author | Olivier Brunel
<jjk@jjacky.com> 2023-02-25 16:54:29 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2023-02-25 16:54:29 UTC |
parent | 55043e20b6f5b2d8c77c8c14992cfde24826c121 |
doc/hmap_init.3.md | +86 | -0 |
include/hmap.h | +30 | -0 |
include/limb/hmap.h | +21 | -0 |
meta/libs/limb | +7 | -0 |
src/hmap/grow.c | +90 | -0 |
src/hmap/hmap_free.c | +7 | -0 |
src/hmap/hmap_get.c | +12 | -0 |
src/hmap/hmap_init.c | +22 | -0 |
src/hmap/hmap_set.c | +29 | -0 |
src/hmap/lookup.c | +16 | -0 |
diff --git a/doc/hmap_init.3.md b/doc/hmap_init.3.md new file mode 100644 index 0000000..5e51228 --- /dev/null +++ b/doc/hmap_init.3.md @@ -0,0 +1,86 @@ +% limb manual +% hmap_init(3) + +# NAME + +hmap\_init, hmap\_set, hmap\_get, hmap\_free - hash table management + +# SYNOPSIS + + #include <limb/hmap.h> + +```pre hl +int hmap_init(size_t <em>dlen</em>, size_t <em>n</em>, hmap *<em>hmap</em>) +int hmap_set(u32 <em>key</em>, void *<em>data</em>, hmap *<em>hmap</em>) +void *hmap_get(u32 <em>key</em>, hmap *<em>hmap</em>) +void hmap_free(hmap *<em>hmap</em>) +``` + +# DESCRIPTION + +These functions allow the caller to manage a hash table containing entries +consisting of a key (a 32bit unsigned integer) and its associated data. The last +argument, `hmap`, points to a structure that describes the table on which the +function operates, and should be treated as opaque (i.e. do not attempt to +directly access or modify the fields in this structure.). + +The size of the associated data are defined by the `hmap_init`() call, they can +therefore be different for each hash table, but are of fixed size on a per-table +basis (and cannot be modified). + +First, the `hmap_init`() function must be used to initialize the structure +pointed to by `hmap` for a hash table of `n` elements, using `dlen` bytes as +per-element data size. + +The hash table will automatically grow as needed when adding new elements, but +it is not possible to shrunk it down. Note also that the actual size of the +table must be a power of two, and will therefore be adjusted accordingly (e.g. +using 10 as `n` will actually result in a table initialized for 16 elements). + +The `hmap_set`() function searches the table for an element with the given `key` +and sets its associated data to that pointed to by `data` (which must be of the +size given as `dlen` to `hmap_init`() earlier). Note that the actual data +pointed will be copied into the table. + +If not such element exists, it is added to the table. If needed the table might +grow its size in order to add the new element. If `data` is NULL then the +element is "removed" from the table - that is, the memory is emptied out to be +available for use by another element, the table itself does not shrunk in size. + +To get a valid 32bit key from a data blob (e.g. a string), you might want to use +[hlookup32](3). + +The `hmap_get`() function searches the tale for an element with the given `key` +and returns a pointer to its associated data when found, or NULL if no such +element exists in the table. + +The `hmap_free`() function frees all memory allocated by the table and returns +it into a uninitialized state. It is then possible to re-use it with +`hmap_init`() with different parameters. + +# RETURN VALUE + +The `hmap_init`() and `hmap_set`() functions return 1 on success, and zero on +error, with `errno` set to indicate the cause of the error. + +The `hmap_get`() function returns a pointer to the data associated with the +element if found, or NULL otherwise. It cannot fail. + +# ERRORS + +The `hmap_init`() function may fail if : + +: *EINVAL* +:: `hmap` is NULL, `dlen` or `n` is zero + +: *ENOMEM* +:: Not enough memory to allocate the table + +The `hmap_set`() function may fail if : + +: *ENOENT* +:: `data` was NULL but no element with the given `key` was found + +: *ENOMEM* +:: Not enough memory to grow the table in order to add the element. Note that in +such a case, the table remains in valid state and can still be used as before. diff --git a/include/hmap.h b/include/hmap.h new file mode 100644 index 0000000..3087ebe --- /dev/null +++ b/include/hmap.h @@ -0,0 +1,30 @@ +#ifndef LIMB_INTERNAL_HMAP_H +#define LIMB_INTERNAL_HMAP_H + +#include <errno.h> +#include "limb/hmap.h" + +#define MINSIZE 8 +#define MAXSIZE ((u32) -1 / 2 + 1) + +/* IMMPORANT: the stralloc hmap->sa is *NOT* used as expected in here. Indeed, + * we determine the *exact* size we want allocated and ask for it. + * + * Then, sa.a is truned into our mask, i.e. the number of elements - 1 + * Similarly, sa.len is used as counter of used elements, but those aren't used + * in order and every element (even empty/zero ones) should be considered in + * use by the hmap. + */ + +struct item { + u32 key; + u8 data[]; +}; + +#define DLEN(hmap) (hmap->ilen - sizeof(u32)) +#define ITEM(i,hmap) (void *) (hmap->sa.s + (i) * hmap->ilen) + +struct item *lookup(u32 key, hmap *hmap); +int grow(size_t n, hmap *hmap); + +#endif /* LIMB_INTERNAL_HMAP_H */ diff --git a/include/limb/hmap.h b/include/limb/hmap.h new file mode 100644 index 0000000..7d62636 --- /dev/null +++ b/include/limb/hmap.h @@ -0,0 +1,21 @@ +#ifndef LIMB_HMAP_H +#define LIMB_HMAP_H + +#include <skalibs/stralloc.h> +#include "limb/int.h" + +typedef struct hmap hmap; + +struct hmap { + stralloc sa; + size_t ilen; +}; + +#define HMAP_ZERO { STRALLOC_ZERO, 0 } + +extern int hmap_init(size_t dlen, size_t n, hmap *hmap); +extern int hmap_set(u32 key, void *data, hmap *hmap); +extern void *hmap_get(u32 key, hmap *hmap); +extern void hmap_free(hmap *hmap); + +#endif /* LIMB_HMAP_H */ diff --git a/meta/libs/limb b/meta/libs/limb index dee4e65..d6e7319 100644 --- a/meta/libs/limb +++ b/meta/libs/limb @@ -29,6 +29,13 @@ obj/nextsplit_rabin.o obj/hlookup.o obj/hlookup32.o obj/hlookup64.o +# hmap +obj/hmap/lookup.o +obj/hmap/grow.o +obj/hmap/hmap_init.o +obj/hmap/hmap_set.o +obj/hmap/hmap_get.o +obj/hmap/hmap_free.o # SHA3 obj/sha3/byte_order.o obj/sha3/rhash_sha3_process_block.o diff --git a/src/hmap/grow.c b/src/hmap/grow.c new file mode 100644 index 0000000..05aadc0 --- /dev/null +++ b/src/hmap/grow.c @@ -0,0 +1,90 @@ +#include "hmap.h" + +static void +relocate(u32 idx, u32 min, u32 max, u32 *left, hmap *hmap) +{ + struct fullitem { + u32 key; + u8 data[DLEN(hmap)]; + } * it; + + it = ITEM(idx, hmap); + if (it->key) { + struct fullitem itmp, *new; + + /* remove item from current spot */ + itmp = *it; + memset(it, 0, sizeof(*it)); + + /* and look for its natural spot, or first available collision spot + * if needed */ + u32 i, j; + for (i = it->key, j = 1; ; i += j++) { + /* get item's wanted spot */ + new = ITEM(i & hmap->sa.a, hmap); + + /* put it back if it's where it was */ + if (new == it) + break; + + /* make sure its wanted spot has been relocated if needed */ + relocate(i & hmap->sa.a, min, max, left, hmap); + + /* settle there if it's now available */ + if (!new->key) + break; + } + /* put item in its (new) spot */ + *new = itmp; + /* decrease left iif it was within [min,max] *and* its new spot is + * outside of ]min,max] */ + i &= hmap->sa.a; + if (idx >= min && idx <= max && (i > max || i <= min)) + --*left; + } +} + +int +grow(size_t n, hmap *hmap) +{ + u32 oldsize, newsize; + + if (n > MAXSIZE) + n = MAXSIZE; + + for (newsize = MINSIZE; newsize < n; newsize <<= 1) + ; + + /* if sa.a was turned into the mask.. */ + if (hmap->sa.a) { + /* ..make it the (currently) allocated size again */ + ++hmap->sa.a; + hmap->sa.a *= hmap->ilen; + } + oldsize = hmap->sa.a; + + if (!stralloc_ready_tuned(&hmap->sa, newsize * hmap->ilen, 0, 0, 1)) { + --hmap->sa.a; + return 0; + } + + /* empty the new items */ + memset(hmap->sa.s + oldsize * hmap->ilen, 0, (newsize - oldsize) * hmap->ilen); + + /* make sa.a the new mask */ + hmap->sa.a /= hmap->ilen; + --hmap->sa.a; + + /* we'll (try to) relocate all items to their (new) "natural" spots, or + * a new collision/alternate spot if one got freed */ + u32 left = hmap->sa.len; + for (u32 i = 0; left; ++i) { + struct item *it = ITEM(i, hmap); + if (it->key) { + relocate(i, i, oldsize, &left, hmap); + --left; + } + } + + return 1; +} diff --git a/src/hmap/hmap_free.c b/src/hmap/hmap_free.c new file mode 100644 index 0000000..57ffcd4 --- /dev/null +++ b/src/hmap/hmap_free.c @@ -0,0 +1,7 @@ +#include "hmap.h" + +void +hmap_free(hmap *hmap) +{ + stralloc_free(&hmap->sa); +} diff --git a/src/hmap/hmap_get.c b/src/hmap/hmap_get.c new file mode 100644 index 0000000..b617fb7 --- /dev/null +++ b/src/hmap/hmap_get.c @@ -0,0 +1,12 @@ +#include "hmap.h" + +void * +hmap_get(u32 key, hmap *hmap) +{ + struct item *it; + + it = lookup(key, hmap); + if (it->key) + return it->data; + return NULL; +} diff --git a/src/hmap/hmap_init.c b/src/hmap/hmap_init.c new file mode 100644 index 0000000..0b50d89 --- /dev/null +++ b/src/hmap/hmap_init.c @@ -0,0 +1,22 @@ +#include <errno.h> +#include "limb/uint64.h" +#include "hmap.h" + +int +hmap_init(size_t dlen, size_t n, hmap *hmap) +{ + if (!hmap || !dlen || !n) + return (errno = EINVAL, 0); + + /* number of elements (n) must be a power of 2 */ + u32 p = msb64((u64) n) - 1; + if (n > 1 << p) + n = 1 << (p + 1); + + hmap->sa = stralloc_zero; + hmap->ilen = sizeof(u32) + dlen; + if (!grow(n, hmap)) + return 0; + + return 1; +} diff --git a/src/hmap/hmap_set.c b/src/hmap/hmap_set.c new file mode 100644 index 0000000..d51b614 --- /dev/null +++ b/src/hmap/hmap_set.c @@ -0,0 +1,29 @@ +#include <errno.h> +#include "hmap.h" + +int +hmap_set(u32 key, void *data, hmap *hmap) +{ + struct item *it; + + it = lookup(key, hmap); + if (!data) { + if (!it->key) + return (errno = ENOENT, 0); + memset(it->data, 0, hmap->ilen); + return 1; + } + + it->key = key; + memcpy(it->data, data, DLEN(hmap)); + + if (++hmap->sa.len > hmap->sa.a - hmap->sa.a / 4) { + if (!grow(2 * hmap->sa.len, hmap)) { + --hmap->sa.len; + memset(it, 0, hmap->ilen); + return (errno = ENOMEM, 0); + } + } + + return 1; +} diff --git a/src/hmap/lookup.c b/src/hmap/lookup.c new file mode 100644 index 0000000..aef3067 --- /dev/null +++ b/src/hmap/lookup.c @@ -0,0 +1,16 @@ +#include "hmap.h" + +struct item * +lookup(u32 key, hmap *hmap) +{ + struct item *it; + u32 i, j; + + for (i = key, j = 1; ; i += j++) { + it = ITEM(i & hmap->sa.a, hmap); + if (!it->key || it->key == key) + break; + } + + return it; +}