Welcome to little lamb

Code » limb » commit 59a538e

Add hmap_{init,set,get,free}() for hash table management

author Olivier Brunel
2023-02-25 16:54:29 UTC
committer Olivier Brunel
2023-02-25 16:54:29 UTC
parent 55043e20b6f5b2d8c77c8c14992cfde24826c121

Add hmap_{init,set,get,free}() for hash table management

Allow some simple hashmap, with 32bit unsigned integers as keys (see
hlookup32() for simple hash function) and data of user-defined size.

doc/hmap_init.3.md +86 -0
include/hmap.h +30 -0
include/limb/hmap.h +21 -0
meta/libs/limb +7 -0
src/hmap/grow.c +90 -0
src/hmap/hmap_free.c +7 -0
src/hmap/hmap_get.c +12 -0
src/hmap/hmap_init.c +22 -0
src/hmap/hmap_set.c +29 -0
src/hmap/lookup.c +16 -0

diff --git a/doc/hmap_init.3.md b/doc/hmap_init.3.md
new file mode 100644
index 0000000..5e51228
--- /dev/null
+++ b/doc/hmap_init.3.md
@@ -0,0 +1,86 @@
+% limb manual
+% hmap_init(3)
+
+# NAME
+
+hmap\_init, hmap\_set, hmap\_get, hmap\_free - hash table management
+
+# SYNOPSIS
+
+    #include <limb/hmap.h>
+
+```pre hl
+int hmap_init(size_t <em>dlen</em>, size_t <em>n</em>, hmap *<em>hmap</em>)
+int hmap_set(u32 <em>key</em>, void *<em>data</em>, hmap *<em>hmap</em>)
+void *hmap_get(u32 <em>key</em>, hmap *<em>hmap</em>)
+void hmap_free(hmap *<em>hmap</em>)
+```
+
+# DESCRIPTION
+
+These functions allow the caller to manage a hash table containing entries
+consisting of a key (a 32bit unsigned integer) and its associated data. The last
+argument, `hmap`, points to a structure that describes the table on which the
+function operates, and should be treated as opaque (i.e. do not attempt to
+directly access or modify the fields in this structure.).
+
+The size of the associated data are defined by the `hmap_init`() call, they can
+therefore be different for each hash table, but are of fixed size on a per-table
+basis (and cannot be modified).
+
+First, the `hmap_init`() function must be used to initialize the structure
+pointed to by `hmap` for a hash table of `n` elements, using `dlen` bytes as
+per-element data size.
+
+The hash table will automatically grow as needed when adding new elements, but
+it is not possible to shrunk it down. Note also that the actual size of the
+table must be a power of two, and will therefore be adjusted accordingly (e.g.
+using 10 as `n` will actually result in a table initialized for 16 elements).
+
+The `hmap_set`() function searches the table for an element with the given `key`
+and sets its associated data to that pointed to by `data` (which must be of the
+size given as `dlen` to `hmap_init`() earlier). Note that the actual data
+pointed will be copied into the table.
+
+If not such element exists, it is added to the table. If needed the table might
+grow its size in order to add the new element. If `data` is NULL then the
+element is "removed" from the table - that is, the memory is emptied out to be
+available for use by another element, the table itself does not shrunk in size.
+
+To get a valid 32bit key from a data blob (e.g. a string), you might want to use
+[hlookup32](3).
+
+The `hmap_get`() function searches the tale for an element with the given `key`
+and returns a pointer to its associated data when found, or NULL if no such
+element exists in the table.
+
+The `hmap_free`() function frees all memory allocated by the table and returns
+it into a uninitialized state. It is then possible to re-use it with
+`hmap_init`() with different parameters.
+
+# RETURN VALUE
+
+The `hmap_init`() and `hmap_set`() functions return 1 on success, and zero on
+error, with `errno` set to indicate the cause of the error.
+
+The `hmap_get`() function returns a pointer to the data associated with the
+element if found, or NULL otherwise. It cannot fail.
+
+# ERRORS
+
+The `hmap_init`() function may fail if :
+
+: *EINVAL*
+:: `hmap` is NULL, `dlen` or `n` is zero
+
+: *ENOMEM*
+:: Not enough memory to allocate the table
+
+The `hmap_set`() function may fail if :
+
+: *ENOENT*
+:: `data` was NULL but no element with the given `key` was found
+
+: *ENOMEM*
+:: Not enough memory to grow the table in order to add the element. Note that in
+such a case, the table remains in valid state and can still be used as before.
diff --git a/include/hmap.h b/include/hmap.h
new file mode 100644
index 0000000..3087ebe
--- /dev/null
+++ b/include/hmap.h
@@ -0,0 +1,30 @@
+#ifndef LIMB_INTERNAL_HMAP_H
+#define LIMB_INTERNAL_HMAP_H
+
+#include <errno.h>
+#include "limb/hmap.h"
+
+#define MINSIZE     8
+#define MAXSIZE     ((u32) -1 / 2 + 1)
+
+/* IMMPORANT: the stralloc hmap->sa is *NOT* used as expected in here. Indeed,
+ * we determine the *exact* size we want allocated and ask for it.
+ *
+ * Then, sa.a is truned into our mask, i.e. the number of elements - 1
+ * Similarly, sa.len is used as counter of used elements, but those aren't used
+ * in order and every element (even empty/zero ones) should be considered in
+ * use by the hmap.
+ */
+
+struct item {
+    u32 key;
+    u8 data[];
+};
+
+#define DLEN(hmap)          (hmap->ilen - sizeof(u32))
+#define ITEM(i,hmap)        (void *) (hmap->sa.s + (i) * hmap->ilen)
+
+struct item *lookup(u32 key, hmap *hmap);
+int grow(size_t n, hmap *hmap);
+
+#endif /* LIMB_INTERNAL_HMAP_H */
diff --git a/include/limb/hmap.h b/include/limb/hmap.h
new file mode 100644
index 0000000..7d62636
--- /dev/null
+++ b/include/limb/hmap.h
@@ -0,0 +1,21 @@
+#ifndef LIMB_HMAP_H
+#define LIMB_HMAP_H
+
+#include <skalibs/stralloc.h>
+#include "limb/int.h"
+
+typedef struct hmap hmap;
+
+struct hmap {
+    stralloc sa;
+    size_t ilen;
+};
+
+#define HMAP_ZERO       { STRALLOC_ZERO, 0 }
+
+extern int hmap_init(size_t dlen, size_t n, hmap *hmap);
+extern int hmap_set(u32 key, void *data, hmap *hmap);
+extern void *hmap_get(u32 key, hmap *hmap);
+extern void hmap_free(hmap *hmap);
+
+#endif /* LIMB_HMAP_H */
diff --git a/meta/libs/limb b/meta/libs/limb
index dee4e65..d6e7319 100644
--- a/meta/libs/limb
+++ b/meta/libs/limb
@@ -29,6 +29,13 @@ obj/nextsplit_rabin.o
 obj/hlookup.o
 obj/hlookup32.o
 obj/hlookup64.o
+# hmap
+obj/hmap/lookup.o
+obj/hmap/grow.o
+obj/hmap/hmap_init.o
+obj/hmap/hmap_set.o
+obj/hmap/hmap_get.o
+obj/hmap/hmap_free.o
 # SHA3
 obj/sha3/byte_order.o
 obj/sha3/rhash_sha3_process_block.o
diff --git a/src/hmap/grow.c b/src/hmap/grow.c
new file mode 100644
index 0000000..05aadc0
--- /dev/null
+++ b/src/hmap/grow.c
@@ -0,0 +1,90 @@
+#include "hmap.h"
+
+static void
+relocate(u32 idx, u32 min, u32 max, u32 *left, hmap *hmap)
+{
+    struct fullitem {
+        u32 key;
+        u8 data[DLEN(hmap)];
+    } * it;
+
+    it = ITEM(idx, hmap);
+    if (it->key) {
+        struct fullitem itmp, *new;
+
+        /* remove item from current spot */
+        itmp = *it;
+        memset(it, 0, sizeof(*it));
+
+        /* and look for its natural spot, or first available collision spot
+         * if needed */
+        u32 i, j;
+        for (i = it->key, j = 1; ; i += j++) {
+            /* get item's wanted spot */
+            new = ITEM(i & hmap->sa.a, hmap);
+
+            /* put it back if it's where it was */
+            if (new == it)
+                break;
+
+            /* make sure its wanted spot has been relocated if needed */
+            relocate(i & hmap->sa.a, min, max, left, hmap);
+
+            /* settle there if it's now available */
+            if (!new->key)
+                break;
+        }
+        /* put item in its (new) spot */
+        *new = itmp;
+        /* decrease left iif it was within [min,max] *and* its new spot is
+         * outside of ]min,max] */
+        i &= hmap->sa.a;
+        if (idx >= min && idx <= max && (i > max || i <= min))
+            --*left;
+    }
+}
+
+int
+grow(size_t n, hmap *hmap)
+{
+    u32 oldsize, newsize;
+
+    if (n > MAXSIZE)
+        n = MAXSIZE;
+
+    for (newsize = MINSIZE; newsize < n; newsize <<= 1)
+        ;
+
+    /* if sa.a was turned into the mask.. */
+    if (hmap->sa.a) {
+        /* ..make it the (currently) allocated size again */
+        ++hmap->sa.a;
+        hmap->sa.a *= hmap->ilen;
+    }
+    oldsize = hmap->sa.a;
+
+    if (!stralloc_ready_tuned(&hmap->sa, newsize * hmap->ilen, 0, 0, 1)) {
+        --hmap->sa.a;
+        return 0;
+    }
+
+    /* empty the new items */
+    memset(hmap->sa.s + oldsize * hmap->ilen, 0, (newsize - oldsize) * hmap->ilen);
+
+    /* make sa.a the new mask */
+    hmap->sa.a /= hmap->ilen;
+    --hmap->sa.a;
+
+    /* we'll (try to) relocate all items to their (new) "natural" spots, or
+     * a new collision/alternate spot if one got freed */
+    u32 left = hmap->sa.len;
+    for (u32 i = 0; left; ++i) {
+        struct item *it = ITEM(i, hmap);
+        if (it->key) {
+            relocate(i, i, oldsize, &left, hmap);
+            --left;
+        }
+    }
+
+    return 1;
+}
diff --git a/src/hmap/hmap_free.c b/src/hmap/hmap_free.c
new file mode 100644
index 0000000..57ffcd4
--- /dev/null
+++ b/src/hmap/hmap_free.c
@@ -0,0 +1,7 @@
+#include "hmap.h"
+
+void
+hmap_free(hmap *hmap)
+{
+    stralloc_free(&hmap->sa);
+}
diff --git a/src/hmap/hmap_get.c b/src/hmap/hmap_get.c
new file mode 100644
index 0000000..b617fb7
--- /dev/null
+++ b/src/hmap/hmap_get.c
@@ -0,0 +1,12 @@
+#include "hmap.h"
+
+void *
+hmap_get(u32 key, hmap *hmap)
+{
+    struct item *it;
+
+    it = lookup(key, hmap);
+    if (it->key)
+        return it->data;
+    return NULL;
+}
diff --git a/src/hmap/hmap_init.c b/src/hmap/hmap_init.c
new file mode 100644
index 0000000..0b50d89
--- /dev/null
+++ b/src/hmap/hmap_init.c
@@ -0,0 +1,22 @@
+#include <errno.h>
+#include "limb/uint64.h"
+#include "hmap.h"
+
+int
+hmap_init(size_t dlen, size_t n, hmap *hmap)
+{
+    if (!hmap || !dlen || !n)
+        return (errno = EINVAL, 0);
+
+    /* number of elements (n) must be a power of 2 */
+    u32 p = msb64((u64) n) - 1;
+    if (n > 1 << p)
+        n = 1 << (p + 1);
+
+    hmap->sa = stralloc_zero;
+    hmap->ilen = sizeof(u32) + dlen;
+    if (!grow(n, hmap))
+        return 0;
+
+    return 1;
+}
diff --git a/src/hmap/hmap_set.c b/src/hmap/hmap_set.c
new file mode 100644
index 0000000..d51b614
--- /dev/null
+++ b/src/hmap/hmap_set.c
@@ -0,0 +1,29 @@
+#include <errno.h>
+#include "hmap.h"
+
+int
+hmap_set(u32 key, void *data, hmap *hmap)
+{
+    struct item *it;
+
+    it = lookup(key, hmap);
+    if (!data) {
+        if (!it->key)
+            return (errno = ENOENT, 0);
+        memset(it->data, 0, hmap->ilen);
+        return 1;
+    }
+
+    it->key = key;
+    memcpy(it->data, data, DLEN(hmap));
+
+    if (++hmap->sa.len > hmap->sa.a - hmap->sa.a / 4) {
+        if (!grow(2 * hmap->sa.len, hmap)) {
+            --hmap->sa.len;
+            memset(it, 0, hmap->ilen);
+            return (errno = ENOMEM, 0);
+        }
+    }
+
+    return 1;
+}
diff --git a/src/hmap/lookup.c b/src/hmap/lookup.c
new file mode 100644
index 0000000..aef3067
--- /dev/null
+++ b/src/hmap/lookup.c
@@ -0,0 +1,16 @@
+#include "hmap.h"
+
+struct item *
+lookup(u32 key, hmap *hmap)
+{
+    struct item *it;
+    u32 i, j;
+
+    for (i = key, j = 1; ; i += j++) {
+        it = ITEM(i & hmap->sa.a, hmap);
+        if (!it->key || it->key == key)
+            break;
+    }
+
+    return it;
+}