Welcome to little lamb

Code » limb » commit 3a9d214

Add msb64() to find most significant bit set

author Olivier Brunel
2023-01-20 08:20:15 UTC
committer Olivier Brunel
2023-01-30 21:56:09 UTC
parent 40a43679f149fcf873d7a584f5f4a588202eca10

Add msb64() to find most significant bit set

doc/msb64.3.md +27 -0
include/limb/int.h +13 -0
meta/HISTORY +1 -1
meta/deps-lib +1 -1
src/msb64.c +84 -0

diff --git a/doc/msb64.3.md b/doc/msb64.3.md
new file mode 100644
index 0000000..5998845
--- /dev/null
+++ b/doc/msb64.3.md
@@ -0,0 +1,27 @@
+% limb manual
+% msb64(3)
+
+# NAME
+
+msb64 - return the position of the most significant bit set
+
+# SYNOPSIS
+
+    #include <limb/int.h>
+
+```pre hl
+int msb64(u64 <em>val</em>)
+```
+
+# DESCRIPTION
+
+The `msb64`() function returns the position of the last (most significant) bit
+set in `val`.
+
+The least significant bit is position 1 and the most significant bit is
+position 64.
+
+# RETURN VALUE
+
+`msb64`() returns the position of the most significant bit set (from 1 to 64),
+or 0 if no bits are set (i.e. `val` is zero).
diff --git a/include/limb/int.h b/include/limb/int.h
new file mode 100644
index 0000000..013981a
--- /dev/null
+++ b/include/limb/int.h
@@ -0,0 +1,13 @@
+#ifndef LIMB_INT_H
+#define LIMB_INT_H
+
+#include <stdint.h>
+
+typedef uint8_t  u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+extern int msb64(u64 val);
+
+#endif /* LIMB_INT_H */
diff --git a/meta/HISTORY b/meta/HISTORY
index aebd9af..6f6f2ec 100644
--- a/meta/HISTORY
+++ b/meta/HISTORY
@@ -1,6 +1,6 @@
 # Current development
 
-- Nothing yet.
+- Add msb64() to find the most significant bit set.
 
 # version 0.0.1 [released on 2023-01-16]
 
diff --git a/meta/deps-lib b/meta/deps-lib
index a8be909..0525b27 100644
--- a/meta/deps-lib
+++ b/meta/deps-lib
@@ -1 +1 @@
-limb: src/put.o src/open_exclat.o src/open_createat.o src/openc_exclat.o src/openc_createat.o skalibs
+limb: src/put.o src/open_exclat.o src/open_createat.o src/openc_exclat.o src/openc_createat.o src/msb64.o skalibs
diff --git a/src/msb64.c b/src/msb64.c
new file mode 100644
index 0000000..914fddb
--- /dev/null
+++ b/src/msb64.c
@@ -0,0 +1,84 @@
+#include <skalibs/sysdeps.h>
+#include "limb/int.h"
+
+/* For talks/tests and even graphs regarding those implementations and their
+ * performances:
+ * https://lila.oss/blog/2023-01-19-find-the-first-most-significant-bit-set-in-c
+ */
+
+
+/* GCC/Clang builtin; fastest solution */
+#ifdef __builtin_clzll
+
+int msb64(u64 v)
+{
+    if (!v) return 0;
+    return 64 - __builtin_clzll(v);
+}
+
+/* 64bit - de Bruijn sequence is fast */
+#elif SKALIBS_SIZEOFULONG == 8
+
+/* Kim Walisch, Mark Dickinson
+ * https://www.chessprogramming.org/BitScan#De_Bruijn_Multiplication_2
+ */
+
+const u8 index64[64] = {
+    0, 47,  1, 56, 48, 27,  2, 60,
+    57, 49, 41, 37, 28, 16,  3, 61,
+    54, 58, 35, 52, 50, 42, 21, 44,
+    38, 32, 29, 23, 17, 11,  4, 62,
+    46, 55, 26, 59, 40, 36, 15, 53,
+    34, 51, 20, 43, 31, 22, 10, 45,
+    25, 39, 14, 33, 19, 30,  9, 24,
+    13, 18,  8, 12,  7,  6,  5, 63
+};
+
+int msb64(u64 v)
+{
+    if (!v) return 0;
+
+    const u64 debruijn64 = 0x03F79D71B4CB0A89UL;
+
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v |= v >> 32;
+
+    return index64[(v * debruijn64) >> 58] + 1;
+}
+
+/* 32bit - unrolled tree is fastest then, but this is pretty close and much
+ * simpler to read */
+#else
+
+/* VoidStar */
+
+const u8 kTableLog2[256] = {
+0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+int msb64(u64 v)
+{
+    if (!v) return 0;
+
+    int  k = 0;
+    if (v > 0xFFFFFFFFUL) { v >>= 32; k  = 32; }
+    if (v > 0x00FFFFFFUL) { v >>= 24; k |= 24; }
+    if (v > 0x0000FFFFUL) { v >>= 16; k |= 16; }
+    if (v > 0x000000FFUL) { v >>=  8; k |=  8; }
+    k |= kTableLog2[v]; /* precompute the Log2 of the low byte */
+
+    return k + 1;
+}
+
+#endif