author | Olivier Brunel
<jjk@jjacky.com> 2023-01-20 08:20:15 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2023-01-30 21:56:09 UTC |
parent | 40a43679f149fcf873d7a584f5f4a588202eca10 |
doc/msb64.3.md | +27 | -0 |
include/limb/int.h | +13 | -0 |
meta/HISTORY | +1 | -1 |
meta/deps-lib | +1 | -1 |
src/msb64.c | +84 | -0 |
diff --git a/doc/msb64.3.md b/doc/msb64.3.md new file mode 100644 index 0000000..5998845 --- /dev/null +++ b/doc/msb64.3.md @@ -0,0 +1,27 @@ +% limb manual +% msb64(3) + +# NAME + +msb64 - return the position of the most significant bit set + +# SYNOPSIS + + #include <limb/int.h> + +```pre hl +int msb64(u64 <em>val</em>) +``` + +# DESCRIPTION + +The `msb64`() function returns the position of the last (most significant) bit +set in `val`. + +The least significant bit is position 1 and the most significant bit is +position 64. + +# RETURN VALUE + +`msb64`() returns the position of the most significant bit set (from 1 to 64), +or 0 if no bits are set (i.e. `val` is zero). diff --git a/include/limb/int.h b/include/limb/int.h new file mode 100644 index 0000000..013981a --- /dev/null +++ b/include/limb/int.h @@ -0,0 +1,13 @@ +#ifndef LIMB_INT_H +#define LIMB_INT_H + +#include <stdint.h> + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +extern int msb64(u64 val); + +#endif /* LIMB_INT_H */ diff --git a/meta/HISTORY b/meta/HISTORY index aebd9af..6f6f2ec 100644 --- a/meta/HISTORY +++ b/meta/HISTORY @@ -1,6 +1,6 @@ # Current development -- Nothing yet. +- Add msb64() to find the most significant bit set. # version 0.0.1 [released on 2023-01-16] diff --git a/meta/deps-lib b/meta/deps-lib index a8be909..0525b27 100644 --- a/meta/deps-lib +++ b/meta/deps-lib @@ -1 +1 @@ -limb: src/put.o src/open_exclat.o src/open_createat.o src/openc_exclat.o src/openc_createat.o skalibs +limb: src/put.o src/open_exclat.o src/open_createat.o src/openc_exclat.o src/openc_createat.o src/msb64.o skalibs diff --git a/src/msb64.c b/src/msb64.c new file mode 100644 index 0000000..914fddb --- /dev/null +++ b/src/msb64.c @@ -0,0 +1,84 @@ +#include <skalibs/sysdeps.h> +#include "limb/int.h" + +/* For talks/tests and even graphs regarding those implementations and their + * performances: + * https://lila.oss/blog/2023-01-19-find-the-first-most-significant-bit-set-in-c + */ + + +/* GCC/Clang builtin; fastest solution */ +#ifdef __builtin_clzll + +int msb64(u64 v) +{ + if (!v) return 0; + return 64 - __builtin_clzll(v); +} + +/* 64bit - de Bruijn sequence is fast */ +#elif SKALIBS_SIZEOFULONG == 8 + +/* Kim Walisch, Mark Dickinson + * https://www.chessprogramming.org/BitScan#De_Bruijn_Multiplication_2 + */ + +const u8 index64[64] = { + 0, 47, 1, 56, 48, 27, 2, 60, + 57, 49, 41, 37, 28, 16, 3, 61, + 54, 58, 35, 52, 50, 42, 21, 44, + 38, 32, 29, 23, 17, 11, 4, 62, + 46, 55, 26, 59, 40, 36, 15, 53, + 34, 51, 20, 43, 31, 22, 10, 45, + 25, 39, 14, 33, 19, 30, 9, 24, + 13, 18, 8, 12, 7, 6, 5, 63 +}; + +int msb64(u64 v) +{ + if (!v) return 0; + + const u64 debruijn64 = 0x03F79D71B4CB0A89UL; + + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + + return index64[(v * debruijn64) >> 58] + 1; +} + +/* 32bit - unrolled tree is fastest then, but this is pretty close and much + * simpler to read */ +#else + +/* VoidStar */ + +const u8 kTableLog2[256] = { +0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +int msb64(u64 v) +{ + if (!v) return 0; + + int k = 0; + if (v > 0xFFFFFFFFUL) { v >>= 32; k = 32; } + if (v > 0x00FFFFFFUL) { v >>= 24; k |= 24; } + if (v > 0x0000FFFFUL) { v >>= 16; k |= 16; } + if (v > 0x000000FFUL) { v >>= 8; k |= 8; } + k |= kTableLog2[v]; /* precompute the Log2 of the low byte */ + + return k + 1; +} + +#endif