/* This file is part of limb https://lila.oss/limb
* Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */
/* SPDX-License-Identifier: GPL-2.0-only */
#include <skalibs/sysdeps.h>
#include <limb/int.h>
/* For talks/tests and even graphs regarding those implementations and their
* performances:
* https://lila.oss/blog/2023-01-19-find-the-first-most-significant-bit-set-in-c
*/
/* GCC/Clang builtin; fastest solution */
#ifdef __builtin_clzll
int msb64(u64 v)
{
if (!v) return 0;
return 64 - __builtin_clzll(v);
}
/* 64bit - de Bruijn sequence is fast */
#elif SKALIBS_SIZEOFULONG == 8
/* Kim Walisch, Mark Dickinson
* https://www.chessprogramming.org/BitScan#De_Bruijn_Multiplication_2
*/
const u8 index64[64] = {
0, 47, 1, 56, 48, 27, 2, 60,
57, 49, 41, 37, 28, 16, 3, 61,
54, 58, 35, 52, 50, 42, 21, 44,
38, 32, 29, 23, 17, 11, 4, 62,
46, 55, 26, 59, 40, 36, 15, 53,
34, 51, 20, 43, 31, 22, 10, 45,
25, 39, 14, 33, 19, 30, 9, 24,
13, 18, 8, 12, 7, 6, 5, 63
};
int msb64(u64 v)
{
if (!v) return 0;
const u64 debruijn64 = 0x03F79D71B4CB0A89UL;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v |= v >> 32;
return index64[(v * debruijn64) >> 58] + 1;
}
/* 32bit - unrolled tree is fastest then, but this is pretty close and much
* simpler to read */
#else
/* VoidStar */
const u8 kTableLog2[256] = {
0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
int msb64(u64 v)
{
if (!v) return 0;
int k = 0;
if (v > 0xFFFFFFFFUL) { v >>= 32; k = 32; }
if (v > 0x00FFFFFFUL) { v >>= 24; k |= 24; }
if (v > 0x0000FFFFUL) { v >>= 16; k |= 16; }
if (v > 0x000000FFUL) { v >>= 8; k |= 8; }
k |= kTableLog2[v]; /* precompute the Log2 of the low byte */
return k + 1;
}
#endif