Welcome to little lamb

Code » limb » master » tree

[master] / src / liblimb / u64.h / msb64.c

/* This file is part of limb                           https://lila.oss/limb
 * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
/* SPDX-License-Identifier: GPL-2.0-only */
#include <skalibs/sysdeps.h>
#include <limb/int.h>

/* For talks/tests and even graphs regarding those implementations and their
 * performances:
 * https://lila.oss/blog/2023-01-19-find-the-first-most-significant-bit-set-in-c
 */


/* GCC/Clang builtin; fastest solution */
#ifdef __builtin_clzll

int msb64(u64 v)
{
    if (!v) return 0;
    return 64 - __builtin_clzll(v);
}

/* 64bit - de Bruijn sequence is fast */
#elif SKALIBS_SIZEOFULONG == 8

/* Kim Walisch, Mark Dickinson
 * https://www.chessprogramming.org/BitScan#De_Bruijn_Multiplication_2
 */

const u8 index64[64] = {
    0, 47,  1, 56, 48, 27,  2, 60,
    57, 49, 41, 37, 28, 16,  3, 61,
    54, 58, 35, 52, 50, 42, 21, 44,
    38, 32, 29, 23, 17, 11,  4, 62,
    46, 55, 26, 59, 40, 36, 15, 53,
    34, 51, 20, 43, 31, 22, 10, 45,
    25, 39, 14, 33, 19, 30,  9, 24,
    13, 18,  8, 12,  7,  6,  5, 63
};

int msb64(u64 v)
{
    if (!v) return 0;

    const u64 debruijn64 = 0x03F79D71B4CB0A89UL;

    v |= v >> 1;
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;
    v |= v >> 32;

    return index64[(v * debruijn64) >> 58] + 1;
}

/* 32bit - unrolled tree is fastest then, but this is pretty close and much
 * simpler to read */
#else

/* VoidStar */

const u8 kTableLog2[256] = {
0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};

int msb64(u64 v)
{
    if (!v) return 0;

    int  k = 0;
    if (v > 0xFFFFFFFFUL) { v >>= 32; k  = 32; }
    if (v > 0x00FFFFFFUL) { v >>= 24; k |= 24; }
    if (v > 0x0000FFFFUL) { v >>= 16; k |= 16; }
    if (v > 0x000000FFUL) { v >>=  8; k |=  8; }
    k |= kTableLog2[v]; /* precompute the Log2 of the low byte */

    return k + 1;
}

#endif