Welcome to little lamb

Code » test-rolling-hash » next » tree

[next] / src / hashchop.c

/*
 * Copyright (c) 2011-2012 Scott Vokes <vokes.s@gmail.com>
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <string.h> /* size_t */
#include <limb/int.h>

size_t
nextsplit(size_t min, size_t avg, const void *data, size_t dlen)
{
    if (dlen <= min) return dlen;
    u32 mask = (1 << (msb64(avg) - 1)) - 1;
    const u8 *buf = data;

    u32 a = 1, b = 0;
    size_t offset;

    for (u32 i = 0; i < min; ++i) {
        a += buf[i];
        b += (min - i + 1) * buf[i];
    }
    a &= mask;
    b &= mask;

    for (offset = min; offset < dlen; ++offset) {
        u32 k = offset - min;
        u32 l = offset;
        u8 nk = buf[k];
        u8 nl = buf[l];
        u32 na = (a - nk + nl);
        u32 nb = (b - (l - k + 1) * nk + na);
        u32 checksum = (na + (nb << 16)) & mask;
        if (checksum == 0) break;
        a = na;
        b = nb;
    }
    return offset;
}