Welcome to little lamb

Code » test-rolling-hash » master » tree

[master] / src / ae.c

#include <endian.h>
#include <string.h> /* size_t */
#include <limb/int.h>

/* Author: Yucheng Zhang
 * For more see paper: AE: An Asymmetric Extremum Content Defined Chunking
 * Algorithm for Fast and Bandwidth-Efficient Data Deduplication
 */

#if __BYTE_ORDER == __BIG_ENDIAN
# define GETLE64(u) (u)
#else
# define GETLE64(u) __builtin_bswap64(u)
#endif

static inline
int cmp(const u8 *x, const u8 *y)
{
    int ret;
    u64 a = GETLE64(* ((u64 *) x));
    u64 b = GETLE64(* ((u64 *) y));
    if (a > b)
        ret = 1;
    else
        ret = -1;
    return ret;
}

size_t
nextsplit(size_t min, size_t avg, const void *data, size_t dlen)
{
    if (dlen <= min) return dlen;

    double e = 2.718281828;
    int window_size = avg / (e - 1);
    /* cur points to the current position
     * max points to the position of max value
     * end points to the end of buffer
     */
    const u8 *cur = (const u8 *) data + 1;
    const u8 *max = (const u8 *) data;
    const u8 *end = (const u8 *) data + dlen - sizeof(u64);

    if (dlen <= window_size + sizeof(u64))
        return dlen;

    for ( ; cur <= end; ++cur) {
        if (cmp(cur, max) < 0) {
            max = cur;
            continue;
        }
        if (cur == max + window_size)
            return cur - (const u8 *) data;
    }
    return dlen;
}