Welcome to little lamb

Code » limb » release » tree

[release] / src / liblimb / nextsplit.h / nextsplit_ae.c

/* This file is part of limb                           https://lila.oss/limb
 * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
/* SPDX-License-Identifier: GPL-2.0-only */
#include <endian.h>
#include <limb/nextsplit.h>
#include <limb/int.h>

/* Author: Yucheng Zhang
 * For more see paper: AE: An Asymmetric Extremum Content Defined Chunking
 * Algorithm for Fast and Bandwidth-Efficient Data Deduplication
 */

#if __BYTE_ORDER == __BIG_ENDIAN
# define GETLE32(u) (u)
#else
# define GETLE32(u) __builtin_bswap32(u)
#endif

static inline int
cmp(const u8 *x, const u8 *y)
{
    int ret;
    u32 a = GETLE32(* ((u32 *) x));
    u32 b = GETLE32(* ((u32 *) y));
    if (a > b)
        ret = 1;
    else
        ret = -1;
    return ret;
}

size_t
nextsplit_ae(size_t min, size_t avg, const void *data, size_t dlen)
{
    if (dlen <= min) return dlen;

    double e = 2.718281828;
    int window_size = avg / (e - 1);
    /* cur points to the current position
     * max points to the position of max value
     * end points to the end of buffer
     */
    const u8 *cur = (const u8 *) data + 1;
    const u8 *max = (const u8 *) data;
    const u8 *end = (const u8 *) data + dlen - sizeof(u32);

    if (dlen <= window_size + sizeof(u32))
        return dlen;

    for ( ; cur <= end; ++cur) {
        if (cmp(cur, max) < 0) {
            max = cur;
            continue;
        }
        if (cur == max + window_size)
            return cur - (const u8 *) data;
    }
    return dlen;
}