/* This file is part of limb https://lila.oss/limb
* Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */
/* SPDX-License-Identifier: GPL-2.0-only */
#include <endian.h>
#include <limb/nextsplit.h>
#include <limb/int.h>
/* Author: Yucheng Zhang
* For more see paper: AE: An Asymmetric Extremum Content Defined Chunking
* Algorithm for Fast and Bandwidth-Efficient Data Deduplication
*/
#if __BYTE_ORDER == __BIG_ENDIAN
# define GETLE32(u) (u)
#else
# define GETLE32(u) __builtin_bswap32(u)
#endif
static inline int
cmp(const u8 *x, const u8 *y)
{
int ret;
u32 a = GETLE32(* ((u32 *) x));
u32 b = GETLE32(* ((u32 *) y));
if (a > b)
ret = 1;
else
ret = -1;
return ret;
}
size_t
nextsplit_ae(size_t min, size_t avg, const void *data, size_t dlen)
{
if (dlen <= min) return dlen;
double e = 2.718281828;
int window_size = avg / (e - 1);
/* cur points to the current position
* max points to the position of max value
* end points to the end of buffer
*/
const u8 *cur = (const u8 *) data + 1;
const u8 *max = (const u8 *) data;
const u8 *end = (const u8 *) data + dlen - sizeof(u32);
if (dlen <= window_size + sizeof(u32))
return dlen;
for ( ; cur <= end; ++cur) {
if (cmp(cur, max) < 0) {
max = cur;
continue;
}
if (cur == max + window_size)
return cur - (const u8 *) data;
}
return dlen;
}