#include <endian.h>
#include <string.h> /* size_t */
#include <limb/int.h>
/* Author: Yucheng Zhang
* For more see paper: AE: An Asymmetric Extremum Content Defined Chunking
* Algorithm for Fast and Bandwidth-Efficient Data Deduplication
*/
#if __BYTE_ORDER == __BIG_ENDIAN
# define GETLE64(u) (u)
#else
# define GETLE64(u) __builtin_bswap64(u)
#endif
static inline
int cmp(const u8 *x, const u8 *y)
{
int ret;
u64 a = GETLE64(* ((u64 *) x));
u64 b = GETLE64(* ((u64 *) y));
if (a > b)
ret = 1;
else
ret = -1;
return ret;
}
size_t
nextsplit(size_t min, size_t avg, const void *data, size_t dlen)
{
if (dlen <= min) return dlen;
double e = 2.718281828;
int window_size = avg / (e - 1);
/* cur points to the current position
* max points to the position of max value
* end points to the end of buffer
*/
const u8 *cur = (const u8 *) data + 1;
const u8 *max = (const u8 *) data;
const u8 *end = (const u8 *) data + dlen - sizeof(u64);
if (dlen <= window_size + sizeof(u64))
return dlen;
for ( ; cur <= end; ++cur) {
if (cmp(cur, max) < 0) {
max = cur;
continue;
}
if (cur == max + window_size)
return cur - (const u8 *) data;
}
return dlen;
}