#include <dirent.h>
#include <errno.h>
#include <time.h>
#include <skalibs/stralloc.h>
#include <skalibs/unix-transactional.h>
#include <skalibs/djbunix.h>
#include <skalibs/uint32.h>
#include <skalibs/uint16.h>
#include <skalibs/fmtscan.h>
#include <limb/int.h>
#include <limb/output.h>
#include <limb/blake3.h>
const char *PROG = "test";
extern size_t nextsplit(size_t min, size_t avg, const void *data, size_t dlen);
struct bench {
size_t min;
size_t avg;
size_t max;
int iter;
stralloc *sa;
u8 *blocks;
size_t blkpos;
size_t pos;
size_t total;
int hashes;
};
static void
bench(struct bench *b)
{
size_t pos, curblkpos = b->blkpos;
for (int i = 0; i < b->iter; ++i) {
curblkpos = b->blkpos;
pos = b->pos;
while (pos < b->sa->len) {
size_t len = b->sa->len - pos;
size_t offset = nextsplit(b->min, b->avg,
b->sa->s + pos, (len > b->max) ? b->max : len);
curblkpos += u64_pack_trim((u64) offset, b->blocks + curblkpos);
/* special case: when scanning directories, we don't keep the
* entire dataset in memory, so we need to compute/print hashes
* right now within the benchmarking. */
if (i == 0 && b->hashes) {
unsigned char buf[32];
blake3(b->sa->s + pos, offset, buf);
char hash[32 * 2 + 1] = { 0 };
for (int i = 0; i < sizeof(buf); ++i) {
if (buf[i] & 0xf0)
hash[i * 2] = fmtscan_asc(buf[i] >> 4);
else
hash[i * 2] = '0';
hash[i * 2 + 1] = fmtscan_asc(buf[i] & 0x0f);
}
char size[UINT32_FMT];
size[uint32_fmt(size, offset)] = 0;
out(hash, " ", size);
}
pos += offset;
}
}
b->blkpos = curblkpos;
b->total += b->sa->len - b->pos;
}
static int
processdir(int basefd, const char *name, struct bench *b)
{
int ret = -1;
int fd;
DIR *dir;
struct dirent *de;
dir = NULL;
fd = open_readat(basefd, name);
if (fd < 0) goto err;
dir = fdopendir(fd);
if (!dir) goto err;
fd = -1;
for (;;) {
errno = 0;
de = readdir(dir);
if (!de) {
if (errno) goto err;
break;
}
if (de->d_name[0] == '.' &&
(!de->d_name[1] || (de->d_name[1] == '.' && !de->d_name[2])))
continue;
size_t salen = b->sa->len;
fd = open_readat(dirfd(dir), de->d_name);
if (fd < 0 || !slurp(b->sa, fd)) {
if (errno == EISDIR) {
fd_close(fd);
if (processdir(dirfd(dir), de->d_name, b) < 0)
goto err;
continue;
}
if (fd >= 0) fd_close(fd);
warnusys("process ...", name, "/", de->d_name);
continue;
}
fd_close(fd);
fd = -1;
bench(b);
b->sa->len = salen;
}
ret = 0;
err:
if (fd >= 0) fd_close(fd);
if (dir) closedir(dir);
return ret;
}
#include <stdio.h>
int
main(int argc, const char *argv[])
{
struct timespec ts1, ts2;
stralloc sa = STRALLOC_ZERO;
u8 blocks[800 << 10];
struct bench b = {
.min = ( 4 << 10),
.avg = ( 8 << 10),
.max = ( 1 << 20),
.iter = 1,
.sa = &sa,
.blocks = blocks,
};
int list = 0;
while (argc >= 2) {
if (!strncmp(argv[1], "-a", 2) || !strncmp(argv[1], "-m", 2)
|| !strncmp(argv[1], "-M", 2)) {
const char *s = NULL;
if (!argv[1][2])
s = argv[2];
else if (argc < 3)
dief(1, "missing value for ", argv[1]);
else
s = argv[1] + 2;
u32 u;
if (!uint32_scan(s, &u))
dief(1, "invalid value for ", argv[1]);
switch (argv[1][1]) {
case 'a': b.avg = u; break;
case 'm': b.min = u; break;
case 'M': b.max = u; break;
}
int e = (s == argv[2]) ? 1 : 0;
argc -= 1 + e;
memmove(&argv[1], &argv[2 + e], argc * sizeof(*argv));
} else if (!strcmp(argv[1], "-l")) {
list = 1;
} else if (!strcmp(argv[1], "-H")) {
list = 2;
} else {
break;
}
if (list)
memmove(&argv[1], &argv[2], --argc * sizeof(*argv));
}
if (argc != 2 && argc != 3)
dieusage(1, "[-a AVGSIZE] [-m MINSIZE] [-M MAXSIZE] [-l | -H] FILE [ITER]");
if (argc == 3) {
u16 u;
if (!uint16_scan(argv[2], &u))
dief(1, "invalid ITER argument");
b.iter = u;
}
int fd = open_read(argv[1]);
if (fd < 0 || !slurp(&sa, fd)) {
if (errno != EISDIR)
diefusys(2, "read ", argv[1]);
fd_close(fd);
fd = -1;
if (list == 2) {
b.hashes = 1;
list = 0;
}
}
clock_gettime(CLOCK_MONOTONIC, &ts1);
if (fd < 0)
processdir(AT_FDCWD, argv[1], &b);
else
bench(&b);
clock_gettime(CLOCK_MONOTONIC, &ts2);
blocks[b.blkpos] = 0;
if (fd >= 0) fd_close(fd);
size_t min = b.total;
size_t max = 0;
size_t total = 0;
int n = 1;
size_t pos = 0;
for (int o = 0; blocks[o]; ++n) {
u64 u;
o += u64_unpack_trim(blocks + o, &u);
if (list) {
if (list == 2) {
unsigned char buf[32];
blake3(sa.s + pos, u, buf);
for (int i = 0; i < sizeof(buf); ++i)
fprintf(stdout, "%.02x", buf[i]);
pos += u;
} else if (list == 1) {
fprintf(stdout, "block:");
}
fprintf(stdout, " %lu\n", u);
}
if (u < min && blocks[o]) min = u;
if (u > max) max = u;
total += u;
}
--n;
if (total != b.total) {
fprintf(stderr, "incorrect total size %lu != %lu\n", total, b.total);
return 2;
}
fprintf(stderr, "%u blocks; min=%lu, avg=%lu, max=%lu\n", n, min, total / n, max);
ts2.tv_sec -= ts1.tv_sec;
ts2.tv_nsec -= ts1.tv_nsec;
double took = ts2.tv_sec + (ts2.tv_nsec / 1000000000.0);
double speed = ((b.total * b.iter) / took) / (1 << 20);
fprintf(stderr, "took %.09f seconds, %f MiB/s\n", took, speed);
return 0;
}