Code » test-hashes » commit 245aa3e
Add SHA3 implementation from David Leon Gil
author | Olivier Brunel
<jjk@jjacky.com> 2023-01-25 15:07:47 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2023-01-25 15:07:47 UTC |
parent | 1432daef951399f33778db7cfda8105cc2b08b81 |
include/sha3-unrolled.h | +66 | -0 |
meta/AUTHORS | +1 | -0 |
meta/deps-bin | +1 | -0 |
project.mk | +1 | -0 |
src/sha3-unrolled-impl.c | +396 | -0 |
src/sha3-unrolled.c | +23 | -0 |
diff --git a/include/sha3-unrolled.h b/include/sha3-unrolled.h new file mode 100644 index 0000000..23316f4 --- /dev/null +++ b/include/sha3-unrolled.h @@ -0,0 +1,66 @@ +#ifndef KECCAK_FIPS202_H +#define KECCAK_FIPS202_H + +#include <stddef.h> +#include <stdint.h> + +#define KECCAK_MAX_RATE 200 + +/* Calculate the rate (block size) from the security target. */ +#define KECCAK_RATE(bits) (KECCAK_MAX_RATE - (bits / 4)) + +/* The internal structure of a FIPS202 hash/xof instance. Most callers + * should treat this as an opaque structure. + */ +typedef struct keccak_state { + uint8_t a[KECCAK_MAX_RATE]; + size_t rate; + uint8_t delim; + + uint8_t block[KECCAK_MAX_RATE]; + size_t offset; + + uint8_t finalized : 1; +} keccak_state; + +/* Initialize a Keccak instance suitable for SHA-3 hash functions. */ +int keccak_digest_init(keccak_state *s, size_t bits); + +/* Feed more data into the SHA-3 hash instance. */ +int keccak_digest_update(keccak_state *s, const uint8_t *buf, size_t len); + +/* Calculate the SHA-3 hash digest. The state is unmodified to support + * calculating multiple/rolling digests. + */ +int keccak_digest_sum(const keccak_state *s, uint8_t *out, size_t outlen); + +/* Initialize a Keccak instance suitable for XOFs (SHAKE-128/256). */ +int keccak_xof_init(keccak_state *s, size_t bits); + +/* Absorb more data into the XOF. Must not be called after a squeeze call. */ +int keccak_xof_absorb(keccak_state *s, const uint8_t *buf, size_t len); + +/* Squeeze data out of the XOF. Must not attempt to absorb additional data, + * after a squeeze has been called. + */ +int keccak_xof_squeeze(keccak_state *s, uint8_t *out, size_t outlen); + +/* Clone an existing hash/XOF instance. */ +void keccak_clone(keccak_state *out, const keccak_state *in); + +/* Cleanse sensitive data from a given hash instance. */ +void keccak_cleanse(keccak_state *s); + +#define decshake(bits) \ + int shake##bits(uint8_t*, size_t, const uint8_t*, size_t); + +#define decsha3(bits) \ + int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t); + +decshake(128) +decshake(256) +decsha3(224) +decsha3(256) +decsha3(384) +decsha3(512) +#endif diff --git a/meta/AUTHORS b/meta/AUTHORS index e6db4eb..c9acca6 100644 --- a/meta/AUTHORS +++ b/meta/AUTHORS @@ -4,4 +4,5 @@ Main author: Contributors: * Markku-Juhani O. Saarinen [tiny-sha3] * Niels Möller [sha3-nettle-impl] +* David Leon Gil, TOR devs [sha3-unrolled] * Samuel Neves and Jack O'Connor [blake3-off, blake3-off-noopt] diff --git a/meta/deps-bin b/meta/deps-bin index e532be8..9170fa8 100644 --- a/meta/deps-bin +++ b/meta/deps-bin @@ -4,6 +4,7 @@ test-blake2s-ska: src/test.o src/blake2s-ska.o skalibs test-sha3-lila: src/test.o src/sha3-lila.o limb skalibs test-blake3-lila: src/test.o src/blake3-lila.o limb skalibs test-sha3-tiny: src/test.o skalibs src/tiny-sha3.o src/sha3-tiny.o +test-sha3-unrolled: src/test.o skalibs src/sha3-unrolled-impl.o src/sha3-unrolled.o test-sha3-nettle: src/test.o skalibs src/sha3-nettle-impl-le.o src/sha3-nettle-impl-memxor.o src/sha3-nettle-impl.o src/sha3-nettle.o test-blake3-off: src/test.o skalibs src/blake3-off-impl.o src/blake3-off-impl-portable.o src/blake3-off-impl-dispatch.o src/blake3_avx2_x86-64_unix.S src/blake3_avx512_x86-64_unix.S src/blake3_sse2_x86-64_unix.S src/blake3_sse41_x86-64_unix.S src/blake3-off.o test-blake3-off-noopt: src/test.o skalibs src/blake3-off-noopt-impl.o src/blake3-off-noopt-impl-portable.o src/blake3-off-noopt-impl-dispatch.o src/blake3-off-noopt.o diff --git a/project.mk b/project.mk index 8d5d706..a726c0a 100644 --- a/project.mk +++ b/project.mk @@ -2,6 +2,7 @@ BINS = test-sha1-ska test-sha256-ska test-blake2s-ska \ test-sha3-lila test-blake3-lila \ test-sha3-tiny \ + test-sha3-unrolled \ test-sha3-nettle \ test-blake3-off test-blake3-off-noopt diff --git a/src/sha3-unrolled-impl.c b/src/sha3-unrolled-impl.c new file mode 100644 index 0000000..1c57f03 --- /dev/null +++ b/src/sha3-unrolled-impl.c @@ -0,0 +1,396 @@ +/** libkeccak-tiny + * + * A single-file implementation of SHA-3 and SHAKE. + * + * Implementor: David Leon Gil + * License: CC0, attribution kindly requested. Blame taken too, + * but not liability. + */ +#include "sha3-unrolled.h" + +#include <string.h> +#include <byteswap.h> + +/******** Endianness conversion helpers ********/ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define le64toh(n) (n) +#else +#define le64toh(n) bswap_64(n) +#endif + +static inline uint64_t +loadu64le(const unsigned char *x) { + uint64_t r; + memcpy(&r, x, sizeof(r)); + return le64toh(r); +} + +static inline void +storeu64le(uint8_t *x, uint64_t u) { + uint64_t val = le64toh(u); + memcpy(x, &val, sizeof(u)); +} + +/******** The Keccak-f[1600] permutation ********/ + +/*** Constants. ***/ +static const uint8_t rho[24] = \ + { 1, 3, 6, 10, 15, 21, + 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, + 62, 18, 39, 61, 20, 44}; +static const uint8_t pi[24] = \ + {10, 7, 11, 17, 18, 3, + 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, + 20, 14, 22, 9, 6, 1}; +static const uint64_t RC[24] = \ + {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, + 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, + 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; + +/*** Helper macros to unroll the permutation. ***/ +#define rol(x, s) (((x) << s) | ((x) >> (64 - s))) +#define REPEAT6(e) e e e e e e +#define REPEAT24(e) REPEAT6(e e e e) +#define REPEAT5(e) e e e e e +#define FOR5(v, s, e) \ + v = 0; \ + REPEAT5(e; v += s;) + +/*** Keccak-f[1600] ***/ +static inline void keccakf(void* state) { + uint64_t* a = (uint64_t*)state; + uint64_t b[5] = {0}; + uint64_t t = 0; + uint8_t x, y, i = 0; + + REPEAT24( + // Theta + FOR5(x, 1, + b[x] = 0; + FOR5(y, 5, + b[x] ^= a[x + y]; )) + FOR5(x, 1, + FOR5(y, 5, + a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) + // Rho and pi + t = a[1]; + x = 0; + REPEAT24(b[0] = a[pi[x]]; + a[pi[x]] = rol(t, rho[x]); + t = b[0]; + x++; ) + // Chi + FOR5(y, + 5, + FOR5(x, 1, + b[x] = a[y + x];) + FOR5(x, 1, + a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) + // Iota + a[0] ^= RC[i]; + i++; ) +} + +/******** The FIPS202-defined functions. ********/ + +/*** Some helper macros. ***/ + +// `xorin` modified to handle Big Endian systems, `buf` being unaligned on +// systems that care about such things. Assumes that len is a multiple of 8, +// which is always true for the rates we use, and the modified finalize. +static inline void +xorin8(uint8_t *dst, const uint8_t *src, size_t len) { + uint64_t* a = (uint64_t*)dst; // Always aligned. + for (size_t i = 0; i < len; i += 8) { + a[i/8] ^= loadu64le(src + i); + } +} + +// `setout` likewise modified to handle Big Endian systems. Assumes that len +// is a multiple of 8, which is true for every rate we use. +static inline void +setout8(const uint8_t *src, uint8_t *dst, size_t len) { + const uint64_t *si = (const uint64_t*)src; // Always aligned. + for (size_t i = 0; i < len; i+= 8) { + storeu64le(dst+i, si[i/8]); + } +} + +#define P keccakf +#define Plen KECCAK_MAX_RATE + +#define KECCAK_DELIM_DIGEST 0x06 +#define KECCAK_DELIM_XOF 0x1f + +// Fold P*F over the full blocks of an input. +#define foldP(I, L, F) \ + while (L >= s->rate) { \ + F(s->a, I, s->rate); \ + P(s->a); \ + I += s->rate; \ + L -= s->rate; \ + } + +static inline void +keccak_absorb_blocks(keccak_state *s, const uint8_t *buf, size_t nr_blocks) +{ + size_t blen = nr_blocks * s->rate; + foldP(buf, blen, xorin8); +} + +static int +keccak_update(keccak_state *s, const uint8_t *buf, size_t len) +{ + if (s->finalized) + return -1; + if ((buf == NULL) && len != 0) + return -1; + + size_t remaining = len; + while (remaining > 0) { + if (s->offset == 0) { + const size_t blocks = remaining / s->rate; + size_t direct_bytes = blocks * s->rate; + if (direct_bytes > 0) { + keccak_absorb_blocks(s, buf, blocks); + remaining -= direct_bytes; + buf += direct_bytes; + } + } + + const size_t buf_avail = s->rate - s->offset; + const size_t buf_bytes = (buf_avail > remaining) ? remaining : buf_avail; + if (buf_bytes > 0) { + memcpy(&s->block[s->offset], buf, buf_bytes); + s->offset += buf_bytes; + remaining -= buf_bytes; + buf += buf_bytes; + } + if (s->offset == s->rate) { + keccak_absorb_blocks(s, s->block, 1); + s->offset = 0; + } + } + return 0; +} + +static void +keccak_finalize(keccak_state *s) +{ + // Xor in the DS and pad frame. + s->block[s->offset++] = s->delim; // DS. + for (size_t i = s->offset; i < s->rate; i++) { + s->block[i] = 0; + } + s->block[s->rate - 1] |= 0x80; // Pad frame. + + // Xor in the last block. + xorin8(s->a, s->block, s->rate); + + memset(s->block, 0, sizeof(s->block)); + s->finalized = 1; + s->offset = s->rate; +} + +static inline void +keccak_squeeze_blocks(keccak_state *s, uint8_t *out, size_t nr_blocks) +{ + for (size_t n = 0; n < nr_blocks; n++) { + keccakf(s->a); + setout8(s->a, out, s->rate); + out += s->rate; + } +} + +static int +keccak_squeeze(keccak_state *s, uint8_t *out, size_t outlen) +{ + if (!s->finalized) + return -1; + + size_t remaining = outlen; + while (remaining > 0) { + if (s->offset == s->rate) { + const size_t blocks = remaining / s->rate; + const size_t direct_bytes = blocks * s->rate; + if (blocks > 0) { + keccak_squeeze_blocks(s, out, blocks); + out += direct_bytes; + remaining -= direct_bytes; + } + + if (remaining > 0) { + keccak_squeeze_blocks(s, s->block, 1); + s->offset = 0; + } + } + + const size_t buf_bytes = s->rate - s->offset; + const size_t indirect_bytes = (buf_bytes > remaining) ? remaining : buf_bytes; + if (indirect_bytes > 0) { + memcpy(out, &s->block[s->offset], indirect_bytes); + out += indirect_bytes; + s->offset += indirect_bytes; + remaining -= indirect_bytes; + } + } + return 0; +} + +int +keccak_digest_init(keccak_state *s, size_t bits) +{ + if (s == NULL) + return -1; + if (bits != 224 && bits != 256 && bits != 384 && bits != 512) + return -1; + + keccak_cleanse(s); + s->rate = KECCAK_RATE(bits); + s->delim = KECCAK_DELIM_DIGEST; + return 0; +} + +int +keccak_digest_update(keccak_state *s, const uint8_t *buf, size_t len) +{ + if (s == NULL) + return -1; + if (s->delim != KECCAK_DELIM_DIGEST) + return -1; + + return keccak_update(s, buf, len); +} + +int +keccak_digest_sum(const keccak_state *s, uint8_t *out, size_t outlen) +{ + if (s == NULL) + return -1; + if (s->delim != KECCAK_DELIM_DIGEST) + return -1; + if (out == NULL || outlen > 4 * (KECCAK_MAX_RATE - s->rate) / 8) + return -1; + + // Work in a copy so that incremental/rolling hashes are easy. + keccak_state s_tmp; + keccak_clone(&s_tmp, s); + keccak_finalize(&s_tmp); + int ret = keccak_squeeze(&s_tmp, out, outlen); + keccak_cleanse(&s_tmp); + return ret; +} + +int +keccak_xof_init(keccak_state *s, size_t bits) +{ + if (s == NULL) + return -1; + if (bits != 128 && bits != 256) + return -1; + + keccak_cleanse(s); + s->rate = KECCAK_RATE(bits); + s->delim = KECCAK_DELIM_XOF; + return 0; +} + +int +keccak_xof_absorb(keccak_state *s, const uint8_t *buf, size_t len) +{ + if (s == NULL) + return -1; + if (s->delim != KECCAK_DELIM_XOF) + return -1; + + return keccak_update(s, buf, len); +} + +int +keccak_xof_squeeze(keccak_state *s, uint8_t *out, size_t outlen) +{ + if (s == NULL) + return -1; + if (s->delim != KECCAK_DELIM_XOF) + return -1; + + if (!s->finalized) + keccak_finalize(s); + + return keccak_squeeze(s, out, outlen); +} + +void +keccak_clone(keccak_state *out, const keccak_state *in) +{ + memcpy(out, in, sizeof(keccak_state)); +} + +void +keccak_cleanse(keccak_state *s) +{ + memset(s, 0, sizeof(keccak_state)); +} + +/** The sponge-based hash construction. **/ +static inline int hash(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen, + size_t bits, uint8_t delim) { + if ((out == NULL) || ((in == NULL) && inlen != 0)) { + return -1; + } + + int ret = 0; + keccak_state s; + keccak_cleanse(&s); + + switch (delim) { + case KECCAK_DELIM_DIGEST: + ret |= keccak_digest_init(&s, bits); + ret |= keccak_digest_update(&s, in, inlen); + // Use the internal API instead of sum to avoid the memcpy. + keccak_finalize(&s); + ret |= keccak_squeeze(&s, out, outlen); + break; + case KECCAK_DELIM_XOF: + ret |= keccak_xof_init(&s, bits); + ret |= keccak_xof_absorb(&s, in, inlen); + ret |= keccak_xof_squeeze(&s, out, outlen); + break; + default: + return -1; + } + keccak_cleanse(&s); + return ret; +} + +/*** Helper macros to define SHA3 and SHAKE instances. ***/ +#define defshake(bits) \ + int shake##bits(uint8_t* out, size_t outlen, \ + const uint8_t* in, size_t inlen) { \ + return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_XOF); \ + } +#define defsha3(bits) \ + int sha3_##bits(uint8_t* out, size_t outlen, \ + const uint8_t* in, size_t inlen) { \ + if (outlen > (bits/8)) { \ + return -1; \ + } \ + return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_DIGEST); \ + } + +/*** FIPS202 SHAKE VOFs ***/ +defshake(128) +defshake(256) + +/*** FIPS202 SHA3 FOFs ***/ +defsha3(224) +defsha3(256) +defsha3(384) +defsha3(512) diff --git a/src/sha3-unrolled.c b/src/sha3-unrolled.c new file mode 100644 index 0000000..d6b4f4c --- /dev/null +++ b/src/sha3-unrolled.c @@ -0,0 +1,23 @@ +#include "sha3-unrolled.h" + +static keccak_state ctx; + +void init(void) +{ + keccak_digest_init(&ctx, 256); +} + +void update(const char *msg, size_t size) +{ + keccak_digest_update(&ctx, (const uint8_t *) msg, size); +} + +void final(unsigned char *md) +{ + keccak_digest_sum(&ctx, (uint8_t *) md, 32); +} + +int hashlen(void) +{ + return 32; +}