Welcome to little lamb

Code » test-hashes » commit 245aa3e

Add SHA3 implementation from David Leon Gil

author Olivier Brunel
2023-01-25 15:07:47 UTC
committer Olivier Brunel
2023-01-25 15:07:47 UTC
parent 1432daef951399f33778db7cfda8105cc2b08b81

Add SHA3 implementation from David Leon Gil

Fixed/tweaked by TOR devs

include/sha3-unrolled.h +66 -0
meta/AUTHORS +1 -0
meta/deps-bin +1 -0
project.mk +1 -0
src/sha3-unrolled-impl.c +396 -0
src/sha3-unrolled.c +23 -0

diff --git a/include/sha3-unrolled.h b/include/sha3-unrolled.h
new file mode 100644
index 0000000..23316f4
--- /dev/null
+++ b/include/sha3-unrolled.h
@@ -0,0 +1,66 @@
+#ifndef KECCAK_FIPS202_H
+#define KECCAK_FIPS202_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define KECCAK_MAX_RATE 200
+
+/* Calculate the rate (block size) from the security target. */
+#define KECCAK_RATE(bits) (KECCAK_MAX_RATE - (bits / 4))
+
+/* The internal structure of a FIPS202 hash/xof instance.  Most callers
+ * should treat this as an opaque structure.
+ */
+typedef struct keccak_state {
+  uint8_t a[KECCAK_MAX_RATE];
+  size_t rate;
+  uint8_t delim;
+
+  uint8_t block[KECCAK_MAX_RATE];
+  size_t offset;
+
+  uint8_t finalized : 1;
+} keccak_state;
+
+/* Initialize a Keccak instance suitable for SHA-3 hash functions. */
+int keccak_digest_init(keccak_state *s, size_t bits);
+
+/* Feed more data into the SHA-3 hash instance. */
+int keccak_digest_update(keccak_state *s, const uint8_t *buf, size_t len);
+
+/* Calculate the SHA-3 hash digest.  The state is unmodified to support
+ * calculating multiple/rolling digests.
+ */
+int keccak_digest_sum(const keccak_state *s, uint8_t *out, size_t outlen);
+
+/* Initialize a Keccak instance suitable for XOFs (SHAKE-128/256). */
+int keccak_xof_init(keccak_state *s, size_t bits);
+
+/* Absorb more data into the XOF.  Must not be called after a squeeze call. */
+int keccak_xof_absorb(keccak_state *s, const uint8_t *buf, size_t len);
+
+/* Squeeze data out of the XOF. Must not attempt to absorb additional data,
+ * after a squeeze has been called.
+ */
+int keccak_xof_squeeze(keccak_state *s, uint8_t *out, size_t outlen);
+
+/* Clone an existing hash/XOF instance. */
+void keccak_clone(keccak_state *out, const keccak_state *in);
+
+/* Cleanse sensitive data from a given hash instance. */
+void keccak_cleanse(keccak_state *s);
+
+#define decshake(bits) \
+  int shake##bits(uint8_t*, size_t, const uint8_t*, size_t);
+
+#define decsha3(bits) \
+  int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t);
+
+decshake(128)
+decshake(256)
+decsha3(224)
+decsha3(256)
+decsha3(384)
+decsha3(512)
+#endif
diff --git a/meta/AUTHORS b/meta/AUTHORS
index e6db4eb..c9acca6 100644
--- a/meta/AUTHORS
+++ b/meta/AUTHORS
@@ -4,4 +4,5 @@ Main author:
 Contributors:
 * Markku-Juhani O. Saarinen [tiny-sha3]
 * Niels Möller [sha3-nettle-impl]
+* David Leon Gil, TOR devs [sha3-unrolled]
 * Samuel Neves and Jack O'Connor [blake3-off, blake3-off-noopt]
diff --git a/meta/deps-bin b/meta/deps-bin
index e532be8..9170fa8 100644
--- a/meta/deps-bin
+++ b/meta/deps-bin
@@ -4,6 +4,7 @@ test-blake2s-ska: src/test.o src/blake2s-ska.o skalibs
 test-sha3-lila: src/test.o src/sha3-lila.o limb skalibs
 test-blake3-lila: src/test.o src/blake3-lila.o limb skalibs
 test-sha3-tiny: src/test.o skalibs src/tiny-sha3.o src/sha3-tiny.o
+test-sha3-unrolled: src/test.o skalibs src/sha3-unrolled-impl.o src/sha3-unrolled.o
 test-sha3-nettle: src/test.o skalibs src/sha3-nettle-impl-le.o src/sha3-nettle-impl-memxor.o src/sha3-nettle-impl.o src/sha3-nettle.o
 test-blake3-off: src/test.o skalibs src/blake3-off-impl.o src/blake3-off-impl-portable.o src/blake3-off-impl-dispatch.o src/blake3_avx2_x86-64_unix.S src/blake3_avx512_x86-64_unix.S src/blake3_sse2_x86-64_unix.S src/blake3_sse41_x86-64_unix.S src/blake3-off.o
 test-blake3-off-noopt: src/test.o skalibs src/blake3-off-noopt-impl.o src/blake3-off-noopt-impl-portable.o src/blake3-off-noopt-impl-dispatch.o src/blake3-off-noopt.o
diff --git a/project.mk b/project.mk
index 8d5d706..a726c0a 100644
--- a/project.mk
+++ b/project.mk
@@ -2,6 +2,7 @@
 BINS = test-sha1-ska test-sha256-ska test-blake2s-ska \
 	   test-sha3-lila test-blake3-lila \
 	   test-sha3-tiny \
+	   test-sha3-unrolled \
 	   test-sha3-nettle \
 	   test-blake3-off test-blake3-off-noopt
 
diff --git a/src/sha3-unrolled-impl.c b/src/sha3-unrolled-impl.c
new file mode 100644
index 0000000..1c57f03
--- /dev/null
+++ b/src/sha3-unrolled-impl.c
@@ -0,0 +1,396 @@
+/** libkeccak-tiny
+ *
+ * A single-file implementation of SHA-3 and SHAKE.
+ *
+ * Implementor: David Leon Gil
+ * License: CC0, attribution kindly requested. Blame taken too,
+ * but not liability.
+ */
+#include "sha3-unrolled.h"
+
+#include <string.h>
+#include <byteswap.h>
+
+/******** Endianness conversion helpers ********/
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define le64toh(n) (n)
+#else
+#define le64toh(n) bswap_64(n)
+#endif
+
+static inline uint64_t
+loadu64le(const unsigned char *x) {
+  uint64_t r;
+  memcpy(&r, x, sizeof(r));
+  return le64toh(r);
+}
+
+static inline void
+storeu64le(uint8_t *x, uint64_t u) {
+  uint64_t val = le64toh(u);
+  memcpy(x, &val, sizeof(u));
+}
+
+/******** The Keccak-f[1600] permutation ********/
+
+/*** Constants. ***/
+static const uint8_t rho[24] = \
+  { 1,  3,   6, 10, 15, 21,
+    28, 36, 45, 55,  2, 14,
+    27, 41, 56,  8, 25, 43,
+    62, 18, 39, 61, 20, 44};
+static const uint8_t pi[24] = \
+  {10,  7, 11, 17, 18, 3,
+    5, 16,  8, 21, 24, 4,
+   15, 23, 19, 13, 12, 2,
+   20, 14, 22,  9, 6,  1};
+static const uint64_t RC[24] = \
+  {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
+   0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
+   0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
+   0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
+   0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
+   0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
+
+/*** Helper macros to unroll the permutation. ***/
+#define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
+#define REPEAT6(e) e e e e e e
+#define REPEAT24(e) REPEAT6(e e e e)
+#define REPEAT5(e) e e e e e
+#define FOR5(v, s, e) \
+  v = 0;            \
+  REPEAT5(e; v += s;)
+
+/*** Keccak-f[1600] ***/
+static inline void keccakf(void* state) {
+  uint64_t* a = (uint64_t*)state;
+  uint64_t b[5] = {0};
+  uint64_t t = 0;
+  uint8_t x, y, i = 0;
+
+  REPEAT24(
+      // Theta
+      FOR5(x, 1,
+           b[x] = 0;
+           FOR5(y, 5,
+                b[x] ^= a[x + y]; ))
+      FOR5(x, 1,
+           FOR5(y, 5,
+                a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
+      // Rho and pi
+      t = a[1];
+      x = 0;
+      REPEAT24(b[0] = a[pi[x]];
+               a[pi[x]] = rol(t, rho[x]);
+               t = b[0];
+               x++; )
+      // Chi
+      FOR5(y,
+         5,
+         FOR5(x, 1,
+              b[x] = a[y + x];)
+         FOR5(x, 1,
+              a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
+      // Iota
+      a[0] ^= RC[i];
+      i++; )
+}
+
+/******** The FIPS202-defined functions. ********/
+
+/*** Some helper macros. ***/
+
+// `xorin` modified to handle Big Endian systems, `buf` being unaligned on
+// systems that care about such things.  Assumes that len is a multiple of 8,
+// which is always true for the rates we use, and the modified finalize.
+static inline void
+xorin8(uint8_t *dst, const uint8_t *src, size_t len) {
+  uint64_t* a = (uint64_t*)dst; // Always aligned.
+  for (size_t i = 0; i < len; i += 8) {
+    a[i/8] ^= loadu64le(src + i);
+  }
+}
+
+// `setout` likewise modified to handle Big Endian systems.  Assumes that len
+// is a multiple of 8, which is true for every rate we use.
+static inline void
+setout8(const uint8_t *src, uint8_t *dst, size_t len) {
+  const uint64_t *si = (const uint64_t*)src; // Always aligned.
+  for (size_t i = 0; i < len; i+= 8) {
+    storeu64le(dst+i, si[i/8]);
+  }
+}
+
+#define P keccakf
+#define Plen KECCAK_MAX_RATE
+
+#define KECCAK_DELIM_DIGEST 0x06
+#define KECCAK_DELIM_XOF 0x1f
+
+// Fold P*F over the full blocks of an input.
+#define foldP(I, L, F) \
+  while (L >= s->rate) {  \
+    F(s->a, I, s->rate);  \
+    P(s->a);              \
+    I += s->rate;         \
+    L -= s->rate;         \
+  }
+
+static inline void
+keccak_absorb_blocks(keccak_state *s, const uint8_t *buf, size_t nr_blocks)
+{
+  size_t blen = nr_blocks * s->rate;
+  foldP(buf, blen, xorin8);
+}
+
+static int
+keccak_update(keccak_state *s, const uint8_t *buf, size_t len)
+{
+  if (s->finalized)
+    return -1;
+  if ((buf == NULL) && len != 0)
+    return -1;
+
+  size_t remaining = len;
+  while (remaining > 0) {
+    if (s->offset == 0) {
+      const size_t blocks = remaining / s->rate;
+      size_t direct_bytes = blocks * s->rate;
+      if (direct_bytes > 0) {
+        keccak_absorb_blocks(s, buf, blocks);
+        remaining -= direct_bytes;
+        buf += direct_bytes;
+      }
+    }
+
+    const size_t buf_avail = s->rate - s->offset;
+    const size_t buf_bytes = (buf_avail > remaining) ? remaining : buf_avail;
+    if (buf_bytes > 0) {
+      memcpy(&s->block[s->offset], buf, buf_bytes);
+      s->offset += buf_bytes;
+      remaining -= buf_bytes;
+      buf += buf_bytes;
+    }
+    if (s->offset == s->rate) {
+      keccak_absorb_blocks(s, s->block, 1);
+      s->offset = 0;
+    }
+  }
+  return 0;
+}
+
+static void
+keccak_finalize(keccak_state *s)
+{
+  // Xor in the DS and pad frame.
+  s->block[s->offset++] = s->delim; // DS.
+  for (size_t i = s->offset; i < s->rate; i++) {
+    s->block[i] = 0;
+  }
+  s->block[s->rate - 1] |= 0x80; // Pad frame.
+
+  // Xor in the last block.
+  xorin8(s->a, s->block, s->rate);
+
+  memset(s->block, 0, sizeof(s->block));
+  s->finalized = 1;
+  s->offset = s->rate;
+}
+
+static inline void
+keccak_squeeze_blocks(keccak_state *s, uint8_t *out, size_t nr_blocks)
+{
+  for (size_t n = 0; n < nr_blocks; n++) {
+    keccakf(s->a);
+    setout8(s->a, out, s->rate);
+    out += s->rate;
+  }
+}
+
+static int
+keccak_squeeze(keccak_state *s, uint8_t *out, size_t outlen)
+{
+  if (!s->finalized)
+    return -1;
+
+  size_t remaining = outlen;
+  while (remaining > 0) {
+    if (s->offset == s->rate) {
+      const size_t blocks = remaining / s->rate;
+      const size_t direct_bytes = blocks * s->rate;
+      if (blocks > 0) {
+        keccak_squeeze_blocks(s, out, blocks);
+        out += direct_bytes;
+        remaining -= direct_bytes;
+      }
+
+      if (remaining > 0) {
+        keccak_squeeze_blocks(s, s->block, 1);
+        s->offset = 0;
+      }
+    }
+
+    const size_t buf_bytes = s->rate - s->offset;
+    const size_t indirect_bytes = (buf_bytes > remaining) ? remaining : buf_bytes;
+    if (indirect_bytes > 0) {
+      memcpy(out, &s->block[s->offset], indirect_bytes);
+      out += indirect_bytes;
+      s->offset += indirect_bytes;
+      remaining -= indirect_bytes;
+    }
+  }
+  return 0;
+}
+
+int
+keccak_digest_init(keccak_state *s, size_t bits)
+{
+  if (s == NULL)
+    return -1;
+  if (bits != 224 && bits != 256 && bits != 384 && bits != 512)
+    return -1;
+
+  keccak_cleanse(s);
+  s->rate = KECCAK_RATE(bits);
+  s->delim = KECCAK_DELIM_DIGEST;
+  return 0;
+}
+
+int
+keccak_digest_update(keccak_state *s, const uint8_t *buf, size_t len)
+{
+  if (s == NULL)
+    return -1;
+  if (s->delim != KECCAK_DELIM_DIGEST)
+    return -1;
+
+  return keccak_update(s, buf, len);
+}
+
+int
+keccak_digest_sum(const keccak_state *s, uint8_t *out, size_t outlen)
+{
+  if (s == NULL)
+    return -1;
+  if (s->delim != KECCAK_DELIM_DIGEST)
+    return -1;
+  if (out == NULL || outlen > 4 * (KECCAK_MAX_RATE - s->rate) / 8)
+    return -1;
+
+  // Work in a copy so that incremental/rolling hashes are easy.
+  keccak_state s_tmp;
+  keccak_clone(&s_tmp, s);
+  keccak_finalize(&s_tmp);
+  int ret = keccak_squeeze(&s_tmp, out, outlen);
+  keccak_cleanse(&s_tmp);
+  return ret;
+}
+
+int
+keccak_xof_init(keccak_state *s, size_t bits)
+{
+  if (s == NULL)
+    return -1;
+  if (bits != 128 && bits != 256)
+    return -1;
+
+  keccak_cleanse(s);
+  s->rate = KECCAK_RATE(bits);
+  s->delim = KECCAK_DELIM_XOF;
+  return 0;
+}
+
+int
+keccak_xof_absorb(keccak_state *s, const uint8_t *buf, size_t len)
+{
+  if (s == NULL)
+    return -1;
+  if (s->delim != KECCAK_DELIM_XOF)
+    return -1;
+
+  return keccak_update(s, buf, len);
+}
+
+int
+keccak_xof_squeeze(keccak_state *s, uint8_t *out, size_t outlen)
+{
+  if (s == NULL)
+    return -1;
+  if (s->delim != KECCAK_DELIM_XOF)
+    return -1;
+
+  if (!s->finalized)
+    keccak_finalize(s);
+
+  return keccak_squeeze(s, out, outlen);
+}
+
+void
+keccak_clone(keccak_state *out, const keccak_state *in)
+{
+  memcpy(out, in, sizeof(keccak_state));
+}
+
+void
+keccak_cleanse(keccak_state *s)
+{
+  memset(s, 0, sizeof(keccak_state));
+}
+
+/** The sponge-based hash construction. **/
+static inline int hash(uint8_t* out, size_t outlen,
+                       const uint8_t* in, size_t inlen,
+                       size_t bits, uint8_t delim) {
+  if ((out == NULL) || ((in == NULL) && inlen != 0)) {
+    return -1;
+  }
+
+  int ret = 0;
+  keccak_state s;
+  keccak_cleanse(&s);
+
+  switch (delim) {
+    case KECCAK_DELIM_DIGEST:
+      ret |= keccak_digest_init(&s, bits);
+      ret |= keccak_digest_update(&s, in, inlen);
+      // Use the internal API instead of sum to avoid the memcpy.
+      keccak_finalize(&s);
+      ret |= keccak_squeeze(&s, out, outlen);
+      break;
+    case KECCAK_DELIM_XOF:
+      ret |= keccak_xof_init(&s, bits);
+      ret |= keccak_xof_absorb(&s, in, inlen);
+      ret |= keccak_xof_squeeze(&s, out, outlen);
+      break;
+    default:
+      return -1;
+  }
+  keccak_cleanse(&s);
+  return ret;
+}
+
+/*** Helper macros to define SHA3 and SHAKE instances. ***/
+#define defshake(bits)                                            \
+  int shake##bits(uint8_t* out, size_t outlen,                    \
+                  const uint8_t* in, size_t inlen) {              \
+    return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_XOF);  \
+  }
+#define defsha3(bits)                                             \
+  int sha3_##bits(uint8_t* out, size_t outlen,                    \
+                  const uint8_t* in, size_t inlen) {              \
+    if (outlen > (bits/8)) {                                      \
+      return -1;                                                  \
+    }                                                             \
+    return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_DIGEST);  \
+  }
+
+/*** FIPS202 SHAKE VOFs ***/
+defshake(128)
+defshake(256)
+
+/*** FIPS202 SHA3 FOFs ***/
+defsha3(224)
+defsha3(256)
+defsha3(384)
+defsha3(512)
diff --git a/src/sha3-unrolled.c b/src/sha3-unrolled.c
new file mode 100644
index 0000000..d6b4f4c
--- /dev/null
+++ b/src/sha3-unrolled.c
@@ -0,0 +1,23 @@
+#include "sha3-unrolled.h"
+
+static keccak_state ctx;
+
+void init(void)
+{
+    keccak_digest_init(&ctx, 256);
+}
+
+void update(const char *msg, size_t size)
+{
+    keccak_digest_update(&ctx, (const uint8_t *) msg, size);
+}
+
+void final(unsigned char *md)
+{
+    keccak_digest_sum(&ctx, (uint8_t *) md, 32);
+}
+
+int hashlen(void)
+{
+    return 32;
+}