Code » test-hashes » commit 8ddee0a
Add SHA3 implementation from nettle 3.4
author | Olivier Brunel
<jjk@jjacky.com> 2023-01-25 12:17:39 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2023-01-25 12:20:12 UTC |
parent | 3a4cd5acea4c1c07459e32cec46ea20ab240ef9f |
include/sha3-nettle-impl-le.h | +54 | -0 |
include/sha3-nettle-impl-memxor.h | +49 | -0 |
include/sha3-nettle-impl.h | +42 | -0 |
meta/AUTHORS | +1 | -0 |
meta/deps-bin | +1 | -0 |
project.mk | +1 | -1 |
src/sha3-nettle-impl-le.c | +30 | -0 |
src/sha3-nettle-impl-memxor.c | +390 | -0 |
src/sha3-nettle-impl.c | +238 | -0 |
src/sha3-nettle.c | +23 | -0 |
diff --git a/include/sha3-nettle-impl-le.h b/include/sha3-nettle-impl-le.h new file mode 100644 index 0000000..6f3be72 --- /dev/null +++ b/include/sha3-nettle-impl-le.h @@ -0,0 +1,54 @@ +#ifndef LE_H +#define LE_H + +#include <stdint.h> +#include <string.h> /* size_t */ + +/* The masking of the right shift is needed to allow n == 0 (using + just 32 - n and 64 - n results in undefined behaviour). Most uses + of these macros use a constant and non-zero rotation count. */ +#define ROTL32(n,x) (((x)<<(n)) | ((x)>>((-(n)&31)))) + +#define LE_READ_UINT32(p) \ + ( (((uint32_t) (p)[3]) << 24) \ + | (((uint32_t) (p)[2]) << 16) \ + | (((uint32_t) (p)[1]) << 8) \ + | ((uint32_t) (p)[0])) + +#define LE_WRITE_UINT32(p, i) \ + do { \ + (p)[3] = ((i) >> 24) & 0xff; \ + (p)[2] = ((i) >> 16) & 0xff; \ + (p)[1] = ((i) >> 8) & 0xff; \ + (p)[0] = (i) & 0xff; \ + } while (0) + + +#define ROTL64(n,x) (((x)<<(n)) | ((x)>>((-(n))&63))) + +#define LE_READ_UINT64(p) \ + ( (((uint64_t) (p)[7]) << 56) \ + | (((uint64_t) (p)[6]) << 48) \ + | (((uint64_t) (p)[5]) << 40) \ + | (((uint64_t) (p)[4]) << 32) \ + | (((uint64_t) (p)[3]) << 24) \ + | (((uint64_t) (p)[2]) << 16) \ + | (((uint64_t) (p)[1]) << 8) \ + | ((uint64_t) (p)[0])) + +#define LE_WRITE_UINT64(p, i) \ + do { \ + (p)[7] = ((i) >> 56) & 0xff; \ + (p)[6] = ((i) >> 48) & 0xff; \ + (p)[5] = ((i) >> 40) & 0xff; \ + (p)[4] = ((i) >> 32) & 0xff; \ + (p)[3] = ((i) >> 24) & 0xff; \ + (p)[2] = ((i) >> 16) & 0xff; \ + (p)[1] = ((i) >> 8) & 0xff; \ + (p)[0] = (i) & 0xff; \ + } while (0) + + +void _nettle_write_le64(size_t length, uint8_t *dst, const uint64_t *src); + +#endif /* LE_H */ diff --git a/include/sha3-nettle-impl-memxor.h b/include/sha3-nettle-impl-memxor.h new file mode 100644 index 0000000..6f36a20 --- /dev/null +++ b/include/sha3-nettle-impl-memxor.h @@ -0,0 +1,49 @@ +#ifndef MEMXOR_H +#define MEMXOR_H + +#include <stdint.h> +#include <stdlib.h> + +void *memxor(void *dst, const void *src, size_t n); +void *memxor3(void *dst, const void *a, const void *b, size_t n); + + +/* memxor-internal */ + +/* The word_t type is intended to be the native word size. */ +#if defined(__x86_64__) || defined(__arch64__) +/* Including on M$ windows, where unsigned long is only 32 bits */ +typedef uint64_t word_t; +#else +typedef unsigned long int word_t; +#endif + +#define ALIGN_OFFSET(p) ((uintptr_t) (p) % sizeof(word_t)) + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define MERGE(w0, sh_1, w1, sh_2) \ + (((w0) >> (sh_1)) | ((w1) << (sh_2))) +#else +#define MERGE(w0, sh_1, w1, sh_2) \ + (((w0) << (sh_1)) | ((w1) >> (sh_2))) +#endif + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define READ_PARTIAL(r,p,n) do { \ + word_t _rp_x; \ + unsigned _rp_i; \ + for (_rp_i = (n), _rp_x = (p)[--_rp_i]; _rp_i > 0;) \ + _rp_x = (_rp_x << CHAR_BIT) | (p)[--_rp_i]; \ + (r) = _rp_x; \ +} while (0) +#else +#define READ_PARTIAL(r,p,n) do { \ + word_t _rp_x; \ + unsigned _rp_i; \ + for (_rp_x = (p)[0], _rp_i = 1; _rp_i < (n); _rp_i++) \ + _rp_x = (_rp_x << CHAR_BIT) | (p)[_rp_i]; \ + (r) = _rp_x; \ +} while (0) +#endif + +#endif /* MEMXOR_H */ diff --git a/include/sha3-nettle-impl.h b/include/sha3-nettle-impl.h new file mode 100644 index 0000000..f883833 --- /dev/null +++ b/include/sha3-nettle-impl.h @@ -0,0 +1,42 @@ +#ifndef SHA3_H +#define SHA3_H + +#include <stdint.h> +#include <stddef.h> /* For size_t */ + +#define SHA3_ROUNDS 24 + +/* The sha3 state is a 5x5 matrix of 64-bit words. In the notation of + Keccak description, S[x,y] is element x + 5*y, so if x is + interpreted as the row index and y the column index, it is stored + in column-major order. */ +#define SHA3_STATE_LENGTH 25 + +/* The "width" is 1600 bits or 200 octets */ +struct sha3_state { + uint64_t a[SHA3_STATE_LENGTH]; +}; + +void sha3_permute (struct sha3_state *state); + +unsigned _sha3_update (struct sha3_state *state, unsigned block_size, uint8_t *block, + unsigned pos, size_t length, const uint8_t *data); +void _sha3_pad (struct sha3_state *state, unsigned block_size, uint8_t *block, unsigned pos); + + +typedef struct { + struct sha3_state state; + unsigned index; + uint8_t block[200]; + int mdlen, blksize; +} sha3_ctx_t; + +// OpenSSL - like interfece +int sha3_init(sha3_ctx_t *c, int mdlen); // mdlen = hash output in bytes +int sha3_update(sha3_ctx_t *c, const void *data, size_t len); +int sha3_final(void *md, sha3_ctx_t *c); // digest goes to md + +// compute a sha3 hash (md) of given byte length from "in" +void *sha3(const void *in, size_t inlen, void *md, int mdlen); + +#endif /* SHA3_H */ diff --git a/meta/AUTHORS b/meta/AUTHORS index ea28dfb..2b65874 100644 --- a/meta/AUTHORS +++ b/meta/AUTHORS @@ -3,3 +3,4 @@ Main author: Contributors: * Andrey Jivsov. crypto@brainhub.org [sha3-impl] +* Niels Möller [sha3-nettle-impl] diff --git a/meta/deps-bin b/meta/deps-bin index 0b1c3ec..56f416e 100644 --- a/meta/deps-bin +++ b/meta/deps-bin @@ -4,3 +4,4 @@ test-blake2s-ska: src/test.o src/blake2s-ska.o skalibs test-sha3-lila: src/test.o src/sha3-lila.o limb skalibs test-blake3-lila: src/test.o src/blake3-lila.o limb skalibs test-sha3: src/test.o skalibs src/sha3-impl.o src/sha3.o +test-sha3-nettle: src/test.o skalibs src/sha3-nettle-impl-le.o src/sha3-nettle-impl-memxor.o src/sha3-nettle-impl.o src/sha3-nettle.o diff --git a/project.mk b/project.mk index 5b56fc0..d38b145 100644 --- a/project.mk +++ b/project.mk @@ -1,4 +1,4 @@ # binaries: -- don't forget to set meta/deps-bin with all deps & .o files BINS = test-sha1-ska test-sha256-ska test-blake2s-ska \ test-sha3-lila test-blake3-lila \ - test-sha3 + test-sha3 test-sha3-nettle diff --git a/src/sha3-nettle-impl-le.c b/src/sha3-nettle-impl-le.c new file mode 100644 index 0000000..cee1a4a --- /dev/null +++ b/src/sha3-nettle-impl-le.c @@ -0,0 +1,30 @@ +#include "sha3-nettle-impl-le.h" + +void +_nettle_write_le64(size_t length, uint8_t *dst, + const uint64_t *src) +{ + size_t i; + size_t words; + unsigned leftover; + + words = length / 8; + leftover = length % 8; + + for (i = 0; i < words; i++, dst += 8) + LE_WRITE_UINT64(dst, src[i]); + + if (leftover) + { + uint64_t word; + + word = src[i]; + + do + { + *dst++ = word & 0xff; + word >>= 8; + } + while (--leftover); + } +} diff --git a/src/sha3-nettle-impl-memxor.c b/src/sha3-nettle-impl-memxor.c new file mode 100644 index 0000000..348b741 --- /dev/null +++ b/src/sha3-nettle-impl-memxor.c @@ -0,0 +1,390 @@ +/* memxor.c + + Copyright (C) 2010, 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* Implementation inspired by memcmp in glibc, contributed to the FSF + by Torbjorn Granlund. + */ + +#include <assert.h> +#include <limits.h> + +#include "sha3-nettle-impl-memxor.h" + +#define WORD_T_THRESH 16 + +/* XOR word-aligned areas. n is the number of words, not bytes. */ +static void memxor_common_alignment (word_t *dst, const word_t *src, size_t n) +{ + /* FIXME: Require n > 0? */ + /* FIXME: Unroll four times, like memcmp? Probably not worth the + effort. */ + + if (n & 1) + { + n--; + dst[n] ^= src[n]; + } + while (n >= 2) + { + n -= 2; + dst[n+1] ^= src[n+1]; + dst[n] ^= src[n]; + } +} + +/* XOR *un-aligned* src-area onto aligned dst area. n is number of + words, not bytes. Assumes we can read complete words at the start + and end of the src operand. */ +static void memxor_different_alignment (word_t *dst, const unsigned char *src, size_t n) +{ + int shl, shr; + const word_t *src_word; + unsigned offset = ALIGN_OFFSET (src); + word_t s0, s1; + + assert (n > 0); + shl = CHAR_BIT * offset; + shr = CHAR_BIT * (sizeof(word_t) - offset); + + src_word = (const word_t *) ((uintptr_t) src & -sizeof(word_t)); + + /* Read top offset bytes, in native byte order. */ + READ_PARTIAL (s0, (unsigned char *) &src_word[n], offset); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + s0 <<= shr; /* FIXME: Eliminate this shift? */ +#endif + + /* Do n-1 regular iterations */ + if (n & 1) + s1 = s0; + else + { + n--; + s1 = src_word[n]; + dst[n] ^= MERGE (s1, shl, s0, shr); + } + + assert (n & 1); + while (n > 2) + { + n -= 2; + s0 = src_word[n+1]; + dst[n+1] ^= MERGE(s0, shl, s1, shr); + s1 = src_word[n]; /* FIXME: Overread on last iteration */ + dst[n] ^= MERGE(s1, shl, s0, shr); + } + assert (n == 1); + /* Read low wordsize - offset bytes */ + READ_PARTIAL (s0, src, sizeof(word_t) - offset); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + s0 <<= shl; /* FIXME: eliminate shift? */ +#endif /* !WORDS_BIGENDIAN */ + + dst[0] ^= MERGE(s0, shl, s1, shr); +} + +/* Performance, Intel SU1400 (x86_64): 0.25 cycles/byte aligned, 0.45 + cycles/byte unaligned. */ + +/* XOR LEN bytes starting at SRCADDR onto DESTADDR. Result undefined + if the source overlaps with the destination. Return DESTADDR. */ +void *memxor(void *dst_in, const void *src_in, size_t n) +{ + unsigned char *dst = dst_in; + const unsigned char *src = src_in; + + if (n >= WORD_T_THRESH) + { + unsigned i; + unsigned offset; + size_t nwords; + /* There are at least some bytes to compare. No need to test + for N == 0 in this alignment loop. */ + for (i = ALIGN_OFFSET(dst + n); i > 0; i--) + { + n--; + dst[n] ^= src[n]; + } + offset = ALIGN_OFFSET(src + n); + nwords = n / sizeof (word_t); + n %= sizeof (word_t); + + if (offset) + memxor_different_alignment ((word_t *) (dst+n), src+n, nwords); + else + memxor_common_alignment ((word_t *) (dst+n), + (const word_t *) (src+n), nwords); + } + while (n > 0) + { + n--; + dst[n] ^= src[n]; + } + + return dst; +} + +/* XOR word-aligned areas. n is the number of words, not bytes. */ +static void memxor3_common_alignment (word_t *dst, const word_t *a, const word_t *b, size_t n) +{ + /* FIXME: Require n > 0? */ + if (n & 1) + { + n--; + dst[n] = a[n] ^ b[n]; + } + while (n > 0) + { + n -= 2; + dst[n+1] = a[n+1] ^ b[n+1]; + dst[n] = a[n] ^ b[n]; + } +} + +static void memxor3_different_alignment_b (word_t *dst, const word_t *a, + const unsigned char *b, unsigned offset, size_t n) +{ + int shl, shr; + const word_t *b_word; + + word_t s0, s1; + + assert (n > 0); + + shl = CHAR_BIT * offset; + shr = CHAR_BIT * (sizeof(word_t) - offset); + + b_word = (const word_t *) ((uintptr_t) b & -sizeof(word_t)); + + /* Read top offset bytes, in native byte order. */ + READ_PARTIAL (s0, (unsigned char *) &b_word[n], offset); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + s0 <<= shr; +#endif + + if (n & 1) + s1 = s0; + else + { + n--; + s1 = b_word[n]; + dst[n] = a[n] ^ MERGE (s1, shl, s0, shr); + } + + while (n > 2) + { + n -= 2; + s0 = b_word[n+1]; + dst[n+1] = a[n+1] ^ MERGE(s0, shl, s1, shr); + s1 = b_word[n]; + dst[n] = a[n] ^ MERGE(s1, shl, s0, shr); + } + assert (n == 1); + /* Read low wordsize - offset bytes */ + READ_PARTIAL (s0, b, sizeof(word_t) - offset); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + s0 <<= shl; +#endif /* !WORDS_BIGENDIAN */ + + dst[0] = a[0] ^ MERGE(s0, shl, s1, shr); +} + +static void memxor3_different_alignment_ab (word_t *dst, + const unsigned char *a, const unsigned char *b, unsigned offset, size_t n) +{ + int shl, shr; + const word_t *a_word; + const word_t *b_word; + + word_t s0, s1, t; + + assert (n > 0); + + shl = CHAR_BIT * offset; + shr = CHAR_BIT * (sizeof(word_t) - offset); + + a_word = (const word_t *) ((uintptr_t) a & -sizeof(word_t)); + b_word = (const word_t *) ((uintptr_t) b & -sizeof(word_t)); + + /* Read top offset bytes, in native byte order. */ + READ_PARTIAL (s0, (unsigned char *) &a_word[n], offset); + READ_PARTIAL (t, (unsigned char *) &b_word[n], offset); + s0 ^= t; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + s0 <<= shr; +#endif + + if (n & 1) + s1 = s0; + else + { + n--; + s1 = a_word[n] ^ b_word[n]; + dst[n] = MERGE (s1, shl, s0, shr); + } + + while (n > 2) + { + n -= 2; + s0 = a_word[n+1] ^ b_word[n+1]; + dst[n+1] = MERGE(s0, shl, s1, shr); + s1 = a_word[n] ^ b_word[n]; + dst[n] = MERGE(s1, shl, s0, shr); + } + assert (n == 1); + /* Read low wordsize - offset bytes */ + READ_PARTIAL (s0, a, sizeof(word_t) - offset); + READ_PARTIAL (t, b, sizeof(word_t) - offset); + s0 ^= t; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + s0 <<= shl; +#endif /* !WORDS_BIGENDIAN */ + + dst[0] = MERGE(s0, shl, s1, shr); +} + +static void memxor3_different_alignment_all (word_t *dst, + const unsigned char *a, const unsigned char *b, + unsigned a_offset, unsigned b_offset, size_t n) +{ + int al, ar, bl, br; + const word_t *a_word; + const word_t *b_word; + + word_t a0, a1, b0, b1; + + al = CHAR_BIT * a_offset; + ar = CHAR_BIT * (sizeof(word_t) - a_offset); + bl = CHAR_BIT * b_offset; + br = CHAR_BIT * (sizeof(word_t) - b_offset); + + a_word = (const word_t *) ((uintptr_t) a & -sizeof(word_t)); + b_word = (const word_t *) ((uintptr_t) b & -sizeof(word_t)); + + /* Read top offset bytes, in native byte order. */ + READ_PARTIAL (a0, (unsigned char *) &a_word[n], a_offset); + READ_PARTIAL (b0, (unsigned char *) &b_word[n], b_offset); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + a0 <<= ar; + b0 <<= br; +#endif + + if (n & 1) + { + a1 = a0; b1 = b0; + } + else + { + n--; + a1 = a_word[n]; + b1 = b_word[n]; + + dst[n] = MERGE (a1, al, a0, ar) ^ MERGE (b1, bl, b0, br); + } + while (n > 2) + { + n -= 2; + a0 = a_word[n+1]; b0 = b_word[n+1]; + dst[n+1] = MERGE(a0, al, a1, ar) ^ MERGE(b0, bl, b1, br); + a1 = a_word[n]; b1 = b_word[n]; + dst[n] = MERGE(a1, al, a0, ar) ^ MERGE(b1, bl, b0, br); + } + assert (n == 1); + /* Read low wordsize - offset bytes */ + READ_PARTIAL (a0, a, sizeof(word_t) - a_offset); + READ_PARTIAL (b0, b, sizeof(word_t) - b_offset); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + a0 <<= al; + b0 <<= bl; +#endif /* !WORDS_BIGENDIAN */ + + dst[0] = MERGE(a0, al, a1, ar) ^ MERGE(b0, bl, b1, br); +} + +/* Current implementation processes data in descending order, to + support overlapping operation with one of the sources overlapping + the start of the destination area. This feature is used only + internally by cbc decrypt, and it is not advertised or documented + to nettle users. */ +void *memxor3(void *dst_in, const void *a_in, const void *b_in, size_t n) +{ + unsigned char *dst = dst_in; + const unsigned char *a = a_in; + const unsigned char *b = b_in; + + if (n >= WORD_T_THRESH) + { + unsigned i; + unsigned a_offset; + unsigned b_offset; + size_t nwords; + + for (i = ALIGN_OFFSET(dst + n); i > 0; i--) + { + n--; + dst[n] = a[n] ^ b[n]; + } + + a_offset = ALIGN_OFFSET(a + n); + b_offset = ALIGN_OFFSET(b + n); + + nwords = n / sizeof (word_t); + n %= sizeof (word_t); + + if (a_offset == b_offset) + { + if (!a_offset) + memxor3_common_alignment((word_t *) (dst + n), + (const word_t *) (a + n), + (const word_t *) (b + n), nwords); + else + memxor3_different_alignment_ab((word_t *) (dst + n), + a + n, b + n, a_offset, + nwords); + } + else if (!a_offset) + memxor3_different_alignment_b((word_t *) (dst + n), + (const word_t *) (a + n), b + n, + b_offset, nwords); + else if (!b_offset) + memxor3_different_alignment_b((word_t *) (dst + n), + (const word_t *) (b + n), a + n, + a_offset, nwords); + else + memxor3_different_alignment_all((word_t *) (dst + n), a + n, b + n, + a_offset, b_offset, nwords); + + } + while (n-- > 0) + dst[n] = a[n] ^ b[n]; + + return dst; +} diff --git a/src/sha3-nettle-impl.c b/src/sha3-nettle-impl.c new file mode 100644 index 0000000..f5e3194 --- /dev/null +++ b/src/sha3-nettle-impl.c @@ -0,0 +1,238 @@ + +#include <assert.h> +#include <string.h> +#include "sha3-nettle-impl.h" +#include "sha3-nettle-impl-le.h" +#include "sha3-nettle-impl-memxor.h" + +int sha3_init(sha3_ctx_t *c, int mdlen) +{ + memset(c, 0, offsetof (sha3_ctx_t, block)); + c->mdlen = mdlen; + c->blksize = 200 - 2 * mdlen; + return 1; +} + +int sha3_update(sha3_ctx_t *c, const void *data, size_t len) +{ + c->index = _sha3_update (&c->state, c->blksize, c->block, c->index, len, data); + return 1; +} + +int sha3_final(void *md, sha3_ctx_t *c) +{ + _sha3_pad (&c->state, c->blksize, c->block, c->index); + _nettle_write_le64 (c->mdlen, md, c->state.a); + return 1; +} + +void *sha3(const void *in, size_t inlen, void *md, int mdlen) +{ + sha3_ctx_t sha3; + + sha3_init(&sha3, mdlen); + sha3_update(&sha3, in, inlen); + sha3_final(md, &sha3); + + return md; +} + +static void +sha3_absorb (struct sha3_state *state, unsigned length, const uint8_t *data) +{ + assert ( (length & 7) == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + { + uint64_t *p; + for (p = state->a; length > 0; p++, length -= 8, data += 8) + *p ^= LE_READ_UINT64 (data); + } +#else /* !WORDS_BIGENDIAN */ + memxor (state->a, data, length); +#endif + + sha3_permute (state); +} + +unsigned +_sha3_update (struct sha3_state *state, + unsigned block_size, uint8_t *block, + unsigned pos, + size_t length, const uint8_t *data) +{ + if (pos > 0) + { + unsigned left = block_size - pos; + if (length < left) + { + memcpy (block + pos, data, length); + return pos + length; + } + else + { + memcpy (block + pos, data, left); + data += left; + length -= left; + sha3_absorb (state, block_size, block); + } + } + for (; length >= block_size; length -= block_size, data += block_size) + sha3_absorb (state, block_size, data); + + memcpy (block, data, length); + return length; +} + +void +_sha3_pad (struct sha3_state *state, + unsigned block_size, uint8_t *block, unsigned pos) +{ + assert (pos < block_size); + block[pos++] = 6; + + memset (block + pos, 0, block_size - pos); + block[block_size - 1] |= 0x80; + + sha3_absorb (state, block_size, block); +} + +void +sha3_permute (struct sha3_state *state) +{ + static const uint64_t rc[SHA3_ROUNDS] = { + 0x0000000000000001ULL, 0X0000000000008082ULL, + 0X800000000000808AULL, 0X8000000080008000ULL, + 0X000000000000808BULL, 0X0000000080000001ULL, + 0X8000000080008081ULL, 0X8000000000008009ULL, + 0X000000000000008AULL, 0X0000000000000088ULL, + 0X0000000080008009ULL, 0X000000008000000AULL, + 0X000000008000808BULL, 0X800000000000008BULL, + 0X8000000000008089ULL, 0X8000000000008003ULL, + 0X8000000000008002ULL, 0X8000000000000080ULL, + 0X000000000000800AULL, 0X800000008000000AULL, + 0X8000000080008081ULL, 0X8000000000008080ULL, + 0X0000000080000001ULL, 0X8000000080008008ULL, + }; + + /* Original permutation: + + 0,10,20, 5,15, + 16, 1,11,21, 6, + 7,17, 2,12,22, + 23, 8,18, 3,13, + 14,24, 9,19, 4 + + Rotation counts: + + 0, 1, 62, 28, 27, + 36, 44, 6, 55, 20, + 3, 10, 43, 25, 39, + 41, 45, 15, 21, 8, + 18, 2, 61, 56, 14, + */ + + /* In-place implementation. Permutation done as a long sequence of + 25 moves "following" the permutation. + + T <-- 1 + 1 <-- 6 + 6 <-- 9 + 9 <-- 22 + 22 <-- 14 + 14 <-- 20 + 20 <-- 2 + 2 <-- 12 + 12 <-- 13 + 13 <-- 19 + 19 <-- 23 + 23 <-- 15 + 15 <-- 4 + 4 <-- 24 + 24 <-- 21 + 21 <-- 8 + 8 <-- 16 + 16 <-- 5 + 5 <-- 3 + 3 <-- 18 + 18 <-- 17 + 17 <-- 11 + 11 <-- 7 + 7 <-- 10 + 10 <-- T + +*/ + uint64_t C[5], D[5], T, X; + unsigned i, y; + +#define A state->a + + C[0] = A[0] ^ A[5+0] ^ A[10+0] ^ A[15+0] ^ A[20+0]; + C[1] = A[1] ^ A[5+1] ^ A[10+1] ^ A[15+1] ^ A[20+1]; + C[2] = A[2] ^ A[5+2] ^ A[10+2] ^ A[15+2] ^ A[20+2]; + C[3] = A[3] ^ A[5+3] ^ A[10+3] ^ A[15+3] ^ A[20+3]; + C[4] = A[4] ^ A[5+4] ^ A[10+4] ^ A[15+4] ^ A[20+4]; + + for (i = 0; i < SHA3_ROUNDS; i++) + { + D[0] = C[4] ^ ROTL64(1, C[1]); + D[1] = C[0] ^ ROTL64(1, C[2]); + D[2] = C[1] ^ ROTL64(1, C[3]); + D[3] = C[2] ^ ROTL64(1, C[4]); + D[4] = C[3] ^ ROTL64(1, C[0]); + + A[0] ^= D[0]; + X = A[ 1] ^ D[1]; T = ROTL64(1, X); + X = A[ 6] ^ D[1]; A[ 1] = ROTL64 (44, X); + X = A[ 9] ^ D[4]; A[ 6] = ROTL64 (20, X); + X = A[22] ^ D[2]; A[ 9] = ROTL64 (61, X); + X = A[14] ^ D[4]; A[22] = ROTL64 (39, X); + X = A[20] ^ D[0]; A[14] = ROTL64 (18, X); + X = A[ 2] ^ D[2]; A[20] = ROTL64 (62, X); + X = A[12] ^ D[2]; A[ 2] = ROTL64 (43, X); + X = A[13] ^ D[3]; A[12] = ROTL64 (25, X); + X = A[19] ^ D[4]; A[13] = ROTL64 ( 8, X); + X = A[23] ^ D[3]; A[19] = ROTL64 (56, X); + X = A[15] ^ D[0]; A[23] = ROTL64 (41, X); + X = A[ 4] ^ D[4]; A[15] = ROTL64 (27, X); + X = A[24] ^ D[4]; A[ 4] = ROTL64 (14, X); + X = A[21] ^ D[1]; A[24] = ROTL64 ( 2, X); + X = A[ 8] ^ D[3]; A[21] = ROTL64 (55, X); /* row 4 done */ + X = A[16] ^ D[1]; A[ 8] = ROTL64 (45, X); + X = A[ 5] ^ D[0]; A[16] = ROTL64 (36, X); + X = A[ 3] ^ D[3]; A[ 5] = ROTL64 (28, X); + X = A[18] ^ D[3]; A[ 3] = ROTL64 (21, X); /* row 0 done */ + X = A[17] ^ D[2]; A[18] = ROTL64 (15, X); + X = A[11] ^ D[1]; A[17] = ROTL64 (10, X); /* row 3 done */ + X = A[ 7] ^ D[2]; A[11] = ROTL64 ( 6, X); /* row 1 done */ + X = A[10] ^ D[0]; A[ 7] = ROTL64 ( 3, X); + A[10] = T; /* row 2 done */ + + D[0] = ~A[1] & A[2]; + D[1] = ~A[2] & A[3]; + D[2] = ~A[3] & A[4]; + D[3] = ~A[4] & A[0]; + D[4] = ~A[0] & A[1]; + + A[0] ^= D[0] ^ rc[i]; C[0] = A[0]; + A[1] ^= D[1]; C[1] = A[1]; + A[2] ^= D[2]; C[2] = A[2]; + A[3] ^= D[3]; C[3] = A[3]; + A[4] ^= D[4]; C[4] = A[4]; + + for (y = 5; y < 25; y+= 5) + { + D[0] = ~A[y+1] & A[y+2]; + D[1] = ~A[y+2] & A[y+3]; + D[2] = ~A[y+3] & A[y+4]; + D[3] = ~A[y+4] & A[y+0]; + D[4] = ~A[y+0] & A[y+1]; + + A[y+0] ^= D[0]; C[0] ^= A[y+0]; + A[y+1] ^= D[1]; C[1] ^= A[y+1]; + A[y+2] ^= D[2]; C[2] ^= A[y+2]; + A[y+3] ^= D[3]; C[3] ^= A[y+3]; + A[y+4] ^= D[4]; C[4] ^= A[y+4]; + } + } +#undef A +} diff --git a/src/sha3-nettle.c b/src/sha3-nettle.c new file mode 100644 index 0000000..e8eebbd --- /dev/null +++ b/src/sha3-nettle.c @@ -0,0 +1,23 @@ +#include "sha3-nettle-impl.h" + +sha3_ctx_t ctx; + +void init(void) +{ + sha3_init(&ctx, 32); +} + +void update(const char *msg, size_t size) +{ + sha3_update(&ctx, msg, size); +} + +void final(unsigned char *md) +{ + sha3_final(md, &ctx); +} + +int hashlen(void) +{ + return 32; +}