commit 8ddee0a

Add SHA3 implementation from nettle 3.4

author	Olivier Brunel <jjk@jjacky.com> 2023-01-25 12:17:39 UTC
committer	Olivier Brunel <jjk@jjacky.com> 2023-01-25 12:20:12 UTC
parent	3a4cd5acea4c1c07459e32cec46ea20ab240ef9f

Add SHA3 implementation from nettle 3.4

include/sha3-nettle-impl-le.h	+54	-0
include/sha3-nettle-impl-memxor.h	+49	-0
include/sha3-nettle-impl.h	+42	-0
meta/AUTHORS	+1	-0
meta/deps-bin	+1	-0
project.mk	+1	-1
src/sha3-nettle-impl-le.c	+30	-0
src/sha3-nettle-impl-memxor.c	+390	-0
src/sha3-nettle-impl.c	+238	-0
src/sha3-nettle.c	+23	-0

diff --git a/include/sha3-nettle-impl-le.h b/include/sha3-nettle-impl-le.h
new file mode 100644
index 0000000..6f3be72
--- /dev/null
+++ b/include/sha3-nettle-impl-le.h
@@ -0,0 +1,54 @@
+#ifndef LE_H
+#define LE_H
+
+#include <stdint.h>
+#include <string.h> /* size_t */
+
+/* The masking of the right shift is needed to allow n == 0 (using
+   just 32 - n and 64 - n results in undefined behaviour). Most uses
+   of these macros use a constant and non-zero rotation count. */
+#define ROTL32(n,x) (((x)<<(n)) | ((x)>>((-(n)&31))))
+
+#define LE_READ_UINT32(p)                       \
+	  (  (((uint32_t) (p)[3]) << 24)            \
+	   | (((uint32_t) (p)[2]) << 16)            \
+	   | (((uint32_t) (p)[1]) << 8)             \
+	   |  ((uint32_t) (p)[0]))
+
+#define LE_WRITE_UINT32(p, i)                   \
+	do {                                        \
+		(p)[3] = ((i) >> 24) & 0xff;            \
+		(p)[2] = ((i) >> 16) & 0xff;            \
+		(p)[1] = ((i) >> 8) & 0xff;             \
+		(p)[0] = (i) & 0xff;                    \
+	} while (0)
+
+
+#define ROTL64(n,x) (((x)<<(n)) | ((x)>>((-(n))&63)))
+
+#define LE_READ_UINT64(p)                       \
+	  (  (((uint64_t) (p)[7]) << 56)            \
+	   | (((uint64_t) (p)[6]) << 48)            \
+	   | (((uint64_t) (p)[5]) << 40)            \
+	   | (((uint64_t) (p)[4]) << 32)            \
+	   | (((uint64_t) (p)[3]) << 24)            \
+	   | (((uint64_t) (p)[2]) << 16)            \
+	   | (((uint64_t) (p)[1]) << 8)             \
+	   |  ((uint64_t) (p)[0]))
+
+#define LE_WRITE_UINT64(p, i)                   \
+	do {                                        \
+		(p)[7] = ((i) >> 56) & 0xff;            \
+		(p)[6] = ((i) >> 48) & 0xff;            \
+		(p)[5] = ((i) >> 40) & 0xff;            \
+		(p)[4] = ((i) >> 32) & 0xff;            \
+		(p)[3] = ((i) >> 24) & 0xff;            \
+		(p)[2] = ((i) >> 16) & 0xff;            \
+		(p)[1] = ((i) >> 8) & 0xff;             \
+		(p)[0] = (i) & 0xff;                    \
+	} while (0)
+
+
+void _nettle_write_le64(size_t length, uint8_t *dst, const uint64_t *src);
+
+#endif /* LE_H */
diff --git a/include/sha3-nettle-impl-memxor.h b/include/sha3-nettle-impl-memxor.h
new file mode 100644
index 0000000..6f36a20
--- /dev/null
+++ b/include/sha3-nettle-impl-memxor.h
@@ -0,0 +1,49 @@
+#ifndef MEMXOR_H
+#define MEMXOR_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+void *memxor(void *dst, const void *src, size_t n);
+void *memxor3(void *dst, const void *a, const void *b, size_t n);
+
+
+/* memxor-internal */
+
+/* The word_t type is intended to be the native word size. */
+#if defined(__x86_64__) || defined(__arch64__)
+/* Including on M$ windows, where unsigned long is only 32 bits */
+typedef uint64_t word_t;
+#else
+typedef unsigned long int word_t;
+#endif
+
+#define ALIGN_OFFSET(p) ((uintptr_t) (p) % sizeof(word_t))
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define MERGE(w0, sh_1, w1, sh_2) \
+  (((w0) >> (sh_1)) | ((w1) << (sh_2)))
+#else
+#define MERGE(w0, sh_1, w1, sh_2) \
+  (((w0) << (sh_1)) | ((w1) >> (sh_2)))
+#endif
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define READ_PARTIAL(r,p,n) do {			\
+	word_t _rp_x;					\
+	unsigned _rp_i;					\
+	for (_rp_i = (n), _rp_x = (p)[--_rp_i]; _rp_i > 0;)	\
+	_rp_x = (_rp_x << CHAR_BIT) | (p)[--_rp_i];	\
+	(r) = _rp_x;					\
+} while (0)
+#else
+#define READ_PARTIAL(r,p,n) do {			\
+	word_t _rp_x;						\
+	unsigned _rp_i;						\
+	for (_rp_x = (p)[0], _rp_i = 1; _rp_i < (n); _rp_i++)	\
+	_rp_x = (_rp_x << CHAR_BIT) | (p)[_rp_i];			\
+	(r) = _rp_x;						\
+} while (0)
+#endif
+
+#endif /* MEMXOR_H */
diff --git a/include/sha3-nettle-impl.h b/include/sha3-nettle-impl.h
new file mode 100644
index 0000000..f883833
--- /dev/null
+++ b/include/sha3-nettle-impl.h
@@ -0,0 +1,42 @@
+#ifndef SHA3_H
+#define SHA3_H
+
+#include <stdint.h>
+#include <stddef.h> /* For size_t */
+
+#define SHA3_ROUNDS 24
+
+/* The sha3 state is a 5x5 matrix of 64-bit words. In the notation of
+   Keccak description, S[x,y] is element x + 5*y, so if x is
+   interpreted as the row index and y the column index, it is stored
+   in column-major order. */
+#define SHA3_STATE_LENGTH 25
+
+/* The "width" is 1600 bits or 200 octets */
+struct sha3_state {
+	uint64_t a[SHA3_STATE_LENGTH];
+};
+
+void sha3_permute (struct sha3_state *state);
+
+unsigned _sha3_update (struct sha3_state *state, unsigned block_size, uint8_t *block,
+		unsigned pos, size_t length, const uint8_t *data);
+void _sha3_pad (struct sha3_state *state, unsigned block_size, uint8_t *block, unsigned pos);
+
+
+typedef struct {
+	struct sha3_state state;
+	unsigned index;
+	uint8_t block[200];
+	int mdlen, blksize;
+} sha3_ctx_t;
+
+// OpenSSL - like interfece
+int sha3_init(sha3_ctx_t *c, int mdlen);    // mdlen = hash output in bytes
+int sha3_update(sha3_ctx_t *c, const void *data, size_t len);
+int sha3_final(void *md, sha3_ctx_t *c);    // digest goes to md
+
+// compute a sha3 hash (md) of given byte length from "in"
+void *sha3(const void *in, size_t inlen, void *md, int mdlen);
+
+#endif /* SHA3_H */
diff --git a/meta/AUTHORS b/meta/AUTHORS
index ea28dfb..2b65874 100644
--- a/meta/AUTHORS
+++ b/meta/AUTHORS
@@ -3,3 +3,4 @@ Main author:
 
 Contributors:
 * Andrey Jivsov. crypto@brainhub.org [sha3-impl]
+* Niels Möller [sha3-nettle-impl]
diff --git a/meta/deps-bin b/meta/deps-bin
index 0b1c3ec..56f416e 100644
--- a/meta/deps-bin
+++ b/meta/deps-bin
@@ -4,3 +4,4 @@ test-blake2s-ska: src/test.o src/blake2s-ska.o skalibs
 test-sha3-lila: src/test.o src/sha3-lila.o limb skalibs
 test-blake3-lila: src/test.o src/blake3-lila.o limb skalibs
 test-sha3: src/test.o skalibs src/sha3-impl.o src/sha3.o
+test-sha3-nettle: src/test.o skalibs src/sha3-nettle-impl-le.o src/sha3-nettle-impl-memxor.o src/sha3-nettle-impl.o src/sha3-nettle.o
diff --git a/project.mk b/project.mk
index 5b56fc0..d38b145 100644
--- a/project.mk
+++ b/project.mk
@@ -1,4 +1,4 @@
 # binaries: -- don't forget to set meta/deps-bin with all deps & .o files
 BINS = test-sha1-ska test-sha256-ska test-blake2s-ska \
 	   test-sha3-lila test-blake3-lila \
-	   test-sha3
+	   test-sha3 test-sha3-nettle
diff --git a/src/sha3-nettle-impl-le.c b/src/sha3-nettle-impl-le.c
new file mode 100644
index 0000000..cee1a4a
--- /dev/null
+++ b/src/sha3-nettle-impl-le.c
@@ -0,0 +1,30 @@
+#include "sha3-nettle-impl-le.h"
+
+void
+_nettle_write_le64(size_t length, uint8_t *dst,
+                   const uint64_t *src)
+{
+	size_t i;
+	size_t words;
+	unsigned leftover;
+
+	words = length / 8;
+	leftover = length % 8;
+
+	for (i = 0; i < words; i++, dst += 8)
+		LE_WRITE_UINT64(dst, src[i]);
+
+	if (leftover)
+	{
+		uint64_t word;
+
+		word = src[i];
+
+		do
+		{
+			*dst++ = word & 0xff;
+			word >>= 8;
+		}
+		while (--leftover);
+	}
+}
diff --git a/src/sha3-nettle-impl-memxor.c b/src/sha3-nettle-impl-memxor.c
new file mode 100644
index 0000000..348b741
--- /dev/null
+++ b/src/sha3-nettle-impl-memxor.c
@@ -0,0 +1,390 @@
+/* memxor.c
+
+   Copyright (C) 2010, 2014 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+*/
+
+/* Implementation inspired by memcmp in glibc, contributed to the FSF
+   by Torbjorn Granlund.
+ */
+
+#include <assert.h>
+#include <limits.h>
+
+#include "sha3-nettle-impl-memxor.h"
+
+#define WORD_T_THRESH 16
+
+/* XOR word-aligned areas. n is the number of words, not bytes. */
+static void memxor_common_alignment (word_t *dst, const word_t *src, size_t n)
+{
+	/* FIXME: Require n > 0? */
+	/* FIXME: Unroll four times, like memcmp? Probably not worth the
+	   effort. */
+
+	if (n & 1)
+	{
+		n--;
+		dst[n] ^= src[n];
+	}
+	while (n >= 2)
+	{
+		n -= 2;
+		dst[n+1] ^= src[n+1];
+		dst[n] ^= src[n];
+	}
+}
+
+/* XOR *un-aligned* src-area onto aligned dst area. n is number of
+   words, not bytes. Assumes we can read complete words at the start
+   and end of the src operand. */
+static void memxor_different_alignment (word_t *dst, const unsigned char *src, size_t n)
+{
+	int shl, shr;
+	const word_t *src_word;
+	unsigned offset = ALIGN_OFFSET (src);
+	word_t s0, s1;
+
+	assert (n > 0);
+	shl = CHAR_BIT * offset;
+	shr = CHAR_BIT * (sizeof(word_t) - offset);
+
+	src_word = (const word_t *) ((uintptr_t) src & -sizeof(word_t));
+
+	/* Read top offset bytes, in native byte order. */
+	READ_PARTIAL (s0, (unsigned char *) &src_word[n], offset);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	s0 <<= shr; /* FIXME: Eliminate this shift? */
+#endif
+
+	/* Do n-1 regular iterations */
+	if (n & 1)
+		s1 = s0;
+	else
+	{
+		n--;
+		s1 = src_word[n];
+		dst[n] ^= MERGE (s1, shl, s0, shr);
+	}
+
+	assert (n & 1);
+	while (n > 2)
+	{
+		n -= 2;
+		s0 = src_word[n+1];
+		dst[n+1] ^= MERGE(s0, shl, s1, shr);
+		s1 = src_word[n]; /* FIXME: Overread on last iteration */
+		dst[n] ^= MERGE(s1, shl, s0, shr);
+	}
+	assert (n == 1);
+	/* Read low wordsize - offset bytes */
+	READ_PARTIAL (s0, src, sizeof(word_t) - offset);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	s0 <<= shl; /* FIXME: eliminate shift? */
+#endif /* !WORDS_BIGENDIAN */
+
+	dst[0] ^= MERGE(s0, shl, s1, shr);
+}
+
+/* Performance, Intel SU1400 (x86_64): 0.25 cycles/byte aligned, 0.45
+   cycles/byte unaligned. */
+
+/* XOR LEN bytes starting at SRCADDR onto DESTADDR. Result undefined
+   if the source overlaps with the destination. Return DESTADDR. */
+void *memxor(void *dst_in, const void *src_in, size_t n)
+{
+	unsigned char *dst = dst_in;
+	const unsigned char *src = src_in;
+
+	if (n >= WORD_T_THRESH)
+	{
+		unsigned i;
+		unsigned offset;
+		size_t nwords;
+		/* There are at least some bytes to compare.  No need to test
+		   for N == 0 in this alignment loop.  */
+		for (i = ALIGN_OFFSET(dst + n); i > 0; i--)
+		{
+			n--;
+			dst[n] ^= src[n];
+		}
+		offset = ALIGN_OFFSET(src + n);
+		nwords = n / sizeof (word_t);
+		n %= sizeof (word_t);
+
+		if (offset)
+			memxor_different_alignment ((word_t *) (dst+n), src+n, nwords);
+		else
+			memxor_common_alignment ((word_t *) (dst+n),
+					(const word_t *) (src+n), nwords);
+	}
+	while (n > 0)
+	{
+		n--;
+		dst[n] ^= src[n];
+	}
+
+	return dst;
+}
+
+/* XOR word-aligned areas. n is the number of words, not bytes. */
+static void memxor3_common_alignment (word_t *dst, const word_t *a, const word_t *b, size_t n)
+{
+	/* FIXME: Require n > 0? */
+	if (n & 1)
+	{
+		n--;
+		dst[n] = a[n] ^ b[n];
+	}
+	while (n > 0)
+	{
+		n -= 2;
+		dst[n+1] = a[n+1] ^ b[n+1];
+		dst[n] = a[n] ^ b[n];
+	}
+}
+
+static void memxor3_different_alignment_b (word_t *dst, const word_t *a,
+		const unsigned char *b, unsigned offset, size_t n)
+{
+	int shl, shr;
+	const word_t *b_word;
+
+	word_t s0, s1;
+
+	assert (n > 0);
+
+	shl = CHAR_BIT * offset;
+	shr = CHAR_BIT * (sizeof(word_t) - offset);
+
+	b_word = (const word_t *) ((uintptr_t) b & -sizeof(word_t));
+
+	/* Read top offset bytes, in native byte order. */
+	READ_PARTIAL (s0, (unsigned char *) &b_word[n], offset);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	s0 <<= shr;
+#endif
+
+	if (n & 1)
+		s1 = s0;
+	else
+	{
+		n--;
+		s1 = b_word[n];
+		dst[n] = a[n] ^ MERGE (s1, shl, s0, shr);
+	}
+
+	while (n > 2)
+	{
+		n -= 2;
+		s0 = b_word[n+1];
+		dst[n+1] = a[n+1] ^ MERGE(s0, shl, s1, shr);
+		s1 = b_word[n];
+		dst[n] = a[n] ^ MERGE(s1, shl, s0, shr);
+	}
+	assert (n == 1);
+	/* Read low wordsize - offset bytes */
+	READ_PARTIAL (s0, b, sizeof(word_t) - offset);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	s0 <<= shl;
+#endif /* !WORDS_BIGENDIAN */
+
+	dst[0] = a[0] ^ MERGE(s0, shl, s1, shr);
+}
+
+static void memxor3_different_alignment_ab (word_t *dst,
+		const unsigned char *a, const unsigned char *b, unsigned offset, size_t n)
+{
+	int shl, shr;
+	const word_t *a_word;
+	const word_t *b_word;
+
+	word_t s0, s1, t;
+
+	assert (n > 0);
+
+	shl = CHAR_BIT * offset;
+	shr = CHAR_BIT * (sizeof(word_t) - offset);
+
+	a_word = (const word_t *) ((uintptr_t) a & -sizeof(word_t));
+	b_word = (const word_t *) ((uintptr_t) b & -sizeof(word_t));
+
+	/* Read top offset bytes, in native byte order. */
+	READ_PARTIAL (s0, (unsigned char *) &a_word[n], offset);
+	READ_PARTIAL (t,  (unsigned char *) &b_word[n], offset);
+	s0 ^= t;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	s0 <<= shr;
+#endif
+
+	if (n & 1)
+		s1 = s0;
+	else
+	{
+		n--;
+		s1 = a_word[n] ^ b_word[n];
+		dst[n] = MERGE (s1, shl, s0, shr);
+	}
+
+	while (n > 2)
+	{
+		n -= 2;
+		s0 = a_word[n+1] ^ b_word[n+1];
+		dst[n+1] = MERGE(s0, shl, s1, shr);
+		s1 = a_word[n] ^ b_word[n];
+		dst[n] = MERGE(s1, shl, s0, shr);
+	}
+	assert (n == 1);
+	/* Read low wordsize - offset bytes */
+	READ_PARTIAL (s0, a, sizeof(word_t) - offset);
+	READ_PARTIAL (t,  b, sizeof(word_t) - offset);
+	s0 ^= t;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	s0 <<= shl;
+#endif /* !WORDS_BIGENDIAN */
+
+	dst[0] = MERGE(s0, shl, s1, shr);
+}
+
+static void memxor3_different_alignment_all (word_t *dst,
+		const unsigned char *a, const unsigned char *b,
+		unsigned a_offset, unsigned b_offset, size_t n)
+{
+	int al, ar, bl, br;
+	const word_t *a_word;
+	const word_t *b_word;
+
+	word_t a0, a1, b0, b1;
+
+	al = CHAR_BIT * a_offset;
+	ar = CHAR_BIT * (sizeof(word_t) - a_offset);
+	bl = CHAR_BIT * b_offset;
+	br = CHAR_BIT * (sizeof(word_t) - b_offset);
+
+	a_word = (const word_t *) ((uintptr_t) a & -sizeof(word_t));
+	b_word = (const word_t *) ((uintptr_t) b & -sizeof(word_t));
+
+	/* Read top offset bytes, in native byte order. */
+	READ_PARTIAL (a0, (unsigned char *) &a_word[n], a_offset);
+	READ_PARTIAL (b0, (unsigned char *) &b_word[n], b_offset);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	a0 <<= ar;
+	b0 <<= br;
+#endif
+
+	if (n & 1)
+	{
+		a1 = a0; b1 = b0;
+	}
+	else
+	{
+		n--;
+		a1 = a_word[n];
+		b1 = b_word[n];
+
+		dst[n] = MERGE (a1, al, a0, ar) ^ MERGE (b1, bl, b0, br);
+	}
+	while (n > 2)
+	{
+		n -= 2;
+		a0 = a_word[n+1]; b0 = b_word[n+1];
+		dst[n+1] = MERGE(a0, al, a1, ar) ^ MERGE(b0, bl, b1, br);
+		a1 = a_word[n]; b1 = b_word[n];
+		dst[n] = MERGE(a1, al, a0, ar) ^ MERGE(b1, bl, b0, br);
+	}
+	assert (n == 1);
+	/* Read low wordsize - offset bytes */
+	READ_PARTIAL (a0, a, sizeof(word_t) - a_offset);
+	READ_PARTIAL (b0, b, sizeof(word_t) - b_offset);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	a0 <<= al;
+	b0 <<= bl;
+#endif /* !WORDS_BIGENDIAN */
+
+	dst[0] = MERGE(a0, al, a1, ar) ^ MERGE(b0, bl, b1, br);
+}
+
+/* Current implementation processes data in descending order, to
+   support overlapping operation with one of the sources overlapping
+   the start of the destination area. This feature is used only
+   internally by cbc decrypt, and it is not advertised or documented
+   to nettle users. */
+void *memxor3(void *dst_in, const void *a_in, const void *b_in, size_t n)
+{
+	unsigned char *dst = dst_in;
+	const unsigned char *a = a_in;
+	const unsigned char *b = b_in;
+
+	if (n >= WORD_T_THRESH)
+	{
+		unsigned i;
+		unsigned a_offset;
+		unsigned b_offset;
+		size_t nwords;
+
+		for (i = ALIGN_OFFSET(dst + n); i > 0; i--)
+		{
+			n--;
+			dst[n] = a[n] ^ b[n];
+		}
+
+		a_offset = ALIGN_OFFSET(a + n);
+		b_offset = ALIGN_OFFSET(b + n);
+
+		nwords = n / sizeof (word_t);
+		n %= sizeof (word_t);
+
+		if (a_offset == b_offset)
+		{
+			if (!a_offset)
+				memxor3_common_alignment((word_t *) (dst + n),
+						(const word_t *) (a + n),
+						(const word_t *) (b + n), nwords);
+			else
+				memxor3_different_alignment_ab((word_t *) (dst + n),
+						a + n, b + n, a_offset,
+						nwords);
+		}
+		else if (!a_offset)
+			memxor3_different_alignment_b((word_t *) (dst + n),
+					(const word_t *) (a + n), b + n,
+					b_offset, nwords);
+		else if (!b_offset)
+			memxor3_different_alignment_b((word_t *) (dst + n),
+					(const word_t *) (b + n), a + n,
+					a_offset, nwords);
+		else
+			memxor3_different_alignment_all((word_t *) (dst + n), a + n, b + n,
+					a_offset, b_offset, nwords);
+
+	}
+	while (n-- > 0)
+		dst[n] = a[n] ^ b[n];
+
+	return dst;
+}
diff --git a/src/sha3-nettle-impl.c b/src/sha3-nettle-impl.c
new file mode 100644
index 0000000..f5e3194
--- /dev/null
+++ b/src/sha3-nettle-impl.c
@@ -0,0 +1,238 @@
+
+#include <assert.h>
+#include <string.h>
+#include "sha3-nettle-impl.h"
+#include "sha3-nettle-impl-le.h"
+#include "sha3-nettle-impl-memxor.h"
+
+int sha3_init(sha3_ctx_t *c, int mdlen)
+{
+	memset(c, 0, offsetof (sha3_ctx_t, block));
+	c->mdlen = mdlen;
+	c->blksize = 200 - 2 * mdlen;
+	return 1;
+}
+
+int sha3_update(sha3_ctx_t *c, const void *data, size_t len)
+{
+	c->index = _sha3_update (&c->state, c->blksize, c->block, c->index, len, data);
+	return 1;
+}
+
+int sha3_final(void *md, sha3_ctx_t *c)
+{
+	_sha3_pad (&c->state, c->blksize, c->block, c->index);
+	_nettle_write_le64 (c->mdlen, md, c->state.a);
+	return 1;
+}
+
+void *sha3(const void *in, size_t inlen, void *md, int mdlen)
+{
+	sha3_ctx_t sha3;
+
+	sha3_init(&sha3, mdlen);
+	sha3_update(&sha3, in, inlen);
+	sha3_final(md, &sha3);
+
+	return md;
+}
+
+static void
+sha3_absorb (struct sha3_state *state, unsigned length, const uint8_t *data)
+{
+	assert ( (length & 7) == 0);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	{
+		uint64_t *p;
+		for (p = state->a; length > 0; p++, length -= 8, data += 8)
+			*p ^= LE_READ_UINT64 (data);
+	}
+#else /* !WORDS_BIGENDIAN */
+	memxor (state->a, data, length);
+#endif
+
+	sha3_permute (state);
+}
+
+unsigned
+_sha3_update (struct sha3_state *state,
+              unsigned block_size, uint8_t *block,
+              unsigned pos,
+              size_t length, const uint8_t *data)
+{
+	if (pos > 0)
+	{
+		unsigned left = block_size - pos;
+		if (length < left)
+		{
+			memcpy (block + pos, data, length);
+			return pos + length;
+		}
+		else
+		{
+			memcpy (block + pos, data, left);
+			data += left;
+			length -= left;
+			sha3_absorb (state, block_size, block);
+		}
+	}
+	for (; length >= block_size; length -= block_size, data += block_size)
+		sha3_absorb (state, block_size, data);
+
+	memcpy (block, data, length);
+	return length;
+}
+
+void
+_sha3_pad (struct sha3_state *state,
+           unsigned block_size, uint8_t *block, unsigned pos)
+{
+	assert (pos < block_size);
+	block[pos++] = 6;
+
+	memset (block + pos, 0, block_size - pos);
+	block[block_size - 1] |= 0x80;
+
+	sha3_absorb (state, block_size, block);  
+}
+
+void
+sha3_permute (struct sha3_state *state)
+{
+	static const uint64_t rc[SHA3_ROUNDS] = {
+		0x0000000000000001ULL, 0X0000000000008082ULL,
+		0X800000000000808AULL, 0X8000000080008000ULL,
+		0X000000000000808BULL, 0X0000000080000001ULL,
+		0X8000000080008081ULL, 0X8000000000008009ULL,
+		0X000000000000008AULL, 0X0000000000000088ULL,
+		0X0000000080008009ULL, 0X000000008000000AULL,
+		0X000000008000808BULL, 0X800000000000008BULL,
+		0X8000000000008089ULL, 0X8000000000008003ULL,
+		0X8000000000008002ULL, 0X8000000000000080ULL,
+		0X000000000000800AULL, 0X800000008000000AULL,
+		0X8000000080008081ULL, 0X8000000000008080ULL,
+		0X0000000080000001ULL, 0X8000000080008008ULL,
+	};
+
+	/* Original permutation:
+
+	   0,10,20, 5,15,
+	   16, 1,11,21, 6,
+	   7,17, 2,12,22,
+	   23, 8,18, 3,13,
+	   14,24, 9,19, 4
+
+	   Rotation counts:
+
+	   0,  1, 62, 28, 27,
+	   36, 44,  6, 55, 20,
+	   3, 10, 43, 25, 39,
+	   41, 45, 15, 21,  8,
+	   18,  2, 61, 56, 14,
+	   */
+
+	/* In-place implementation. Permutation done as a long sequence of
+	   25 moves "following" the permutation.
+
+	   T <--  1
+	   1 <--  6
+	   6 <--  9
+	   9 <-- 22
+	   22 <-- 14
+	   14 <-- 20
+	   20 <--  2
+	   2 <-- 12
+	   12 <-- 13
+	   13 <-- 19
+	   19 <-- 23
+	   23 <-- 15
+	   15 <--  4
+	   4 <-- 24
+	   24 <-- 21
+	   21 <--  8
+	   8 <-- 16
+	   16 <--  5
+	   5 <--  3
+	   3 <-- 18
+	   18 <-- 17
+	   17 <-- 11
+	   11 <--  7
+	   7 <-- 10
+	   10 <--  T
+
+*/
+	uint64_t C[5], D[5], T, X;
+	unsigned i, y;
+
+#define A state->a
+
+	C[0] = A[0] ^ A[5+0] ^ A[10+0] ^ A[15+0] ^ A[20+0];
+	C[1] = A[1] ^ A[5+1] ^ A[10+1] ^ A[15+1] ^ A[20+1];
+	C[2] = A[2] ^ A[5+2] ^ A[10+2] ^ A[15+2] ^ A[20+2];
+	C[3] = A[3] ^ A[5+3] ^ A[10+3] ^ A[15+3] ^ A[20+3];
+	C[4] = A[4] ^ A[5+4] ^ A[10+4] ^ A[15+4] ^ A[20+4];
+
+	for (i = 0; i < SHA3_ROUNDS; i++)
+	{
+		D[0] = C[4] ^ ROTL64(1, C[1]);
+		D[1] = C[0] ^ ROTL64(1, C[2]);
+		D[2] = C[1] ^ ROTL64(1, C[3]);
+		D[3] = C[2] ^ ROTL64(1, C[4]);
+		D[4] = C[3] ^ ROTL64(1, C[0]);
+
+		A[0] ^= D[0];
+		X = A[ 1] ^ D[1];     T = ROTL64(1, X);
+		X = A[ 6] ^ D[1]; A[ 1] = ROTL64 (44, X);
+		X = A[ 9] ^ D[4]; A[ 6] = ROTL64 (20, X);
+		X = A[22] ^ D[2]; A[ 9] = ROTL64 (61, X);
+		X = A[14] ^ D[4]; A[22] = ROTL64 (39, X);
+		X = A[20] ^ D[0]; A[14] = ROTL64 (18, X);
+		X = A[ 2] ^ D[2]; A[20] = ROTL64 (62, X);
+		X = A[12] ^ D[2]; A[ 2] = ROTL64 (43, X);
+		X = A[13] ^ D[3]; A[12] = ROTL64 (25, X);
+		X = A[19] ^ D[4]; A[13] = ROTL64 ( 8, X);
+		X = A[23] ^ D[3]; A[19] = ROTL64 (56, X);
+		X = A[15] ^ D[0]; A[23] = ROTL64 (41, X);
+		X = A[ 4] ^ D[4]; A[15] = ROTL64 (27, X);
+		X = A[24] ^ D[4]; A[ 4] = ROTL64 (14, X);
+		X = A[21] ^ D[1]; A[24] = ROTL64 ( 2, X);
+		X = A[ 8] ^ D[3]; A[21] = ROTL64 (55, X); /* row 4 done */
+		X = A[16] ^ D[1]; A[ 8] = ROTL64 (45, X);
+		X = A[ 5] ^ D[0]; A[16] = ROTL64 (36, X);
+		X = A[ 3] ^ D[3]; A[ 5] = ROTL64 (28, X);
+		X = A[18] ^ D[3]; A[ 3] = ROTL64 (21, X); /* row 0 done */
+		X = A[17] ^ D[2]; A[18] = ROTL64 (15, X);
+		X = A[11] ^ D[1]; A[17] = ROTL64 (10, X); /* row 3 done */
+		X = A[ 7] ^ D[2]; A[11] = ROTL64 ( 6, X); /* row 1 done */
+		X = A[10] ^ D[0]; A[ 7] = ROTL64 ( 3, X);
+		A[10] = T;                                /* row 2 done */
+
+		D[0] = ~A[1] & A[2];
+		D[1] = ~A[2] & A[3];
+		D[2] = ~A[3] & A[4];
+		D[3] = ~A[4] & A[0];
+		D[4] = ~A[0] & A[1];
+
+		A[0] ^= D[0] ^ rc[i]; C[0] = A[0];
+		A[1] ^= D[1]; C[1] = A[1];
+		A[2] ^= D[2]; C[2] = A[2];
+		A[3] ^= D[3]; C[3] = A[3];
+		A[4] ^= D[4]; C[4] = A[4];
+
+		for (y = 5; y < 25; y+= 5)
+		{
+			D[0] = ~A[y+1] & A[y+2];
+			D[1] = ~A[y+2] & A[y+3];
+			D[2] = ~A[y+3] & A[y+4];
+			D[3] = ~A[y+4] & A[y+0];
+			D[4] = ~A[y+0] & A[y+1];
+
+			A[y+0] ^= D[0]; C[0] ^= A[y+0];
+			A[y+1] ^= D[1]; C[1] ^= A[y+1];
+			A[y+2] ^= D[2]; C[2] ^= A[y+2];
+			A[y+3] ^= D[3]; C[3] ^= A[y+3];
+			A[y+4] ^= D[4]; C[4] ^= A[y+4];
+		}
+	}
+#undef A
+}
diff --git a/src/sha3-nettle.c b/src/sha3-nettle.c
new file mode 100644
index 0000000..e8eebbd
--- /dev/null
+++ b/src/sha3-nettle.c
@@ -0,0 +1,23 @@
+#include "sha3-nettle-impl.h"
+
+sha3_ctx_t ctx;
+
+void init(void)
+{
+    sha3_init(&ctx, 32);
+}
+
+void update(const char *msg, size_t size)
+{
+    sha3_update(&ctx, msg, size);
+}
+
+void final(unsigned char *md)
+{
+    sha3_final(md, &ctx);
+}
+
+int hashlen(void)
+{
+    return 32;
+}