/* This file is part of limb https://lila.oss/limb
* Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */
/* Based on official BLAKE3 implementation:
* https://github.com/BLAKE3-team/BLAKE3
* Copyright (C) 2019-2020 Samuel Neves and Jack O'Connor */
/* SPDX-License-Identifier: CC0-1.0 OR Apache-2.0 */
#include <string.h>
#include "blake3.h"
INLINE u32
rotr32(u32 w, u32 c)
{
return (w >> c) | (w << (32 - c));
}
INLINE void
g(u32 *state, size_t a, size_t b, size_t c, size_t d, u32 x, u32 y)
{
state[a] = state[a] + state[b] + x;
state[d] = rotr32(state[d] ^ state[a], 16);
state[c] = state[c] + state[d];
state[b] = rotr32(state[b] ^ state[c], 12);
state[a] = state[a] + state[b] + y;
state[d] = rotr32(state[d] ^ state[a], 8);
state[c] = state[c] + state[d];
state[b] = rotr32(state[b] ^ state[c], 7);
}
INLINE void
round_fn(u32 state[16], const u32 *msg, size_t round)
{
/* Select the message schedule based on the round. */
const u8 *schedule = MSG_SCHEDULE[round];
/* Mix the columns. */
g(state, 0, 4, 8, 12, msg[schedule[ 0]], msg[schedule[ 1]]);
g(state, 1, 5, 9, 13, msg[schedule[ 2]], msg[schedule[ 3]]);
g(state, 2, 6, 10, 14, msg[schedule[ 4]], msg[schedule[ 5]]);
g(state, 3, 7, 11, 15, msg[schedule[ 6]], msg[schedule[ 7]]);
/* Mix the rows. */
g(state, 0, 5, 10, 15, msg[schedule[ 8]], msg[schedule[ 9]]);
g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
}
INLINE void
compress_pre(u32 state[16], const u32 cv[8], const u8 block[BLAKE3_BLOCK_LEN],
u8 block_len, u64 counter, u8 flags)
{
u32 block_words[16];
block_words[ 0] = load32(block + 4 * 0);
block_words[ 1] = load32(block + 4 * 1);
block_words[ 2] = load32(block + 4 * 2);
block_words[ 3] = load32(block + 4 * 3);
block_words[ 4] = load32(block + 4 * 4);
block_words[ 5] = load32(block + 4 * 5);
block_words[ 6] = load32(block + 4 * 6);
block_words[ 7] = load32(block + 4 * 7);
block_words[ 8] = load32(block + 4 * 8);
block_words[ 9] = load32(block + 4 * 9);
block_words[10] = load32(block + 4 * 10);
block_words[11] = load32(block + 4 * 11);
block_words[12] = load32(block + 4 * 12);
block_words[13] = load32(block + 4 * 13);
block_words[14] = load32(block + 4 * 14);
block_words[15] = load32(block + 4 * 15);
state[ 0] = cv[0];
state[ 1] = cv[1];
state[ 2] = cv[2];
state[ 3] = cv[3];
state[ 4] = cv[4];
state[ 5] = cv[5];
state[ 6] = cv[6];
state[ 7] = cv[7];
state[ 8] = IV[0];
state[ 9] = IV[1];
state[10] = IV[2];
state[11] = IV[3];
state[12] = counter_low(counter);
state[13] = counter_high(counter);
state[14] = (u32) block_len;
state[15] = (u32) flags;
round_fn(state, &block_words[0], 0);
round_fn(state, &block_words[0], 1);
round_fn(state, &block_words[0], 2);
round_fn(state, &block_words[0], 3);
round_fn(state, &block_words[0], 4);
round_fn(state, &block_words[0], 5);
round_fn(state, &block_words[0], 6);
}
void
blake3_compress_in_place_portable(u32 cv[8],
const u8 block[BLAKE3_BLOCK_LEN],
u8 block_len, u64 counter, u8 flags)
{
u32 state[16];
compress_pre(state, cv, block, block_len, counter, flags);
cv[0] = state[0] ^ state[ 8];
cv[1] = state[1] ^ state[ 9];
cv[2] = state[2] ^ state[10];
cv[3] = state[3] ^ state[11];
cv[4] = state[4] ^ state[12];
cv[5] = state[5] ^ state[13];
cv[6] = state[6] ^ state[14];
cv[7] = state[7] ^ state[15];
}
void
blake3_compress_xof_portable(const u32 cv[8],
const u8 block[BLAKE3_BLOCK_LEN],
u8 block_len, u64 counter,
u8 flags, u8 out[64])
{
u32 state[16];
compress_pre(state, cv, block, block_len, counter, flags);
store32(&out[ 0 * 4], state[ 0] ^ state[ 8]);
store32(&out[ 1 * 4], state[ 1] ^ state[ 9]);
store32(&out[ 2 * 4], state[ 2] ^ state[10]);
store32(&out[ 3 * 4], state[ 3] ^ state[11]);
store32(&out[ 4 * 4], state[ 4] ^ state[12]);
store32(&out[ 5 * 4], state[ 5] ^ state[13]);
store32(&out[ 6 * 4], state[ 6] ^ state[14]);
store32(&out[ 7 * 4], state[ 7] ^ state[15]);
store32(&out[ 8 * 4], state[ 8] ^ cv[0]);
store32(&out[ 9 * 4], state[ 9] ^ cv[1]);
store32(&out[10 * 4], state[10] ^ cv[2]);
store32(&out[11 * 4], state[11] ^ cv[3]);
store32(&out[12 * 4], state[12] ^ cv[4]);
store32(&out[13 * 4], state[13] ^ cv[5]);
store32(&out[14 * 4], state[14] ^ cv[6]);
store32(&out[15 * 4], state[15] ^ cv[7]);
}
INLINE void
hash_one_portable(const u8 *input, size_t blocks,
const u32 key[8], u64 counter,
u8 flags, u8 flags_start,
u8 flags_end, u8 out[BLAKE3_OUT_LEN])
{
u32 cv[8];
memcpy(cv, key, BLAKE3_KEY_LEN);
u8 block_flags = flags | flags_start;
while (blocks > 0) {
if (blocks == 1)
block_flags |= flags_end;
blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
block_flags);
input = &input[BLAKE3_BLOCK_LEN];
blocks -= 1;
block_flags = flags;
}
store_cv_words(out, cv);
}
void blake3_hash_many_portable(const u8 *const *inputs, size_t num_inputs,
size_t blocks, const u32 key[8],
u64 counter, char increment_counter,
u8 flags, u8 flags_start,
u8 flags_end, u8 *out)
{
while (num_inputs > 0) {
hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
flags_end, out);
if (increment_counter)
++counter;
++inputs;
--num_inputs;
out = &out[BLAKE3_OUT_LEN];
}
}