Welcome to little lamb

Code » limb » master » tree

[master] / src / liblimb / blake3.h / blake3_portable.c

/* This file is part of limb                           https://lila.oss/limb
 * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
/* Based on official BLAKE3 implementation:
 *  https://github.com/BLAKE3-team/BLAKE3
 * Copyright (C) 2019-2020 Samuel Neves and Jack O'Connor */
/* SPDX-License-Identifier: CC0-1.0 OR Apache-2.0 */
#include <string.h>
#include "blake3.h"

INLINE u32
rotr32(u32 w, u32 c)
{
    return (w >> c) | (w << (32 - c));
}

INLINE void
g(u32 *state, size_t a, size_t b, size_t c, size_t d, u32 x, u32 y)
{
    state[a] = state[a] + state[b] + x;
    state[d] = rotr32(state[d] ^ state[a], 16);
    state[c] = state[c] + state[d];
    state[b] = rotr32(state[b] ^ state[c], 12);
    state[a] = state[a] + state[b] + y;
    state[d] = rotr32(state[d] ^ state[a], 8);
    state[c] = state[c] + state[d];
    state[b] = rotr32(state[b] ^ state[c], 7);
}

INLINE void
round_fn(u32 state[16], const u32 *msg, size_t round)
{
    /* Select the message schedule based on the round. */
    const u8 *schedule = MSG_SCHEDULE[round];

    /* Mix the columns. */
    g(state,  0,  4,  8, 12, msg[schedule[ 0]], msg[schedule[ 1]]);
    g(state,  1,  5,  9, 13, msg[schedule[ 2]], msg[schedule[ 3]]);
    g(state,  2,  6, 10, 14, msg[schedule[ 4]], msg[schedule[ 5]]);
    g(state,  3,  7, 11, 15, msg[schedule[ 6]], msg[schedule[ 7]]);

    /* Mix the rows. */
    g(state,  0,  5, 10, 15, msg[schedule[ 8]], msg[schedule[ 9]]);
    g(state,  1,  6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
    g(state,  2,  7,  8, 13, msg[schedule[12]], msg[schedule[13]]);
    g(state,  3,  4,  9, 14, msg[schedule[14]], msg[schedule[15]]);
}

INLINE void
compress_pre(u32 state[16], const u32 cv[8], const u8 block[BLAKE3_BLOCK_LEN],
             u8 block_len, u64 counter, u8 flags)
{
    u32 block_words[16];
    block_words[ 0] = load32(block + 4 *  0);
    block_words[ 1] = load32(block + 4 *  1);
    block_words[ 2] = load32(block + 4 *  2);
    block_words[ 3] = load32(block + 4 *  3);
    block_words[ 4] = load32(block + 4 *  4);
    block_words[ 5] = load32(block + 4 *  5);
    block_words[ 6] = load32(block + 4 *  6);
    block_words[ 7] = load32(block + 4 *  7);
    block_words[ 8] = load32(block + 4 *  8);
    block_words[ 9] = load32(block + 4 *  9);
    block_words[10] = load32(block + 4 * 10);
    block_words[11] = load32(block + 4 * 11);
    block_words[12] = load32(block + 4 * 12);
    block_words[13] = load32(block + 4 * 13);
    block_words[14] = load32(block + 4 * 14);
    block_words[15] = load32(block + 4 * 15);

    state[ 0] = cv[0];
    state[ 1] = cv[1];
    state[ 2] = cv[2];
    state[ 3] = cv[3];
    state[ 4] = cv[4];
    state[ 5] = cv[5];
    state[ 6] = cv[6];
    state[ 7] = cv[7];
    state[ 8] = IV[0];
    state[ 9] = IV[1];
    state[10] = IV[2];
    state[11] = IV[3];
    state[12] = counter_low(counter);
    state[13] = counter_high(counter);
    state[14] = (u32) block_len;
    state[15] = (u32) flags;

    round_fn(state, &block_words[0], 0);
    round_fn(state, &block_words[0], 1);
    round_fn(state, &block_words[0], 2);
    round_fn(state, &block_words[0], 3);
    round_fn(state, &block_words[0], 4);
    round_fn(state, &block_words[0], 5);
    round_fn(state, &block_words[0], 6);
}

void
blake3_compress_in_place_portable(u32 cv[8],
                                  const u8 block[BLAKE3_BLOCK_LEN],
                                  u8 block_len, u64 counter, u8 flags)
{
    u32 state[16];
    compress_pre(state, cv, block, block_len, counter, flags);
    cv[0] = state[0] ^ state[ 8];
    cv[1] = state[1] ^ state[ 9];
    cv[2] = state[2] ^ state[10];
    cv[3] = state[3] ^ state[11];
    cv[4] = state[4] ^ state[12];
    cv[5] = state[5] ^ state[13];
    cv[6] = state[6] ^ state[14];
    cv[7] = state[7] ^ state[15];
}

void
blake3_compress_xof_portable(const u32 cv[8],
                             const u8 block[BLAKE3_BLOCK_LEN],
                             u8 block_len, u64 counter,
                             u8 flags, u8 out[64])
{
    u32 state[16];
    compress_pre(state, cv, block, block_len, counter, flags);

    store32(&out[ 0 * 4], state[ 0] ^ state[ 8]);
    store32(&out[ 1 * 4], state[ 1] ^ state[ 9]);
    store32(&out[ 2 * 4], state[ 2] ^ state[10]);
    store32(&out[ 3 * 4], state[ 3] ^ state[11]);
    store32(&out[ 4 * 4], state[ 4] ^ state[12]);
    store32(&out[ 5 * 4], state[ 5] ^ state[13]);
    store32(&out[ 6 * 4], state[ 6] ^ state[14]);
    store32(&out[ 7 * 4], state[ 7] ^ state[15]);
    store32(&out[ 8 * 4], state[ 8] ^ cv[0]);
    store32(&out[ 9 * 4], state[ 9] ^ cv[1]);
    store32(&out[10 * 4], state[10] ^ cv[2]);
    store32(&out[11 * 4], state[11] ^ cv[3]);
    store32(&out[12 * 4], state[12] ^ cv[4]);
    store32(&out[13 * 4], state[13] ^ cv[5]);
    store32(&out[14 * 4], state[14] ^ cv[6]);
    store32(&out[15 * 4], state[15] ^ cv[7]);
}

INLINE void
hash_one_portable(const u8 *input, size_t blocks,
                  const u32 key[8], u64 counter,
                  u8 flags, u8 flags_start,
                  u8 flags_end, u8 out[BLAKE3_OUT_LEN])
{
    u32 cv[8];
    memcpy(cv, key, BLAKE3_KEY_LEN);
    u8 block_flags = flags | flags_start;
    while (blocks > 0) {
        if (blocks == 1)
            block_flags |= flags_end;
        blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
                                          block_flags);
        input = &input[BLAKE3_BLOCK_LEN];
        blocks -= 1;
        block_flags = flags;
    }
    store_cv_words(out, cv);
}

void blake3_hash_many_portable(const u8 *const *inputs, size_t num_inputs,
                               size_t blocks, const u32 key[8],
                               u64 counter, char increment_counter,
                               u8 flags, u8 flags_start,
                               u8 flags_end, u8 *out)
{
    while (num_inputs > 0) {
        hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
                          flags_end, out);
        if (increment_counter)
            ++counter;
        ++inputs;
        --num_inputs;
        out = &out[BLAKE3_OUT_LEN];
    }
}