Welcome to little lamb

Code » limb » master » tree

[master] / src / liblimb / poly1305.h / poly1305_final.c

/* This file is part of limb                           https://lila.oss/limb
 * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
/* Based on poly1305-donna
 * Copyright (C) 2016 Andrew Moon */
/* SPDX-License-Identifier: GPL-2.0-only */
#include <string.h>
#include <limb/poly1305.h>
#include "poly1305.h"

void
poly1305_final(void *dst_, void *ctx_)
{
    struct poly1305_ctx *ctx = ctx_;
    u8 *dst = dst_;

    u32 g0, g1, g2, g3, g4;
    u32 c;
    u64 f;
    u32 mask;

    /* process remaining block */
    if (ctx->leftover) {
        size_t i = ctx->leftover;
        ctx->buf[i++] = 1;
        memset(ctx->buf + i, 0, POLY1305_BLOCKSIZE - i);
        ctx->final = 1;
        poly1305_blocks(ctx->buf, POLY1305_BLOCKSIZE, ctx);
    }

    /* fully carry a */
                        c = ctx->a[1] >> 26; ctx->a[1] = ctx->a[1] & 0x03ffffff;
    ctx->a[2] += c    ; c = ctx->a[2] >> 26; ctx->a[2] = ctx->a[2] & 0x03ffffff;
    ctx->a[3] += c    ; c = ctx->a[3] >> 26; ctx->a[3] = ctx->a[3] & 0x03ffffff;
    ctx->a[4] += c    ; c = ctx->a[4] >> 26; ctx->a[4] = ctx->a[4] & 0x03ffffff;
    ctx->a[0] += c * 5; c = ctx->a[0] >> 26; ctx->a[0] = ctx->a[0] & 0x03ffffff;
    ctx->a[1] += c;

    /* compute a + -p */
    g0 = ctx->a[0] + 5; c = g0 >> 26; g0 &= 0x03ffffff;
    g1 = ctx->a[1] + c; c = g1 >> 26; g1 &= 0x03ffffff;
    g2 = ctx->a[2] + c; c = g2 >> 26; g2 &= 0x03ffffff;
    g3 = ctx->a[3] + c; c = g3 >> 26; g3 &= 0x03ffffff;
    g4 = ctx->a[4] + c - (U32_C(1) << 26);

    /* select a if a < p, or a + -p if a >= p */
    mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
    g0 &= mask;
    g1 &= mask;
    g2 &= mask;
    g3 &= mask;
    g4 &= mask;
    mask = ~mask;
    ctx->a[0] = (ctx->a[0] & mask) | g0;
    ctx->a[1] = (ctx->a[1] & mask) | g1;
    ctx->a[2] = (ctx->a[2] & mask) | g2;
    ctx->a[3] = (ctx->a[3] & mask) | g3;
    ctx->a[4] = (ctx->a[4] & mask) | g4;

    /* a = a % (2^128) */
    ctx->a[0] = ((ctx->a[0]      ) | (ctx->a[1] << 26)) & 0xffffffff;
    ctx->a[1] = ((ctx->a[1] >>  6) | (ctx->a[2] << 20)) & 0xffffffff;
    ctx->a[2] = ((ctx->a[2] >> 12) | (ctx->a[3] << 14)) & 0xffffffff;
    ctx->a[3] = ((ctx->a[3] >> 18) | (ctx->a[4] <<  8)) & 0xffffffff;

    /* mac = (a + s) % (2^128) */
    f = (u64) ctx->a[0] + ctx->s[0];             ctx->a[0] = (u32) f;
    f = (u64) ctx->a[1] + ctx->s[1] + (f >> 32); ctx->a[1] = (u32) f;
    f = (u64) ctx->a[2] + ctx->s[2] + (f >> 32); ctx->a[2] = (u32) f;
    f = (u64) ctx->a[3] + ctx->s[3] + (f >> 32); ctx->a[3] = (u32) f;

    memcpy(dst, ctx->a, 4 * sizeof(*ctx->a));
    u32pa_le(dst, 4);
}