/* This file is part of limb https://lila.oss/limb
* Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */
/* Based on poly1305-donna
* Copyright (C) 2016 Andrew Moon */
/* SPDX-License-Identifier: GPL-2.0-only */
#include <string.h>
#include <limb/poly1305.h>
#include "poly1305.h"
void
poly1305_final(void *dst_, void *ctx_)
{
struct poly1305_ctx *ctx = ctx_;
u8 *dst = dst_;
u32 g0, g1, g2, g3, g4;
u32 c;
u64 f;
u32 mask;
/* process remaining block */
if (ctx->leftover) {
size_t i = ctx->leftover;
ctx->buf[i++] = 1;
memset(ctx->buf + i, 0, POLY1305_BLOCKSIZE - i);
ctx->final = 1;
poly1305_blocks(ctx->buf, POLY1305_BLOCKSIZE, ctx);
}
/* fully carry a */
c = ctx->a[1] >> 26; ctx->a[1] = ctx->a[1] & 0x03ffffff;
ctx->a[2] += c ; c = ctx->a[2] >> 26; ctx->a[2] = ctx->a[2] & 0x03ffffff;
ctx->a[3] += c ; c = ctx->a[3] >> 26; ctx->a[3] = ctx->a[3] & 0x03ffffff;
ctx->a[4] += c ; c = ctx->a[4] >> 26; ctx->a[4] = ctx->a[4] & 0x03ffffff;
ctx->a[0] += c * 5; c = ctx->a[0] >> 26; ctx->a[0] = ctx->a[0] & 0x03ffffff;
ctx->a[1] += c;
/* compute a + -p */
g0 = ctx->a[0] + 5; c = g0 >> 26; g0 &= 0x03ffffff;
g1 = ctx->a[1] + c; c = g1 >> 26; g1 &= 0x03ffffff;
g2 = ctx->a[2] + c; c = g2 >> 26; g2 &= 0x03ffffff;
g3 = ctx->a[3] + c; c = g3 >> 26; g3 &= 0x03ffffff;
g4 = ctx->a[4] + c - (U32_C(1) << 26);
/* select a if a < p, or a + -p if a >= p */
mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
g0 &= mask;
g1 &= mask;
g2 &= mask;
g3 &= mask;
g4 &= mask;
mask = ~mask;
ctx->a[0] = (ctx->a[0] & mask) | g0;
ctx->a[1] = (ctx->a[1] & mask) | g1;
ctx->a[2] = (ctx->a[2] & mask) | g2;
ctx->a[3] = (ctx->a[3] & mask) | g3;
ctx->a[4] = (ctx->a[4] & mask) | g4;
/* a = a % (2^128) */
ctx->a[0] = ((ctx->a[0] ) | (ctx->a[1] << 26)) & 0xffffffff;
ctx->a[1] = ((ctx->a[1] >> 6) | (ctx->a[2] << 20)) & 0xffffffff;
ctx->a[2] = ((ctx->a[2] >> 12) | (ctx->a[3] << 14)) & 0xffffffff;
ctx->a[3] = ((ctx->a[3] >> 18) | (ctx->a[4] << 8)) & 0xffffffff;
/* mac = (a + s) % (2^128) */
f = (u64) ctx->a[0] + ctx->s[0]; ctx->a[0] = (u32) f;
f = (u64) ctx->a[1] + ctx->s[1] + (f >> 32); ctx->a[1] = (u32) f;
f = (u64) ctx->a[2] + ctx->s[2] + (f >> 32); ctx->a[2] = (u32) f;
f = (u64) ctx->a[3] + ctx->s[3] + (f >> 32); ctx->a[3] = (u32) f;
memcpy(dst, ctx->a, 4 * sizeof(*ctx->a));
u32pa_le(dst, 4);
}