Welcome to little lamb

Code » limb » commit 25cf80e

Add base{,32,64}.h & related functions

author Olivier Brunel
2023-04-12 15:48:25 UTC
committer Olivier Brunel
2023-05-20 18:06:36 UTC
parent 7ab8f5efcc42fe19e05202a2ea2515c29bff1e73

Add base{,32,64}.h & related functions

base_fmt() & base_scan() are generic functions to encode/decode a byte
array using a baseN algorithm as specified and given alphabet.

base32_fmt() & base32_scan() are base32 from RFC 4648, base64_fmt() and
base64_scan() are base64 from RFC 4648.

src/doc/base.h.0.md +28 -0
src/doc/base.h/base_fmt.3.md +73 -0
src/doc/base32.h.0.md +28 -0
src/doc/base32.h/base32_fmt.3.md +50 -0
src/doc/base64.h.0.md +28 -0
src/doc/base64.h/base64_fmt.3.md +50 -0
src/include/base32.h +11 -0
src/include/base64.h +11 -0
src/liblimb/base.h/base_fmt.c +64 -0
src/liblimb/base.h/base_scan.c +66 -0
src/liblimb/base32.h/base32_fmt.c +11 -0
src/liblimb/base32.h/base32_scan.c +13 -0
src/liblimb/base64.h/base64_fmt.c +11 -0
src/liblimb/base64.h/base64_scan.c +13 -0
src/liblimb/include/limb/base.h +12 -0
src/liblimb/include/limb/base32.h +12 -0
src/liblimb/include/limb/base64.h +12 -0

diff --git a/src/doc/base.h.0.md b/src/doc/base.h.0.md
new file mode 100644
index 0000000..2b4b9c6
--- /dev/null
+++ b/src/doc/base.h.0.md
@@ -0,0 +1,28 @@
+% limb manual
+% base.h(0)
+
+# NAME
+
+base.h - base-16\/32\/64\/128 encoding/decoding of byte arrays
+
+# SYNOPSIS
+
+    #include <limb/base.h>
+
+# DESCRIPTION
+
+This header defines the required functions perform base 16\/32\/34\/128 encoding/decoding.
+
+## Functions
+
+The following functions are defined :
+
+: [base_fmt](3)
+:: Encode a byte array in the specified base using the given alphabet.
+
+: [base_scan](3)
+:: Decode a byte array in the specified base using the given alphabet.
+
+# SEE ALSO
+
+[base32.h](0), [base64.h](0)
diff --git a/src/doc/base.h/base_fmt.3.md b/src/doc/base.h/base_fmt.3.md
new file mode 100644
index 0000000..ca3535d
--- /dev/null
+++ b/src/doc/base.h/base_fmt.3.md
@@ -0,0 +1,73 @@
+% limb manual
+% base_fmt(3)
+
+# NAME
+
+base\_fmt - base-16/32/64/128 encode/decode a byte array
+
+# SYNOPSIS
+
+    #include <limb/base.h>
+
+```pre hl
+ssize_t base_fmt(char *<em>dst</em>, int <em>base</em>, const char *<em>data</em>, size_t <em>dlen</em>, const char *<em>alpha</em>, int <em>strict</em>)
+ssize_t base_scan(char *<em>dst</em>, int <em>base</em>, const char *<em>data</em>, size_t <em>dlen</em>, const char *<em>alpha</em>)
+```
+
+# DESCRIPTION
+
+The `base_fmt`() function will encode the byte array pointed to by `data` of
+length `dlen` bytes into the byte array pointed to by `dst` using the base
+algorithm specified with `base` and the alphabet pointed to be `alpha`.
+
+Valid values for `base` are 2, 4, 8, 16, 32, 64 and 128 for the corresponding
+base. Accordingly the specified alphabet `alpha` must be of length 3, 5, 9, 17,
+33, 65 and 129 bytes, with the last character to be used for padding.
+
+The output placed into `dst` will have necessary padding unless `strict` is
+non-zero, in which case only one padding character is put when padding is
+needed.
+Note that it will /not/ be NUL-terminated. If `dst` is *NULL*, the function
+simply returns the size required for `dst`.
+
+The `base_scan`() function will decode the byte array pointed to by `data` of
+length `dlen` into the byte array pointed to be `dst` using the base algorithm
+specified with `base` and the alphabet pointed to by `alpha`.
+
+Both arguments `base` and `alpha` are similar than their counterparts for
+`base_fmt`().
+
+The encoded `data` doesn't have to end with proper padding, that is a single
+padding character marking the end of data is enough (as done with `base_fmt`()
+when `strict` is zero).  Note that, as with `base_fmt`(), the result in `dst`
+will /not/ be NUL-terminated; And that `dst` can also be *NULL*, in which case
+only data validation is performed and the length required in `dst` is returned.
+
+! INFO:
+! Both functions implement algorithms that can be used to perform encoding
+! and decoding as described in [RFC 4648][rfc4648], assuming corresponding
+! alphabet is used.
+
+[rfc4648] (https://datatracker.ietf.org/doc/html/rfc4648)
+
+# RETURN VALUE
+
+Both functions return the length written into `dst` - or that would have been
+had it not been *NULL* - on success. Otherwise, they return -1 and set `errno`
+to indicate the error.
+
+# ERRORS
+
+The `base_fmt`() and `base_scan`() functions may fail if :
+
+: *ERANGE*
+:: The `base` argument isn't valid.
+
+The `base_scan`() function may also fail if :
+
+: *EINVAL*
+:: The input `data` is invalid, i.e. contains a character not from `alpha`.
+
+# SEE ALSO
+
+[base32_fmt](3), [base64_fmt](3)
diff --git a/src/doc/base32.h.0.md b/src/doc/base32.h.0.md
new file mode 100644
index 0000000..fbf4bd7
--- /dev/null
+++ b/src/doc/base32.h.0.md
@@ -0,0 +1,28 @@
+% limb manual
+% base32.h(0)
+
+# NAME
+
+base32.h - base32 encode/decode a byte array
+
+# SYNOPSIS
+
+    #include <limb/base32.h>
+
+# DESCRIPTION
+
+This header defines the required functions perform base32 encoding/decoding.
+
+## Functions
+
+The following functions are defined :
+
+: [base32_fmt](3)
+:: Encode a byte array using BASE32 algorithm as per RFC 4648.
+
+: [base32_scan](3)
+:: Decode a byte array using BASE32 algorithm as par RFC 4648.
+
+# SEE ALSO
+
+[base.h](0), [base64.h](0)
diff --git a/src/doc/base32.h/base32_fmt.3.md b/src/doc/base32.h/base32_fmt.3.md
new file mode 100644
index 0000000..4a42e53
--- /dev/null
+++ b/src/doc/base32.h/base32_fmt.3.md
@@ -0,0 +1,50 @@
+% limb manual
+% base32_fmt(3)
+
+# NAME
+
+base32\_fmt, base32\_scan - base32 encode/decode a byte array
+
+# SYNOPSIS
+
+    #include <limb/base32.h>
+
+```pre hl
+size_t base32_fmt(char *<em>dst</em>, const void *<em>data</em>, size_t <em>dlen</em>, int <em>strict</em>)
+ssize_t base32_scan(char *<em>dst</em>, const char *<em>data</em>, size_t <em>dlen</em>)
+```
+
+# DESCRIPTION
+
+The `base32_fmt`() function will encode the byte array pointed to by `data` of
+length `dlen` bytes into the byte array pointed to by `dst` using the base32
+algorithm as described in [RFC 4648][rfc4648], unless `strict` is zero in which
+case when padding is needed, only one padding character is used.
+
+Refer to [base_fmt](3) for more details.
+
+The `base32_scan`() function will decode the byte array pointed to by `data` of
+length `dlen` into the byte array pointed to be `dst` using the base32 algorithm
+as described in [RFC 4648][rfc4648].
+
+Refer to [base_scan](3) for more details.
+
+[rfc4648] (https://datatracker.ietf.org/doc/html/rfc4648)
+
+# RETURN VALUE
+
+The `base32_fmt`() function return the length written into `dst` - or that
+would have been when `dst` is *NULL*.
+
+The `base32_scan`() function return the length written into `dst` - or that
+would have been when `dst` is *NULL* - on success. Otherwise it returns -1 and
+sets `errno` to indicate the error.
+
+# ERRORS
+
+The `base32_scan`() function may fail for any of the errors described for
+[base_scan](3) except *ERANGE*.
+
+# SEE ALSO
+
+[base64_fmt](3)
diff --git a/src/doc/base64.h.0.md b/src/doc/base64.h.0.md
new file mode 100644
index 0000000..59e4d82
--- /dev/null
+++ b/src/doc/base64.h.0.md
@@ -0,0 +1,28 @@
+% limb manual
+% base64.h(0)
+
+# NAME
+
+base64.h - base64 encode/decode a byte array
+
+# SYNOPSIS
+
+    #include <limb/base64.h>
+
+# DESCRIPTION
+
+This header defines the required functions perform base64 encoding/decoding.
+
+## Functions
+
+The following functions are defined :
+
+: [base64_fmt](3)
+:: Encode a byte array using BASE64 algorithm as per RFC 4648.
+
+: [base64_scan](3)
+:: Decode a byte array using BASE64 algorithm as par RFC 4648.
+
+# SEE ALSO
+
+[base.h](0), [base32.h](0)
diff --git a/src/doc/base64.h/base64_fmt.3.md b/src/doc/base64.h/base64_fmt.3.md
new file mode 100644
index 0000000..eabce51
--- /dev/null
+++ b/src/doc/base64.h/base64_fmt.3.md
@@ -0,0 +1,50 @@
+% limb manual
+% base64_fmt(3)
+
+# NAME
+
+base64\_fmt, base64\_scan - base64 encode/decode a byte array
+
+# SYNOPSIS
+
+    #include <limb/base64.h>
+
+```pre hl
+size_t base64_fmt(char *<em>dst</em>, const void *<em>data</em>, size_t <em>dlen</em>, int <em>strict</em>)
+ssize_t base64_scan(char *<em>dst</em>, const char *<em>data</em>, size_t <em>dlen</em>)
+```
+
+# DESCRIPTION
+
+The `base64_fmt`() function will encode the byte array pointed to by `data` of
+length `dlen` bytes into the byte array pointed to by `dst` using the base64
+algorithm as described in [RFC 4648][rfc4648], unless `strict` is zero in which
+case when padding is needed, only one padding character is used.
+
+Refer to [base_fmt](3) for more details.
+
+The `base64_scan`() function will decode the byte array pointed to by `data` of
+length `dlen` into the byte array pointed to be `dst` using the base64 algorithm
+as described in [RFC 4648][rfc4648].
+
+Refer to [base_scan](3) for more details.
+
+[rfc4648] (https://datatracker.ietf.org/doc/html/rfc4648)
+
+# RETURN VALUE
+
+The `base64_fmt`() function return the length written into `dst` - or that
+would have been when `dst` is *NULL*.
+
+The `base64_scan`() function return the length written into `dst` - or that
+would have been when `dst` is *NULL* - on success. Otherwise it returns -1 and
+sets `errno` to indicate the error.
+
+# ERRORS
+
+The `base64_scan`() function may fail for any of the errors described for
+[base_scan](3) except *ERANGE*.
+
+# SEE ALSO
+
+[base32_fmt](3)
diff --git a/src/include/base32.h b/src/include/base32.h
new file mode 100644
index 0000000..e87ef74
--- /dev/null
+++ b/src/include/base32.h
@@ -0,0 +1,11 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef LIMB_LIMB_BASE32_H
+#define LIMB_LIMB_BASE32_H
+
+#include <limb/base32.h>
+
+extern const char base32[33];
+
+#endif /* LIMB_LIMB_BASE32_H */
diff --git a/src/include/base64.h b/src/include/base64.h
new file mode 100644
index 0000000..f195d6f
--- /dev/null
+++ b/src/include/base64.h
@@ -0,0 +1,11 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef LIMB_LIMB_BASE64_H
+#define LIMB_LIMB_BASE64_H
+
+#include <limb/base64.h>
+
+extern const char base64[65];
+
+#endif /* LIMB_LIMB_BASE64_H */
diff --git a/src/liblimb/base.h/base_fmt.c b/src/liblimb/base.h/base_fmt.c
new file mode 100644
index 0000000..bc74fb1
--- /dev/null
+++ b/src/liblimb/base.h/base_fmt.c
@@ -0,0 +1,64 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <errno.h>
+#include <string.h>
+#include <limb/base.h>
+#include <limb/u64.h>
+
+ssize_t
+base_fmt(char *dst, int base, const char *data_, size_t dlen, const char *alpha, int strict)
+{
+    const unsigned char *data = (const unsigned char *) data_;
+    size_t bits, bin, bout, msk;
+    size_t w = 0;
+
+    switch (base) {
+        case   2: bits = 1; bin = 1; bout = 8; break;
+        case   4: bits = 2; bin = 1; bout = 4; break;
+        case   8: bits = 3; bin = 3; bout = 8; break;
+        case  16: bits = 4; bin = 1; bout = 2; break;
+        case  32: bits = 5; bin = 5; bout = 8; break;
+        case  64: bits = 6; bin = 3; bout = 4; break;
+        case 128: bits = 7; bin = 7; bout = 8; break;
+        default: return (errno = ERANGE, -1);
+    }
+    msk = (1 << bits) - 1;
+
+    while (dlen) {
+        size_t l, o, pad;
+        if (dlen < bin) {
+            l = dlen;
+            o = bout * l / bin;
+            if (bout * l % bin)
+                ++o;
+            if (strict) pad = bout - o;
+            else pad = 1;
+        } else {
+            l = bin;
+            o = bout;
+            pad = 0;
+        }
+
+        if (dst) {
+            u64 u = 0;
+            memcpy(&u, data, l);
+            u64p_be(&u);
+
+            for (size_t i = 0; i < o; ++i)
+                dst[i] = alpha[(u >> (64 - bits * (i + 1))) & msk];
+            memset(dst + o, alpha[base], pad);
+            o += pad;
+
+            dst += o;
+            data += l;
+        } else {
+            o += pad;
+        }
+
+        w += o;
+        dlen -= l;
+    }
+
+    return w;
+}
diff --git a/src/liblimb/base.h/base_scan.c b/src/liblimb/base.h/base_scan.c
new file mode 100644
index 0000000..130ed02
--- /dev/null
+++ b/src/liblimb/base.h/base_scan.c
@@ -0,0 +1,66 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <errno.h>
+#include <limb/base.h>
+#include <limb/bytestr.h>
+#include <limb/u64.h>
+
+ssize_t
+base_scan(char *dst, int base, const char *data, size_t dlen, const char *alpha)
+{
+    size_t bits, bin, bout;
+    ssize_t w = 0;
+
+    switch (base) {
+        case   2: bits = 1; bin = 8; bout = 1; break;
+        case   4: bits = 2; bin = 4; bout = 1; break;
+        case   8: bits = 3; bin = 8; bout = 3; break;
+        case  16: bits = 4; bin = 2; bout = 1; break;
+        case  32: bits = 5; bin = 8; bout = 5; break;
+        case  64: bits = 6; bin = 4; bout = 3; break;
+        case 128: bits = 7; bin = 8; bout = 7; break;
+        default: return (errno = ERANGE, -1);
+    }
+
+    while (dlen) {
+        size_t l = (dlen > bin) ? bin : dlen;
+        size_t e = (size_t) -1;
+        u64 u = 0;
+
+        for (size_t i = 0; i < l; ++i) {
+            int n = byte_chr(alpha, base + 1, data[i]);
+            if (n == base + 1)
+                return (errno = EINVAL, -1);
+            if (e == (size_t) -1) {
+                if (n < base)
+                    u |= (u64) n << (64 - bits * (i + 1));
+                else /* n == base, i.e. padding */
+                    e = i;
+            } else if (n < base) {
+                /* got something after padding */
+                return (errno = EINVAL, -1);
+            }
+        }
+        u64p_be(&u);
+
+        size_t n = bout;
+        /* padding means writing less than bout bytes */
+        if (e != (size_t) -1)
+            n = bout * e / bin;
+        /* less than bin bytes in /requires/ padding */
+        if (l < bin && e == (size_t) -1)
+            return (errno = EINVAL, -1);
+
+        if (dst) {
+            memcpy(dst, &u, n);
+            dst += n;
+        }
+
+        w += n;
+        data += l;
+        dlen -= l;
+    }
+
+    return w;
+}
diff --git a/src/liblimb/base32.h/base32_fmt.c b/src/liblimb/base32.h/base32_fmt.c
new file mode 100644
index 0000000..ebeb7be
--- /dev/null
+++ b/src/liblimb/base32.h/base32_fmt.c
@@ -0,0 +1,11 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <limb/base.h>
+#include "base32.h"
+
+size_t
+base32_fmt(char *dst, const void *data, size_t dlen, int strict)
+{
+    return base_fmt(dst, 32, data, dlen, base32, strict);
+}
diff --git a/src/liblimb/base32.h/base32_scan.c b/src/liblimb/base32.h/base32_scan.c
new file mode 100644
index 0000000..92f7f18
--- /dev/null
+++ b/src/liblimb/base32.h/base32_scan.c
@@ -0,0 +1,13 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <limb/base.h>
+#include "base32.h"
+
+const char base32[33] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=";
+
+ssize_t
+base32_scan(char *dst, const char *data, size_t dlen)
+{
+    return base_scan(dst, 32, data, dlen, base32);
+}
diff --git a/src/liblimb/base64.h/base64_fmt.c b/src/liblimb/base64.h/base64_fmt.c
new file mode 100644
index 0000000..2f4f612
--- /dev/null
+++ b/src/liblimb/base64.h/base64_fmt.c
@@ -0,0 +1,11 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <limb/base.h>
+#include "base64.h"
+
+size_t
+base64_fmt(char *dst, const void *data, size_t dlen, int strict)
+{
+    return base_fmt(dst, 64, data, dlen, base64, strict);
+}
diff --git a/src/liblimb/base64.h/base64_scan.c b/src/liblimb/base64.h/base64_scan.c
new file mode 100644
index 0000000..969d368
--- /dev/null
+++ b/src/liblimb/base64.h/base64_scan.c
@@ -0,0 +1,13 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <limb/base.h>
+#include "base64.h"
+
+const char base64[65] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
+
+ssize_t
+base64_scan(char *dst, const char *data, size_t dlen)
+{
+    return base_scan(dst, 64, data, dlen, base64);
+}
diff --git a/src/liblimb/include/limb/base.h b/src/liblimb/include/limb/base.h
new file mode 100644
index 0000000..c5e0cea
--- /dev/null
+++ b/src/liblimb/include/limb/base.h
@@ -0,0 +1,12 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef LIMB_BASE_H
+#define LIMB_BASE_H
+
+#include <sys/types.h> /* ssize_t */
+
+extern ssize_t base_fmt(char *dst, int base, const char *data, size_t dlen, const char *alpha, int strict);
+extern ssize_t base_scan(char *dst, int base, const char *data, size_t dlen, const char *alpha);
+
+# endif /* LIMB_BASE_H */
diff --git a/src/liblimb/include/limb/base32.h b/src/liblimb/include/limb/base32.h
new file mode 100644
index 0000000..cdaa70c
--- /dev/null
+++ b/src/liblimb/include/limb/base32.h
@@ -0,0 +1,12 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef LIMB_BASE32_H
+#define LIMB_BASE32_H
+
+#include <sys/types.h> /* ssize_t */
+
+extern size_t base32_fmt(char *dst, const void *data, size_t dlen, int strict);
+extern ssize_t base32_scan(char *dst, const char *data, size_t dlen);
+
+# endif /* LIMB_BASE32_H */
diff --git a/src/liblimb/include/limb/base64.h b/src/liblimb/include/limb/base64.h
new file mode 100644
index 0000000..a617121
--- /dev/null
+++ b/src/liblimb/include/limb/base64.h
@@ -0,0 +1,12 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef LIMB_BASE64_H
+#define LIMB_BASE64_H
+
+#include <sys/types.h> /* ssize_t */
+
+extern size_t base64_fmt(char *dst, const void *data, size_t dlen, int strict);
+extern ssize_t base64_scan(char *dst, const char *data, size_t dlen);
+
+# endif /* LIMB_BASE64_H */