Welcome to little lamb

Code » limb » commit 0bc778f

Add buffer_putesc{all,s,} and ESC support for put()

author Olivier Brunel
2023-02-22 15:36:48 UTC
committer Olivier Brunel
2023-02-22 15:36:48 UTC
parent b12f4701fbbead12f55e0cb1e8c1616c41135143

Add buffer_putesc{all,s,} and ESC support for put()

The buffer_putesc* family of functions allow to write text to a buffer
whilst having it automatically escaped. This is mainly intended for e.g.
path/file names to ensure they are always correctly named.

As such, it is possible to use the special constant ESC as strings for
put() to toggle this escaping, also processing this constant as a double
quote.

doc/buffer_putescall.3.md +94 -0
doc/put.3.md +10 -0
include/limb/buffer.h +10 -0
include/limb/output.h +2 -0
meta/bins/mkrabintables +3 -0
meta/libs/limb +4 -0
src/buffer_putesc.c +11 -0
src/buffer_putescall.c +83 -0
src/buffer_putescs.c +8 -0
src/put.c +13 -3

diff --git a/doc/buffer_putescall.3.md b/doc/buffer_putescall.3.md
new file mode 100644
index 0000000..9eef985
--- /dev/null
+++ b/doc/buffer_putescall.3.md
@@ -0,0 +1,94 @@
+% limb manual
+% buffer_putescall(3)
+
+# NAME
+
+buffer\_putescall, buffer\_putesc, buffer\_putescs - write escaped text to a
+buffer
+
+# SYNOPSIS
+
+    #include <limb/buffer.h>
+
+```pre hl
+size_t buffer_putescall(buffer *<em>b</em>, const char *<em>s</em>, size_t <em>len</em>, size_t *<em>pos</em>)
+ssize_t buffer_putesc(buffer *<em>b</em>, const char *<em>s</em>, size_t <em>len</em>)
+ssize_t buffer_putescs(buffer *<em>b</em>, const char *<em>s</em>)
+```
+
+# DESCRIPTION
+
+The `buffer_putescall`() function will write the content of `s` of length `len`
+starting at position pointed by `pos` (usually 0) into buffer `b`, whilst
+taking care of escaping characters as needed. This means more characters might
+be written into `b` than present in `s`.
+
+The value pointed by `pos` will be updated to reflect the position inside
+`s` afterwards.
+
+The `buffer_putesc`() function does the same always processing `s` from the
+start.
+
+The `buffer_putescs`() function does the same as `buffer_putesc`() but expects
+`s` to be a NUL-terminated string.
+
+# ESCAPING
+
+The escaping performed is intended to have the written value inside
+double-quotes. Characters with special escaping are as follow :
+
+: double-quote (`"`)
+:: Escaped by prefixing with a backslash (`\`)
+
+: backslash (`\`)
+:: Escaped by prefixing with a backslash (`\`)
+
+: bell (`0x07`)
+:: Escaped as `\a`
+
+: backspace (`0x08`)
+:: Escaped as `\b`
+
+: tabulation (`0x09`)
+:: Escaped as `\t`
+
+: line feed (`0x0a`)
+:: Escaped as `\n`
+
+: vertical tabulation (`0x0b`)
+:: Escaped as `\v`
+
+: form feed (`0x0c`)
+:: Escaped as `\f`
+
+: carriage return (`0x0d`)
+:: Escaped as `\r`
+
+Anything else will either be written as-in if recognized as a printable
+character, else escaped in hex-mode, that is `\x` followed by the hexadecimal
+code of the character. (For example, character 127 would be escaped as `\x7f`)
+
+# RETURN VALUE
+
+The `buffer_putescall`() function returns the number of bytes written into `b`
+on success, which can be more than processed from `s` due to the escaping.
+Otherwise, it returns 0 and sets `errno` to indicate the error.
+
+The `buffer_putesc`() and `buffer_putescs`() functions return the number of
+bytes processed from `s` on success. Otherwise they returns -1 and sets `errno`
+to indicate the error.
+
+# ERRORS
+
+The `buffer_putescall`() function may fail if :
+
+: *EINVAL*
+:: `pos` was too high
+
+All of these may also fail and set `errno` for any of the errors specified for
+[buffer_flush](3).
+
+# NOTES
+
+The behavior of these functions depend on the *LC_CTYPE* category of the current
+locale.
diff --git a/doc/put.3.md b/doc/put.3.md
index af5a4ce..34cb41d 100644
--- a/doc/put.3.md
+++ b/doc/put.3.md
@@ -29,6 +29,16 @@ is flushed.
 If `PUT_DIE` was specified, the program then ends - calling [\_exit](3) with
 the value of `ret`. Note that `PUT_DIE` implies `PUT_LFF`.
 
+# ESCAPING
+
+It is possible to pass the special constant `ESC` as one of the strings, so that
+following strings will be escaped using [buffer_putesc](3). It can be specified
+again to stop escaping and return to "normal" processing.
+
+Additionally, a double quote (`"`) will be put in place of each `ESC` constant.
+
+This escaping can be enabled as many times as needed.
+
 # FLAGS
 
 Values for `opts` are constructed by a bitwise-inclusive OR of flags from the
diff --git a/include/limb/buffer.h b/include/limb/buffer.h
new file mode 100644
index 0000000..fec9e7c
--- /dev/null
+++ b/include/limb/buffer.h
@@ -0,0 +1,10 @@
+#ifndef LIMB_BUFFER_H
+#define LIMB_BUFFER_H
+
+#include <skalibs/buffer.h>
+
+extern size_t buffer_putescall(buffer *b, const char *s, size_t len, size_t *pos);
+extern ssize_t buffer_putesc(buffer *b, const char *s, size_t len);
+extern ssize_t buffer_putescs(buffer *b, const char *s);
+
+#endif /* LIMB_BUFFER_H */
diff --git a/include/limb/output.h b/include/limb/output.h
index 335d74e..a34c3bd 100644
--- a/include/limb/output.h
+++ b/include/limb/output.h
@@ -9,6 +9,8 @@ enum {
     PUT_LFF     = (1 << 2),
 };
 
+#define ESC                 ((void *) 1)
+
 extern void put(buffer *b, int e, unsigned int opts, const char * const *as, unsigned int n);
 
 extern const char *PROG;
diff --git a/meta/bins/mkrabintables b/meta/bins/mkrabintables
index 6e3e1ca..f86cb1c 100644
--- a/meta/bins/mkrabintables
+++ b/meta/bins/mkrabintables
@@ -1,4 +1,7 @@
 obj/tools/mkrabintables.o
 obj/msb64.o
+obj/buffer_putescall.o
+obj/buffer_putescs.o
+obj/buffer_putesc.o
 obj/put.o
 skalibs
diff --git a/meta/libs/limb b/meta/libs/limb
index e90f061..a5f43aa 100644
--- a/meta/libs/limb
+++ b/meta/libs/limb
@@ -7,6 +7,10 @@ obj/openc_exclat.o
 obj/openc_createat.o
 # djbunix.h
 obj/sareadlinkat.o
+# buffer
+obj/buffer_putescall.o
+obj/buffer_putesc.o
+obj/buffer_putescs.o
 # find msb
 obj/msb64.o
 # {,un}pack u64
diff --git a/src/buffer_putesc.c b/src/buffer_putesc.c
new file mode 100644
index 0000000..b56c12e
--- /dev/null
+++ b/src/buffer_putesc.c
@@ -0,0 +1,11 @@
+#include <errno.h>
+#include "limb/buffer.h"
+
+ssize_t
+buffer_putesc(buffer *b, const char *s, size_t len)
+{
+    size_t w = 0;
+    if (!buffer_putescall(b, s, len, &w))
+        return -1;
+    return w;
+}
diff --git a/src/buffer_putescall.c b/src/buffer_putescall.c
new file mode 100644
index 0000000..370cafc
--- /dev/null
+++ b/src/buffer_putescall.c
@@ -0,0 +1,83 @@
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+#include <wctype.h>
+#include <wchar.h>
+#include <skalibs/fmtscan.h>
+#include "limb/buffer.h"
+
+size_t
+buffer_putescall(buffer *b, const char *s, size_t len, size_t *pos)
+{
+    if (*pos > len) return (errno = EINVAL, 0);
+
+    const char direct[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+    char tmp[4] = "\\";
+    size_t written = 0;
+
+    for (;;) {
+        size_t w;
+
+        /* common chars we can directly put */
+        if (memchr(direct, s[*pos], sizeof(direct) - 1)) {
+            w = buffer_putnoflush(b, s + *pos, 1);
+            written += w;
+        } else {
+            const char *t;
+            size_t l;
+
+            /* basic backslash escaping */
+            if (s[*pos] == '\\' || s[*pos] == '"') {
+                tmp[1] = s[*pos];
+                t = tmp;
+                l = 2;
+                w = 1;
+            /* simple backslash escaping */
+            } else if (s[*pos] >= 7 && s[*pos] <= 13) {
+                const char esc[7] = "abtnvfr";
+                tmp[1] = esc[s[*pos] - 7];
+                t = tmp;
+                l = 2;
+                w = 1;
+            } else {
+                mbstate_t state = { 0 };
+                /* try to get a multibyte char */
+                wchar_t wc;
+                w = mbrtowc(&wc, s + *pos, len - *pos, &state);
+                /* if it is one and is printable, put the bytes */
+                if (w && w != (size_t) -2 && w != (size_t) -1 && iswprint(wc)) {
+                    t = s + *pos;
+                    l = w;
+                /* just a single-byte char */
+                } else if (isprint(s[*pos])) {
+                    w = buffer_putnoflush(b, s + *pos, 1);
+                    written += w;
+                    goto next;
+                /* hexa-escaping */
+                } else {
+                    tmp[1] = 'x';
+                    ucharn_fmt(tmp + 2, s + *pos, 1);
+                    t = tmp;
+                    l = 4;
+                    w = 1;
+                }
+            }
+
+            size_t bw = 0;
+            if (!buffer_putall(b, t, l, &bw))
+                return 0;
+            written += bw;
+        }
+
+next:
+        *pos += w;
+        if (*pos >= len)
+            return written;
+
+        if (buffer_isfull(b)) {
+            buffer_flush(b);
+            if (buffer_isfull(b))
+                return 0;
+        }
+    }
+}
diff --git a/src/buffer_putescs.c b/src/buffer_putescs.c
new file mode 100644
index 0000000..c1f6671
--- /dev/null
+++ b/src/buffer_putescs.c
@@ -0,0 +1,8 @@
+#include <string.h>
+#include "limb/buffer.h"
+
+ssize_t
+buffer_putescs(buffer *b, const char *s)
+{
+    return buffer_putesc(b, s, strlen(s));
+}
diff --git a/src/put.c b/src/put.c
index 5e66fc8..2c03351 100644
--- a/src/put.c
+++ b/src/put.c
@@ -1,5 +1,6 @@
 #include <errno.h>
 #include <unistd.h> /* _exit() */
+#include "limb/buffer.h"
 #include "limb/output.h"
 
 void
@@ -7,9 +8,18 @@ put(buffer *b, int r, unsigned int opts, const char * const *as, unsigned int n)
 {
     int e = errno;
 
-    unsigned int i;
-    for (i = 0; i < n; ++i)
-        if (as[i] && as[i][0]) buffer_puts(b, as[i]);
+    ssize_t (*puts) (buffer *b, const char *s) = buffer_puts;
+    for (unsigned i = 0; i < n; ++i) {
+        if (as[i] == ESC) {
+            buffer_put(b, "\"", 1);
+            if (puts == buffer_puts)
+                puts = buffer_putescs;
+            else
+                puts = buffer_puts;
+        } else if (as[i] && as[i][0]) {
+            puts(b, as[i]);
+        }
+    }
 
     if (opts & PUT_SYS) {
         buffer_put(b, ": ", 2);