author | Olivier Brunel
<jjk@jjacky.com> 2023-02-22 15:36:48 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2023-02-22 15:36:48 UTC |
parent | b12f4701fbbead12f55e0cb1e8c1616c41135143 |
doc/buffer_putescall.3.md | +94 | -0 |
doc/put.3.md | +10 | -0 |
include/limb/buffer.h | +10 | -0 |
include/limb/output.h | +2 | -0 |
meta/bins/mkrabintables | +3 | -0 |
meta/libs/limb | +4 | -0 |
src/buffer_putesc.c | +11 | -0 |
src/buffer_putescall.c | +83 | -0 |
src/buffer_putescs.c | +8 | -0 |
src/put.c | +13 | -3 |
diff --git a/doc/buffer_putescall.3.md b/doc/buffer_putescall.3.md new file mode 100644 index 0000000..9eef985 --- /dev/null +++ b/doc/buffer_putescall.3.md @@ -0,0 +1,94 @@ +% limb manual +% buffer_putescall(3) + +# NAME + +buffer\_putescall, buffer\_putesc, buffer\_putescs - write escaped text to a +buffer + +# SYNOPSIS + + #include <limb/buffer.h> + +```pre hl +size_t buffer_putescall(buffer *<em>b</em>, const char *<em>s</em>, size_t <em>len</em>, size_t *<em>pos</em>) +ssize_t buffer_putesc(buffer *<em>b</em>, const char *<em>s</em>, size_t <em>len</em>) +ssize_t buffer_putescs(buffer *<em>b</em>, const char *<em>s</em>) +``` + +# DESCRIPTION + +The `buffer_putescall`() function will write the content of `s` of length `len` +starting at position pointed by `pos` (usually 0) into buffer `b`, whilst +taking care of escaping characters as needed. This means more characters might +be written into `b` than present in `s`. + +The value pointed by `pos` will be updated to reflect the position inside +`s` afterwards. + +The `buffer_putesc`() function does the same always processing `s` from the +start. + +The `buffer_putescs`() function does the same as `buffer_putesc`() but expects +`s` to be a NUL-terminated string. + +# ESCAPING + +The escaping performed is intended to have the written value inside +double-quotes. Characters with special escaping are as follow : + +: double-quote (`"`) +:: Escaped by prefixing with a backslash (`\`) + +: backslash (`\`) +:: Escaped by prefixing with a backslash (`\`) + +: bell (`0x07`) +:: Escaped as `\a` + +: backspace (`0x08`) +:: Escaped as `\b` + +: tabulation (`0x09`) +:: Escaped as `\t` + +: line feed (`0x0a`) +:: Escaped as `\n` + +: vertical tabulation (`0x0b`) +:: Escaped as `\v` + +: form feed (`0x0c`) +:: Escaped as `\f` + +: carriage return (`0x0d`) +:: Escaped as `\r` + +Anything else will either be written as-in if recognized as a printable +character, else escaped in hex-mode, that is `\x` followed by the hexadecimal +code of the character. (For example, character 127 would be escaped as `\x7f`) + +# RETURN VALUE + +The `buffer_putescall`() function returns the number of bytes written into `b` +on success, which can be more than processed from `s` due to the escaping. +Otherwise, it returns 0 and sets `errno` to indicate the error. + +The `buffer_putesc`() and `buffer_putescs`() functions return the number of +bytes processed from `s` on success. Otherwise they returns -1 and sets `errno` +to indicate the error. + +# ERRORS + +The `buffer_putescall`() function may fail if : + +: *EINVAL* +:: `pos` was too high + +All of these may also fail and set `errno` for any of the errors specified for +[buffer_flush](3). + +# NOTES + +The behavior of these functions depend on the *LC_CTYPE* category of the current +locale. diff --git a/doc/put.3.md b/doc/put.3.md index af5a4ce..34cb41d 100644 --- a/doc/put.3.md +++ b/doc/put.3.md @@ -29,6 +29,16 @@ is flushed. If `PUT_DIE` was specified, the program then ends - calling [\_exit](3) with the value of `ret`. Note that `PUT_DIE` implies `PUT_LFF`. +# ESCAPING + +It is possible to pass the special constant `ESC` as one of the strings, so that +following strings will be escaped using [buffer_putesc](3). It can be specified +again to stop escaping and return to "normal" processing. + +Additionally, a double quote (`"`) will be put in place of each `ESC` constant. + +This escaping can be enabled as many times as needed. + # FLAGS Values for `opts` are constructed by a bitwise-inclusive OR of flags from the diff --git a/include/limb/buffer.h b/include/limb/buffer.h new file mode 100644 index 0000000..fec9e7c --- /dev/null +++ b/include/limb/buffer.h @@ -0,0 +1,10 @@ +#ifndef LIMB_BUFFER_H +#define LIMB_BUFFER_H + +#include <skalibs/buffer.h> + +extern size_t buffer_putescall(buffer *b, const char *s, size_t len, size_t *pos); +extern ssize_t buffer_putesc(buffer *b, const char *s, size_t len); +extern ssize_t buffer_putescs(buffer *b, const char *s); + +#endif /* LIMB_BUFFER_H */ diff --git a/include/limb/output.h b/include/limb/output.h index 335d74e..a34c3bd 100644 --- a/include/limb/output.h +++ b/include/limb/output.h @@ -9,6 +9,8 @@ enum { PUT_LFF = (1 << 2), }; +#define ESC ((void *) 1) + extern void put(buffer *b, int e, unsigned int opts, const char * const *as, unsigned int n); extern const char *PROG; diff --git a/meta/bins/mkrabintables b/meta/bins/mkrabintables index 6e3e1ca..f86cb1c 100644 --- a/meta/bins/mkrabintables +++ b/meta/bins/mkrabintables @@ -1,4 +1,7 @@ obj/tools/mkrabintables.o obj/msb64.o +obj/buffer_putescall.o +obj/buffer_putescs.o +obj/buffer_putesc.o obj/put.o skalibs diff --git a/meta/libs/limb b/meta/libs/limb index e90f061..a5f43aa 100644 --- a/meta/libs/limb +++ b/meta/libs/limb @@ -7,6 +7,10 @@ obj/openc_exclat.o obj/openc_createat.o # djbunix.h obj/sareadlinkat.o +# buffer +obj/buffer_putescall.o +obj/buffer_putesc.o +obj/buffer_putescs.o # find msb obj/msb64.o # {,un}pack u64 diff --git a/src/buffer_putesc.c b/src/buffer_putesc.c new file mode 100644 index 0000000..b56c12e --- /dev/null +++ b/src/buffer_putesc.c @@ -0,0 +1,11 @@ +#include <errno.h> +#include "limb/buffer.h" + +ssize_t +buffer_putesc(buffer *b, const char *s, size_t len) +{ + size_t w = 0; + if (!buffer_putescall(b, s, len, &w)) + return -1; + return w; +} diff --git a/src/buffer_putescall.c b/src/buffer_putescall.c new file mode 100644 index 0000000..370cafc --- /dev/null +++ b/src/buffer_putescall.c @@ -0,0 +1,83 @@ +#include <errno.h> +#include <string.h> +#include <ctype.h> +#include <wctype.h> +#include <wchar.h> +#include <skalibs/fmtscan.h> +#include "limb/buffer.h" + +size_t +buffer_putescall(buffer *b, const char *s, size_t len, size_t *pos) +{ + if (*pos > len) return (errno = EINVAL, 0); + + const char direct[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + char tmp[4] = "\\"; + size_t written = 0; + + for (;;) { + size_t w; + + /* common chars we can directly put */ + if (memchr(direct, s[*pos], sizeof(direct) - 1)) { + w = buffer_putnoflush(b, s + *pos, 1); + written += w; + } else { + const char *t; + size_t l; + + /* basic backslash escaping */ + if (s[*pos] == '\\' || s[*pos] == '"') { + tmp[1] = s[*pos]; + t = tmp; + l = 2; + w = 1; + /* simple backslash escaping */ + } else if (s[*pos] >= 7 && s[*pos] <= 13) { + const char esc[7] = "abtnvfr"; + tmp[1] = esc[s[*pos] - 7]; + t = tmp; + l = 2; + w = 1; + } else { + mbstate_t state = { 0 }; + /* try to get a multibyte char */ + wchar_t wc; + w = mbrtowc(&wc, s + *pos, len - *pos, &state); + /* if it is one and is printable, put the bytes */ + if (w && w != (size_t) -2 && w != (size_t) -1 && iswprint(wc)) { + t = s + *pos; + l = w; + /* just a single-byte char */ + } else if (isprint(s[*pos])) { + w = buffer_putnoflush(b, s + *pos, 1); + written += w; + goto next; + /* hexa-escaping */ + } else { + tmp[1] = 'x'; + ucharn_fmt(tmp + 2, s + *pos, 1); + t = tmp; + l = 4; + w = 1; + } + } + + size_t bw = 0; + if (!buffer_putall(b, t, l, &bw)) + return 0; + written += bw; + } + +next: + *pos += w; + if (*pos >= len) + return written; + + if (buffer_isfull(b)) { + buffer_flush(b); + if (buffer_isfull(b)) + return 0; + } + } +} diff --git a/src/buffer_putescs.c b/src/buffer_putescs.c new file mode 100644 index 0000000..c1f6671 --- /dev/null +++ b/src/buffer_putescs.c @@ -0,0 +1,8 @@ +#include <string.h> +#include "limb/buffer.h" + +ssize_t +buffer_putescs(buffer *b, const char *s) +{ + return buffer_putesc(b, s, strlen(s)); +} diff --git a/src/put.c b/src/put.c index 5e66fc8..2c03351 100644 --- a/src/put.c +++ b/src/put.c @@ -1,5 +1,6 @@ #include <errno.h> #include <unistd.h> /* _exit() */ +#include "limb/buffer.h" #include "limb/output.h" void @@ -7,9 +8,18 @@ put(buffer *b, int r, unsigned int opts, const char * const *as, unsigned int n) { int e = errno; - unsigned int i; - for (i = 0; i < n; ++i) - if (as[i] && as[i][0]) buffer_puts(b, as[i]); + ssize_t (*puts) (buffer *b, const char *s) = buffer_puts; + for (unsigned i = 0; i < n; ++i) { + if (as[i] == ESC) { + buffer_put(b, "\"", 1); + if (puts == buffer_puts) + puts = buffer_putescs; + else + puts = buffer_puts; + } else if (as[i] && as[i][0]) { + puts(b, as[i]); + } + } if (opts & PUT_SYS) { buffer_put(b, ": ", 2);