author | Olivier Brunel
<jjk@jjacky.com> 2023-04-17 17:20:53 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2023-05-20 18:06:37 UTC |
parent | 3aa5cac08ad0974b0741d84bc8753d04adcc3ad1 |
src/doc/buffer.h/buffer_putescall.3.md | +1 | -35 |
src/doc/esc.h.0.md | +32 | -0 |
src/doc/esc.h/escall_fmt.3.md | +128 | -0 |
src/liblimb/buffer.h/buffer_putescall.c | +6 | -64 |
src/liblimb/esc.h/esc_fmt.c | +14 | -0 |
src/liblimb/esc.h/esc_scan.c | +14 | -0 |
src/liblimb/esc.h/escall_fmt.c | +76 | -0 |
src/liblimb/esc.h/escall_scan.c | +45 | -0 |
src/liblimb/include/limb/esc.h | +15 | -0 |
src/mkrabintables/escall_fmt.o | +1 | -0 |
diff --git a/src/doc/buffer.h/buffer_putescall.3.md b/src/doc/buffer.h/buffer_putescall.3.md index 9eef985..2bbabee 100644 --- a/src/doc/buffer.h/buffer_putescall.3.md +++ b/src/doc/buffer.h/buffer_putescall.3.md @@ -32,41 +32,7 @@ start. The `buffer_putescs`() function does the same as `buffer_putesc`() but expects `s` to be a NUL-terminated string. -# ESCAPING - -The escaping performed is intended to have the written value inside -double-quotes. Characters with special escaping are as follow : - -: double-quote (`"`) -:: Escaped by prefixing with a backslash (`\`) - -: backslash (`\`) -:: Escaped by prefixing with a backslash (`\`) - -: bell (`0x07`) -:: Escaped as `\a` - -: backspace (`0x08`) -:: Escaped as `\b` - -: tabulation (`0x09`) -:: Escaped as `\t` - -: line feed (`0x0a`) -:: Escaped as `\n` - -: vertical tabulation (`0x0b`) -:: Escaped as `\v` - -: form feed (`0x0c`) -:: Escaped as `\f` - -: carriage return (`0x0d`) -:: Escaped as `\r` - -Anything else will either be written as-in if recognized as a printable -character, else escaped in hex-mode, that is `\x` followed by the hexadecimal -code of the character. (For example, character 127 would be escaped as `\x7f`) +For more about the escaping performed, refer to [esc_fmt](3). # RETURN VALUE diff --git a/src/doc/esc.h.0.md b/src/doc/esc.h.0.md new file mode 100644 index 0000000..76ae796 --- /dev/null +++ b/src/doc/esc.h.0.md @@ -0,0 +1,32 @@ +% limb manual +% esc.h(0) + +# NAME + +esc.h - escaping/unescaping data + + +# SYNOPSIS + + #include <limb/esc.h> + + +# DESCRIPTION + +The header defines functions to needed to escape/unescape data. + +## Functions + +The following functions are defined : + +: [escall_fmt](3) +:: To escape given text/data. + +: [escall_scan](3) +:: To unescape data previsouly escaped via [escall_fmt](3). + +: [esc_fmt](3) +:: Similar to [escall_fmt](3) but don't handle partial processing. + +: [esc_scan](3) +:: Similar to [escall_scan](3) but don't handle partial processing. diff --git a/src/doc/esc.h/escall_fmt.3.md b/src/doc/esc.h/escall_fmt.3.md new file mode 100644 index 0000000..20a4168 --- /dev/null +++ b/src/doc/esc.h/escall_fmt.3.md @@ -0,0 +1,128 @@ +% limb manual +% escall_fmt(3) + +# NAME + +escall\_fmt, esc\_fmt - escape given text/data + +# SYNOPSIS + + #include <limb/sc.h> + +```pre hl +int escall_fmt(char *<em>dst</em>, size_t <em>dlen</em>, const char *<em>sce</em>, size_t <em>slen</em>, size_t *<em>w</em>, size_t *<em>r</em>) +int escall_scan(char *<em>dst</em>, size_t <em>dlen</em>, const char *<em>sce</em>, size_t <em>slen</em>, size_t *<em>w</em>, size_t *<em>r</em>) + +ssize_t esc_fmt(char *<em>dst</em>, size_t <em>dlen</em>, const char *<em>sce</em>, size_t <em>slen</em>) +ssize_t esc_scan(char *<em>dst</em>, size_t <em>dlen</em>, const char *<em>sce</em>, size_t <em>slen</em>) +``` + +# DESCRIPTION + +The `escall_fmt`() function will write the content of `sce` of length `slen` +bytes, starting at offset pointed by `r` (usually 0), into the memory area +pointed by `dst` starting at position `w` (usually 0) and never going past +`dlen`, whilst taking care of escaping characters as needed. This means more +characters might be written into `dst` than present in `sce`. + +The values pointed to by `r` and `w` are updated accordingly to reflect the +positions of the last read in `sce` and write in `dst`, respectively. +Specifically, if a byte couldn't be escaped for lack of space in `dst`, the +value pointed to by `r` would remain on the last successfully processed byte, +and no "partial write" would have occurred in `dst`. + +If `dst` is *NULL* then nothing is written, the data in `sce` is still processed +and both `r` and `w` (/both/ mandatory) updated accordingly. + +The `esc_fmt`() function is similar, only without the `r` and `w` arguments, and +different return values. + +The `escall_scan`() function will read the data pointed to be `sce` of length +`slen` bytes, starting at offset pointed by `r` (usually 0), and write in the +memory area pointed by `dst` (up to `dlen` bytes) starting at offset pointed by +`w` the result of unescaping said data, expected by have been previously +escaped via `escall_fmt`(). + +Similarly to `escall_fmt`() the values pointed to by `r` and `w` are updated +accordingly. + +The `esc_scan`() function is similar to `escall_scan`() only without the `r` and +`w` argument, and different return values. + +# ESCAPING + +The escaping performed is intended to have the written value inside +double-quotes. Characters with special escaping are as follow : + +: double-quote (`"`) +:: Escaped by prefixing with a backslash (`\`) + +: backslash (`\`) +:: Escaped by prefixing with a backslash (`\`) + +: bell (`0x07`) +:: Escaped as `\a` + +: backspace (`0x08`) +:: Escaped as `\b` + +: tabulation (`0x09`) +:: Escaped as `\t` + +: line feed (`0x0a`) +:: Escaped as `\n` + +: vertical tabulation (`0x0b`) +:: Escaped as `\v` + +: form feed (`0x0c`) +:: Escaped as `\f` + +: carriage return (`0x0d`) +:: Escaped as `\r` + +Anything else will either be written as-in if recognized as a printable +character in the current locale, else escaped in hex-mode, that is `\x` followed +by the hexadecimal code of the character. (For example, character 127 would be +escaped as `\x7f`) + +# RETURN VALUE + +The `escall_fmt`() and `escall_scan`() function return 1 on success. Otherwise +they return 0 and set `errno` to indicate the error. + +In either case, values pointed to by `r` and `w` will have been updated to +reflect the positions of the last read in `sce` and write in `dst`, +respectively. + +The `esc_fmt`() and `esc_scan`() functions return the number of bytes written +into `dst` on success. Otherwise they return -1 and set `errno` to indicate the +error. + +# ERRORS + +The `escall_fmt`(), `esc_fmt`(), `escall_scan`() and `esc_scan`() function may +fail if : + +: *ENOBUFS* +:: Not enough space in `dst`. + +The `escall_fmt`() and `escall_scan`() functions may also fail if : + +: *EINVAL* +:: Either `r` or `w` was too high (more than `slen` or `dlen`, respectively). + +The `escall_scan`() and `esc_scan`() functions may also fail if : + +: *EINVAL* +:: Data in `sce` is malformed/invalid. E.g. a backslash followed by other than +:: an allowed byte. + +# NOTES + +The behavior of the `escall_fmt`() and `esc_fmt`() functions depend on the +*LC_CTYPE* category of the current locale. + +# SEE ALSO + +[buffer_putesc](3) diff --git a/src/liblimb/buffer.h/buffer_putescall.c b/src/liblimb/buffer.h/buffer_putescall.c index 69a2ed9..a8681db 100644 --- a/src/liblimb/buffer.h/buffer_putescall.c +++ b/src/liblimb/buffer.h/buffer_putescall.c @@ -2,80 +2,22 @@ * Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */ /* SPDX-License-Identifier: GPL-2.0-only */ #include <errno.h> -#include <string.h> -#include <ctype.h> -#include <wctype.h> -#include <wchar.h> -#include <skalibs/fmtscan.h> #include <limb/buffer.h> +#include <limb/esc.h> size_t buffer_putescall(buffer *b, const char *s, size_t len, size_t *pos) { if (*pos > len) return (errno = EINVAL, 0); - - const char direct[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - char tmp[4] = "\\"; size_t written = 0; for (;;) { - size_t w; - - /* common chars we can directly put */ - if (memchr(direct, s[*pos], sizeof(direct) - 1)) { - w = buffer_putnoflush(b, s + *pos, 1); - written += w; - } else { - const char *t; - size_t l; - - /* basic backslash escaping */ - if (s[*pos] == '\\' || s[*pos] == '"') { - tmp[1] = s[*pos]; - t = tmp; - l = 2; - w = 1; - /* simple backslash escaping */ - } else if (s[*pos] >= 7 && s[*pos] <= 13) { - const char esc[7] = "abtnvfr"; - tmp[1] = esc[s[*pos] - 7]; - t = tmp; - l = 2; - w = 1; - } else { - mbstate_t state = { 0 }; - /* try to get a multibyte char */ - wchar_t wc; - w = mbrtowc(&wc, s + *pos, len - *pos, &state); - /* if it is one and is printable, put the bytes */ - if (w && w != (size_t) -2 && w != (size_t) -1 && iswprint(wc)) { - t = s + *pos; - l = w; - /* just a single-byte char */ - } else if (isprint(s[*pos])) { - w = buffer_putnoflush(b, s + *pos, 1); - written += w; - goto next; - /* hexa-escaping */ - } else { - tmp[1] = 'x'; - ucharn_fmt(tmp + 2, s + *pos, 1); - t = tmp; - l = 4; - w = 1; - } - } - - size_t bw = 0; - if (!buffer_putall(b, t, l, &bw)) - return 0; - written += bw; - } + char buf[64]; + size_t w = 0; -next: - *pos += w; - if (*pos >= len) - return written; + escall_fmt(buf, sizeof(buf), s, len, &w, pos); + written += buffer_putnoflush(b, buf, w); + if (*pos >= len) return written; if (buffer_isfull(b)) { buffer_flush(b); diff --git a/src/liblimb/esc.h/esc_fmt.c b/src/liblimb/esc.h/esc_fmt.c new file mode 100644 index 0000000..e179ef3 --- /dev/null +++ b/src/liblimb/esc.h/esc_fmt.c @@ -0,0 +1,14 @@ +/* This file is part of limb https://lila.oss/limb + * Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <limb/esc.h> + +ssize_t +esc_fmt(char *dst, size_t dlen, const char *sce, size_t slen) +{ + size_t w = 0, r = 0; + if (escall_fmt(dst, dlen, sce, slen, &w, &r)) + return w; + else + return -1; +} diff --git a/src/liblimb/esc.h/esc_scan.c b/src/liblimb/esc.h/esc_scan.c new file mode 100644 index 0000000..5b5e661 --- /dev/null +++ b/src/liblimb/esc.h/esc_scan.c @@ -0,0 +1,14 @@ +/* This file is part of limb https://lila.oss/limb + * Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <limb/esc.h> + +ssize_t +esc_scan(char *dst, size_t dlen, const char *sce, size_t slen) +{ + size_t w = 0, r = 0; + if (escall_scan(dst, dlen, sce, slen, &w, &r)) + return w; + else + return -1; +} diff --git a/src/liblimb/esc.h/escall_fmt.c b/src/liblimb/esc.h/escall_fmt.c new file mode 100644 index 0000000..a95e1bc --- /dev/null +++ b/src/liblimb/esc.h/escall_fmt.c @@ -0,0 +1,76 @@ +/* This file is part of limb https://lila.oss/limb + * Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <ctype.h> +#include <errno.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> +#include <skalibs/fmtscan.h> +#include <limb/esc.h> + +int +escall_fmt(char *dst, size_t dlen, const char *sce, size_t slen, size_t *w, size_t *r) +{ + if (*w > dlen || *r > slen) return (errno = EINVAL, 0); + + while ((!dst || *w < dlen) && *r < slen) { + /* printable ASCII chars we can directly put */ + if (sce[*r] != '\\' && sce[*r] != '"' && sce[*r] >= 32 && sce[*r] <= 126) { + if (dst) dst[*w] = sce[*r]; + ++*w; + ++*r; + } else { + /* basic backslash escaping */ + if (sce[*r] == '\\' || sce[*r] == '"') { + if (*w + 2 > dlen) return (errno = ENOBUFS, 0); + if (dst) { + dst[*w] = '\\'; + dst[*w + 1] = sce[*r]; + } + *w += 2; + *r += 1; + /* simple backslash escaping */ + } else if (sce[*r] >= 7 && sce[*r] <= 13) { + if (*w + 2 > dlen) return (errno = ENOBUFS, 0); + if (dst) { + const char esc[7] = "abtnvfr"; + dst[*w] = '\\'; + dst[*w + 1] = esc[sce[*r] - 7]; + } + *w += 2; + *r += 1; + } else { + mbstate_t state = { 0 }; + /* try to get a multibyte char */ + wchar_t wc; + size_t l = mbrtowc(&wc, sce + *r, slen - *r, &state); + /* if it is one and is printable, put the bytes */ + if (l && l != (size_t) -2 && l != (size_t) -1 && iswprint(wc)) { + if (*w + l > dlen) return (errno = ENOBUFS, 0); + if (dst) memcpy(dst + *w, sce + *r, l); + *w += l; + *r += l; + /* just a single-byte char */ + } else if (isprint(sce[*r])) { + if (dst) dst[*w] = sce[*r]; + ++*w; + ++*r; + /* hexa-escaping */ + } else { + if (*w + 4 > dlen) return (errno = ENOBUFS, 0); + if (dst) { + dst[*w] = '\\'; + dst[*w + 1] = 'x'; + ucharn_fmt(dst + *w + 2, sce + *r, 1); + } + *w += 4; + *r += 1; + } + } + } + } + if (*r < slen) return (errno = ENOBUFS, 0); + + return 1; +} diff --git a/src/liblimb/esc.h/escall_scan.c b/src/liblimb/esc.h/escall_scan.c new file mode 100644 index 0000000..abe1000 --- /dev/null +++ b/src/liblimb/esc.h/escall_scan.c @@ -0,0 +1,45 @@ +/* This file is part of limb https://lila.oss/limb + * Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <errno.h> +#include <skalibs/fmtscan.h> +#include <limb/bytestr.h> +#include <limb/esc.h> + +int +escall_scan(char *dst, size_t dlen, const char *sce, size_t slen, size_t *w, size_t *r) +{ + if (*w > dlen || *r > slen) return (errno = EINVAL, 0); + + while ((!dst || *w < dlen) && *r < slen) { + if (sce[*r] == '\\') { + ++*r; + if (sce[*r] == '\\' || sce[*r] == '"') { + if (dst) dst[*w] = sce[*r]; + } else if (sce[*r] == 'x') { + ++*r; + if (dst) { + char c = fmtscan_num(sce[*r], 16); + if (c >= 16) return (errno = EINVAL, 0); + dst[*w] = c << 4; + c = fmtscan_num(sce[*r + 1], 16); + if (c >= 16) return (errno = EINVAL, 0); + dst[*w] += c; + } + ++*r; + } else { + const char esc[7] = "abtnvfr"; + size_t n = byte_chr(esc, 7, sce[*r]); + if (n == 7) return (errno = EINVAL, 0); + if (dst) dst[*w] = 7 + n; + } + } else { + if (dst) dst[*w] = sce[*r]; + } + ++*w; + ++*r; + } + if (*r < slen) return (errno = ENOBUFS, 0); + + return 1; +} diff --git a/src/liblimb/include/limb/esc.h b/src/liblimb/include/limb/esc.h new file mode 100644 index 0000000..56e1456 --- /dev/null +++ b/src/liblimb/include/limb/esc.h @@ -0,0 +1,15 @@ +/* This file is part of limb https://lila.oss/limb + * Copyright (C) 2023 Olivier Brunel jjk@jjacky.com */ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef LIMB_ESC_H +#define LIMB_ESC_H + +#include <sys/types.h> /* {,s}size_t */ + +extern int escall_fmt(char *dst, size_t dlen, const char *sce, size_t slen, size_t *w, size_t *r); +extern int escall_scan(char *dst, size_t dlen, const char *sce, size_t slen, size_t *w, size_t *r); + +extern ssize_t esc_fmt(char *dst, size_t dlen, const char *sce, size_t slen); +extern ssize_t esc_scan(char *dst, size_t dlen, const char *sce, size_t slen); + +#endif /* LIMB_ESC_H */ diff --git a/src/mkrabintables/escall_fmt.o b/src/mkrabintables/escall_fmt.o new file mode 120000 index 0000000..45e3e20 --- /dev/null +++ b/src/mkrabintables/escall_fmt.o @@ -0,0 +1 @@ +liblimb/esc.h/escall_fmt.o \ No newline at end of file