Welcome to little lamb

Code » limb » commit 8dfdc46

escall_scan: Fix quote handling & support "stream mode"

author Olivier Brunel
2023-12-16 21:10:52 UTC
committer Olivier Brunel
2024-01-01 19:10:12 UTC
parent 7da00c17bf301ae7f596c3412d35a719d71318a1

escall_scan: Fix quote handling & support "stream mode"

- While double-quote are expected to be quoted, this wasn't actually
  checked/enforced, fixed.
- Also, support use in a "stream mode", that is when there's not enough
  data in sce to fully unescape something, report ENODATA to give the
  caller a chance to fill its buffer and try again.
  So obviously positions in r & w are updated properly to allow such use.

src/doc/esc.h/escall_fmt.3.md +12 -1
src/liblimb/esc.h/escall_scan.c +7 -5

diff --git a/src/doc/esc.h/escall_fmt.3.md b/src/doc/esc.h/escall_fmt.3.md
index eabfcc2..5b38bbc 100644
--- a/src/doc/esc.h/escall_fmt.3.md
+++ b/src/doc/esc.h/escall_fmt.3.md
@@ -46,6 +46,13 @@ escaped via `escall_fmt`().
 Similarly to `escall_fmt`() the values pointed to by `r` and `w` are updated
 accordingly.
 
+! INFO: Stream mode
+! It is possible to use `escall_scan`() without having the full data stream yet.
+! That is, if it is not possible to perform an unescaping it will fail with
+! *ENODATA* and both `r` and `w` will be updated as expected.
+! That is notably `r` will be set to the offset of the last byte actually read
+! and successfully processed, and `w` the corresponding last byte written.
+
 The `esc_scan`() function is similar to `escall_scan`() only without the `r` and
 `w` argument, and different return values.
 
@@ -122,7 +129,11 @@ The `escall_scan`() and `esc_scan`() functions may also fail if :
 
 : *EINVAL*
 :: Data in `sce` is malformed/invalid. E.g. a backslash followed by other than
-:: an allowed byte.
+:: an allowed byte or a non-escaped quote.
+
+: *ENODATA*
+:: Not enough data available in `sce`. E.g. a backslash as last byte, or
+:: followed by an `x` and less than 2 bytes.
 
 # NOTES
 
diff --git a/src/liblimb/esc.h/escall_scan.c b/src/liblimb/esc.h/escall_scan.c
index edbd9dd..d03c5b8 100644
--- a/src/liblimb/esc.h/escall_scan.c
+++ b/src/liblimb/esc.h/escall_scan.c
@@ -12,7 +12,7 @@ escall_scan(char *dst, size_t dlen, const char *sce, size_t slen, size_t *w, siz
     if (*w > dlen || *r > slen) return (errno = EINVAL, 0);
 
     while ((!dst || *w < dlen) && *r < slen) {
-        size_t n = byte_chr(sce + *r, slen - *r, '\\');
+        size_t n = byte_in(sce + *r, slen - *r, "\\\"", 2);
         if (dst) {
             if (n > dlen - *w)
                 n = dlen - *w;
@@ -23,26 +23,28 @@ escall_scan(char *dst, size_t dlen, const char *sce, size_t slen, size_t *w, siz
 
         if ((dst && *w == dlen) || *r == slen)
             break;
+        if (sce[*r] != '\\') return (errno = EINVAL, 0);
 
-        /* sce[*r] == '\\' */
+        if (*r + 1 == slen) return (errno = ENODATA, 0);
         ++*r;
         if (sce[*r] == '\\' || sce[*r] == '"') {
             if (dst) dst[*w] = sce[*r];
         } else if (sce[*r] == 'x') {
+            if (slen - *r < 2) return (--*r, errno = ENODATA, 0);
             ++*r;
             if (dst) {
                 char c = fmtscan_num(sce[*r], 16);
-                if (c >= 16) return (errno = EINVAL, 0);
+                if (c >= 16) return (*r -= 2, errno = EINVAL, 0);
                 dst[*w] = c << 4;
                 c = fmtscan_num(sce[*r + 1], 16);
-                if (c >= 16) return (errno = EINVAL, 0);
+                if (c >= 16) return (*r -= 2, errno = EINVAL, 0);
                 dst[*w] += c;
             }
             ++*r;
         } else {
             const char esc[7] = "abtnvfr";
             size_t n = byte_chr(esc, 7, sce[*r]);
-            if (n == 7) return (errno = EINVAL, 0);
+            if (n == 7) return (--*r, errno = EINVAL, 0);
             if (dst) dst[*w] = 7 + n;
         }
         ++*w;