Welcome to little lamb

Code » qmdoc » commit f6f196c

Make TOC actually work

author Olivier Brunel
2022-12-27 19:19:15 UTC
committer Olivier Brunel
2022-12-27 19:19:15 UTC
parent ff81ff2a54ba79d074c7b3caca108359a9eba9a8

Make TOC actually work

main.c +95 -16

diff --git a/main.c b/main.c
index cb37f88..7020258 100644
--- a/main.c
+++ b/main.c
@@ -65,8 +65,8 @@ struct ctx {
     int options;
     stralloc sa;
     size_t otoc;
+    size_t otitle;
     stralloc sa_out;
-    size_t ootoc;
     int toc_lvl;
     struct css *css;
     struct page *pages;
@@ -77,6 +77,7 @@ struct ctx {
         int from;
     } code;
     struct {
+        stralloc sa;
         size_t salen;
         int state;
     } buf;
@@ -99,11 +100,11 @@ raw_text(struct ctx *ctx, const char *text, size_t size)
 
     switch (ctx->buf.state) {
         case BUF_WAITING:
-            ctx->buf.salen = ctx->sa.len;
+            ctx->buf.salen = ctx->buf.sa.len;
             ctx->buf.state = BUF_ON;
             /* fall through */
         case BUF_ON:
-            sa = &ctx->sa;
+            sa = &ctx->buf.sa;
             break;
     }
 
@@ -142,6 +143,55 @@ escape_text(struct ctx *ctx, const char *text, size_t size)
     return 1;
 }
 
+static int
+anchor(struct ctx *ctx, const char *text, size_t size)
+{
+    char s[size];
+    size_t skipped = 0;
+    int skip = 0;
+    for (int i = 0; i < size; ++i) {
+        if (text[i] == '>') {
+            ++skipped;
+            skip = 0;
+        } else if (skip) {
+            ++skipped;
+        }
+        else if (text[i] >= 'a' && text[i] <= 'z') {
+            s[i - skipped] = text[i];
+        } else if (text[i] >= 'A' && text[i] <= 'Z') {
+            s[i - skipped] = 'a' + text[i] - 'A';
+        } else if (text[i] >= '0' && text[i] <= '9') {
+            s[i - skipped] = text[i];
+        } else if (text[i] == '<') {
+            ++skipped;
+            skip = 1;
+        } else {
+            s[i - skipped] = '_';
+        }
+    }
+    return raw_text(ctx, s, size - skipped);
+}
+
+static int
+strip_tags(struct ctx *ctx, const char *text, size_t size)
+{
+    const char *s;
+    for(;;) {
+        s = memchr(text, '<', size);
+        if (!s) break;
+        if (!escape_text(ctx, text, s - text))
+            return 0;
+        size -= ++s - text;
+        text = s;
+        s = memchr(text, '>', size);
+        if (s) {
+            size -= ++s - text;
+            text = s;
+        }
+    }
+    return escape_text(ctx, text, size);
+}
+
 static int
 highlight_escape_text(struct ctx *ctx, const char *text, size_t size)
 {
@@ -244,9 +294,7 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_)
                     /* remember positions for TOC */
                     if (i == ctx->cur_page) {
                         /* where to include it */
-                        ctx->ootoc = ctx->sa_out.len;
-                        /* where it is being created */
-                        ctx->otoc = ctx->sa.len;
+                        ctx->otoc = ctx->sa_out.len;
 
                         /* open it */
                         ctx->buf.state = BUF_ON;
@@ -298,6 +346,19 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_)
                 buf[uint32_fmt(buf, (uint32) d->level)] = '\0';
                 if (!raw_str(ctx, "<h") || !raw_str(ctx, buf) || !raw_str(ctx, ">"))
                     return ERR_PARSER_ENTER_BLOCK;
+
+                /* TOC */
+                ctx->buf.state = BUF_ON;
+                for ( ; ctx->toc_lvl < d->level; ++ctx->toc_lvl) {
+                    if (!raw_str(ctx, "<ul>"))
+                        return ERR_PARSER_TOC;
+                }
+                for ( ; ctx->toc_lvl > d->level; --ctx->toc_lvl) {
+                    if (!raw_str(ctx, "</ul>"))
+                        return ERR_PARSER_TOC;
+                }
+                ctx->buf.state = BUF_OFF;
+                ctx->otitle = ctx->sa_out.len;
             }
             break;
 
@@ -416,11 +477,28 @@ leave_block(MD_BLOCKTYPE type, void *details, void *ctx_)
 
         case MD_BLOCK_H:
             {
+                /* TOC */
+                const char *s = ctx->sa_out.s + ctx->otitle;
+                size_t l = ctx->sa_out.len - ctx->otitle;
+
                 MD_BLOCK_H_DETAIL *d = details;
                 char buf[UINT32_FMT];
                 buf[uint32_fmt(buf, (uint32) d->level)] = '\0';
-                if (!raw_str(ctx, "</h") || !raw_str(ctx, buf) || !raw_str(ctx, ">"))
+                if (!raw_str(ctx, "<a name=\"")
+                        || !anchor(ctx, s, l)
+                        || !raw_str(ctx, "\"></a>")
+                        || !raw_str(ctx, "</h") || !raw_str(ctx, buf) || !raw_str(ctx, ">"))
                     return ERR_PARSER_LEAVE_BLOCK;
+
+                /* TOC */
+                ctx->buf.state = BUF_ON;
+                if (!raw_str(ctx, "<li><a href=\"#")
+                        || !anchor(ctx, s, l)
+                        || !raw_str(ctx, "\">")
+                        || !strip_tags(ctx, s, l)
+                        || !raw_str(ctx, "</a></li>"))
+                    return ERR_PARSER_TOC;
+                ctx->buf.state = BUF_OFF;
             }
             break;
 
@@ -431,9 +509,9 @@ leave_block(MD_BLOCKTYPE type, void *details, void *ctx_)
                 if (!(ctx->code.flags & CODE_BUFFERED)) {
                     return (raw_str(ctx, "</pre>")) ? 0 : ERR_PARSER_LEAVE_BLOCK;
                 } else {
-                    const char *buf = ctx->sa.s + ctx->buf.salen;
-                    size_t blen = ctx->sa.len - ctx->buf.salen;
-                    ctx->sa.len = ctx->buf.salen;
+                    const char *buf = ctx->buf.sa.s + ctx->buf.salen;
+                    size_t blen = ctx->buf.sa.len - ctx->buf.salen;
+                    ctx->buf.sa.len = ctx->buf.salen;
                     ctx->buf.state = BUF_OFF;
 
                     if (ctx->code.flags & CODE_LINES) {
@@ -730,19 +808,18 @@ convert_page(struct ctx *ctx, int fddest)
 
     /* write output : */
     if (    /* up to TOC position */
-            allwrite(fd, ctx->sa_out.s, ctx->ootoc) != ctx->ootoc
+            allwrite(fd, ctx->sa_out.s, ctx->otoc) != ctx->otoc
             /* then the actual TOC */
-            || allwrite(fd, ctx->sa.s + ctx->otoc, ctx->sa.len - ctx->otoc)
-            != ctx->sa.len - ctx->otoc
+            || allwrite(fd, ctx->buf.sa.s, ctx->buf.sa.len) != ctx->buf.sa.len
             /* and the rest of the page */
-            || allwrite(fd, ctx->sa_out.s + ctx->ootoc, ctx->sa_out.len - ctx->ootoc)
-            != ctx->sa_out.len - ctx->ootoc
+            || allwrite(fd, ctx->sa_out.s + ctx->otoc, ctx->sa_out.len - ctx->otoc)
+            != ctx->sa_out.len - ctx->otoc
        )
         ret_strerr_warnwu1sys(ERR_IO, "write destination");
     fd_close(fd);
 
     /* reset TOC/buffer positions */
-    ctx->ootoc = ctx->otoc = ctx->sa_out.len = 0;
+    ctx->otoc = ctx->sa_out.len = ctx->buf.sa.len = 0;
 
     fd_close(p->fd);
 
@@ -888,6 +965,7 @@ main (int argc, char *argv[])
         .css = css,
         .pages = pages,
         .nb_pages = sizeof(pages) / sizeof(*pages),
+        .buf.sa = STRALLOC_ZERO,
     };
 
     outse("Scanning files...");
@@ -942,6 +1020,7 @@ main (int argc, char *argv[])
                                   ctx.sa.s + pages[i - optind].fileoff, "'");
     }
 
+    stralloc_free(&ctx.buf.sa);
     stralloc_free(&ctx.sa_out);
     stralloc_free(&ctx.sa);