author | Olivier Brunel
<jjk@jjacky.com> 2022-12-27 19:19:15 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2022-12-27 19:19:15 UTC |
parent | ff81ff2a54ba79d074c7b3caca108359a9eba9a8 |
main.c | +95 | -16 |
diff --git a/main.c b/main.c index cb37f88..7020258 100644 --- a/main.c +++ b/main.c @@ -65,8 +65,8 @@ struct ctx { int options; stralloc sa; size_t otoc; + size_t otitle; stralloc sa_out; - size_t ootoc; int toc_lvl; struct css *css; struct page *pages; @@ -77,6 +77,7 @@ struct ctx { int from; } code; struct { + stralloc sa; size_t salen; int state; } buf; @@ -99,11 +100,11 @@ raw_text(struct ctx *ctx, const char *text, size_t size) switch (ctx->buf.state) { case BUF_WAITING: - ctx->buf.salen = ctx->sa.len; + ctx->buf.salen = ctx->buf.sa.len; ctx->buf.state = BUF_ON; /* fall through */ case BUF_ON: - sa = &ctx->sa; + sa = &ctx->buf.sa; break; } @@ -142,6 +143,55 @@ escape_text(struct ctx *ctx, const char *text, size_t size) return 1; } +static int +anchor(struct ctx *ctx, const char *text, size_t size) +{ + char s[size]; + size_t skipped = 0; + int skip = 0; + for (int i = 0; i < size; ++i) { + if (text[i] == '>') { + ++skipped; + skip = 0; + } else if (skip) { + ++skipped; + } + else if (text[i] >= 'a' && text[i] <= 'z') { + s[i - skipped] = text[i]; + } else if (text[i] >= 'A' && text[i] <= 'Z') { + s[i - skipped] = 'a' + text[i] - 'A'; + } else if (text[i] >= '0' && text[i] <= '9') { + s[i - skipped] = text[i]; + } else if (text[i] == '<') { + ++skipped; + skip = 1; + } else { + s[i - skipped] = '_'; + } + } + return raw_text(ctx, s, size - skipped); +} + +static int +strip_tags(struct ctx *ctx, const char *text, size_t size) +{ + const char *s; + for(;;) { + s = memchr(text, '<', size); + if (!s) break; + if (!escape_text(ctx, text, s - text)) + return 0; + size -= ++s - text; + text = s; + s = memchr(text, '>', size); + if (s) { + size -= ++s - text; + text = s; + } + } + return escape_text(ctx, text, size); +} + static int highlight_escape_text(struct ctx *ctx, const char *text, size_t size) { @@ -244,9 +294,7 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_) /* remember positions for TOC */ if (i == ctx->cur_page) { /* where to include it */ - ctx->ootoc = ctx->sa_out.len; - /* where it is being created */ - ctx->otoc = ctx->sa.len; + ctx->otoc = ctx->sa_out.len; /* open it */ ctx->buf.state = BUF_ON; @@ -298,6 +346,19 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_) buf[uint32_fmt(buf, (uint32) d->level)] = '\0'; if (!raw_str(ctx, "<h") || !raw_str(ctx, buf) || !raw_str(ctx, ">")) return ERR_PARSER_ENTER_BLOCK; + + /* TOC */ + ctx->buf.state = BUF_ON; + for ( ; ctx->toc_lvl < d->level; ++ctx->toc_lvl) { + if (!raw_str(ctx, "<ul>")) + return ERR_PARSER_TOC; + } + for ( ; ctx->toc_lvl > d->level; --ctx->toc_lvl) { + if (!raw_str(ctx, "</ul>")) + return ERR_PARSER_TOC; + } + ctx->buf.state = BUF_OFF; + ctx->otitle = ctx->sa_out.len; } break; @@ -416,11 +477,28 @@ leave_block(MD_BLOCKTYPE type, void *details, void *ctx_) case MD_BLOCK_H: { + /* TOC */ + const char *s = ctx->sa_out.s + ctx->otitle; + size_t l = ctx->sa_out.len - ctx->otitle; + MD_BLOCK_H_DETAIL *d = details; char buf[UINT32_FMT]; buf[uint32_fmt(buf, (uint32) d->level)] = '\0'; - if (!raw_str(ctx, "</h") || !raw_str(ctx, buf) || !raw_str(ctx, ">")) + if (!raw_str(ctx, "<a name=\"") + || !anchor(ctx, s, l) + || !raw_str(ctx, "\"></a>") + || !raw_str(ctx, "</h") || !raw_str(ctx, buf) || !raw_str(ctx, ">")) return ERR_PARSER_LEAVE_BLOCK; + + /* TOC */ + ctx->buf.state = BUF_ON; + if (!raw_str(ctx, "<li><a href=\"#") + || !anchor(ctx, s, l) + || !raw_str(ctx, "\">") + || !strip_tags(ctx, s, l) + || !raw_str(ctx, "</a></li>")) + return ERR_PARSER_TOC; + ctx->buf.state = BUF_OFF; } break; @@ -431,9 +509,9 @@ leave_block(MD_BLOCKTYPE type, void *details, void *ctx_) if (!(ctx->code.flags & CODE_BUFFERED)) { return (raw_str(ctx, "</pre>")) ? 0 : ERR_PARSER_LEAVE_BLOCK; } else { - const char *buf = ctx->sa.s + ctx->buf.salen; - size_t blen = ctx->sa.len - ctx->buf.salen; - ctx->sa.len = ctx->buf.salen; + const char *buf = ctx->buf.sa.s + ctx->buf.salen; + size_t blen = ctx->buf.sa.len - ctx->buf.salen; + ctx->buf.sa.len = ctx->buf.salen; ctx->buf.state = BUF_OFF; if (ctx->code.flags & CODE_LINES) { @@ -730,19 +808,18 @@ convert_page(struct ctx *ctx, int fddest) /* write output : */ if ( /* up to TOC position */ - allwrite(fd, ctx->sa_out.s, ctx->ootoc) != ctx->ootoc + allwrite(fd, ctx->sa_out.s, ctx->otoc) != ctx->otoc /* then the actual TOC */ - || allwrite(fd, ctx->sa.s + ctx->otoc, ctx->sa.len - ctx->otoc) - != ctx->sa.len - ctx->otoc + || allwrite(fd, ctx->buf.sa.s, ctx->buf.sa.len) != ctx->buf.sa.len /* and the rest of the page */ - || allwrite(fd, ctx->sa_out.s + ctx->ootoc, ctx->sa_out.len - ctx->ootoc) - != ctx->sa_out.len - ctx->ootoc + || allwrite(fd, ctx->sa_out.s + ctx->otoc, ctx->sa_out.len - ctx->otoc) + != ctx->sa_out.len - ctx->otoc ) ret_strerr_warnwu1sys(ERR_IO, "write destination"); fd_close(fd); /* reset TOC/buffer positions */ - ctx->ootoc = ctx->otoc = ctx->sa_out.len = 0; + ctx->otoc = ctx->sa_out.len = ctx->buf.sa.len = 0; fd_close(p->fd); @@ -888,6 +965,7 @@ main (int argc, char *argv[]) .css = css, .pages = pages, .nb_pages = sizeof(pages) / sizeof(*pages), + .buf.sa = STRALLOC_ZERO, }; outse("Scanning files..."); @@ -942,6 +1020,7 @@ main (int argc, char *argv[]) ctx.sa.s + pages[i - optind].fileoff, "'"); } + stralloc_free(&ctx.buf.sa); stralloc_free(&ctx.sa_out); stralloc_free(&ctx.sa);