author | Olivier Brunel
<jjk@jjacky.com> 2023-06-03 07:57:34 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2023-07-07 16:59:36 UTC |
parent | ac602a3f39e854fc82913110e6b3adf7d6472383 |
src/doc/qmdoc.1.md | +184 | -6 |
src/qmdoc/qmdoc.c | +764 | -224 |
diff --git a/src/doc/qmdoc.1.md b/src/doc/qmdoc.1.md index c9c278e..4049a30 100644 --- a/src/doc/qmdoc.1.md +++ b/src/doc/qmdoc.1.md @@ -9,7 +9,7 @@ qmdoc - Quick markdown documentation generator # SYNOPSIS -*qmdoc* [`OPTION`..] `FILE`.. +*qmdoc* [`OPTION`..] `FILE|DIR`.. # DESCRIPTION @@ -56,6 +56,15 @@ featured inside a meta tag as well as in the footer of generated pages. :: directory. +: *--dir* `TYPE` + +:: When a directory was given as argument, start either a new group (`group`) +:: or sorting group (`sort`) with the files from said directory. Defaults to +:: `sort`. + +:: See [[GROUPS AND SORTING GROUPS]] for more. + + : *-F* `FILE`, *--footer* `FILE` :: Insert `FILE` as footer on every generated page. `FILE` is expected to @@ -147,6 +156,37 @@ table of contents for the entire generated documentation. :: title (see `--title`) on the left-side of each pages (above its TOC). +: *--subdir* `TYPE` + +:: When reading a directory and encountering a sub-directory, start either a new +:: group (`group`) or sorting group (`sort`) with the files from said +:: sub-directory. Defaults to `group`. + +:: See [[GROUPS AND SORTING GROUPS]] for more. + + +: *--sharedir* `DIR` + +:: Use `DIR` as directory to look for *qmdoc*'s own CSS files. + + +: *--sort-group* `SORT` + +:: Use `SORT` as sort order when sorting groups. Can be either `title` (default) +:: to sort using the page's title, or `file` to use the file's name. + +:: You can also prefix it with `d:` to use descending sort order, instead of the +:: default ascending sort order. + +:: ! HINT: +:: ! You can simply use `d:` to only set descending direction. +:: ! +:: ! It is also possible to abbreviate the possible values, so one could use +:: ! e.g. `--sort-group d:fi` to set descending sorting by file names. + +:: See [[GROUPS AND SORTING GROUPS]] for more. + + : *-T*, *--no-toc* :: Don't write a table of contents on each page. The entire left column of @@ -158,7 +198,7 @@ table of contents for the entire generated documentation. : *-t* `TITLE`, *--title* `TITLE` :: Set `TITLE` as general title. It will be featured on top of the TOC of every -:: generated pages. +:: generated pages. Defaults to "Documentation". : *-W*, *--wide-include* @@ -191,7 +231,7 @@ effectively include as many header line as you need : the first four will be processed as described below, while others will simply be ignored. The first line is used to set the page's title. Said title will be used for -the ``<title>`` tag of the page, as well as in the Table of Contents. +the `<title>` tag of the page, as well as in the Table of Contents. If not specified, the file name (including the `.html` extension) is used instead. @@ -236,7 +276,7 @@ NAME TITLE NAME VERSION DATE NAME ``` -Lastly, the page title used in the ``<title>`` tag will consist of the name +Lastly, the page title used in the `<title>` tag will consist of the name field and the title field (given on first header line), concatenated with a dash. E.g: `NAME - TITLE` @@ -268,7 +308,8 @@ This `index.html` will also be linked from every page, as if it was the first generated page, titled "Table of Contents". You can disable index mode by specifying `--no-index`. Each page will still -feature its own TOC with links to other generated pages. +feature its own TOC with links to other generated pages. You can also disable +such TOCs using the `--no-toc` option. ## Customize the global TOC @@ -288,6 +329,143 @@ of course processed as usual. ! Of course if none was given, *qmdoc*'s internal one will be used as usual. + +# GROUPS AND SORTING GROUPS + +In addition to files, it is possible to specify directories as arguments for +*qmdoc* to process. In such a case, it will read the directory's content and +process every file it contains whose name ends in `.md` + +Directories are scanned recursively, meaning that any subdirectory will also be +scanned and its files be processed. + +Because when reading a directory the order in which entries (files) are read is +undetermined (aka random), it is necessary to sort them. To do so, *qmdoc* uses +the notion of "groups". + +## Groups + +Files can be grouped together, and sorting files will be done group by group. +Every time a directory was specified on command line, a new group is created in +which all the directory's files will be put. + +Similarly, when a subdirectory in encountered a new group is created as well. + +By default, files will be sorted by their page's title in ascending order +This can be changed using the `--sort-group` option. Possible values are `title` +(default) and `file`, to use file names. One can also prefix the value with `d:` +to have them sorted in descending order. + +## Sorting groups + +Two kinds of groups are actually supported : regular groups, and sorting groups. +The difference between the two only comes with regard to each page's table of +content (TOC), i.e. regular groups are an extension over sorting groups. + +By default, all pages are featured on the TOC found on every generated page. +However, when groups are involved, only pages from the same group will be +featured. + +! INFO: +! Note that this only applies to page's TOC, the global TOC generated via the +! `<TOC>` tag in the index includes all pages from all groups. + +## Manually grouping pages + +No group exists by default, allowing one to define the order in which pages will +be processed/generated via the order they're given on command-line. + +However, it is possible to group files without using directories, by using +special arguments : + +- Using `+sg` as argument will start a new sorting group; +- Using `+g` as argument will start a new (regular) group. + +That way, one can easily specify a bunch of files to processed, and have them be +sorted. + +You can use as many such arguments as needed, in whatever order needed. Note +that, however, once a (sorting) group has been added (manually or by specifying +a directory), it is not possible to specify a manual order of processing, since +every file from then on will be in a (sorting) group. + + +Consider the following example : + +```sh +qmdoc first.md +g *.md +``` + +The TOC on the page from `first.md` will not include links to any other files, +similarly all other pages' TOC will not include links to the `first.md` page. + +While the shell is likely to expand `*.md` in a sorted fashion, it should be +worthy to note here that because files are in a group, they will be sorted by +*qmdoc* and, by default, using their titles (not their file names). + + +Another example : + +```sh +qmdoc first.md +sg foo*.md bar*.md +sg *.md +``` + +All files whose name begin with either `foo` or `bar` will be sorted/listed +together after `first.md`, all the other files will be sorted/listed on their +own afterwards. + +! HINT: +! As the observant reader might have noticed, it is possible to have the same +! file appear multiple times. *qmdoc* will simply ignore any repeated +! occurrence. + + + +# SYMLINKS + +Symbolic links are handled in a special manner, in order to allow one page to +have multiple names linking to it. + +## Restrictions + +First off, such special handling only applies to symlinks that do /not/ contain +any slashes (`/`) in their content. In other words, they must point to a +different file, or name, within the same directory. + +! HINT: +! It is therefore possible to "disable" this feature for a link by having it +! point to `./target` instead of simply `target` + +In addition, this only applies to [[MAN-PAGE LINKS]] and [[INTRA-LINKS]], not +"regular" links. This is simply because in the later case you specify a target +for the link, and that target is used as-is. It is up to you to ensure it will +actually point to something that does exist. + +## Handling + +When a symlink without slashes is found, instead of opening/processing the file +pointed to, *qmdoc* will simply remember that any link (in any of the processed +pages) to it shall be made to the pointed page instead. + +As a result, no page is actually processed/generated from such symlinks, but any +link /to/ the symlink will be properly placed, pointing to the page it points +to. + +For this reason, one last limitation is in effect: If a symlink points to a +non-existent page, it will have no effect. + +## Example + +So, imagine you've written a small library and you want one single page +documenting two functions, because things are simpler/easier that way. What you +can do then, is have you page written under e.g. `foobar.3.md` and then have a +symlink `barfoo.3.md` pointing to it. + +As a result, *qmdoc* will only generate a single file - namely `foobar.3.html` - +but if in any other pages you have a links such as `[barfoo](3)` then a link +will be made, only pointing to `foobar.3.html`, as per the symlink. + + # MARKDOWN SYNTAX *qmdoc* is using [md4c] as parser, and as such is mostly compliant with @@ -358,7 +536,7 @@ The first line is a special line, it must contain the /type/ of box to create Supported types are : * `WARNING` : For warnings, default title: "Warning" * `INFO` : For informations, default title: "Information" -* `HINT` ! For hints, default title: "Hint" +* `HINT` : For hints, default title: "Hint" * `NOTE` : For notes, default title: "Note" If no supported type is found, the entire line is used as title for a `WARNING` diff --git a/src/qmdoc/qmdoc.c b/src/qmdoc/qmdoc.c index 2491b7d..937b652 100644 --- a/src/qmdoc/qmdoc.c +++ b/src/qmdoc/qmdoc.c @@ -1,10 +1,15 @@ +#define _GNU_SOURCE /* qsort_r() */ #include <stdlib.h> #include <unistd.h> #include <time.h> #include <limb/buffer.h> #include <limb/bytestr.h> +#include <limb/direntry.h> #include <limb/djbunix.h> #include <limb/exitcode.h> +#include <limb/genalloc.h> +#include <limb/hlookup.h> +#include <limb/hmap.h> #include <limb/loadopt.h> #include <limb/output.h> #include <limb/posixplz.h> @@ -17,15 +22,23 @@ const char *PROG = "qmdoc"; +enum idx { + IDX_UNKNOWN = 0, + IDX_SET, + IDX_DISABLED +}; + enum { - OPT_NO_CSS = (1 << 0), - OPT_INLINE_CSS = (1 << 1), - OPT_OVERWRITE = (1 << 2), - OPT_NO_TOC = (1 << 3), - OPT_BUTTONS = (1 << 4), - OPT_NO_INDEX = (1 << 5), - OPT_INDEX = (1 << 6), - OPT_WIDE_INC = (1 << 7), + OPT_NO_CSS = (1 << 0), + OPT_INLINE_CSS = (1 << 1), + OPT_OVERWRITE = (1 << 2), + OPT_NO_TOC = (1 << 3), + OPT_BUTTONS = (1 << 4), + OPT_NO_INDEX = (1 << 5), + OPT_INDEX = (1 << 6), + OPT_WIDE_INC = (1 << 7), + OPT_DIR_GROUP = (1 << 8), + OPT_SUBDIR_GROUP= (1 << 9), }; enum { @@ -42,8 +55,8 @@ enum { }; struct page { - size_t sceoff; size_t fileoff; + size_t dstoff; size_t titleoff; size_t nameoff; size_t veroff; @@ -60,10 +73,23 @@ enum { DOC_BUFFERED_A = (1 << 4), }; +struct entry { + size_t noff; + int page; + u32 dkey; + u8 has_page : 1; /* page member is set */ + u8 is_page : 1; /* is the page, not just a link to it */ + u8 _unused : 6; +}; + struct qmdoc { stralloc sa; stralloc sa_out; - size_t opages; + genalloc pages; + genalloc entries; + genalloc ga_idx; + genalloc ga_grp; + hmap hmap; size_t otoc; /* where to include to page's TOC */ size_t css[NB_CSS]; struct { @@ -89,12 +115,42 @@ struct qmdoc { int from; } code; int toc_lvl; - int nb_pages; int cur_page; + int cur_grp_idx; int options; }; -#define PAGE(ctx, n) ((struct page *) ((ctx)->sa.s + (ctx)->opages))[n] +struct parse { + struct qmdoc *qmdoc; + stralloc sa; + size_t destdir; + size_t footer; + size_t header; + size_t ffile; + size_t sharedir; + int sort_group_desc; + int sort_group; +}; + +struct scan { + stralloc *sa; + size_t off; + int ndirs; +}; + +struct cmp { + struct qmdoc *ctx; + stralloc *sa; + int desc; + int sort; +}; + + +#define PAGES(ctx) genalloc_s(struct page, &(ctx)->pages) +#define NB_PAGES(ctx) genalloc_len(struct page, &(ctx)->pages) + +#define ENTRY(ctx) genalloc_s(struct entry, &(ctx)->entries) +#define NB_ENTRIES(ctx) genalloc_len(struct entry, &(ctx)->entries) #define BUFFERING_ON() \ ctx->doc.flags |= DOC_BUFFERING; \ @@ -118,6 +174,24 @@ enum { ERR_TOC = -106, }; +static int +get_page(u32 key, struct qmdoc *ctx) +{ + u32 okey = key; + for (;;) { + int *i = hmap_get(key, &ctx->hmap); + /* we don't know of such a page */ + if (!i) return -1; + /* found it */ + struct entry *e = &ENTRY(ctx)[*i]; + if (e->has_page) return e->page; + /* points to another entry, follow it */ + key = e->dkey; + /* loop? */ + if (key == okey) return -1; + } +} + static int raw_text(struct qmdoc *ctx, const char *text, size_t size) { @@ -333,9 +407,9 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_) switch (type) { case MD_BLOCK_DOC: { -#define offset(i) ((PAGE(ctx, i).nameoff) ? PAGE(ctx, i).nameoff : PAGE(ctx, i).titleoff) +#define offset(i) ((PAGES(ctx)[i].nameoff) ? PAGES(ctx)[i].nameoff : PAGES(ctx)[i].titleoff) #define str_title(i) ctx->sa.s + offset(i) -#define str_file(i) ctx->sa.s + PAGE(ctx, i).fileoff +#define str_file(i) ctx->sa.s + PAGES(ctx)[i].dstoff if (!raw_str(ctx, "<!DOCTYPE html>\n<html lang=\"") || !escape_text(ctx, ctx->doc.lang, strlen(ctx->doc.lang)) || !raw_str(ctx, "\"><head>") @@ -347,14 +421,14 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_) || !raw_str(ctx, "\">"))) || !raw_str(ctx, "<title>")) return ERR_ENTER_BLOCK; - if (PAGE(ctx, ctx->cur_page).nameoff) { - if (!escape_text(ctx, ctx->sa.s + PAGE(ctx, ctx->cur_page).nameoff, - strlen(ctx->sa.s + PAGE(ctx, ctx->cur_page).nameoff)) + if (PAGES(ctx)[ctx->cur_page].nameoff) { + if (!escape_text(ctx, ctx->sa.s + PAGES(ctx)[ctx->cur_page].nameoff, + strlen(ctx->sa.s + PAGES(ctx)[ctx->cur_page].nameoff)) || !raw_str(ctx, " - ")) return ERR_ENTER_BLOCK; } - if (!escape_text(ctx, ctx->sa.s + PAGE(ctx, ctx->cur_page).titleoff, - strlen(ctx->sa.s + PAGE(ctx, ctx->cur_page).titleoff)) + if (!escape_text(ctx, ctx->sa.s + PAGES(ctx)[ctx->cur_page].titleoff, + strlen(ctx->sa.s + PAGES(ctx)[ctx->cur_page].titleoff)) || !raw_str(ctx, "</title>")) return ERR_ENTER_BLOCK; if (ctx->options & OPT_INLINE_CSS) { @@ -401,18 +475,52 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_) || !raw_str(ctx, "</section><nav><ul class=\"toc page\">")) return ERR_ENTER_BLOCK; - for (int i = 0; i < ctx->nb_pages; ++i) { + int from, to; + if (!genalloc_len(int, &ctx->ga_grp)) { + from = 1; + to = NB_ENTRIES(ctx); + } else { + from = genalloc_s(int, &ctx->ga_grp)[ctx->cur_grp_idx]; + if (genalloc_len(int, &ctx->ga_grp) > ctx->cur_grp_idx + 1) + to = genalloc_s(int, &ctx->ga_grp)[ctx->cur_grp_idx + 1]; + else + to = NB_ENTRIES(ctx); + } + + /* start the loop w/ i=0 in case of index mode, to always + * include it first in every pages */ + for (int i = (ctx->doc.flags & DOC_FULL_TOC) ? 0 : from; i < to; ++i) { + struct entry *entry = &ENTRY(ctx)[genalloc_s(int, &ctx->ga_idx)[i]]; + int pg; + if (!entry->has_page) { + pg = get_page(entry->dkey, ctx); + if (pg < 0) continue; + } else { + pg = entry->page; + } + + /* don't include symlinks to the current page */ + if (!entry->is_page && pg == ctx->cur_page) + continue; + if (!raw_str(ctx, "<li><a href=\"") - || !escape_text(ctx, str_file(i), strlen(str_file(i))) + || !escape_text(ctx, str_file(pg), strlen(str_file(pg))) || !raw_str(ctx, "\" title=\"") - || !escape_text(ctx, str_title(i), strlen(str_title(i))) + || !escape_text(ctx, str_title(pg), strlen(str_title(pg))) || !raw_str(ctx, "\">") - || !escape_text(ctx, str_title(i), strlen(str_title(i))) + || (entry->is_page + && !escape_text(ctx, str_title(pg), strlen(str_title(pg)))) + || (!entry->is_page + && !escape_text(ctx, ctx->sa.s + entry->noff, strlen(ctx->sa.s + entry->noff))) || !raw_str(ctx, "</a>")) return ERR_ENTER_BLOCK; + /* don't include symlinks in the TOC, only "real" pages */ + if (!entry->is_page) + continue; + /* remember positions for TOC */ - if (i == ctx->cur_page) { + if (pg == ctx->cur_page) { ctx->doc.flags |= DOC_BUFFERING; /* full toc */ if (ctx->doc.flags & DOC_FULL_TOC) { @@ -424,11 +532,11 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_) /* adding page title */ if (i > 0 && (!raw_str(ctx, "<li><a href=\"") - || !escape_text(ctx, str_file(i), strlen(str_file(i))) + || !escape_text(ctx, str_file(pg), strlen(str_file(pg))) || !raw_str(ctx, "\" title=\"") - || !escape_text(ctx, str_title(i), strlen(str_title(i))) + || !escape_text(ctx, str_title(pg), strlen(str_title(pg))) || !raw_str(ctx, "\">") - || !escape_text(ctx, str_title(i), strlen(str_title(i))) + || !escape_text(ctx, str_title(pg), strlen(str_title(pg))) || !raw_str(ctx, "</a>"))) return ERR_TOC; } @@ -445,6 +553,12 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_) ctx->toc_lvl = 1; ctx->doc.flags &= ~DOC_BUFFERING; } + + /* if we just added the index w/ index mode enabled, we + * want to move to the first page in the group, i.e. + * from, so set i to move there */ + if (i == 0) + i = from - 1; } if (!raw_str(ctx, "</ul></nav></header>")) return ERR_ENTER_BLOCK; @@ -455,16 +569,16 @@ enter_block(MD_BLOCKTYPE type, void *details, void *ctx_) && !raw_str(ctx, ctx->sa.s + ctx->doc.oheader))) return ERR_ENTER_BLOCK; - if (PAGE(ctx, ctx->cur_page).nameoff) { + if (PAGES(ctx)[ctx->cur_page].nameoff) { if (!raw_str(ctx, "<header class=\"manpage\"><div class=\"left\">") - || !escape_text(ctx, ctx->sa.s + PAGE(ctx, ctx->cur_page).nameoff, - strlen(ctx->sa.s + PAGE(ctx, ctx->cur_page).nameoff)) + || !escape_text(ctx, ctx->sa.s + PAGES(ctx)[ctx->cur_page].nameoff, + strlen(ctx->sa.s + PAGES(ctx)[ctx->cur_page].nameoff)) || !raw_str(ctx, "</div><div class=\"middle\">") - || !escape_text(ctx, ctx->sa.s + PAGE(ctx, ctx->cur_page).titleoff, - strlen(ctx->sa.s + PAGE(ctx, ctx->cur_page).titleoff)) + || !escape_text(ctx, ctx->sa.s + PAGES(ctx)[ctx->cur_page].titleoff, + strlen(ctx->sa.s + PAGES(ctx)[ctx->cur_page].titleoff)) || !raw_str(ctx, "</div><div class=\"right\">") - || !escape_text(ctx, ctx->sa.s + PAGE(ctx, ctx->cur_page).nameoff, - strlen(ctx->sa.s + PAGE(ctx, ctx->cur_page).nameoff)) + || !escape_text(ctx, ctx->sa.s + PAGES(ctx)[ctx->cur_page].nameoff, + strlen(ctx->sa.s + PAGES(ctx)[ctx->cur_page].nameoff)) || !raw_str(ctx, "</div></header>")) return ERR_ENTER_BLOCK; } @@ -665,25 +779,25 @@ leave_block(MD_BLOCKTYPE type, void *details, void *ctx_) year[u32_fmt(year, (u32) 1900 + tm.tm_year)] = '\0'; if ((ctx->doc.flags & DOC_HAS_TITLE) && !raw_str(ctx, "</section>")) return ERR_LEAVE_BLOCK; - if (PAGE(ctx, ctx->cur_page).nameoff) { + if (PAGES(ctx)[ctx->cur_page].nameoff) { if (!raw_str(ctx, "<footer class=\"manpage\"><div class=\"left\">") - || !escape_text(ctx, ctx->sa.s + PAGE(ctx, ctx->cur_page).veroff, - strlen(ctx->sa.s + PAGE(ctx, ctx->cur_page).veroff)) + || !escape_text(ctx, ctx->sa.s + PAGES(ctx)[ctx->cur_page].veroff, + strlen(ctx->sa.s + PAGES(ctx)[ctx->cur_page].veroff)) || !raw_str(ctx, "</div><div class=\"middle\">") - || !escape_text(ctx, ctx->sa.s + PAGE(ctx, ctx->cur_page).dateoff, - strlen(ctx->sa.s + PAGE(ctx, ctx->cur_page).dateoff)) + || !escape_text(ctx, ctx->sa.s + PAGES(ctx)[ctx->cur_page].dateoff, + strlen(ctx->sa.s + PAGES(ctx)[ctx->cur_page].dateoff)) || !raw_str(ctx, "</div><div class=\"right\">") - || !escape_text(ctx, ctx->sa.s + PAGE(ctx, ctx->cur_page).nameoff, - strlen(ctx->sa.s + PAGE(ctx, ctx->cur_page).nameoff)) + || !escape_text(ctx, ctx->sa.s + PAGES(ctx)[ctx->cur_page].nameoff, + strlen(ctx->sa.s + PAGES(ctx)[ctx->cur_page].nameoff)) || !raw_str(ctx, "</div></footer>")) return ERR_LEAVE_BLOCK; } if (ctx->options & OPT_BUTTONS) { -#define str_title(i) ctx->sa.s + PAGE(ctx, i).titleoff -#define str_file(i) ctx->sa.s + PAGE(ctx, i).fileoff +#define str_title(i) ctx->sa.s + PAGES(ctx)[i].titleoff +#define str_file(i) ctx->sa.s + PAGES(ctx)[i].dstoff if (!raw_str(ctx, "<section id=\"navbuttons\">")) return ERR_LEAVE_BLOCK; - if (ctx->cur_page > 0 + if (ctx->cur_page > !(ctx->doc.flags & DOC_FULL_TOC) && (!raw_str(ctx, "<a class=\"prev\" href=\"") || !escape_text(ctx, str_file(ctx->cur_page - 1), strlen(str_file(ctx->cur_page - 1))) @@ -692,7 +806,7 @@ leave_block(MD_BLOCKTYPE type, void *details, void *ctx_) strlen(str_title(ctx->cur_page - 1))) || !raw_str(ctx, "\">Previous</a>"))) return ERR_LEAVE_BLOCK; - if (ctx->cur_page < ctx->nb_pages - 1 + if (0/* FIXME ctx->cur_page < ctx->nb_pages - 1 */ && (!raw_str(ctx, "<a class=\"next\" href=\"") || !escape_text(ctx, str_file(ctx->cur_page + 1), strlen(str_file(ctx->cur_page + 1))) @@ -786,7 +900,7 @@ leave_block(MD_BLOCKTYPE type, void *details, void *ctx_) /* TOC */ char toc[l]; memcpy(toc, s, l); - const char *file = ctx->sa.s + PAGE(ctx, ctx->cur_page).fileoff; + const char *file = ctx->sa.s + PAGES(ctx)[ctx->cur_page].dstoff; ctx->doc.flags |= DOC_BUFFERING; if (!raw_str(ctx, "<li><a href=\"") || !escape_text(ctx, file, strlen(file)) @@ -957,17 +1071,11 @@ enter_span(MD_SPANTYPE type, void *details, void *ctx_) * - else we assume there is/will be a title named as such and * link to its anchor */ - for (int i = 0; i < ctx->nb_pages; ++i) { - if (!memcmp(s, ctx->sa.s + PAGE(ctx, i).fileoff, l) - && !strcmp(".html", ctx->sa.s + PAGE(ctx, i).fileoff + l)) { - page = i; - break; - } - } + page = get_page(hlookup32(s, l), ctx); if (!raw_str(ctx, "<a href=\"") || (page >= 0 && ( - !raw_str(ctx, ctx->sa.s + PAGE(ctx, page).fileoff) + !raw_str(ctx, ctx->sa.s + PAGES(ctx)[page].dstoff) )) || (page < 0 && ( !raw_text(ctx, "#", 1) @@ -1019,31 +1127,34 @@ leave_span(MD_SPANTYPE type, void *details, void *ctx_) if (section < 0) return ERR_LEAVE_SPAN; - char buf[U32_FMT + 2]; - int e; + char buf[4]; buf[0] = '('; - e = u32_fmt(buf + 1, (u32) section); - buf[1 + e] = ')'; - buf[2 + e] = '\0'; - - const char *file; - for (int i = 0; i < ctx->nb_pages; ++i) { - file = ctx->sa.s + PAGE(ctx, i).fileoff; - if (!memcmp(file, s, l) - && file[l] == '.' - && file[l + 1] == section + '0' - && !memcmp(file + l + 2, ".html", 6)) - break; - file = NULL; + buf[1] = section + '0'; + buf[2] = ')'; + buf[3] = '\0'; + + const char *file = NULL; + { + char tmp[l + 2]; + memcpy(tmp, s, l); + tmp[l] = '.'; + tmp[l + 1] = section + '0'; + int page = get_page(hlookup32(tmp, sizeof(tmp)), ctx); + if (page >= 0) + file = ctx->sa.s + PAGES(ctx)[page].dstoff; } if (file || ctx->omanurl != (size_t) -1) { if (!raw_str(ctx, "<a href=\"") - || (!file && !raw_str(ctx, ctx->sa.s + ctx->omanurl)) - || !raw_text(ctx, s, l) - || !raw_text(ctx, ".", 1) - || !raw_text(ctx, buf + 1, 1) - || !raw_text(ctx, ".html", 5) + /* no file means an URL */ + || (!file && (!raw_str(ctx, ctx->sa.s + ctx->omanurl) + || !raw_text(ctx, s, l) + || !raw_text(ctx, ".", 1) + || !raw_text(ctx, buf + 1, 1) + || !raw_text(ctx, ".html", 5))) + /* use file as the linked page might be under a + * different name, in case of symlinks */ + || (file && !raw_str(ctx, file)) || !raw_str(ctx, "\">") || !raw_str(ctx, "<strong>") || !raw_text(ctx, s, l) @@ -1192,13 +1303,13 @@ static int load_source(struct qmdoc *ctx, size_t *salen) { *salen = ctx->sa.len; - if (!stralloc_readyplus(&ctx->sa, PAGE(ctx, ctx->cur_page).size + 1)) + if (!stralloc_readyplus(&ctx->sa, PAGES(ctx)[ctx->cur_page].size + 1)) return EX_TEMPFAIL; - if (allread(PAGE(ctx, ctx->cur_page).fd, ctx->sa.s + *salen, - PAGE(ctx, ctx->cur_page).size) != PAGE(ctx, ctx->cur_page).size) + if (allread(PAGES(ctx)[ctx->cur_page].fd, ctx->sa.s + *salen, + PAGES(ctx)[ctx->cur_page].size) != PAGES(ctx)[ctx->cur_page].size) retwusys(EX_NOINPUT, "read source file"); - ctx->sa.len += PAGE(ctx, ctx->cur_page).size; + ctx->sa.len += PAGES(ctx)[ctx->cur_page].size; /* ending on a new line allows parser optimization */ if (ctx->sa.s[ctx->sa.len - 1] != '\n') @@ -1210,7 +1321,7 @@ load_source(struct qmdoc *ctx, size_t *salen) static int convert_page(struct qmdoc *ctx, int fddest) { - const char *dst = ctx->sa.s + PAGE(ctx, ctx->cur_page).fileoff; + const char *dst = ctx->sa.s + PAGES(ctx)[ctx->cur_page].dstoff; int fd; if (ctx->options & OPT_OVERWRITE) fd = open_truncat(fddest, dst); @@ -1221,7 +1332,7 @@ convert_page(struct qmdoc *ctx, int fddest) size_t salen; const char *sce; - if (PAGE(ctx, ctx->cur_page).fd >= 0) { + if (PAGES(ctx)[ctx->cur_page].fd >= 0) { int r = load_source(ctx, &salen); if (r) return r; sce = ctx->sa.s + salen; @@ -1245,7 +1356,7 @@ convert_page(struct qmdoc *ctx, int fddest) .leave_span = leave_span, .text = text, }; - int r = md_parse(sce, PAGE(ctx, ctx->cur_page).size, &parser, ctx); + int r = md_parse(sce, PAGES(ctx)[ctx->cur_page].size, &parser, ctx); if (r) retw(EX_DATA_ERR, "parser error ", PMINT(r)); @@ -1278,7 +1389,8 @@ convert_page(struct qmdoc *ctx, int fddest) if (!(ctx->doc.flags & DOC_FULL_TOC)) ctx->buf.sa.len = 0; - fd_close(PAGE(ctx, ctx->cur_page).fd); + if (PAGES(ctx)[ctx->cur_page].fd >= 0) + fd_close(PAGES(ctx)[ctx->cur_page].fd); ctx->sa.len = salen; return 0; @@ -1292,34 +1404,57 @@ empty(const char *s) return 1; } +static const char * +real_path_at(stralloc *sa, int bfd, const char *file) +{ + size_t off = sa->len; + + /* in case file actually points inside sa */ + size_t fileoff; + if (file >= sa->s && file < sa->s + sa->len) + fileoff = file - sa->s; + else + fileoff = (size_t) -1; + + char buf[strlen(file) + 1]; + memcpy(buf, file, sizeof(buf)); + + if (sarealpathat(sa, bfd, buf) < 0) + return (fileoff == (size_t) -1) ? file : sa->s + fileoff; + return sa->s + off; +} + + +#define REALPATH(bfd,file) real_path_at(&ctx->sa, bfd, file) + static int -load_page_from_file(stralloc *sa, size_t fileoff, size_t flen, int pgn, struct qmdoc *ctx) +load_page_from_file(struct page *page, int bfd, size_t fileoff, size_t flen, struct qmdoc *ctx) { -#define file() (sa->s + fileoff) - PAGE(ctx, pgn).fd = open_read(file()); - if (PAGE(ctx, pgn).fd < 0) retwusys(EX_NOINPUT, "open ", ESC, file(), ESC); - - PAGE(ctx, pgn).sceoff = byte_rchr(file(), flen, '/'); - if (PAGE(ctx, pgn).sceoff == flen) PAGE(ctx, pgn).sceoff = 0; - else ++PAGE(ctx, pgn).sceoff; - - /* l = strlen(file() + PAGE(ctx, pgn).sceoff) */ - size_t l = flen - PAGE(ctx, pgn).sceoff; - PAGE(ctx, pgn).sceoff += fileoff; - PAGE(ctx, pgn).fileoff = sa->len; - if (!stralloc_readyplus(sa, l - 2 + 5)) - retwusys(EX_TEMPFAIL, "load page title from ", ESC, file(), ESC); - stralloc_catb(sa, sa->s + PAGE(ctx, pgn).sceoff, l - 2); - stralloc_catb(sa, "html", 5); +#define file() (ctx->sa.s + fileoff) + page->fd = open_readat(bfd, file()); + if (page->fd < 0) retwusys(EX_NOINPUT, "open ", ESC, REALPATH(bfd, file()), ESC); + + page->fileoff = fileoff; + size_t sceoff = byte_rchr(file(), flen, '/'); + if (sceoff == flen) sceoff = 0; + else ++sceoff; + + size_t dstlen = flen - sceoff + 2; + sceoff += fileoff; + page->dstoff = ctx->sa.len; + if (!stralloc_readyplus(&ctx->sa, dstlen + 1)) + retwusys(EX_TEMPFAIL, "load page title from ", ESC, REALPATH(bfd, file()), ESC); + stralloc_catb(&ctx->sa, ctx->sa.s + sceoff, dstlen - 4); + stralloc_cats0(&ctx->sa, "html"); char buf_[256], buf[sizeof(buf_)]; - buffer buffer = BUFFER_INIT(&fd_readv, PAGE(ctx, pgn).fd, buf_, sizeof(buf_)); + buffer buffer = BUFFER_INIT(&fd_readv, page->fd, buf_, sizeof(buf_)); ssize_t left = buffer_get(&buffer, buf, sizeof(buf)); if (left <= 0) - retwu(EX_NOINPUT, "load page title from ", ESC, file(), ESC); + retwu(EX_NOINPUT, "load page title from ", ESC, REALPATH(bfd, file()), ESC); - PAGE(ctx, pgn).titleoff = PAGE(ctx, pgn).fileoff; + page->titleoff = page->dstoff; char *b = buf; int line = 1, begin = 1, is_hdr = 0; @@ -1332,31 +1467,33 @@ load_page_from_file(stralloc *sa, size_t fileoff, size_t flen, int pgn, struct q size_t *offset = NULL; switch (line) { case 1: - offset = &PAGE(ctx, pgn).titleoff; + offset = &page->titleoff; break; case 2: - offset = &PAGE(ctx, pgn).nameoff; + offset = &page->nameoff; break; case 3: - offset = &PAGE(ctx, pgn).veroff; + offset = &page->veroff; break; case 4: - offset = &PAGE(ctx, pgn).dateoff; + offset = &page->dateoff; break; default: if (begin && is_hdr && !warned) { - err("warning: header too long in ", ESC, file(), ESC, ": ", + err("warning: header too long in ", + ESC, REALPATH(bfd, file()), ESC, ": ", "Only 4 lines supported"); warned = 1; } } if (offset) { - if (begin && is_hdr) *offset = sa->len; + if (begin && is_hdr) *offset = ctx->sa.len; if (is_hdr - && (!stralloc_catb(sa, b + ((is_hdr) ? 2 : 0), + && (!stralloc_catb(&ctx->sa, b + ((is_hdr) ? 2 : 0), ((e) ? e - b : left) - ((is_hdr) ? 2 : 0)) - || (e && !stralloc_0(sa)))) - retwusys(EX_TEMPFAIL, "load page title from ", ESC, file(), ESC); + || (e && !stralloc_0(&ctx->sa)))) + retwusys(EX_TEMPFAIL, "load page title from ", + ESC, REALPATH(bfd, file()), ESC); } if (e) { int l = e - b + 1; @@ -1372,44 +1509,346 @@ load_page_from_file(stralloc *sa, size_t fileoff, size_t flen, int pgn, struct q b = buf; left = buffer_get(&buffer, buf, sizeof(buf)); if (left <= 0) - retwusys(EX_DATA_ERR, "load page title from ", ESC, file(), ESC); + retwusys(EX_DATA_ERR, "load page title from ", + ESC, REALPATH(bfd, file()), ESC); if (!e) begin = 0; } } - if (empty(sa->s + PAGE(ctx, pgn).titleoff)) - PAGE(ctx, pgn).titleoff = PAGE(ctx, pgn).fileoff; + if (empty(ctx->sa.s + page->titleoff)) + page->titleoff = page->dstoff; - if (PAGE(ctx, pgn).nameoff && empty(sa->s + PAGE(ctx, pgn).nameoff)) - PAGE(ctx, pgn).nameoff = 0; + if (page->nameoff && empty(ctx->sa.s + page->nameoff)) + page->nameoff = 0; - if (PAGE(ctx, pgn).nameoff) { - if (!PAGE(ctx, pgn).veroff || empty(sa->s + PAGE(ctx, pgn).veroff)) - PAGE(ctx, pgn).veroff = PAGE(ctx, pgn).nameoff; - if (!PAGE(ctx, pgn).dateoff || empty(sa->s + PAGE(ctx, pgn).dateoff)) - PAGE(ctx, pgn).dateoff = PAGE(ctx, pgn).titleoff; + if (page->nameoff) { + if (!page->veroff || empty(ctx->sa.s + page->veroff)) + page->veroff = page->nameoff; + if (!page->dateoff || empty(ctx->sa.s + page->dateoff)) + page->dateoff = page->titleoff; } - PAGE(ctx, pgn).size = lseek(PAGE(ctx, pgn).fd, 0, SEEK_END); - if (PAGE(ctx, pgn).size == (off_t) -1 || lseek(PAGE(ctx, pgn).fd, done, SEEK_SET) < 0) - retwusys(EX_IOERR, "seek into ", ESC, file(), ESC); - PAGE(ctx, pgn).size -= done; + page->size = lseek(page->fd, 0, SEEK_END); + if (page->size == (off_t) -1 || lseek(page->fd, done, SEEK_SET) < 0) + retwusys(EX_IOERR, "seek into ", ESC, REALPATH(bfd, file()), ESC); + page->size -= done; + return 0; #undef file } -struct parse { - struct qmdoc *qmdoc; - size_t destdir; - size_t footer; - size_t header; - size_t ffile; - size_t sharedir; +static ssize_t +scan_dir(stralloc *sa, direntry *de, int bfd, void *ctx_) +{ + struct scan *ctx = ctx_; + int isdir = 0; + + if (de->d_type == DT_DIR) { + isdir = 1; + } else if (de->d_type == DT_UNKNOWN) { + struct stat st; + if (!fstatat(bfd, de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) + return -1; + if (S_ISDIR(st.st_mode)) + isdir = 1; + } + + /* put directories in a separate list */ + if (isdir) sa = ctx->sa; + + if (!stralloc_cats0(sa, de->d_name)) + return -1; + + if (!isdir) return 1; + + ++ctx->ndirs; + return 0; +} + +static int +add_idx(genalloc *ga, struct qmdoc *ctx) +{ + int i = NB_ENTRIES(ctx); + if ((!genalloc_len(int, ga) + || genalloc_s(int, ga)[genalloc_len(int, ga) - 1] != i) + /* only add a new (sorting) group if there are none or the + * last/current one isn't empty */ + && !genalloc_append(int, ga, &i)) + return 0; + return 1; +} + +static int +cmp_off(const void *o1_, const void *o2_, void *cmp_) +{ + struct cmp *cmp = cmp_; + const size_t *o1 = o1_; + const size_t *o2 = o2_; + + int r = strcoll(cmp->sa->s + *o1, cmp->sa->s + *o2); + return (!r) ? r : (cmp->desc) ? -r : r; +} + +static int +scan_pages(enum idx *idx_mode, int bfd, stralloc *filesa, size_t fileoff, int nfiles, + int isroot, genalloc *ga_sg, struct parse *parse) +{ +#define file() (filesa->s + fileoff) + struct qmdoc *ctx = parse->qmdoc; + int err = 0; + const char *file; + size_t flen = -1; + for (int i = 0; i < nfiles; ++i) { + struct page page = { 0 }; + size_t salen; + + fileoff += flen + 1; + flen = strlen(file()); + + if (flen == 2 && !strcmp(file(), "+g")) { + if (!add_idx(&ctx->ga_grp, ctx) || !add_idx(ga_sg, ctx)) + return EX_TEMPFAIL; + continue; + } + + if (flen == 3 && !strcmp(file(), "+sg")) { + if (!add_idx(ga_sg, ctx)) + return EX_TEMPFAIL; + continue; + } + + struct stat st; + if (fstatat(bfd, file(), &st, AT_SYMLINK_NOFOLLOW) < 0) + retwusys((errno == ENOENT) ? EX_NOINPUT : EX_IOERR, + "stat ", ESC, REALPATH(bfd, file()), ESC); + + if (S_ISDIR(st.st_mode)) { + int dirfd; + salen = ctx->sa.len; + + dirfd = open2_at(bfd, file(), O_RDONLY | O_DIRECTORY); + if (dirfd < 0) retwusys(EX_IOERR, "open ", ESC, REALPATH(bfd, file()), ESC); + + struct scan scan = { + .sa = &parse->sa, + .off = parse->sa.len, + .ndirs = 0, + }; + + int r = sascandirat(&ctx->sa, bfd, file(), scan_dir, &scan); + if (r < 0) { + scan.sa->len = scan.off; + fd_close(dirfd); + retwusys(EX_IOERR, "scan ", ESC, REALPATH(bfd, file()), ESC); + } + + /* process files */ + if (r) { + if (((ctx->options & OPT_DIR_GROUP) && !add_idx(&ctx->ga_grp, ctx)) + || !add_idx(ga_sg, ctx)) + return EX_TEMPFAIL; + r = scan_pages(idx_mode, dirfd, &ctx->sa, salen, r, 0, ga_sg, parse); + if (r) err = r; + } + + /* process subdirs */ + if (scan.ndirs) { + size_t aoff = scan.sa->len; + /* make an array of names('s offsets) */ + int n = sacoloff(scan.sa, scan.off, aoff); + /* sort said array, to process subdirs in order */ + struct cmp cmp = { + .sa = scan.sa, + .desc = parse->sort_group_desc, + }; + qsort_r(scan.sa->s + aoff, n, sizeof(size_t), cmp_off, &cmp); + + /* now process subdirs one by one in order */ + for(int i = 0; i < n; ++i) { + if (((ctx->options & OPT_SUBDIR_GROUP) && !add_idx(&ctx->ga_grp, ctx)) + || !add_idx(ga_sg, ctx)) + return EX_TEMPFAIL; + size_t *arr = (size_t *) (scan.sa->s + aoff); + r = scan_pages(idx_mode, dirfd, scan.sa, arr[i], 1, 0, ga_sg, parse); + if (r) err = r; + } + } + + scan.sa->len = scan.off; + fd_close(dirfd); + continue; + } + + /* only process *.md files */ + if (strcmp(file() + flen - 3, ".md")) { + warn("File ", ESC, REALPATH(bfd, file()), ESC, " not a markdown file (*.md)"); + err = EX_DATA_ERR; + continue; + } + + /* special handling for symlinks.. */ + salen = ctx->buf.sa.len; + if (S_ISLNK(st.st_mode) && !sareadlinkat0(&ctx->buf.sa, bfd, file())) { + char *dst = ctx->buf.sa.s + salen; + size_t dlen = ctx->buf.sa.len - salen - 1; + size_t off = byte_chr(dst, dlen, '/'); + ctx->buf.sa.len = salen; + /* ..if they don't contain a slash and point to a *.md file */ + if (off == dlen && dlen > 3 && !strcmp(dst + dlen - 3, ".md")) { + /* in this case we treat a symlink as being another name for a + * page, so we want to remember that name in case there's a link + * to it, so we can link to the actual page. However, only a + * single page will be converted, and that's the actual file. */ + struct entry entry; + entry.is_page = 0; + + /* compute the destination key, i.e. the key of the page pointed + * to. Either we get the page, or we don't know it (yet) and + * we'll simply put that key as destination. */ + u32 key = hlookup32(dst, dlen - 3); + int pg = get_page(key, ctx); + if (pg < 0) { + entry.has_page = 0; + entry.dkey = key; + } else { + entry.has_page = 1; + entry.page = pg; + } + + /* compute our own key, for the file/symlink name */ + off = byte_rchr(file(), flen, '/'); + if (off == flen) off = 0; + else ++off; + key = hlookup32(file() + off, flen - off - 3); + + /* we remember this link's name.. */ + entry.noff = fileoff + off; + /* ..but turn it from "foobar.n.md" to "foober(n)" */ + if (file()[flen - 5] == '.' + && (file()[flen - 4] >= '0' && file()[flen - 4] <= '9')) { + file()[flen - 5] = '('; + file()[flen - 3] = ')'; + file()[flen - 2] = 0; + } else { + /* or just drop the ".md" */ + file()[flen - 3] = 0; + } + + int *i = hmap_get(key, &ctx->hmap); + if (i) { + struct entry *entry = &ENTRY(ctx)[*i]; + if (entry->is_page) + diefu(EX_DATA_ERR, "process link ", ESC, REALPATH(bfd, file()), ESC, + ": ", "Already a page generating ", + ESC, ctx->sa.s + PAGES(ctx)[entry->page].dstoff, ESC); + continue; + } + + int n = NB_ENTRIES(ctx); + if (!genalloc_append(struct entry, &ctx->entries, &entry) + || !hmap_set(key, &n, &ctx->hmap)) + diefusys(EX_TEMPFAIL, "prepare page ", ESC, REALPATH(bfd, file()), ESC); + continue; + } + } + + salen = ctx->sa.len; + int r = load_page_from_file(&page, bfd, fileoff, flen, ctx); + if (r) { + ctx->sa.len = salen; + if (page.fd >= 0) fd_close(page.fd); + err = r; + } else { + /* add the page */ + if (!genalloc_append(struct page, &ctx->pages, &page)) + diefusys(EX_TEMPFAIL, "prepare page ", ESC, REALPATH(bfd, file()), ESC); + + /* compute the key for this page, stripping the ".html" */ + u32 key = hlookup32(ctx->sa.s + page.dstoff, strlen(ctx->sa.s + page.dstoff) - 5); + int *i = hmap_get(key, &ctx->hmap); + if (i) { + struct entry *entry = &ENTRY(ctx)[*i]; + if (!entry->is_page) + diefu(EX_DATA_ERR, "prepare page ", ESC, REALPATH(bfd, file()), ESC, + ": ", "Already a symlink by that name"); + if (fd_same(page.fd, PAGES(ctx)[entry->page].fd)) { + /* same file given twice, just ignore it */ + ctx->pages.len -= sizeof(struct page); + fd_close(page.fd); + continue; + } + diefu(EX_DATA_ERR, "prepare page ", ESC, REALPATH(bfd, file()), ESC, ": ", + "Already a page generating ", + ESC, ctx->sa.s + page.dstoff, ESC); + } + + /* entry for this page */ + struct entry e = { + .has_page = 1, + .is_page = 1, + .page = NB_PAGES(ctx) - 1 + }; + + if (*idx_mode == IDX_UNKNOWN + && !strcmp(ctx->sa.s + page.dstoff, "index.html")) { + /* OPT_INDEX or this is the first page we're adding means we're + * enabling index mode, and putting this entry in the reserved + * index spot. + * Else index mode is disabled (index wasn't first) and the + * entry will be added as any others. */ + if ((ctx->options & OPT_INDEX) || !NB_PAGES(ctx)) { + ENTRY(ctx)[0] = e; + r = 0; + *idx_mode = IDX_SET; + } else { + *idx_mode = IDX_DISABLED; + r = 1; + } + } else { + r = 1; + } + + if (r) { + r = NB_ENTRIES(ctx); + if (!genalloc_append(struct entry, &ctx->entries, &e)) + diefusys(EX_TEMPFAIL, "prepare page ", ESC, REALPATH(bfd, file()), ESC); + } + + if (!hmap_set(key, &r, &ctx->hmap)) + diefusys(EX_TEMPFAIL, "prepare page ", ESC, REALPATH(bfd, file()), ESC); + } + } + + return err; +#undef file +} + +enum { + SORT_NONE = 0, + SORT_TITLE, + SORT_FILE }; +static int +parse_sort(int *order, const char *arg) +{ + const char *list[] = { "title", "file", NULL }; + int desc = 0; + if (arg[0] == 'd' && arg[1] == ':') { + desc = 1; + arg += 2; + } + /* allow just "d:" to only set descending */ + if (*arg) *order = 1 + byte_get_match(NULL, arg, strlen(arg), list); + return desc; +} + enum { OPTID_VERSION = OPTID_FIRST, OPTID_SHAREDIR, + OPTID_SORT_GROUP, + OPTID_DIR, + OPTID_SUBDIR, ARGID_FILE }; @@ -1422,6 +1861,7 @@ parse_cmdline(int argc, const char *argv[], const char usage[], struct parse *ct OPTION_ARG_NONE('C', "no-css", 0, OPTID_SHORTOPT), OPTION_ARG_REQ( 'c', "css", OPT_PATH, OPTID_SHORTOPT), OPTION_ARG_REQ( 'd', "destdir", OPT_PATH, OPTID_SHORTOPT), + OPTION_ARG_REQ( 0 , "dir", 0, OPTID_DIR), OPTION_ARG_REQ( 'F', "footer", OPT_PATH, OPTID_SHORTOPT), OPTION_ARG_REQ( 'H', "header", OPT_PATH, OPTID_SHORTOPT), OPTION_ARG_NONE('h', "help", 0, OPTID_SHORTOPT), @@ -1431,12 +1871,14 @@ parse_cmdline(int argc, const char *argv[], const char usage[], struct parse *ct OPTION_ARG_REQ( 'M', "man-url", OPT_PATH, OPTID_SHORTOPT), OPTION_ARG_NONE('o', "overwrite", 0, OPTID_SHORTOPT), OPTION_ARG_REQ( 's', "subtitle", OPT_PATH, OPTID_SHORTOPT), + OPTION_ARG_REQ( 0 , "subdir", 0, OPTID_SUBDIR), + OPTION_ARG_REQ( 0 , "sharedir", OPT_PATH, OPTID_SHAREDIR), + OPTION_ARG_REQ( 0 , "sort-group", 0, OPTID_SORT_GROUP), OPTION_ARG_NONE('T', "no-toc", 0, OPTID_SHORTOPT), OPTION_ARG_REQ( 't', "title", OPT_PATH, OPTID_SHORTOPT), OPTION_ARG_NONE( 0 , "version", 0, OPTID_VERSION), OPTION_ARG_NONE('W', "wide-includes", 0, OPTID_SHORTOPT), OPTION_ARG_NONE('X', "no-index", 0, OPTID_SHORTOPT), - OPTION_ARG_REQ( 0 , "sharedir", OPT_PATH, OPTID_SHAREDIR), LOADOPT_ARGUMENTS, ARGUMENT_REQ( "file", OPT_PATH | OPT_RPT, ARGID_FILE), LOADOPT_DONE @@ -1461,6 +1903,24 @@ parse_cmdline(int argc, const char *argv[], const char usage[], struct parse *ct case 'd': ctx->destdir = LO_OFF(&lo); break; + case OPTID_DIR: + c = OPT_DIR_GROUP; + /* fall through */ + case OPTID_SUBDIR: + { + if (c != OPT_DIR_GROUP) + c = OPT_SUBDIR_GROUP; + + const char *list[] = { "sort", "group", NULL }; + int i = byte_get_match(NULL, LO_ARG(&lo), strlen(LO_ARG(&lo)), list); + if (i < 0) + dief(EX_USAGE, "invalid argument to option --", options[LO_IDX(&lo)].longopt, ": ", LO_ARG(&lo)); + if (i) + ctx->qmdoc->options |= c; + else + ctx->qmdoc->options &= ~c; + } + break; case 'F': ctx->footer = LO_OFF(&lo); break; @@ -1473,19 +1933,22 @@ parse_cmdline(int argc, const char *argv[], const char usage[], struct parse *ct " -b, --buttons Put Previous & Next buttons on pages\n" " -C, --no-css Do not use CSS (still process --css if any)\n" " -c, --css FILE Add FILE as additional CSS\n" -" -d, --destdir DIR Write files into DIR\n" +" -d, --destdir DIR Write files into DIR [.]\n" +" --dir TYPE Starting a new TYPE when reading directories [sort]\n" " -F, --footer FILE Insert FILE as common footer\n" " -H, --header FILE Insert FILE as common header\n" " -h, --help Show this help screen and exit\n" " -I, --inline-css Use inline CSS instead of external files\n" " -i, --index Force index mode\n" -" -l, --lang LNG Set LNG as language attribute\n" +" -l, --lang LNG Set LNG as language attribute [en]\n" " -M, --man-url URL Use URL as prefix for external man-page links\n" " -o, --overwrite Overwrite destination files if already exist\n" -" --sharedir DIR Use DIR as source for qmdoc's CSS files\n" " -s, --subtitle TEXT Set TEXT as general subtitle\n" +" --subdir TYPE Starting a new TYPE when reading sub-directories [group]\n" +" --sharedir DIR Use DIR as source for qmdoc's CSS files [" QMDOC_SHAREDIR "]\n" +" --sort-group SORT Use SORT as sort order for sorting groups [title]\n" " -T, --no-toc Don't write a TOC on each page. Implies --no-index\n" -" -t, --title TITLE Set TITLE as general (across all pages) title\n" +" -t, --title TITLE Set TITLE as general (across all pages) title [Documentation]\n" " -V, --version Show version screen and exit\n" " -W, --wide-include Include header/footer right within <body>\n" " -X, --no-index Disable index mode\n" @@ -1508,6 +1971,14 @@ parse_cmdline(int argc, const char *argv[], const char usage[], struct parse *ct case 's': ctx->qmdoc->doc.osubtitle = LO_OFF(&lo); break; + case OPTID_SHAREDIR: + ctx->sharedir = LO_OFF(&lo); + break; + case OPTID_SORT_GROUP: + ctx->sort_group_desc = parse_sort(&ctx->sort_group, LO_ARG(&lo)); + if (ctx->sort_group < 0) + dief(EX_USAGE, "invalid sort order for --", "sort-group", ": ", LO_ARG(&lo)); + break; case 'T': ctx->qmdoc->options |= OPT_NO_TOC | OPT_NO_INDEX; break; @@ -1520,9 +1991,6 @@ parse_cmdline(int argc, const char *argv[], const char usage[], struct parse *ct case 'X': ctx->qmdoc->options |= OPT_NO_INDEX; break; - case OPTID_SHAREDIR: - ctx->sharedir = LO_OFF(&lo); - break; case OPTID_VERSION: dieversion(QMDOC_VERSION, "2023", QMDOC_CURYEAR, QMDOC_AUTHOR, QMDOC_URL, NULL); break; @@ -1541,6 +2009,29 @@ parse_cmdline(int argc, const char *argv[], const char usage[], struct parse *ct return nfile; } +static int +cmp_page(const void *i1, const void *i2, void *cmp_) +{ + struct cmp *cmp = cmp_; + struct qmdoc *ctx = cmp->ctx; + const int * const i[2] = { i1, i2 }; + size_t off[2]; + + for (int j = 0; j < 2; ++j) { + struct entry *e = &ENTRY(ctx)[*i[j]]; + if (e->is_page) { + struct page *p = &PAGES(ctx)[e->page]; + off[j] = (cmp->sort == SORT_FILE) ? p->dstoff : + (p->nameoff) ? p->nameoff : p->titleoff; + } else { + off[j] = e->noff; + } + } + + int r = strcoll(cmp->sa->s + off[0], cmp->sa->s + off[1]); + return (!r) ? r : (cmp->desc) ? -r : r; +} + int main (int argc, const char *argv[]) { @@ -1549,9 +2040,12 @@ main (int argc, const char *argv[]) else PROG = argv[0]; struct qmdoc ctx = { - .options = 0, + .options = OPT_SUBDIR_GROUP, .sa = STRALLOC_ZERO, .sa_out = STRALLOC_ZERO, + .pages = GENALLOC_ZERO, + .ga_grp = GENALLOC_ZERO, + .hmap = HMAP_ZERO, .css[CSS_CUSTOM] = (size_t) -1, .doc.otitle = (size_t) -1, .doc.oauthor = (size_t) -1, @@ -1560,12 +2054,20 @@ main (int argc, const char *argv[]) .omanurl = (size_t) -1, .buf.sa = STRALLOC_ZERO, }; - struct parse parse = { .qmdoc = &ctx, .destdir = (size_t) -1, - .header = (size_t) -1, .footer = (size_t) -1, .ffile = (size_t) -1, - .sharedir = (size_t) -1 }; - const char usage[] = "[OPTION..] FILE.."; + struct parse parse = { + .qmdoc = &ctx, + .sa = STRALLOC_ZERO, + .destdir = (size_t) -1, + .header = (size_t) -1, + .footer = (size_t) -1, + .ffile = (size_t) -1, + .sharedir = (size_t) -1, + .sort_group_desc = 0, + .sort_group = SORT_TITLE + }; + const char usage[] = "[OPTION..] FILE|DIR.."; - ctx.nb_pages = parse_cmdline(argc, argv, usage, &parse); + int nb_pages = parse_cmdline(argc, argv, usage, &parse); size_t lfile = ctx.sa.len; if (ctx.doc.otitle == (size_t) -1) { @@ -1588,90 +2090,100 @@ main (int argc, const char *argv[]) if (!(ctx.options & OPT_NO_TOC)) ctx.css[CSS_NO_TOC] = (size_t) -1; - int fddest = open(ctx.sa.s + parse.destdir, O_RDONLY | O_DIRECTORY | O_CLOEXEC); + int fddest = open(ctx.sa.s + parse.destdir, O_RDONLY | O_DIRECTORY); if (fddest < 0) diefusys(EX_IOERR, "open ", ESC, ctx.sa.s + parse.destdir, ESC); - { - /* +1 in case we'll need to add our internal index */ - size_t len = (ctx.nb_pages + 1) * sizeof(struct page); + int r = 1; + enum idx idx_mode = (ctx.options & OPT_NO_INDEX) ? IDX_DISABLED : IDX_UNKNOWN; + if (!hmap_init(sizeof(int), nb_pages + 1, &ctx.hmap) + || !genalloc_ready(struct page, &ctx.pages, nb_pages + 1) + || !genalloc_ready(struct entry, &ctx.entries, nb_pages + 1) + || !genalloc_append(int, &ctx.ga_grp, &r)) + diefusys(EX_TEMPFAIL, "initialize"); + /* reserve index 0 for the index, in case we need it. This will simplify + * things a bit. */ + genalloc_setlen(struct entry, &ctx.entries, 1); - ctx.opages = ctx.sa.len; - if (!stralloc_readyplus(&ctx.sa, len)) - diefusys(EX_TEMPFAIL, "initialize"); - ctx.sa.len += len; - memset(ctx.sa.s + ctx.opages, 0, len); - } + genalloc ga_sg = GENALLOC_ZERO; - int err = 0; - int idx_page = -1; out("Scanning pages..."); - size_t off = ctx.sa.len; - ctx.nb_pages = sacoloff(&ctx.sa, parse.ffile, lfile); - for (int i = 0; i < ctx.nb_pages; ++i) { - size_t *fileoff = (size_t *) (ctx.sa.s + off); - const char *file = ctx.sa.s + fileoff[i]; - size_t flen = strlen(file); - - if (strcmp(file + flen - 3, ".md")) { - warn("File ", ESC, file, ESC, " not a markdown file (*.md)"); - PAGE(&ctx, i).fd = -1; - err = EX_DATA_ERR; - continue; + r = scan_pages(&idx_mode, AT_FDCWD, &ctx.sa, parse.ffile, nb_pages, 1, &ga_sg, &parse); + if (r) diefu(r, "prepage pages"); + + /* we create an array of indices for entries, so we can sort the array and + * leave entries in their actual indices, so that re-ordering won't mess up + * the symlinks handling */ + + /* re-use memory */ + ctx.ga_idx = parse.sa; + ctx.ga_idx.len = 0; + parse.sa = stralloc_zero; + + r = NB_ENTRIES(&ctx); + if (!genalloc_ready(int, &ctx.ga_idx, r)) + diefusys(EX_TEMPFAIL, "prepare pages"); + genalloc_setlen(int, &ctx.ga_idx, r); + for (int i = 0; i < r; ++i) + genalloc_s(int, &ctx.ga_idx)[i] = i; + + /* sort entries (indicies) each sorting group at a time */ + if (genalloc_len(int, &ga_sg)) { + int i = NB_ENTRIES(&ctx); + genalloc_append(int, &ga_sg, &i); + + for (int i = 0, n = genalloc_len(int, &ga_sg); i + 1 < n; ++i) { + int from, to; + from = genalloc_s(int, &ga_sg)[i]; + to = genalloc_s(int, &ga_sg)[i + 1] - 1; + + struct cmp cmp; + cmp.ctx = &ctx; + cmp.sa = &ctx.sa; + cmp.desc = parse.sort_group_desc; + cmp.sort = parse.sort_group; + + qsort_r(genalloc_s(int, &ctx.ga_idx) + from, to - from + 1, + sizeof(int), cmp_page, &cmp); } - - int r = load_page_from_file(&ctx.sa, fileoff[i], flen, i, &ctx); - if (r) err = r; - - if (!(ctx.options & OPT_NO_INDEX) - && !strcmp(ctx.sa.s + PAGE(&ctx, i).fileoff, "index.html")) - idx_page = i - optind; + genalloc_free(int, &ga_sg); } - if (err) diefu(err, "load pages"); - - /* enable FULL TOC unless disabled (OPT_NO_INDEX) if: - * - index was given as first page, or none given (add our internal tpl), - * - OPT_INDEX was given, in which case we'll move index to first place - */ - if (!(ctx.options & OPT_NO_INDEX) && (idx_page <= 0 || (ctx.options & OPT_INDEX))) { + if (idx_mode != IDX_DISABLED) { ctx.doc.flags |= DOC_FULL_TOC; - if (idx_page < 0) { /* no index, add our internal page first */ - /* move all pages up by one */ - memmove(&PAGE(&ctx, 1), &PAGE(&ctx, 0), ctx.nb_pages * sizeof(struct page)); + /* no index yet, add our internal page first */ + if (idx_mode == IDX_UNKNOWN) { + struct page page = { 0 }; /* add our internal index */ - PAGE(&ctx, 0).sceoff = ctx.sa.len; - if (!stralloc_cats0(&ctx.sa, "<internal index>")) - diefusys(EX_TEMPFAIL, "set internal page title"); - - PAGE(&ctx, 0).fileoff = ctx.sa.len; + page.dstoff = ctx.sa.len; if (!stralloc_cats0(&ctx.sa, "index.html")) - diefusys(EX_TEMPFAIL, "load page title from ", - ESC, ctx.sa.s + PAGE(&ctx, 0).sceoff, ESC); + diefusys(EX_TEMPFAIL, "setup internal index"); - PAGE(&ctx, 0).titleoff = ctx.sa.len; + page.titleoff = ctx.sa.len; if (!stralloc_cats0(&ctx.sa, index_title)) - diefusys(EX_TEMPFAIL, "load page title from ", - ESC, ctx.sa.s + PAGE(&ctx, 0).sceoff, ESC); + diefusys(EX_TEMPFAIL, "setup internal index"); /* fd == -1 means use index_md instead of reading from fd */ - PAGE(&ctx, 0).fd = -1; - PAGE(&ctx, 0).size = strlen(index_md); - PAGE(&ctx, 0).nameoff = PAGE(&ctx, 0).veroff = PAGE(&ctx, 0).dateoff = 0; - - ++ctx.nb_pages; - } else if (idx_page > 0) { /* move index's page to first */ - struct page pg; - /* "extract" the index page */ - pg = PAGE(&ctx, idx_page); - /* move everything before up by one */ - memmove(&PAGE(&ctx, 1), &PAGE(&ctx, 0), idx_page * sizeof(struct page)); - /* put index page first */ - PAGE(&ctx, 0) = pg; + page.fd = -1; + page.size = strlen(index_md); + + if (!genalloc_append(struct page, &ctx.pages, &page)) + diefusys(EX_TEMPFAIL, "setup internal index"); + + /* set the reserved entry */ + ENTRY(&ctx)[0] = (struct entry) { + .has_page = 1, + .is_page = 1, + .page = NB_PAGES(&ctx) - 1 + }; + + idx_mode = IDX_SET; } } + if (NB_PAGES(&ctx) == 0) dief(EX_NOINPUT, "nothing to do"); + if (parse.header != (size_t) -1 || parse.footer != (size_t) -1) { out("Loading files..."); @@ -1755,33 +2267,61 @@ main (int argc, const char *argv[]) if (dirfd != AT_FDCWD) fd_close(dirfd); } - ctx.cur_page = 0; + int i = 1; + ctx.cur_grp_idx = 0; for (;;) { - if ((ctx.doc.flags & (DOC_FULL_TOC | DOC_IS_INDEX)) == DOC_FULL_TOC - && ctx.cur_page == 0) - ++ctx.cur_page; + int cur_entry = genalloc_s(int, &ctx.ga_idx)[i]; + struct entry *e = &ENTRY(&ctx)[cur_entry]; - out("Converting ", ESC, ctx.sa.s + PAGE(&ctx, ctx.cur_page).sceoff, ESC, "..."); + if (genalloc_len(int, &ctx.ga_grp) > ctx.cur_grp_idx + 1 + && genalloc_s(int, &ctx.ga_grp)[ctx.cur_grp_idx + 1] == i) + ++ctx.cur_grp_idx; - int r = convert_page(&ctx, fddest); - if (r) - diefu(r, "convert ", ESC, ctx.sa.s + PAGE(&ctx, ctx.cur_page).sceoff, ESC, - " to ", ESC, ctx.sa.s + parse.destdir, "/", - ctx.sa.s + PAGE(&ctx, ctx.cur_page).fileoff, ESC); + if (!e->is_page) { + struct page *p; + if (e->has_page) { + p = &PAGES(&ctx)[e->page]; + } else { + int pg = get_page(e->dkey, &ctx); + if (pg < 0) p = NULL; + else p = &PAGES(&ctx)[pg]; + } - if (++ctx.cur_page == ctx.nb_pages) { + verb("Link ", ESC, ctx.sa.s + e->noff, ESC, " -> ", + ESC, (p) ? ctx.sa.s + p->dstoff : "<unknown>", ESC); + } else { + ctx.cur_page = e->page; + out("Generating ", ESC, ctx.sa.s + PAGES(&ctx)[ctx.cur_page].dstoff, ESC, "..."); + + int r = convert_page(&ctx, fddest); + if (r) + diefu(r, "convert ", ESC, ctx.sa.s + PAGES(&ctx)[ctx.cur_page].fileoff, ESC, + " to ", ESC, ctx.sa.s + parse.destdir, "/", + ctx.sa.s + PAGES(&ctx)[ctx.cur_page].dstoff, ESC); + } + + if (++i == genalloc_len(int, &ctx.ga_idx)) { if (!(ctx.doc.flags & DOC_FULL_TOC)) break; - ctx.cur_page = 0; + i = 0; + ctx.cur_grp_idx = 0; ctx.doc.flags |= DOC_IS_INDEX; } else if (ctx.doc.flags & DOC_IS_INDEX) break; } + add(PMUINT(NB_PAGES(&ctx) - !(ctx.doc.flags & DOC_FULL_TOC)), " files written"); + r = NB_ENTRIES(&ctx) - NB_PAGES(&ctx) - !(ctx.doc.flags & DOC_FULL_TOC); + if (r) add(" (", PMUINT(r), " links processed)"); + out("."); + stralloc_free(&ctx.buf.sa); stralloc_free(&ctx.sa_out); stralloc_free(&ctx.sa); - - out("done."); + genalloc_free(struct page, &ctx.pages); + genalloc_free(struct entry, &ctx.entries); + genalloc_free(int, &ctx.ga_idx); + genalloc_free(int, &ctx.ga_grp); + hmap_free(&ctx.hmap); return 0; }