author | Olivier Brunel
<jjk@jjacky.com> 2022-12-29 09:25:33 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2023-07-19 12:45:28 UTC |
parent | 38c9577dac0910ebb1b23adcc8a04f1c67ddce9b |
src/md4c.c | +156 | -12 |
src/md4c.h | +10 | -0 |
diff --git a/src/md4c.c b/src/md4c.c index 54783df..d000393 100644 --- a/src/md4c.c +++ b/src/md4c.c @@ -246,7 +246,8 @@ enum MD_LINETYPE_tag { MD_LINE_HTML, MD_LINE_TEXT, MD_LINE_TABLE, - MD_LINE_TABLEUNDERLINE + MD_LINE_TABLEUNDERLINE, + MD_LINE_BOX_INFO }; typedef enum MD_LINETYPE_tag MD_LINETYPE; @@ -4907,11 +4908,55 @@ abort: return ret; } +static int +md_setup_box_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_BOX_DETAIL* det, + MD_ATTRIBUTE_BUILD* type_build, MD_ATTRIBUTE_BUILD* title_build) +{ + const MD_LINE* line = (const MD_LINE*)(block + 1); + + OFF beg = line->beg; + OFF end = line->end; + OFF sep; + int ret = 0; + + /* Trim initial spaces. */ + while(beg < ctx->size && ISBLANK(beg)) + beg++; + + /* Trim trailing spaces. */ + while(end > beg && ISBLANK(end-1)) + end--; + + /* Look for type. */ + sep = beg; + while(sep < end && !ISBLANK(sep) && CH(sep) != ':') + sep++; + if (sep < end && CH(sep) == ':') { + /* Build type string attribute. */ + MD_CHECK(md_build_attribute(ctx, STR(beg), sep - beg, 0, &det->type, type_build)); + beg = sep + 1; + while(beg < end && ISBLANK(beg)) + beg++; + } else { + /* Build empty type string attribute. */ + MD_CHECK(md_build_attribute(ctx, NULL, 0, 0, &det->type, type_build)); + } + + /* Build info string attribute. */ + MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->title, title_build)); + +abort: + return ret; +} + static int md_process_all_blocks(MD_CTX* ctx) { int byte_off = 0; int ret = 0; + MD_ATTRIBUTE_BUILD type_build; + MD_ATTRIBUTE_BUILD title_build; + int clean_box_builds = FALSE; /* ctx->containers now is not needed for detection of lists and list items * so we reuse it for tracking what lists are loose or tight. We rely @@ -4925,6 +4970,7 @@ md_process_all_blocks(MD_CTX* ctx) MD_BLOCK_UL_DETAIL ul; MD_BLOCK_OL_DETAIL ol; MD_BLOCK_LI_DETAIL li; + MD_BLOCK_BOX_DETAIL box; } det; switch(block->type) { @@ -4945,6 +4991,16 @@ md_process_all_blocks(MD_CTX* ctx) det.li.task_mark_offset = (OFF) block->n_lines; break; + case MD_BLOCK_BOX: + memset(&det.box, 0, sizeof(MD_BLOCK_BOX_DETAIL)); + if(block->flags & MD_BLOCK_CONTAINER_OPENER) { + memset(&type_build, 0, sizeof(MD_ATTRIBUTE_BUILD)); + memset(&title_build, 0, sizeof(MD_ATTRIBUTE_BUILD)); + clean_box_builds = TRUE; + MD_CHECK(md_setup_box_detail(ctx, block, &det.box, &type_build, &title_build)); + } + break; + default: /* noop */ break; @@ -4954,7 +5010,7 @@ md_process_all_blocks(MD_CTX* ctx) if(block->flags & MD_BLOCK_CONTAINER_CLOSER) { MD_LEAVE_BLOCK(block->type, &det); - if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE) + if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE || block->type == MD_BLOCK_BOX) ctx->n_containers--; } @@ -4964,12 +5020,16 @@ md_process_all_blocks(MD_CTX* ctx) if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) { ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST); ctx->n_containers++; - } else if(block->type == MD_BLOCK_QUOTE) { + } else if(block->type == MD_BLOCK_QUOTE || block->type == MD_BLOCK_BOX) { /* This causes that any text in a block quote, even if * nested inside a tight list item, is wrapped with * <p>...</p>. */ ctx->containers[ctx->n_containers].is_loose = TRUE; ctx->n_containers++; + + /* box block actually contains an info string line */ + if(block->type == MD_BLOCK_BOX) + byte_off += block->n_lines * sizeof(MD_LINE); } } } else { @@ -4987,6 +5047,10 @@ md_process_all_blocks(MD_CTX* ctx) ctx->n_block_bytes = 0; abort: + if(clean_box_builds) { + md_free_attribute(ctx, &type_build); + md_free_attribute(ctx, &title_build); + } return ret; } @@ -5223,6 +5287,35 @@ abort: return ret; } +static int +md_push_box_bytes(MD_CTX* ctx, MD_LINE_ANALYSIS* analysis, unsigned flags) +{ + MD_BLOCK* block; + MD_LINE* line; + int ret = 0; + + MD_CHECK(md_end_current_block(ctx)); + + block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK)); + if(block == NULL) + return -1; + + block->type = MD_BLOCK_BOX; + block->flags = flags; + block->data = 0; + block->n_lines = 0; + + line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE)); + if(line == NULL) + return -1; + line->beg = analysis->beg; + line->end = analysis->end; + block->n_lines++; + +abort: + return ret; +} + /*********************** @@ -5622,6 +5715,9 @@ md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* contai if(container->ch == _T('>')) return FALSE; + if(container->ch == _T('!')) + return FALSE; + if(container->ch != pivot->ch) return FALSE; if(container->mark_indent > pivot->contents_indent) @@ -5653,7 +5749,7 @@ md_push_container(MD_CTX* ctx, const MD_CONTAINER* container) } static int -md_enter_child_containers(MD_CTX* ctx, int n_children) +md_enter_child_containers(MD_CTX* ctx, int n_children, MD_LINE_ANALYSIS* line) { int i; int ret = 0; @@ -5689,6 +5785,10 @@ md_enter_child_containers(MD_CTX* ctx, int n_children) MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER)); break; + case _T('!'): + MD_CHECK(md_push_box_bytes(ctx, line, MD_BLOCK_CONTAINER_OPENER)); + break; + default: MD_UNREACHABLE(); break; @@ -5730,6 +5830,11 @@ md_leave_child_containers(MD_CTX* ctx, int n_keep) 0, MD_BLOCK_CONTAINER_CLOSER)); break; + case _T('!'): + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_BOX, 0, + 0, MD_BLOCK_CONTAINER_CLOSER)); + break; + default: MD_UNREACHABLE(); break; @@ -5763,6 +5868,18 @@ md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTA return TRUE; } + /* Check for block box mark. */ + if((ctx->parser.flags & MD_FLAG_BOX) && CH(off) == _T('!')) { + off++; + p_container->ch = _T('!'); + p_container->is_loose = FALSE; + p_container->is_task = FALSE; + p_container->mark_indent = indent; + p_container->contents_indent = indent + 1; + *p_end = off; + return TRUE; + } + /* Check for list item bullet mark. */ if(ISANYOF(off, _T("-+*")) && (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) { p_container->ch = CH(off); @@ -5845,8 +5962,11 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, while(n_parents < ctx->n_containers) { MD_CONTAINER* c = &ctx->containers[n_parents]; - if(c->ch == _T('>') && line->indent < ctx->code_indent_offset && - off < ctx->size && CH(off) == _T('>')) + if((c->ch == _T('>') && line->indent < ctx->code_indent_offset && + off < ctx->size && CH(off) == _T('>')) + || + (c->ch == _T('!') && line->indent < ctx->code_indent_offset && + off < ctx->size && CH(off) == _T('!'))) { /* Block quote mark. */ off++; @@ -5860,7 +5980,8 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, line->beg = off; - } else if(c->ch != _T('>') && line->indent >= c->contents_indent) { + } else if(c->ch != _T('>') && c->ch != _T('!') + && line->indent >= c->contents_indent) { /* List. */ line->indent -= c->contents_indent; } else { @@ -5874,7 +5995,9 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, /* Blank line does not need any real indentation to be nested inside * a list. */ if(n_brothers + n_children == 0) { - while(n_parents < ctx->n_containers && ctx->containers[n_parents].ch != _T('>')) + while(n_parents < ctx->n_containers + && ctx->containers[n_parents].ch != _T('>') + && ctx->containers[n_parents].ch != _T('!')) n_parents++; } } @@ -5949,7 +6072,8 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, line->type = MD_LINE_BLANK; ctx->last_line_has_list_loosening_effect = (n_parents > 0 && n_brothers + n_children == 0 && - ctx->containers[n_parents-1].ch != _T('>')); + ctx->containers[n_parents-1].ch != _T('>') && + ctx->containers[n_parents-1].ch != _T('!')); #if 1 /* See https://github.com/mity/md4c/issues/6 @@ -5964,6 +6088,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, * item can begin with at most one blank line." */ if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') && + ctx->containers[n_parents-1].ch != _T('!') && n_brothers + n_children == 0 && ctx->current_block == NULL && ctx->n_block_bytes > (int) sizeof(MD_BLOCK)) { @@ -5983,6 +6108,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, ctx->last_line_has_list_loosening_effect = FALSE; if(ctx->last_list_item_starts_with_two_blank_lines) { if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') && + ctx->containers[n_parents-1].ch != _T('!') && n_brothers + n_children == 0 && ctx->current_block == NULL && ctx->n_block_bytes > (int) sizeof(MD_BLOCK)) { @@ -6074,7 +6200,8 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, md_is_container_mark(ctx, line->indent, off, &off, &container)) { if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers && - (off >= ctx->size || ISNEWLINE(off)) && container.ch != _T('>')) + (off >= ctx->size || ISNEWLINE(off)) && container.ch != _T('>') + && container.ch != _T('!')) { /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */ } else if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers && @@ -6089,6 +6216,10 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, line->beg = off; line->data = container.ch; + /* First line in a BOX is an info string */ + if(container.ch == _T('!')) + line->type = MD_LINE_BOX_INFO; + /* Some of the following whitespace actually still belongs to the mark. */ if(off >= ctx->size || ISNEWLINE(off)) { container.contents_indent++; @@ -6178,6 +6309,12 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, } } + /* box info string */ + if(line->type == MD_LINE_BOX_INFO) { + while(off < ctx->size && !ISNEWLINE(off)) off++; + break; + } + /* By default, we are normal text line. */ line->type = MD_LINE_TEXT; if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == 0) { @@ -6272,7 +6409,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, /* If we belong to a list after seeing a blank line, the list is loose. */ if(prev_line_has_list_loosening_effect && line->type != MD_LINE_BLANK && n_parents + n_brothers > 0) { MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1]; - if(c->ch != _T('>')) { + if(c->ch != _T('>') && c->ch != _T('!')) { MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off); block->flags |= MD_BLOCK_LOOSE_LIST; } @@ -6298,7 +6435,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, } if(n_children > 0) - MD_CHECK(md_enter_child_containers(ctx, n_children)); + MD_CHECK(md_enter_child_containers(ctx, n_children, line)); abort: return ret; @@ -6364,6 +6501,13 @@ md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANAL if(line->type != pivot_line->type) MD_CHECK(md_end_current_block(ctx)); + /* reset things and eat the MD_LINE_BOX_INFO line, aka the info strong for + * the box block */ + if(line->type == MD_LINE_BOX_INFO) { + line->type = MD_LINE_BLANK; + return 0; + } + /* The current line may start a new block. */ if(ctx->current_block == NULL) { MD_CHECK(md_start_new_block(ctx, line)); diff --git a/src/md4c.h b/src/md4c.h index 80176d4..dd633e8 100644 --- a/src/md4c.h +++ b/src/md4c.h @@ -58,6 +58,9 @@ typedef enum MD_BLOCKTYPE { /* <blockquote>...</blockquote> */ MD_BLOCK_QUOTE, + /* <div class="box">...</div> */ + MD_BLOCK_BOX, + /* <ul>...</ul> * Detail: Structure MD_BLOCK_UL_DETAIL. */ MD_BLOCK_UL, @@ -280,6 +283,12 @@ typedef struct MD_BLOCK_TD_DETAIL { MD_ALIGN align; } MD_BLOCK_TD_DETAIL; +/* Detailed info for MD_BLOCK_BOX. */ +typedef struct MB_BLOCK_BOX_DETAIL { + MD_ATTRIBUTE type; + MD_ATTRIBUTE title; +} MD_BLOCK_BOX_DETAIL; + /* Detailed info for MD_SPAN_A. */ typedef struct MD_SPAN_A_DETAIL { MD_ATTRIBUTE href; @@ -318,6 +327,7 @@ typedef struct MD_SPAN_WIKILINK { #define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */ #define MD_FLAG_ITALIC 0x8000 /* Enable italic (/foo/) extension. */ #define MD_FLAG_BOLD 0x10000 /* Enable bold extension */ +#define MD_FLAG_BOX 0x20000 /* Enable box extension */ #define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS) #define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)