commit d8917c8a1ec08fa4149be14ff53255c29112f5f6
parent daee3740fae28764ffe9507a64f89c1f9998c3a8
Author: Michael Skec
Date: Wed, 22 Nov 2023 17:44:42 +1100
Initial (experimental) support for footnotes
The implementation is a bit hacky, but it does work. There undoubtedly
are some cases that are not handled and hence the system is fragile and
prone to breakage and bugs if not used as intended.
Only auto-numbered footnotes are supported. The \*[*] escape sequence
should be used to place a footnote marker, and the number is
automatically incremented each time from 1. To define the footnote
text, the .FS and .FE macros should be used. There is currently a
strong likelihood for breakage if other macros are used in the .FS and
.FE sections.
The implementation works by redirecting output temporarily to a
singly-linked list of footnote text buffers when processing text between
the .FS and .FE macros.
There is also a bit of code restructuring (the main function moved to
the bottom of the file).
Diffstat:
M | broff.c | | | 656 | ++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------- |
1 file changed, 415 insertions(+), 241 deletions(-)
diff --git a/broff.c b/broff.c
@@ -5,22 +5,19 @@
* the 'ms' macros.
*/
+#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <stdarg.h>
// Output indentation
-#if 0
-# define INDENT " "
-# define INDENT_BASE
-#else
-# define INDENT "\t"
-# define INDENT_BASE INDENT INDENT
-#endif
-
-static enum command
+#define INDENT "\t"
+#define INDENT_BASE INDENT INDENT
+
+enum command
{
CMD_NONE = 0,
CMD_NH,
@@ -31,23 +28,117 @@ static enum command
// Non-standard
CMD_LI,
-} cmd = CMD_NONE;
+};
+struct footnote
+{
+ char *content;
+ int content_length;
+ int content_cap;
+ struct footnote *next;
+};
+
+static enum command cmd = CMD_NONE, cmd_fn;
+static bool is_footnote = false;
static int heading_level;
-static bool is_sentence = false;
+static bool is_sentence = false, is_sentence_fn;
static char *line;
static ssize_t len;
static char date_str[32] = { 0 };
+static int fn_index = 1;
+static struct footnote *fn_head = NULL, *fn_tail = NULL;
static bool check_font(
const char *restrict const, const char *restrict const, bool);
static bool check_link(void);
static bool check_img(void);
+static int
+stream_printf(const char *restrict format, ...)
+{
+ va_list args;
+
+ int n;
+
+ if (is_footnote)
+ {
+ int needed;
+ struct footnote *fn = fn_tail;
+ assert(fn && "no footnotes generated yet");
+
+ va_start(args, format);
+ n = vsnprintf(NULL, 0, format, args);
+ va_end(args);
+
+ needed = n + fn->content_length + 1;
+
+ if (needed >= fn->content_cap)
+ {
+ /* grow */
+ fn->content_cap = (needed * 3) / 2;
+ fn->content = realloc(fn->content, fn->content_cap);
+ assert(fn->content);
+ }
+
+ va_start(args, format);
+ n = vsnprintf(fn->content + fn->content_length,
+ fn->content_cap - fn->content_length,
+ format, args);
+
+ fn->content_length += n;
+ fn->content[fn->content_length] = '\0';
+ }
+ else
+ {
+ va_start(args, format);
+ n = vfprintf(stdout, format, args);
+ va_end(args);
+ }
+
+ return n;
+}
+
+static int
+stream_puts(const char *s)
+{
+ /* If we are currently writing a footnote */
+ if (is_footnote)
+ {
+ int n, needed;
+ struct footnote *fn = fn_tail;
+ assert(fn && "no footnotes generated yet");
+
+ n = strlen(s);
+
+ needed = n + fn->content_length + 1;
+
+ if (needed >= fn->content_cap)
+ {
+ /* grow */
+ fn->content_cap = (needed * 3) / 2;
+ fn->content = realloc(fn->content, fn->content_cap);
+ assert(fn->content);
+ }
+
+ strncpy(fn->content + fn->content_length,
+ s,
+ fn->content_cap - fn->content_length);
+
+ fn->content_length += n;
+ fn->content[fn->content_length] = '\0';
+
+ return n;
+ }
+
+ return fputs(s, stdout);
+}
+
static inline void
end_sentence(void)
{
- if (is_sentence) printf("</span>\n");
+ if (is_sentence)
+ stream_puts("</span>\n");
+
is_sentence = false;
}
@@ -81,29 +172,25 @@ end_last_cmd(void)
switch(cmd)
{
case CMD_NH:
- printf(INDENT_BASE INDENT
- "</h%d>\n", heading_level);
+ stream_printf(INDENT_BASE INDENT
+ "</h%d>\n", heading_level);
break;
case CMD_LP:
case CMD_PP:
- printf(INDENT_BASE INDENT
- "</p>\n");
+ stream_puts(INDENT_BASE INDENT "</p>\n");
break;
case CMD_LI:
- printf(INDENT_BASE INDENT
- "</li>\n");
- printf(INDENT_BASE INDENT
- "</ul>\n");
+ stream_puts(INDENT_BASE INDENT "</li>\n");
+ stream_puts(INDENT_BASE INDENT "</ul>\n");
break;
case CMD_TL:
// Also print the date if we parsed one
if (*date_str)
{
- printf(INDENT_BASE INDENT INDENT
+ stream_printf(INDENT_BASE INDENT INDENT
"<span style=\"float: right\">%s</span>\n", date_str);
}
- printf(INDENT_BASE INDENT
- "</header>\n");
+ stream_puts(INDENT_BASE INDENT "</header>\n");
break;
default: break;
}
@@ -112,10 +199,6 @@ end_last_cmd(void)
static void
print_escaped(const char *line, int l)
{
-#if 0
- // Verbatim print of content
- printf("%s", line);
-#else
/*
* Check for any in-text escape sequences to parse, and print.
* Probably quite slow, but we really need to do this.
@@ -129,11 +212,16 @@ print_escaped(const char *line, int l)
if (len_remain >= strlen((esc)) && \
strncmp(c, (esc), strlen((esc))) == 0) \
{ \
- if ((sub)) printf((sub)); \
+ if ((sub)) \
+ stream_puts((sub)); \
+ \
c += strlen((esc)) - 1; \
continue; \
}
+ /* future optimisation: we can possibly some kind of table here instead
+ * of all these if-conditions. */
+
// Roff escapes
HANDLE_ESC("\\&", NULL);
HANDLE_ESC("\\~", " ");
@@ -168,229 +256,44 @@ print_escaped(const char *line, int l)
for (; c < line + l && *c != ']'; ++c);
// Print the character as HTML escape.
- printf("&#x%.*s;",
- (int)(c - c_prev),
- c_prev);
+ stream_printf("&#x%.*s;", (int)(c - c_prev), c_prev);
continue;
}
- if (ispunct(*c) && strchr(",.!?;:'\"", *c) == NULL)
+ /* roff supports auto-numbered footnotes via the \*[*] escape sequence,
+ * which states where the footnote is to be placed, and will refer to
+ * the "nearest" (i.e. subsequent) footnote written between .FS and
+ * .FE. We don't care about manual numbering and hence don't currently
+ * support it.
+ */
+ static const char *const FOOTNOTE_ESC = "\\*[*]";
+ if (len_remain >= strlen(FOOTNOTE_ESC) &&
+ strncmp(c, FOOTNOTE_ESC, strlen(FOOTNOTE_ESC)) == 0)
{
- // Escape characters that aren't alphanumeric and are not a
- // certain set of punctuation
- printf("&#%d;", *c);
- continue;
- }
+ c += strlen(FOOTNOTE_ESC) - 1;
- // No escape here, just print out this content normally
- printf("%c", *c);
- }
-#endif
-}
-
-int
-main(int argc, char *argv[])
-{
- FILE *fp = NULL;
+ /* Print out the damn footnote. */
+ stream_printf("<sup class=\"footnote\">"
+ "<a href=\"#fn%d\">%d</a>"
+ "</sup>",
+ fn_index, fn_index);
+ ++fn_index;
- if (argc > 1)
- {
- // Read from file
- fp = fopen(argv[1], "r");
- if (!fp)
- {
- fprintf(stderr, "broff: no such file\n");
- exit(1);
- }
- }
- else
- {
- // Read from stdin
- fp = stdin;
- }
-
- // Begin article
- printf(INDENT_BASE "<article>\n");
-
- // Parse
- size_t len_tmp;
- for (line = NULL; (len = getline(&line, &len_tmp, fp)) != -1;)
- {
- // Strip newline
- line[--len] = '\0';
- if (len <= 0) continue;
-
- if (len == 1 &&
- *line == '.') continue;
-
- if (strncmp(line, ".\\\"", strlen(".\\\"")) == 0) continue;
-
- // .DE end display
- if (cmd == CMD_DS &&
- len >= strlen(".DE") &&
- strncmp(line, ".DE", strlen(".DE")) == 0)
- {
- printf(INDENT_BASE INDENT "</pre>\n");
- cmd = CMD_NONE;
continue;
}
- // We are in a preformatted block; just keep printing as-is
- if (cmd == CMD_DS)
- {
- //printf("\n%s", line);
- printf("\n", line);
-
- for (const char *c = line; *c; ++c)
- {
- if (ispunct(*c) && strchr(",.!?;:'\"", *c) == NULL)
- {
- // Escape characters that aren't alphanumeric and are not a
- // certain set of punctuation
- printf("&#%d;", *c);
- continue;
- }
- printf("%c", *c);
- }
- continue;
- }
-
- // .TL title
- if (len >= strlen(".TL") &&
- strncmp(line, ".TL", strlen(".TL")) == 0)
- {
- end_last_cmd();
- cmd = CMD_TL;
-
- printf(INDENT_BASE INDENT
- "<header>\n");
- continue;
- }
- // .NH Section headings
- if (len >= strlen(".NH") &&
- strncmp(line, ".NH", strlen(".NH")) == 0)
- {
- end_last_cmd();
- cmd = CMD_NH;
-
- // Integer that follows is heading level
- heading_level = 1;
- if (len > strlen(".NH"))
- {
- heading_level = atoi(line + strlen(".NH"));
- }
- printf(INDENT_BASE INDENT
- "<h%d>\n", heading_level);
- continue;
- }
- // .PP indented paragraph
- if (len >= strlen(".PP") &&
- strncmp(line, ".PP", strlen(".PP")) == 0)
- {
- end_last_cmd();
- cmd = CMD_PP;
-
- printf(INDENT_BASE INDENT "<p class=\"sentspc\">\n");
- continue;
- }
- // .LP unindented paragraph
- if (len >= strlen(".LP") &&
- strncmp(line, ".LP", strlen(".LP")) == 0)
- {
- end_last_cmd();
- cmd = CMD_LP;
-
- printf(INDENT_BASE INDENT "<p class=\"sentspc noindent\">\n");
- continue;
- }
- // .DS begin display
- if (len >= strlen(".DS") &&
- strncmp(line, ".DS", strlen(".DS")) == 0)
- {
- end_last_cmd();
- cmd = CMD_DS;
- printf(INDENT_BASE INDENT "<pre>");
- continue;
- }
- // .LI unordered list item
- if (len >= strlen(".LI") &&
- strncmp(line, ".LI", strlen(".LI")) == 0)
- {
- if (cmd != CMD_LI)
- {
- end_last_cmd();
- printf(INDENT_BASE INDENT "<ul>\n");
- }
- else
- {
- end_sentence();
- printf(INDENT_BASE INDENT "</li>\n");
- }
- cmd = CMD_LI;
-
- printf(INDENT_BASE INDENT "<li class=\"sentspc\">\n");
- continue;
- }
-
- // .DA date
- if (len > strlen(".DA") &&
- strncmp(line, ".DA", strlen(".DA")) == 0)
+ if (ispunct(*c) && strchr(",.!?;:'\"", *c) == NULL)
{
- strncpy(date_str, line + strlen(".DA") + 1, sizeof(date_str));
+ // Escape characters that aren't alphanumeric and are not a
+ // certain set of punctuation
+ stream_printf("&#%d;", *c);
continue;
}
- // Print the content on per-sentence basis; in spans for nice sentence
- // spacing.
- if (!is_sentence)
- {
- printf(INDENT_BASE INDENT INDENT "<span class=\"sntc\">");
- is_sentence = true;
- }
- else
- {
- // Put whitespace between words of the sentence
- printf(" ");
- }
-
- // .IM image check
- if (check_img()) continue;
-
- // .B bold font
- // .I italic font
- // .F fixed font
- // .ST/DL strikethrough/delete font extensions
- // .IN ins extensions
- if (check_font(".B", "b", true)) continue;
- if (check_font(".I", "i", true)) continue;
- if (check_font(".F", "code", false)) continue;
- if (check_font(".ST", "s", true)) continue;
- if (check_font(".DL", "del", true)) continue;
- if (check_font(".IN", "ins", true)) continue;
-
- // .H link check
- if (check_link()) continue;
-
- // Print out the text content
- print_escaped(line, len);
-
- // Detect end of sentence
- if (is_sentence_end(line, len))
- {
- end_sentence();
- }
+ // No escape here, just print out this content normally
+ stream_printf("%c", *c);
}
- // End last command
- end_last_cmd();
-
- // Close article
- printf(INDENT_BASE "</article>\n");
-
- // Close file
- if (argc > 1) fclose(fp);
-
- return 0;
}
static bool
@@ -456,9 +359,9 @@ check_font(
if (args[2].s) print_escaped(args[2].s, (int)(args[2].e - args[2].s));
// Print the actual content within the tags
- printf("<%s>", tag);
+ stream_printf("<%s>", tag);
print_escaped(args[0].s, (int)(args[0].e - args[0].s));
- printf("</%s>", tag);
+ stream_printf("</%s>", tag);
// Print the immediate suffix, if any
if (args[1].s)
@@ -537,9 +440,9 @@ check_link(void)
if (args[3].s) print_escaped(args[3].s, (int)(args[3].e - args[3].s));
// Print the actual content within the tags
- printf("<a href=\"%.*s\">", (int)(args[0].e - args[0].s), args[0].s);
+ stream_printf("<a href=\"%.*s\">", (int)(args[0].e - args[0].s), args[0].s);
print_escaped(args[1].s, (int)(args[1].e - args[1].s));
- printf("</a>");
+ stream_puts("</a>");
// Print the immediate suffix, if any
if (args[2].s)
@@ -585,8 +488,279 @@ check_img(void)
alt += strspn(alt, " \"");
int alt_len = strcspn(alt, "\"");
- printf(INDENT_BASE INDENT
+ stream_printf(INDENT_BASE INDENT
"<img src=\"%.*s\" alt=\"%.*s\"/>\n", uri_len, uri, alt_len, alt);
return true;
}
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp = NULL;
+
+ if (argc > 1)
+ {
+ // Read from file
+ fp = fopen(argv[1], "r");
+ if (!fp)
+ {
+ fprintf(stderr, "broff: no such file\n");
+ exit(1);
+ }
+ }
+ else
+ {
+ // Read from stdin
+ fp = stdin;
+ }
+
+ // Begin article
+ stream_puts(INDENT_BASE "<article>\n");
+
+ // Parse
+ size_t len_tmp;
+ for (line = NULL; (len = getline(&line, &len_tmp, fp)) != -1;)
+ {
+ // Strip newline
+ line[--len] = '\0';
+ if (len <= 0) continue;
+
+ if (len == 1 &&
+ *line == '.') continue;
+
+ if (strncmp(line, ".\\\"", strlen(".\\\"")) == 0) continue;
+
+ // .DE end display
+ if (cmd == CMD_DS &&
+ len >= strlen(".DE") &&
+ strncmp(line, ".DE", strlen(".DE")) == 0)
+ {
+ stream_puts(INDENT_BASE INDENT "</pre>\n");
+ cmd = CMD_NONE;
+ continue;
+ }
+
+ // .FE end footnote
+ if (is_footnote &&
+ len >= strlen(".FE") &&
+ strncmp(line, ".FE", strlen(".FE")) == 0)
+ {
+ is_footnote = false;
+
+ /* restore old state */
+ cmd = cmd_fn;
+ is_sentence = is_sentence_fn;
+ continue;
+ }
+
+ // We are in a preformatted block; just keep printing as-is
+ if (cmd == CMD_DS)
+ {
+ stream_puts("\n");
+
+ for (const char *c = line; *c; ++c)
+ {
+ if (ispunct(*c) && strchr(",.!?;:'\"", *c) == NULL)
+ {
+ // Escape characters that aren't alphanumeric and are not a
+ // certain set of punctuation
+ stream_printf("&#%d;", *c);
+ continue;
+ }
+ stream_printf("%c", *c);
+ }
+ continue;
+ }
+
+ // .TL title
+ if (len >= strlen(".TL") &&
+ strncmp(line, ".TL", strlen(".TL")) == 0)
+ {
+ end_last_cmd();
+ cmd = CMD_TL;
+
+ stream_puts(INDENT_BASE INDENT "<header>\n");
+ continue;
+ }
+ // .NH Section headings
+ if (len >= strlen(".NH") &&
+ strncmp(line, ".NH", strlen(".NH")) == 0)
+ {
+ end_last_cmd();
+ cmd = CMD_NH;
+
+ // Integer that follows is heading level
+ heading_level = 1;
+ if (len > strlen(".NH"))
+ {
+ heading_level = atoi(line + strlen(".NH"));
+ }
+ stream_printf(INDENT_BASE INDENT "<h%d>\n", heading_level);
+ continue;
+ }
+ // .PP indented paragraph
+ if (len >= strlen(".PP") &&
+ strncmp(line, ".PP", strlen(".PP")) == 0)
+ {
+ end_last_cmd();
+ cmd = CMD_PP;
+
+ stream_puts(INDENT_BASE INDENT "<p class=\"sentspc\">\n");
+ continue;
+ }
+ // .LP unindented paragraph
+ if (len >= strlen(".LP") &&
+ strncmp(line, ".LP", strlen(".LP")) == 0)
+ {
+ end_last_cmd();
+ cmd = CMD_LP;
+
+ stream_puts(INDENT_BASE INDENT "<p class=\"sentspc noindent\">\n");
+ continue;
+ }
+ // .DS begin display
+ if (len >= strlen(".DS") &&
+ strncmp(line, ".DS", strlen(".DS")) == 0)
+ {
+ end_last_cmd();
+ cmd = CMD_DS;
+ stream_puts(INDENT_BASE INDENT "<pre>");
+ continue;
+ }
+ // .FS begin footnote
+ if (len >= strlen(".FS") &&
+ strncmp(line, ".FS", strlen(".FS")) == 0)
+ {
+ // end_last_cmd(); /* don't end commands */
+
+ /* store state before the footnote */
+ cmd_fn = cmd;
+ is_sentence_fn = is_sentence;
+
+ is_footnote = true;
+
+ /* Create the footnote node */
+ struct footnote *fn = calloc(1, sizeof(*fn));
+ fn->content_cap = 512;
+ fn->content = malloc(fn->content_cap);
+ fn->content[0] = '\0';
+ fn->content_length = 0;
+ fn->next = NULL;
+
+ /* Update list head/tail */
+ if (!fn_tail)
+ {
+ fn_tail = fn_head = fn;
+ }
+ else
+ {
+ fn_tail->next = fn;
+ fn_tail = fn;
+ }
+
+ /* All output will now be written to the footnote tail's content
+ * until .FE is encountered. */
+
+ /* Start a new paragraph in the footnote. */
+ is_sentence = false;
+
+ continue;
+ }
+ // .LI unordered list item
+ if (len >= strlen(".LI") &&
+ strncmp(line, ".LI", strlen(".LI")) == 0)
+ {
+ if (cmd != CMD_LI)
+ {
+ end_last_cmd();
+ stream_puts(INDENT_BASE INDENT "<ul>\n");
+ }
+ else
+ {
+ end_sentence();
+ stream_puts(INDENT_BASE INDENT "</li>\n");
+ }
+ cmd = CMD_LI;
+
+ stream_puts(INDENT_BASE INDENT "<li class=\"sentspc\">\n");
+ continue;
+ }
+
+ // .DA date
+ if (len > strlen(".DA") &&
+ strncmp(line, ".DA", strlen(".DA")) == 0)
+ {
+ strncpy(date_str, line + strlen(".DA") + 1, sizeof(date_str));
+ continue;
+ }
+
+ // Print the content on per-sentence basis; in spans for nice sentence
+ // spacing.
+ if (!is_sentence)
+ {
+ stream_puts(INDENT_BASE INDENT INDENT "<span class=\"sntc\">");
+ is_sentence = true;
+ }
+ else
+ {
+ // Put whitespace between words of the sentence
+ stream_puts(" ");
+ }
+
+ // .IM image check
+ if (check_img()) continue;
+
+ // .B bold font
+ // .I italic font
+ // .F fixed font
+ // .ST/DL strikethrough/delete font extensions
+ // .IN ins extensions
+ if (check_font(".B", "b", true)) continue;
+ if (check_font(".I", "i", true)) continue;
+ if (check_font(".F", "code", false)) continue;
+ if (check_font(".ST", "s", true)) continue;
+ if (check_font(".DL", "del", true)) continue;
+ if (check_font(".IN", "ins", true)) continue;
+
+ // .H link check
+ if (check_link()) continue;
+
+ // Print out the text content
+ print_escaped(line, len);
+
+ // Detect end of sentence
+ if (is_sentence_end(line, len))
+ {
+ end_sentence();
+ }
+ }
+ // End last command
+ end_last_cmd();
+
+ /* Append footnotes if we have any */
+ if (fn_head)
+ {
+ puts("<hr class=\"footnotes-sep\">");
+ puts("<table class=\"footnotes\">");
+ int fn_id = 1;
+ struct footnote *fn;
+ for (fn = fn_head; fn; fn = fn->next, ++fn_id)
+ {
+ printf("<tr>"
+ "<td class=\"footnote-id\"><sup id=\"fn%d\">%d.</sup></td>"
+ "<td class=\"sentspc\">%s</td>"
+ "</tr>",
+ fn_id, fn_id, fn->content);
+ }
+ puts("</table>");
+ }
+
+ // Close article
+ stream_puts(INDENT_BASE "</article>\n");
+
+ // Close file
+ if (argc > 1) fclose(fp);
+
+ return 0;
+}