commit 1b80b30b4dabc105183bf1e4e024aac8379f3e92
parent 39b651135bb7843c551f84bde3c9a3c0764b152f
Author: Michael Skec
Date: Wed, 22 Nov 2023 17:44:41 +1100
unicode escape parsing
now parses roff \[uXXXX] unicode escape codes, and reverts previous
commit for non-standard superscripts.
Diffstat:
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/broff.c b/broff.c
@@ -144,9 +144,6 @@ print_escaped(const char *line, int l)
HANDLE_ESC("\\(aq", "'");
HANDLE_ESC("\\(dq", "\"");
- // Superscripts (not actual Roff escapes)
- HANDLE_ESC("\\(Sc", "ᶜ");
-
// TeX style typographer quotes; must be in this order
HANDLE_ESC("``", "“");
HANDLE_ESC("''", "”");
@@ -159,6 +156,24 @@ print_escaped(const char *line, int l)
HANDLE_ESC("&", "&");
HANDLE_ESC("...", "…");
+ // Check for unicode escapes, which can appear like \[uXXXX]
+ static const char *const UNICODE_ESC_PREFIX = "\\[u";
+ if (len_remain > strlen(UNICODE_ESC_PREFIX) &&
+ strncmp(c, UNICODE_ESC_PREFIX, strlen(UNICODE_ESC_PREFIX)) == 0)
+ {
+ // Find end of escape (denoted by right-square bracket)
+ c += strlen(UNICODE_ESC_PREFIX);
+ const char *c_prev = c;
+ for (; c < line + l && *c != ']'; ++c);
+
+ // Print the character as HTML escape.
+ printf("&#x%.*s;",
+ (int)(c - c_prev),
+ c_prev);
+
+ continue;
+ }
+
if (ispunct(*c) && strchr(",.!?;:'\"", *c) == NULL)
{
// Escape characters that aren't alphanumeric and are not a