commit 2355d6228d687d215f91211718f945a7836822ac
parent 3c638adaca34deca26218990356dd291fc5ae7d3
Author: Michael Skec
Date: Wed, 22 Nov 2023 17:44:40 +1100
add .F for fixed font. also removes broff-escapes script
escaping is now handled in the program itself
Diffstat:
M | Makefile | | | 6 | +----- |
D | broff-escapes | | | 19 | ------------------- |
M | broff.c | | | 115 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------ |
3 files changed, 82 insertions(+), 58 deletions(-)
diff --git a/Makefile b/Makefile
@@ -3,8 +3,6 @@ SRC=broff.c
#CFLAGS=-Og -g
CFLAGS=-O2
-ESCAPES_PROGRAM=broff-escapes
-
.PHONY: all clean test install uninstall
all: $(OUT)
@@ -13,18 +11,16 @@ run: $(OUT)
./$(OUT)
test: $(OUT)
- ./$(OUT) test.ms | ./$(ESCAPES_PROGRAM)
+ ./$(OUT) test.ms
clean:
rm -f $(OUT)
install: $(OUT)
install $(OUT) /usr/local/bin/$(OUT)
- install $(ESCAPES_PROGRAM) /usr/local/bin/$(ESCAPES_PROGRAM)
uninstall: $(OUT)
rm /usr/local/bin/$(OUT)
- rm /usr/local/bin/$(ESCAPES_PROGRAM)
$(OUT): $(SRC) Makefile
gcc $(CFLAGS) $(SRC) -o $(OUT)
diff --git a/broff-escapes b/broff-escapes
@@ -1,19 +0,0 @@
-#!/bin/sed -f
-
-# Basic escapes
-s#\&#\&#g
-s#\.\.\.#\…#g
-
-# Roff escapes (man groff_char)
-s#\\(em#\—#g
-s#\\(lq#\“#g
-s#\\(rq#\”#g
-s#\\(oq#\‘#g
-s#\\(cq#\’#g
-s#\\~#\ #g
-
-# TeX-style fancy quotes
-s#``#\“#g
-s#''#\”#g
-s#`#\‘#g
-s#'#\’#g
diff --git a/broff.c b/broff.c
@@ -38,7 +38,8 @@ static char *line;
static ssize_t len;
static char date_str[32] = { 0 };
-static bool check_font(const char *restrict const, const char *restrict const);
+static bool check_font(
+ const char *restrict const, const char *restrict const, bool);
static bool check_link(void);
static inline void
@@ -51,27 +52,23 @@ end_sentence(void)
static inline bool
is_sentence_end(const char *s, int len)
{
-#if 0
- // Old method
- return c == '.' ||
- c == '!' ||
- c == '?';
-#endif
-
- static const char *const SENTENCE_END_CHARS = ".?!";
+#define SENTENCE_END_CHARS ".?!"
// First simply check thelast character
const char *c = &s[len - 1];
if (strchr(SENTENCE_END_CHARS, *c) != NULL) return true;
- // Check all punctuation that ends the line, and if there is a full-stop in
- // it (e.g. 'end.)' or 'end."') then we will call it the end of a sentence.
- for (; c >= s && ispunct(*c); --c)
+ // If full stop preceeds a certain set of punctuation, then we can call it
+ // a sentence.
+ for (; c >= s &&
+ strchr(SENTENCE_END_CHARS "()[]`'\"", *c) != NULL;
+ --c)
{
if (strchr(SENTENCE_END_CHARS, *c) == NULL) return true;
}
return false;
+#undef SENTENCE_END_CHARS
}
static void
@@ -110,6 +107,58 @@ end_last_cmd(void)
}
}
+static void
+print_escaped(const char *line, int l)
+{
+#if 0
+ // Verbatim print of content
+ printf("%s", line);
+#else
+ /*
+ * Check for any in-text escape sequences to parse, and print.
+ * Probably quite slow, but we really need to do this.
+ */
+ int len_remain;
+ for (const char *c = line; c < line + l; ++c)
+ {
+ len_remain = line + l - c;
+
+ #define HANDLE_ESC(esc, sub) \
+ if (len_remain >= strlen((esc)) && \
+ strncmp(c, (esc), strlen((esc))) == 0) \
+ { \
+ if ((sub)) printf((sub)); \
+ c += strlen((esc)) - 1; \
+ continue; \
+ }
+
+ // Roff escapes
+ HANDLE_ESC("\\&", NULL);
+ HANDLE_ESC("\\~", " ");
+ HANDLE_ESC("\\(em", "—");
+ HANDLE_ESC("\\(lq", "“");
+ HANDLE_ESC("\\(rq", "”");
+ HANDLE_ESC("\\(oq", "‘");
+ HANDLE_ESC("\\(cq", "’");
+
+ // TeX style typographer quotes; must be in this order
+ HANDLE_ESC("``", "“");
+ HANDLE_ESC("''", "”");
+ HANDLE_ESC("`", "‘");
+ HANDLE_ESC("'", "’");
+
+ // HTML escapes (must run last because of the ampersand one)
+ HANDLE_ESC("<", "<");
+ HANDLE_ESC(">", ">");
+ HANDLE_ESC("&", "&");
+ HANDLE_ESC("...", "…");
+
+ // No escape here, just print out this content normally
+ printf("%c", *c);
+ }
+#endif
+}
+
int
main(int argc, char *argv[])
{
@@ -254,15 +303,16 @@ main(int argc, char *argv[])
// .B bold font
// .I italic font
- if (check_font(".B", "b")) continue;
- if (check_font(".I", "i")) continue;
-
+ // .F fixed font
+ if (check_font(".B", "b", true)) continue;
+ if (check_font(".I", "i", true)) continue;
+ if (check_font(".F", "code", false)) continue;
// .LNK link
if (check_link()) continue;
- // Print out the text
- printf("%s", line);
+ // Print out the text content
+ print_escaped(line, len);
// Detect end of sentence
if (is_sentence_end(line, len))
@@ -285,7 +335,8 @@ main(int argc, char *argv[])
static bool
check_font(
const char *restrict const roff_cmd,
- const char *restrict const tag)
+ const char *restrict const tag,
+ bool sentspc)
{
if (len < strlen(roff_cmd) ||
strncmp(line, roff_cmd, strlen(roff_cmd)) != 0) return false;
@@ -341,27 +392,26 @@ check_font(
}
// Print the immediate prefix, if any
- if (args[2].s)
- {
- printf("%.*s", (int)(args[2].e - args[2].s), args[2].s);
- }
+ if (args[2].s) print_escaped(args[2].s, (int)(args[2].e - args[2].s));
// Print the actual content within the tags
- printf("<%s>%.*s</%s>", tag, (int)(args[0].e - args[0].s), args[0].s, tag);
+ printf("<%s>", tag);
+ print_escaped(args[0].s, (int)(args[0].e - args[0].s));
+ printf("</%s>", tag);
// Print the immediate suffix, if any
if (args[1].s)
{
- printf("%.*s", (int)(args[1].e - args[1].s), args[1].s);
+ print_escaped(args[1].s, (int)(args[1].e - args[1].s));
// If the suffix ends on sentence
- if (is_sentence_end(args[1].s, (int)(args[1].e - args[1].s)))
+ if (sentspc && is_sentence_end(args[1].s, (int)(args[1].e - args[1].s)))
end_sentence();
}
else if (args[0].e)
{
// If the content itself ends on sentence
- if (is_sentence_end(args[0].s, (int)(args[0].e - args[0].s)))
+ if (sentspc && is_sentence_end(args[0].s, (int)(args[0].e - args[0].s)))
end_sentence();
}
@@ -423,20 +473,17 @@ check_link(void)
}
// Print the immediate prefix, if any
- if (args[3].s)
- {
- printf("%.*s", (int)(args[3].e - args[3].s), args[3].s);
- }
+ if (args[3].s) print_escaped(args[3].s, (int)(args[3].e - args[3].s));
// Print the actual content within the tags
- printf("<a href=\"%.*s\">%.*s</a>",
- (int)(args[0].e - args[0].s), args[0].s,
- (int)(args[1].e - args[1].s), args[1].s);
+ printf("<a href=\"%.*s\">", (int)(args[0].e - args[0].s), args[0].s);
+ print_escaped(args[1].s, (int)(args[1].e - args[1].s));
+ printf("</a>");
// Print the immediate suffix, if any
if (args[2].s)
{
- printf("%.*s", (int)(args[2].e - args[2].s), args[2].s);
+ print_escaped(args[2].s, (int)(args[2].e - args[2].s));
// If the suffix ends on sentence
if (is_sentence_end(args[2].s, *(args[2].e - 1))) end_sentence();