roff-like markup to HTML with additional niceties.
git clone git://git.skec.site/pub/broff.git
log | files | refs | readme | license

commit 3c638adaca34deca26218990356dd291fc5ae7d3
parent 002330256ae3337db625ebcafa777eb4231b6459
Author: Michael Skec
Date:   Wed, 22 Nov 2023 17:44:40 +1100

better sentence end detection

Diffstat:
Mbroff.c | 35++++++++++++++++++++++++++++-------
1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/broff.c b/broff.c @@ -5,10 +5,11 @@ * the 'ms' macros. */ +#include <ctype.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <stdbool.h> // Output indentation #if 0 @@ -48,11 +49,29 @@ end_sentence(void) } static inline bool -is_sentence_end(char c) +is_sentence_end(const char *s, int len) { +#if 0 + // Old method return c == '.' || c == '!' || c == '?'; +#endif + + static const char *const SENTENCE_END_CHARS = ".?!"; + + // First simply check thelast character + const char *c = &s[len - 1]; + if (strchr(SENTENCE_END_CHARS, *c) != NULL) return true; + + // Check all punctuation that ends the line, and if there is a full-stop in + // it (e.g. 'end.)' or 'end."') then we will call it the end of a sentence. + for (; c >= s && ispunct(*c); --c) + { + if (strchr(SENTENCE_END_CHARS, *c) == NULL) return true; + } + + return false; } static void @@ -246,7 +265,7 @@ main(int argc, char *argv[]) printf("%s", line); // Detect end of sentence - if (is_sentence_end(line[len - 1])) + if (is_sentence_end(line, len)) { end_sentence(); } @@ -336,12 +355,14 @@ check_font( printf("%.*s", (int)(args[1].e - args[1].s), args[1].s); // If the suffix ends on sentence - if (is_sentence_end(*(args[1].e - 1))) end_sentence(); + if (is_sentence_end(args[1].s, (int)(args[1].e - args[1].s))) + end_sentence(); } else if (args[0].e) { // If the content itself ends on sentence - if (is_sentence_end(*(args[0].e - 1))) end_sentence(); + if (is_sentence_end(args[0].s, (int)(args[0].e - args[0].s))) + end_sentence(); } return true; @@ -418,12 +439,12 @@ check_link(void) printf("%.*s", (int)(args[2].e - args[2].s), args[2].s); // If the suffix ends on sentence - if (is_sentence_end(*(args[2].e - 1))) end_sentence(); + if (is_sentence_end(args[2].s, *(args[2].e - 1))) end_sentence(); } else if (args[1].e) { // If the content itself ends on sentence - if (is_sentence_end(*(args[1].e - 1))) end_sentence(); + if (is_sentence_end(args[1].s, *(args[1].e - 1))) end_sentence(); } return true;