A gemini and gopher client for your terminal.
git clone git://git.skec.site/pub/sr71.git
log | files | refs | readme | license

commit 129652abd587f40d96ca9b735d8c552cd2b8ee64
parent 8a96dc9a94618d9a7606d7f5006955bf43b66aba
Author: Michael Skec
Date:   Fri, 24 Nov 2023 09:00:55 +1100

Write a proper Makefile.  Begin URI parsing code.

The Makefile contains a simple 'test' target (test.c) which is being
used to test the URI code as it is developed.  Main program is being
ignored until the URI, logging code, etc. is complete.

uri_str is still unimplemented.  uri_parse still needs more testing
done.

String utility functions are defined in str.h and include functions that
wrap around the standard string library.  Our str_copy function is an
implementation of strlcpy and therefore behaves slightly differently to
strncpy.

Diffstat:
M.gitignore | 1+
AMakefile | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
MREADME | 4++--
Aasserts.h | 6++++++
Aconfig.h | 10++++++++++
Aconfig.mk | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmain.c | 43++++++++++++++++++++++++++++++++++++++++---
Apch.h | 17+++++++++++++++++
Astr.c | 228+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astr.h | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest.c | 342+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Auri.c | 313+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Auri.h | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Autil.h | 10++++++++++
14 files changed, 1304 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -4,3 +4,4 @@ valgrind-out* *.o patches/* *.gch +bin/* diff --git a/Makefile b/Makefile @@ -0,0 +1,79 @@ +# Include the configuration +-include config.mk + +# Directories needed for build +DIRS = bin bin/release bin/debug + +# Main source code, objects, and dependencies +SRCS = $(shell ls *.c) +OBJS = $(shell ls *.c | \ + sed -e '/test.c/d' -e 's/.c$$/.o/' | \ + awk '{print "bin/$(MODE)/" $$0}') +DEPS = $(shell ls *.c | \ + sed -e '/test.c/d' -e 's/.c$$/.d/' | \ + awk '{print "bin/$(MODE)/" $$0}') + +# PCH source +PCH_SRC = pch.h +PCH = bin/$(MODE)/pch.h.gch +PCH_DEPS = bin/$(MODE)/pch.h.d + +TEST_TARGET = test +TEST_OBJS = $(shell ls *.c | sed -e '/main.c/d' -e 's/.c$$/.o/' | awk '{print "bin/$(MODE)/" $$0}') + +all: dirs bin/$(PROGRAM_TARGET) + +test: dirs bin/$(TEST_TARGET) + +runtest: test + @bin/$(TEST_TARGET) + +debugtest: test + @gdb bin/$(TEST_TARGET) + +clean: + $(RM) $(PROGRAM_TARGET) $(OBJS) $(TEST_OBJS) $(TEST_TARGET) + +run: all + @bin/$(PROGRAM_TARGET) + +debug: all + @gdb bin/$(PROGRAM_TARGET) + +mem: all + @valgrind $(VALGRIND_ARGS) --log-file=$(VALGRIND_LOG) bin/$(PROGRAM_TARGET) + +mem_nolog: + @valgrind $(VALGRIND_ARGS) bin/$(PROGRAM_TARGET) + +# Link the executable +bin/$(PROGRAM_TARGET): $(OBJS) + @echo " [link] $@" + @$(CC) $(OBJS) -o $@ $(CFLAGS) $(LDFLAGS) + +# Include dependency files +-include $(DEPS) +-include $(PCH_DEPS) + +# Compile C sources with dependencies +bin/$(MODE)/%.o: %.c $(PCH) + @echo " [cc] $<" + @$(CC) -MMD -MP -c $< -o $@ $(CFLAGS) + +# Compile precompiled header +$(PCH): $(PCH_SRC) + @echo " [pch] $<" + @$(CC) -MMD -MP -x c-header -c $< -o $@ $(CFLAGS) + +# Link test executable +bin/$(TEST_TARGET): $(TEST_OBJS) + @echo " [link] $@" + @$(CC) $(TEST_OBJS) -o $@ $(CFLAGS) $(LDFLAGS) + +dirs: $(DIRS) + +# Create prerequisite directories +$(DIRS): + @$(MKDIR) $(DIRS) + +.PHONY: all dirs clean run debug mem mem_nolog test diff --git a/README b/README @@ -1,7 +1,7 @@ sr71 ---- -A terminal small-Internet client. This repository contains the re-write of -this software. The original version can be found at +A terminal small-Internet client. This repository contains a work-in-progress +re-write of this software. The original version can be found at https://github.com/mikejzx/sr71.git diff --git a/asserts.h b/asserts.h @@ -0,0 +1,6 @@ +#ifndef ASSERTS_H_ +#define ASSERTS_H_ + +#define ASSERT(condition) assert((condition)) + +#endif diff --git a/config.h b/config.h @@ -0,0 +1,10 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* + * config.h + * + * Compile-time program configuration options. + */ + +#endif /* CONFIG_H_ */ diff --git a/config.mk b/config.mk @@ -0,0 +1,79 @@ +# +# Makefile configuration +# + +PROGRAM_VERSION = 0.0.1 + +PROGRAM_NAME = sr71 + +# Build mode/configuration +MODE ?= debug + +# Set to 0 to disable logging. +LOG_ENABLED ?= 1 + +# Base compiler flags +CFLAGS += -DPROGRAM_NAME=\"$(PROGRAM_NAME)\" \ + -DPROGRAM_VERSION_STR=\"$(PROGRAM_VERSION)\" \ + -DLOG_ENABLED=$(LOG_ENABLED) + -std=c11 \ + -Wall \ + -Werror-implicit-function-declaration \ + -Wshadow \ + -Winvalid-pch \ + -Wstrict-aliasing \ + -Wpacked \ + -Wfloat-equal \ + -Wstring-compare \ + -Wconversion \ + -Wstrict-prototypes \ + -Wmissing-prototypes \ + -Wmissing-field-initializers \ + -Wredundant-decls \ + -Wnested-externs \ + -Wvla \ + -Wno-sign-conversion \ + -Wunsuffixed-float-constants \ + -Wduplicated-cond \ + -Wduplicated-branches \ + -Wpacked-not-aligned \ + -Wrestrict \ + -Wjump-misses-init + +# Compiler flags for build modes +ifeq ($(MODE), debug) + CFLAGS += -Og -g -DDEBUG -fstrict-aliasing + PROGRAM_TARGET = $(PROGRAM_NAME)-debug +else ifeq ($(MODE), release) + CFLAGS += -O2 -DNDEBUG + PROGRAM_TARGET = $(PROGRAM_NAME) +endif + +# Linux-specific (we support no other OS at the moment. Perhaps FreeBSD, etc. +# in future) +ifeq ($(shell uname), Linux) + + CC = gcc + AR = ar + RM = rm -f + RMDIRS = rm -rf + MKDIR = mkdir -p + + CFLAGS += -DPLATFORM_LINUX -D_GNU_SOURCE + LDFLAGS += -lm -lpthread + +else + + #$(error Unsupported OS) + +endif + +# Arguments for Valgrind +VALGRIND_ARGS = --leak-check=full \ + --show-leak-kinds=all \ + --track-origins=yes \ + --fair-sched=no + +# Valgrind log file +VALGRIND_LOG = valgrind-out.txt + diff --git a/main.c b/main.c @@ -1,10 +1,47 @@ -#include <stdio.h> -#include <stdlib.h> +#include "pch.h" +#include "uri.h" + +//static struct browser _browser; + +static void +atexit_handler(void) +{ + //log_deinit(); +} int main(int argc, char **argv) { - printf("Hello, world!\n"); + atexit(atexit_handler); + + //_browser = (struct browser) + //{ + // .buffers_head = NULL, + // .buffer_count = 0, + //}; + + //log_init(); + + //tui_init(); + + ++argv; + for (; *argv; ++argv) + { + /* Check for command-line arguments */ + if (strz_equal(*argv, "--version")) + { + printf(PROGRAM_NAME " " PROGRAM_VERSION_STR "\n"); + exit(EXIT_SUCCESS); + } + + /* Attempt to parse command as a URI */ + //struct uri uri = uri_from_str(*argv, URI_FALLBACK_PROTOCOL_BIT); + //if (uri.is_valid) + //{ + + //} + } + exit(EXIT_SUCCESS); return 0; } diff --git a/pch.h b/pch.h @@ -0,0 +1,17 @@ +#ifndef PCH_H_ +#define PCH_H_ + +#include <assert.h> +#include <ctype.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "asserts.h" +#include "config.h" +//#include "log.h" + +#endif diff --git a/str.c b/str.c @@ -0,0 +1,228 @@ +#include "pch.h" +#include "str.h" + +/* + * Copyright (c) 1998, 2015 Todd C. Miller <millert@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * strlcpy implementation + * + * Copies string src to buffer dest of size n. At most n-1 chars will be + * copied. Always NUL terminates (unless n == 0). + * + * Returns strlen(src). If retval >= n, then truncation occurred. + */ +size_t +str_copy(char *restrict dest, const char *restrict src, size_t n) +{ + const char *osrc; + size_t nleft; + + osrc = src; + nleft = n; + + /* Copy as many bytes as will fit. */ + if (nleft != 0) + { + while (--nleft != 0) + { + if ((*dest++ = *src++) == '\0') + break; + } + } + + /* Not enough room in dest, add NUL and traverse rest of src. */ + if (nleft == 0) + { + if (n != 0) + *dest = '\0'; /* NUL-terminate dest */ + + while (*src++) + ; + } + + return (src - osrc - 1); /* Returned count does not include NUL */ +} + +/* strlcpy adapted to be similar to stpncpy + * + * Returns pointer to the null-termination character. */ +char * +str_pcopy(char *restrict dest, const char *restrict src, size_t n) +{ + const char *osrc; + size_t nleft; + + osrc = src; + nleft = n; + + /* Copy as many bytes as will fit. */ + if (nleft != 0) + { + while (--nleft != 0) + { + if ((*dest++ = *src++) == '\0') + break; + } + } + + /* Not enough room in dest, add NUL and traverse rest of src. */ + if (nleft == 0) + { + if (n != 0) + *dest = '\0'; /* NUL-terminate dest */ + + while (*src++) + ; + } + + return dest; +} + +/* Calculates length (in bytes) of initial segment of s up to n bytes which + * consists entirely of bytes in accept */ +size_t +str_spn(const char *restrict s, const char *restrict accept, size_t n) +{ + ASSERT(accept[0] != '\0' && "stupid use of str_spn"); + + const char *a; + size_t count; + bool found; + + for (count = 0; count < n && *s != '\0'; ++count) + { + found = false; + + for (a = accept; *a != '\0'; ++a) + { + if (s[count] == *a) + { + found = true; /* found an accepted char */ + break; + } + } + + if (!found) + break; + } + + return count; +} + +/* Calculates length (in bytes) of initial segment of s up to n bytes which + * consists entirely of bytes not in reject */ +size_t +str_cspn(const char *restrict s, const char *restrict reject, size_t n) +{ + ASSERT(reject[0] != '\0' && "stupid use of str_cspn"); + ASSERT(reject[1] != '\0' && "consider str_ichr instead"); + + const char *r; + size_t count; + bool found; + + for (count = 0; count < n && *s != '\0'; ++count) + { + found = false; + + for (r = reject; *r != '\0'; ++r) + { + if (s[count] != *r) + { + found = true; /* found a rejected char */ + break; + } + } + + if (!found) + break; + } + + return count; +} + +int +strz_ichr(const char *s, char c) +{ + int count; + + for (count = 0; s[count] != c; ++count) + ; + + return count; +} + +char * +strz_chr(char *s, char c) +{ + for (; *s != c; ++s) + ; + + return s; +} + +int +str_ichr(const char *s, char c, size_t n) +{ + int count; + + for (count = 0; count < n && s[count] != c; ++count) + ; + + return count; +} + +char * +str_chr(char *s, char c, size_t n) +{ + int count; + + for (count = 0; count < n && s[count] != c; ++count) + ; + + return s + count; +} + +long +str_tol(const char *buf, size_t n) /* spaces, signs, and digits */ +{ + /* https://stackoverflow.com/a/17002450 */ + + long x = 0, sign = +1; + + /* Move over any preceding spaces */ + for (; n > 0 && isspace(*buf); --n, ++buf); + + /* Parse sign */ + if (n > 0) + { + switch(*buf) + { + case '-': + sign = -1; + /* fall through */ + case '+': + --n; + ++buf; + } + } + + /* Parse digits */ + for (; n && isdigit(*buf); --n, ++buf) + x = x * 10 + *buf - '0'; + + return x * sign; +} diff --git a/str.h b/str.h @@ -0,0 +1,71 @@ +#ifndef STR_H_ +#define STR_H_ + +/* + * str.h + * + * String utility functions. + */ + +/* length of given string of size n */ +static inline size_t +str_len(const char *s, size_t n) +{ + return strnlen(s, n); +} + +/* length of given null-terminated string */ +static inline size_t +strz_len(const char *s) +{ + return strlen(s); +} + +/* length of given constant string literal */ +#define strz_len_lit(x) (sizeof((x)) - 1) + +/* string catenation */ +static inline void +strz_cat(char *restrict dst, const char *restrict src) +{ + strcat(dst, src); +} + +/* string equality test, testing at most n characters */ +static inline bool +str_equal(const char *restrict s1, const char *restrict s2, size_t n) +{ + return strncmp(s1, s2, n) == 0; +} +#define str_equal_fixed(s1, s2) str_equal((s1), (s2), sizeof((s1))) + +/* null-terminated string equality test (avoid using this) */ +static inline bool +strz_equal(const char *restrict s1, const char *restrict s2) +{ + return strcmp(s1, s2); +} + +/* string copy (implemented as strlcpy) */ +size_t str_copy (char *restrict dst, const char *restrict src, size_t n); +char *str_pcopy(char *restrict dst, const char *restrict src, size_t n); +#define str_copy_fixed(dst, src) str_copy((dst), (src), sizeof((dst))) + +/* replacements for strspn and strcspn (because they are shit and don't allow + * us to specify string length). */ +size_t str_spn (const char *restrict s, + const char *restrict accept, size_t n); +size_t str_cspn (const char *restrict s, + const char *restrict reject, size_t n); + +/* locate char in string. + * (May rename to something else?) */ +int strz_ichr(const char *s, char c); +char *strz_chr (char *s, char c); +int str_ichr (const char *s, char c, size_t n); +char *str_chr (char *s, char c, size_t n); + +/* Parse integer from string of at most n bytes (safer atoi/strtol) */ +long str_tol (const char *buf, size_t n); + +#endif diff --git a/test.c b/test.c @@ -0,0 +1,342 @@ +#include "pch.h" +#include "uri.h" + +#define VERBOSE_TEST_RESULTS 0 + +static bool +test_str_spn(const char *s, const char *a, size_t n, size_t expected) +{ + size_t actual = str_spn(s, a, n); + bool equal = (actual == expected); + + fprintf(stdout, "test %s:\n" + "\t'%.*s'\n" + "\t(n=%d, a='%s') str_spn=%d, expected=%d\n", + equal ? "passed" : "FAILED", + (int)n, s, (int)n, a, + (int)actual, (int)expected); + + if (equal) + fprintf(stdout, "\t'%.*s'\n", (int)actual, s); + + return equal; +} + +static bool +test_str_cspn(const char *s, const char *r, size_t n, size_t expected) +{ + size_t actual = str_cspn(s, r, n); + bool equal = (actual == expected); + + fprintf(stdout, "test %s:\n" + "\t'%.*s'\n" + "\t(n=%d, r='%s') str_cspn=%d, expected=%d\n", + equal ? "passed" : "FAILED", + (int)n, s, (int)n, r, + (int)actual, (int)expected); + + if (equal) + fprintf(stdout, "\t'%.*s'\n", (int)actual, s); + + return equal; +} + +static bool +test_str_ichr(const char *s, const char c, size_t n, size_t expected) +{ + size_t actual = str_ichr(s, c, n); + bool equal = (actual == expected); + + fprintf(stdout, "test %s:\n" + "\t'%.*s'\n" + "\t(n=%d, c='%c') str_ichr=%d, expected=%d\n", + equal ? "passed" : "FAILED", + (int)n, s, (int)n, c, + (int)actual, (int)expected); + + if (equal) + fprintf(stdout, "\t'%.*s'\n", (int)actual, s); + + return equal; +} + +static bool +test_str_tol(const char *buf, size_t n, long expected) +{ + long actual = str_tol(buf, n); + bool equal = (actual == expected); + + fprintf(stdout, "test %s:\n" + "\t'%.*s' (n=%ld) str_tol=%ld, expected=%ld\n", + equal ? "passed" : "FAILED", + (int)n, buf, n, actual, expected); + + return equal; +} + +static bool +test_str(void) +{ + bool result; + + const char TEST_STRING[] = "This is a test string. " + "This is another sentence."; + const char TEST_URI[] = "gopher://test.com:70/"; + + fprintf(stdout, "testing string functions...\n"); + + if (!test_str_spn(TEST_STRING, "This ", sizeof(TEST_STRING), 8)) + return false; + if (!test_str_spn(TEST_STRING, "This ", 7, 7)) + return false; + if (!test_str_spn(TEST_STRING + 10, "test ", 11, 7)) + return false; + + + if (!test_str_ichr(TEST_URI, ':', sizeof(TEST_URI), 6)) + return false; + + if (!test_str_tol(" 23 ", 5, 23)) + return false; + if (!test_str_tol(" +23 ", 5, +23)) + return false; + if (!test_str_tol(" -23 ", 5, -23)) + return false; + if (!test_str_tol("-18", 4, -18)) + return false; + if (!test_str_tol("+19", 4, +19)) + return false; + if (!test_str_tol("19", 3, +19)) + return false; + if (!test_str_tol("1", 2, +1)) + return false; + if (!test_str_tol("100", 4, +100)) + return false; + if (!test_str_tol("100", 1, +1)) + return false; + if (!test_str_tol("100", 2, +10)) + return false; + if (!test_str_tol("100", 3, +100)) + return false; + if (!test_str_tol("-200", 1, 0)) + return false; + if (!test_str_tol("-200", 2, -2)) + return false; + if (!test_str_tol("-200", 3, -20)) + return false; + if (!test_str_tol("-200", 4, -200)) + return false; + if (!test_str_tol("", 1, 0)) + return false; + if (!test_str_tol(" ", 3, 0)) + return false; + + return true; +} + +static bool +test_uri(const char *string, struct uri expected) +{ + char uristring[512]; + bool equal; + struct uri parsed; + + parsed = uri_parse(string, strz_len(string)); + equal = uri_equal(parsed, expected, 0); + + uri_str(&expected, uristring, sizeof(uristring), 0); + + fprintf(stdout, "test %s:\n" + "\t'%s' vs '%s'\n", + equal ? "passed" : "FAILED", + string, uristring); + +#if !VERBOSE_TEST_RESULTS + if (!equal) +#endif /* !VERBOSE_TEST_RESULTS */ + { + fprintf(stdout, "\tport %d\t\texpected %d\n", + parsed.port, expected.port); + fprintf(stdout, "\tprotocol %d\t\texpected %d\n", + parsed.protocol, expected.protocol); + fprintf(stdout, "\tprotocol_str '%s'\t\texpected '%s'\n", + parsed.protocol_str, expected.protocol_str); + fprintf(stdout, "\thost '%s'\t\texpected '%s'\n", + parsed.host, expected.host); + fprintf(stdout, "\tpath '%s'\t\texpected '%s'\n", + parsed.path, expected.path); + fprintf(stdout, "\tquery '%s'\t\texpected '%s'\n", + parsed.query, expected.query); + } + + return equal; +} + +static bool +test_uris(void) +{ + fprintf(stdout, "testing URI functions...\n"); + + fprintf(stdout, " - uri_str...\n"); + + char uristring_actual[512]; + char uristring_expect[512]; + struct uri testuri; + +#if 0 + str_copy_fixed(uristring_expect, "gopher://"); + memset(&testuri, 0, sizeof(testuri)); + testuri.port = 0; + testuri.protocol = PROTOCOL_GOPHER; + str_copy_fixed(testuri.protocol_str, "gopher"); + uri_str(&testuri, uristring_actual, sizeof(uristring_actual), 0); + if (!str_equal_fixed(uristring_expect, uristring_actual)) + { + fprintf(stdout, "test failed:\n" + "\t'%s' != expected '%s'\n", + uristring_actual, uristring_expect); + return false; + } + fprintf(stdout, "test passed:\n" + "\t'%s' == expected '%s'\n", + uristring_actual, uristring_expect); +#endif + + /* TODO: oversized hostnames, oversized paths, test truncation, etc. */ + + fprintf(stdout, " - uri_parse...\n"); + + struct uri expected; + + memset(&expected, 0, sizeof(expected)); + if (!test_uri("", expected)) + return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 0; + expected.protocol = PROTOCOL_FILE; + str_copy_fixed(expected.protocol_str, "file"); + str_copy_fixed(expected.host, ""); + str_copy_fixed(expected.path, "README/"); + expected.query[0] = '\0'; + if (!test_uri("file://README/", expected)) + return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 0; + expected.protocol = PROTOCOL_FILE; + str_copy_fixed(expected.protocol_str, "file"); + str_copy_fixed(expected.host, ""); + str_copy_fixed(expected.path, "/home/mike/src/c/sr71/README"); + expected.query[0] = '\0'; + if (!test_uri("file:///home/mike/src/c/sr71/README", expected)) + return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 0; + expected.protocol = PROTOCOL_GOPHER; + str_copy_fixed(expected.protocol_str, "gopher"); + str_copy_fixed(expected.host, "example.com"); + str_copy_fixed(expected.path, ""); + expected.query[0] = '\0'; + if (!test_uri("gopher://example.com", expected)) return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 0; + expected.protocol = PROTOCOL_GOPHER; + str_copy_fixed(expected.protocol_str, "gopher"); + str_copy_fixed(expected.host, "example.com"); + str_copy_fixed(expected.path, "/test.txt"); + expected.query[0] = '\0'; + if (!test_uri("gopher://example.com/test.txt", expected)) return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 70; + expected.protocol = PROTOCOL_GOPHER; + str_copy_fixed(expected.protocol_str, "gopher"); + str_copy_fixed(expected.host, "example.com"); + str_copy_fixed(expected.path, "/test.txt"); + expected.query[0] = '\0'; + if (!test_uri("gopher://example.com:70/test.txt", expected)) return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 70; + expected.protocol = PROTOCOL_GOPHER; + str_copy_fixed(expected.protocol_str, "gopher"); + str_copy_fixed(expected.host, "example.com"); + str_copy_fixed(expected.path, "/1/test.txt"); + expected.query[0] = '\0'; + if (!test_uri("gopher://example.com:70/1/test.txt", expected)) return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 0; + expected.protocol = PROTOCOL_GOPHER; + str_copy_fixed(expected.protocol_str, "gopher"); + str_copy_fixed(expected.host, "example.com"); + str_copy_fixed(expected.path, "/1/test.txt"); + expected.query[0] = '\0'; + if (!test_uri("gopher://example.com/1/test.txt", expected)) return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 0; + expected.protocol = PROTOCOL_NONE; + str_copy_fixed(expected.protocol_str, ""); + str_copy_fixed(expected.host, ""); + str_copy_fixed(expected.path, "test.txt"); + expected.query[0] = '\0'; + if (!test_uri("test.txt", expected)) return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 0; + expected.protocol = PROTOCOL_NONE; + str_copy_fixed(expected.protocol_str, ""); + str_copy_fixed(expected.host, ""); + str_copy_fixed(expected.path, "/test.txt"); + expected.query[0] = '\0'; + if (!test_uri("/test.txt", expected)) return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 70; + expected.protocol = PROTOCOL_GOPHER; + str_copy_fixed(expected.protocol_str, "gopher"); + str_copy_fixed(expected.host, "example.com"); + str_copy_fixed(expected.path, "/"); + expected.query[0] = '\0'; + if (!test_uri("gopher://example.com:70/", expected)) return false; + + memset(&expected, 0, sizeof(expected)); + expected.port = 70; + expected.protocol = PROTOCOL_GOPHER; + str_copy_fixed(expected.protocol_str, "gopher"); + str_copy_fixed(expected.host, "example.com"); + str_copy_fixed(expected.path, "/"); + expected.query[0] = '\0'; /* TODO: query */ + if (!test_uri("gopher://example.com:70/?q=test", expected)) return false; + + return true; +} + +/* testing entry point */ +int +main(void) +{ + int i; + + /* Make sure all protocols are named */ + for (i = 0; i < SUPPORTED_PROTOCOL_COUNT; ++i) + { + if (!SUPPORTED_PROTOCOL_NAMES[i]) + { + fprintf(stdout, "test failed: protocol %d --> (NULL)\n", i); + return -1; + } + + fprintf(stdout, "test passed: protocol %d --> %s\n", + i, SUPPORTED_PROTOCOL_NAMES[i]); + } + + if (!test_str()) return -1; + if (!test_uris()) return -1; + + return 0; +} diff --git a/uri.c b/uri.c @@ -0,0 +1,313 @@ +#include "pch.h" +#include "str.h" +#include "uri.h" +#include "util.h" + +struct uri +uri_parse(const char *uristr, int uristr_len) +{ + struct uri uri = { 0 }; + + int colon = 0, + hostname_start = 0, + hostname_len = 0, + protocol_name_len = 0; + + /* + * Look for a colon. We expect a maximum of two colons in a URI: + * + * 1 colon -- gopher://example.com/ + * 1 colon -- example.com:70/ + * 2 colons -- gopher://example.com:70/ + * + * Note that we consider the second case to be an invalid URI, as no + * protocol is specified, we cannot make any assumption of what protocol to + * use. + */ + colon = str_ichr(uristr, ':', uristr_len); + + if (colon < uristr_len) + { + char nextchar; + + if (colon + 1 < uristr_len) + nextchar = uristr[colon + 1]; + else + nextchar = 0; + + /* Check if this is a port on a URI without a specified scheme */ + if (isdigit(nextchar)) + { + /* TODO how should we handle this? */ + ASSERT(0 && "port parse without scheme is unimplemented"); + } + else /* this is a scheme */ + { + int protocol_name_size; + const char *protocol_name; + + protocol_name = uristr; + protocol_name_len = colon; + protocol_name_size = colon + 1; /* +1 for null-terminator */ + + /* Set the URI protocol */ + ASSERT(protocol_name_size < sizeof(uri.protocol_str) && + "big protocol name"); + + /* Truncate protocol name */ + protocol_name_size = min(sizeof(uri.protocol_str), + protocol_name_size); + /* TODO: log truncation */ + + str_copy(uri.protocol_str, protocol_name, protocol_name_size); + uri.protocol = uri_protocol_lookup(protocol_name, + protocol_name_len); + + /* + * We handle two types of schemes. Those with two slashes '//' and + * those without. For example: + * + * No slashes -- mailto:info@example.com + * No slashes -- internal:about + * Two slashes -- gopher://example.com/ + */ + if (nextchar != '/') /* no slashes */ + uri.flags |= URI_NO_PROTOCOL_SLASHES_BIT; + } + + /* Look for a second colon (for optional port) */ + if (uri.port == 0) + { + int colon2, port_start, port_len; + + ++colon; + colon2 = colon + str_ichr(uristr + colon, ':', uristr_len - colon); + + port_start = colon2 + 1; + + for (port_len = 0; + (port_start + port_len < uristr_len) && + isdigit(uristr[port_start + port_len]); + ++port_len) + ; + + if (port_len > 0) + uri.port = str_tol(uristr + port_start, port_len); + } + } + + + /* Determine hostname, which can appear only in non-relative URIs. + * We assume that the hostname can only appear when a protocol is + * specified, e.g. the following URI is considered invalid: + * + * example.com/index.html + * + * But the following is valid and has a hostname of example.com: + * + * gopher://example.com/index.html + * + * Note that the LOCAL protocols (file://) are a special case. There is + * never a hostname for these protocols and instead we always assume that + * anything following the scheme is the path. + */ + if (uri.protocol != PROTOCOL_FILE && + (protocol_name_len > 0 && + protocol_name_len + 1 < uristr_len)) /* check +1 for colon */ + { + char c; + int hostname_size; + + ASSERT(uristr[protocol_name_len] == ':'); + + /* Hostname starts after the scheme. We start just past the colon and + * skip over any slashes */ + for (hostname_start = protocol_name_len + 1; + hostname_start < uristr_len && + (uristr[hostname_start] == '/'); + ++hostname_start) + ; + + /* Hostname can end at either a colon, a slash (the beginning of the + * path), or the end of the string */ + for (hostname_len = 0; + hostname_start + hostname_len < uristr_len; + ++hostname_len) + { + c = uristr[hostname_len + hostname_start]; + + if (c == '\0' || c == ':' || c == '/') + break; + } + + hostname_size = hostname_len + 1; + ASSERT(hostname_size < sizeof(uri.host) && "big hostname"); + hostname_size = min(sizeof(uri.host), hostname_size); /* truncate */ + /* TODO: log truncation */ + + str_copy(uri.host, uristr + hostname_start, hostname_size); + } + + /* Determine path. For URIs with a scheme and hostname, this path should + * always begin with a leading slash. For relative URIs, the path actually + * starts from the beginning of the string. For local schemes (file://) + * this is anything the follows the scheme. + * + * e.g.: + * URI PATH + * 0. gopher://example.com --> (empty) + * 1. gopher://example.com:70/ --> / + * 2. gopher://example.com/index --> /index + * 3. /index --> /index + * 4. index --> index + * 5. ../index --> ../index + * 6. file://index --> index + * 7. file:///index --> /index + * + * Note that in this parsing function for examples 4, 5, and 6 the paths + * are stored in the relative form as shown. These paths should be + * normalised with the current URI at some point to compute the real path, + * perhaps when the page is displayed to the user. + * + * URI ACTUAL PATH + * index --> <current path>/index + * ../index --> <current path>/../index + * file://index --> <current path>/index + */ + if (uri.protocol == PROTOCOL_FILE) /* local URI */ + { + int n_slashes, path_start, path_len, path_size; + + ASSERT(protocol_name_len > 0); + ASSERT(uristr[protocol_name_len] == ':'); + + /* Everything after scheme is the path. We move over the colon and + * *maximum of two* slashes (to allow absolute paths) */ + n_slashes = 0; + for (path_start = protocol_name_len + 1; + path_start < uristr_len && n_slashes < 2; + ++path_start) + { + if (uristr[path_start] == '/') + ++n_slashes; + } + + /* Find length of path. Generally ends at the end of the string. */ + for (path_len = 0; + (path_start + path_len < uristr_len) && + (uristr[path_start + path_len] != '\0'); + ++path_len) + ; + + path_size = path_len + 1; + ASSERT(path_size < sizeof(uri.path) && "big path"); + path_size = min(sizeof(uri.path), path_size); /* truncate */ + /* TODO: log truncation */ + + /* Copy path */ + str_copy(uri.path, uristr + path_start, path_size); + } + else if (hostname_len > 0) /* URI with a hostname */ + { + int path_start, path_len, path_size; + + /* Start at the end of hostname and after port. */ + for (path_start = hostname_start + hostname_len; + path_start < uristr_len && + uristr[path_start] != '\0' && + uristr[path_start] != '/'; + ++path_start) + { + /* Skip past the port if there is one */ + if (uristr[path_start] == ':') + { + for (; + path_start < uristr_len && isdigit(uristr[path_start]); + ++path_start) + ; + } + } + + /* Length is to end of the string or to the query. */ + for (path_len = 0; + path_start + path_len < uristr_len && + uristr[path_start + path_len] != '\0' && + uristr[path_start + path_len] != '?'; + ++path_len) + ; + + path_size = path_len + 1; + ASSERT(path_size < sizeof(uri.path) && "big path"); + path_size = min(sizeof(uri.path), path_size); /* truncate */ + /* TODO: log truncation */ + + str_copy(uri.path, uristr + path_start, path_size); + } + else /* relative URI--we use beginning section as path. */ + { + int path_start = 0, path_len, path_size; + + ASSERT(protocol_name_len == 0); + + /* Find length of path. Goes until the end of the string or at + * query. */ + for (path_len = 0; + (path_start + path_len < uristr_len) && + (uristr[path_start + path_len] != '\0') && + (uristr[path_start + path_len] != '?'); + ++path_len) + ; + + path_size = path_len + 1; + ASSERT(path_size < sizeof(uri.path) && "big path"); + path_size = min(sizeof(uri.path), path_size); /* truncate */ + /* TODO: log truncation */ + + /* Copy path */ + str_copy(uri.path, uristr + path_start, path_size); + } + + + return uri; +} + +size_t +uri_str(const struct uri *const u, + char *buffer, + size_t buffer_size, + uint32_t flags) +{ + ASSERT(u); + ASSERT(buffer); + ASSERT(buffer_size > 0); + + memset(buffer, 0, buffer_size); + + return str_copy(buffer, "(uri_str is unimplemented)", buffer_size); +} + +bool +uri_equal(struct uri u1, struct uri u2, uint32_t flags) +{ + size_t l1 = strz_len(u1.path); + size_t l2 = strz_len(u2.path); + + /* Strip trailing path slashes */ + if (flags & URIEQ_IGNORE_TRAILING_SLASH_BIT) + { + if (u1.path[l1 - 1] == '/') --l1; + if (u2.path[l2 - 1] == '/') --l2; + } + + /* Check that query matches */ + if ((flags & URIEQ_QUERY_BIT) && + !str_equal(u1.query, u2.query, sizeof(u1.query))) + { + return false; + } + + return l1 == l2 && + str_equal(u1.host, u2.host, sizeof(u1.host)) && + str_equal(u1.path, u2.path, l1) && + u1.protocol == u2.protocol; +} diff --git a/uri.h b/uri.h @@ -0,0 +1,106 @@ +#ifndef URI_H_ +#define URI_H_ + +#include "str.h" + +enum uri_protocol +{ + /* The protocol is not one thatwe understand. */ + PROTOCOL_UNSUPPORTED = -1, + + PROTOCOL_NONE = 0, + + /* + * Protocols that have slashes: + * gopher://example.com + */ + PROTOCOL_GOPHER, + //PROTOCOL_GEMINI, + PROTOCOL_FILE, + + /* + * Protocols that do not have slashes: + * internal:history + * mailto:info@example.com + */ +#define PROTOCOL_NO_SLASHES_FIRST SUPPORTED_PROTOCOL_COUNT //PROTOCOL_INTERNAL + //PROTOCOL_INTERNAL, + //PROTOCOL_MAILTO, + //PROTOCOL_MAGNET, + + SUPPORTED_PROTOCOL_COUNT +}; + +static const char *const SUPPORTED_PROTOCOL_NAMES[SUPPORTED_PROTOCOL_COUNT] = +{ + [PROTOCOL_NONE] = "", + [PROTOCOL_GOPHER] = "gopher", + [PROTOCOL_FILE] = "file", +}; + +/* Lookup supported protocol by name */ +static inline enum uri_protocol +uri_protocol_lookup(const char *s, size_t n) +{ + int i; + + for (i = 0; i < SUPPORTED_PROTOCOL_COUNT; ++i) + { + ASSERT(SUPPORTED_PROTOCOL_NAMES[i] && "unnamed protocol"); + + if (str_equal(s, SUPPORTED_PROTOCOL_NAMES[i], n)) + return (enum uri_protocol)i; + } + + return PROTOCOL_UNSUPPORTED; +} + +static inline bool +uri_protocol_has_slashes(enum uri_protocol p) +{ + ASSERT(p != PROTOCOL_UNSUPPORTED && "unsupported protocol"); + ASSERT(p > PROTOCOL_NONE && + p < SUPPORTED_PROTOCOL_COUNT && "invalid protocol"); + + return p < PROTOCOL_NO_SLASHES_FIRST; +} + +#define URI_NO_PROTOCOL_SLASHES_BIT 0x01 /* URI protocol has no slashes. + * e.g. mailto: and internal: URIs. */ + +struct uri +{ + uint32_t flags; + int port; + enum uri_protocol protocol; + char protocol_str[16]; /* protocol (no colon or slashes) */ + char host[256]; /* includes at-sign (@) for mailto */ + char path[512]; + char query[256]; +}; + +/* Parse URI from string */ +struct uri uri_parse(const char *uristr, int uristr_len); + +/* uristr flags */ +#define URISTR_NO_PORT_BIT 0x01 /* exclude port from the string */ +#define URISTR_FANCY_BIT 0x02 /* fancy URI styling/colouring */ +#define URISTR_NOSCHEME_BIT 0x04 /* omit protocol/scheme */ +#define URISTR_NOSLASH_BIT 0x08 /* omit trailing slash from path */ +#define URISTR_NOGITEM_BIT 0x10 /* omit gopher item type */ +#define URISTR_NOQUERY_BIT 0x20 /* omit query */ + +/* Convert URI to string */ +size_t uri_str(const struct uri *const u, + char *b, + size_t b_size, + uint32_t flags); + +/* urieq flags */ +#define URIEQ_IGNORE_TRAILING_SLASH_BIT 0x01 +#define URIEQ_QUERY_BIT 0x02 + +/* @return true if URIs are equal */ +bool uri_equal(struct uri u1, struct uri u2, uint32_t flags); + +#endif diff --git a/util.h b/util.h @@ -0,0 +1,10 @@ +#ifndef UTIL_H_ +#define UTIL_H_ + +#define min(x, y) ((x) < (y) ? (x) : (y)) + +#define max(x, y) ((x) > (y) ? (x) : (y)) + +#define sign(x) ((x) > 0 ? 1 : -1) + +#endif // UTIL_H_