diff options
author | George Abbott <george@gabbott.dev> | 2025-01-26 11:38:37 +0000 |
---|---|---|
committer | George Abbott <george@gabbott.dev> | 2025-01-26 11:38:37 +0000 |
commit | f6372e58ad54560919a061d33689c81281aa902c (patch) | |
tree | 576113f09e6cb2ceed53af1a3dff298cdce2424d | |
parent | 52c4d3793e09bb95f154880fd1372333f94f66cd (diff) |
misc
-rw-r--r-- | scripts/Makefile | 7 | ||||
-rw-r--r-- | scripts/jezup.c | 413 | ||||
-rw-r--r-- | scripts/neostr.h | 178 | ||||
l--------- | scripts/target/kmd | 1 |
4 files changed, 599 insertions, 0 deletions
diff --git a/scripts/Makefile b/scripts/Makefile new file mode 100644 index 0000000..10989c3 --- /dev/null +++ b/scripts/Makefile @@ -0,0 +1,7 @@ +jezup: + cd kmd + zig build + cd .. + +bar: + clang++ -Wall -Wpedantic -O3 -std=c99 bar.c -o target/bar diff --git a/scripts/jezup.c b/scripts/jezup.c new file mode 100644 index 0000000..b09e8d5 --- /dev/null +++ b/scripts/jezup.c @@ -0,0 +1,413 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> +#include "neostr.h" + +#define owned /* needs to be freed */ +#define borrowed /* does not need to be freed */ + +/* Global Variables */ +bool debug = 0; + +/* Structures */ +enum result +{ + result_success, + result_error, +}; + +typedef struct char_ptr +{ + borrowed char *ptr; + size_t size; +} char_ptr; + +struct Jezup +{ + borrowed const char *registered_input_file; + borrowed const char *registered_template_file; + owned char *input_str; + owned char *template_str; +}; + +/* Functions */ +owned char *char_ptr_copy_alloc(char_ptr cp); +bool char_ptr_is_null(char_ptr cp); +char_ptr char_ptr_new(borrowed char *ptr, size_t size); +char_ptr char_ptr_new_null(void); +owned char *freadall(const char *path); +void jezup_free(struct Jezup *jz); +struct Jezup jezup_new(void); +enum result jezup_register_input_file(struct Jezup *jz, const char *file); +enum result jezup_register_template_file(struct Jezup *jz, const char *file); +char_ptr jezup_get_date(const struct Jezup *jz, bool *has_date); +char_ptr jezup_get_title(const struct Jezup *jz); +char_ptr jezup_get_contents(const struct Jezup *jz); +owned char *jezup_substitute(struct Jezup *jz); +owned char *jezup_substitute_into_html(const struct Jezup *jz); +void dbg(const char *fmt, ...); +void usage(void); + +/* Function Implementations */ + +/* Given a char_ptr, copy the pointed to segment as a new allocation and + * return the pointer to the newly allocated memory. Caller to free(). */ +owned char * +char_ptr_copy_alloc(char_ptr cp) +{ + char *buf = (char*)malloc(cp.size); + if (!buf) + return NULL; + + strncpy(buf, cp.ptr, cp.size); + buf[cp.size] = '\0'; + return buf; +} + +bool +char_ptr_is_null(char_ptr cp) +{ + return (!cp.ptr && cp.size == 0); +} + +char_ptr +char_ptr_new(borrowed char *ptr, size_t size) +{ + return (char_ptr) { ptr, size }; +} + +char_ptr +char_ptr_new_null(void) +{ + return (char_ptr) { (char*)NULL, 0 }; +} + + +owned char * /* caller to free */ +freadall(const char *path) +{ + FILE *f; + long fsize; + char *buf; + + f = fopen(path, "rb"); + if (!f) + goto error; + fseek(f, 0, SEEK_END); + fsize = ftell(f); + fseek(f, 0, SEEK_SET); + + buf = (char*)malloc(fsize + 1); + if (!buf) + goto error; + + fread(buf, fsize, 1, f); + fclose(f); + + buf[fsize] = 0; + return buf; + +error: + fclose(f); + return NULL; +} + +struct Jezup +jezup_new(void) +{ + return (struct Jezup) { NULL, NULL, NULL, NULL }; +} + +void +jezup_free(struct Jezup *jz) +{ + free(jz->input_str); + free(jz->template_str); +} + +enum result +jezup_register_input_file(struct Jezup *jz, const char *file) +{ + if (!file) + return result_error; + + jz->registered_input_file = file; + + /* read contents of file */ + jz->input_str = freadall(file); + + if (!jz->input_str) + return result_error; + else + return result_success; +} + +enum result +jezup_register_template_file(struct Jezup *jz, const char *file) +{ + if (!file) + return result_error; + + jz->registered_template_file = file; + + /* read contents of file */ + jz->template_str = freadall(file); + + if (!jz->template_str) + return result_error; + else + return result_success; +} + +char_ptr +jezup_get_date(const struct Jezup *jz, bool *has_date) +{ + char_ptr p; + + /* Iterate just until the first newline ... */ + p = (char_ptr) { jz->input_str, 0 }; + for (; *(p.ptr + p.size) != '\n'; p.size++); + p.size++; /* account for the newline */ + p.ptr = p.ptr + p.size; /* set current position to date pos */ + p.size = 0; /* reset size to be changed lower down */ + + /* If the next character is a newline, there is no date and we return NULL. */ + if (*p.ptr == '\n') + { + *has_date = false; + return char_ptr_new_null(); + } + + /* We have content on the second line, so return this. */ + for (; *(p.ptr + p.size) != NULL; p.size++) + if (*(p.ptr + p.size) == '\n') + break; + + *has_date = true; + return p; + + +} + +char_ptr +jezup_get_title(const struct Jezup *jz) +{ + /* The title guaranteed to be the contents of the first line, and + * must be populated. As such, we scan until the first newline. */ + char_ptr p; + p = (char_ptr) { jz->input_str, 0 }; + + for (; *(p.ptr + p.size) != NULL; p.size++) + if (*(p.ptr + p.size) == '\n') + break; + + return p; +} + +char_ptr +jezup_get_contents(const struct Jezup *jz) +{ + char_ptr p; + bool first; + size_t tmp; + + p = char_ptr_new(jz->input_str, 0); + first = false; + + for (; *(p.ptr + p.size) != NULL; p.size++) + { + if (*(p.ptr + p.size) == '\n' && first) + goto found; + else if (*(p.ptr + p.size) == '\n' && !first) + first = true; + else + first = false; + } + + return char_ptr_new_null(); + +found: + tmp = p.size; /* we need to swap p.size into p.ptr */ + p.ptr += p.size + 1; + p.size = strlen(jz->input_str) - tmp; + return p; +} + +/* Takes a char_ptr to the contents, and allocates a new owned string + * with the contents converted to HTML. + */ +owned char * +jezup_to_html(const struct Jezup *jz, char_ptr cp) +{ + /* This function should substitute each part of a Jezup syntax into the + * equivalent structure in HTML. Additionally, each paragraph should be + * placed within <p></p> tags. + * HTML Jezup + * <p> Any string of text separated by at most one newline, and not + * falling under any other tag. + * <hN> #, ##, ###, etc. at the beginning of the line. + * + */ + + /* Placing each paragraph within <p></p>. */ + // 1) identify paragraph, surround with <p></p> + // A <p> tag is placed at the start of a sequence, and following each + // newline, if followed by: + // 1. (</p><ol>), - (</p><ul>), \n (</p>) + char *tmp = char_ptr_copy_alloc(cp); + char *result0 = neostr_linewise_replace_prefix_with_circumfix(tmp, "# ", "<h1>", "</h1>"); + char *result1 = neostr_linewise_replace_prefix_with_circumfix(result0, "## ", "<h2>", "</h2>"); + char *result2 = neostr_linewise_replace_prefix_with_circumfix(result1, "### ", "<h3>", "</h3>"); + + free(tmp); + free(result0); + free(result1); + + return result2; + +} + +owned char * +jezup_substitute_into_html(const struct Jezup *jz) +{ + char_ptr title, date, contents; + owned const char *contents_as_html, *result0, *result1, *result2; + bool has_date; + + title, date, contents = char_ptr_new_null(); + + title = jezup_get_title(jz); + if (char_ptr_is_null(title)) + return NULL; + dbg("jezup_substitute_into_html: title nonnull"); + + date = jezup_get_date(jz, &has_date); + dbg("jezup_substitute_into_html: date nonnull"); + + contents = jezup_get_contents(jz); + if (char_ptr_is_null(contents)) + return NULL; + dbg("jezup_substitute_into_html: contents nonnull"); + + contents_as_html = jezup_to_html(jz, contents); + if (!contents_as_html) + return NULL; + dbg("jezup_substitute_into_html: contents as html nonnull"); + + + /* Substitute */ + char *alloc_title = NULL, *alloc_date = NULL, *alloc_contents = NULL; + + /* substitute title */ + alloc_title = char_ptr_copy_alloc(title); + result0 = neostr_replace_all(jz->template_str, "$$TITLE$$", alloc_title); + + /* date */ + if (!char_ptr_is_null(date)) + { + alloc_date = char_ptr_copy_alloc(date); + result1 = neostr_replace_all(result0, "$$DATE$$", alloc_date); + } + else /* so result1 is valid */ + { + result1 = result0; + } + + /* content */ + result2 = neostr_replace_all(result1, "$$CONTENT$$", contents_as_html); + + free(result0); + free(alloc_title); + free(alloc_contents); + free(contents_as_html); + + if (!char_ptr_is_null(date)) /* to stop double-free if date not subbed */ + free(result1); + else + free(alloc_date); + + return result2; + // freelist: result0, result1, contents_as_html, alloc_title, _date, _contents +} + +void +dbg(const char *fmt, ...) +{ + if (!debug) + return; + + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + printf("%s\n", fmt); +} + +void +usage(void) +{ + printf("jezup: convert Jezup to HTML\n"); + printf(" Jezup is a Markdown-like language with a specific syntax to \n" + " provide a title and date. The first line must be the title; \n" + " the second line can optionally contain a date, and the \n" + " contents follow after this with a Markdown syntax. \n" + "The syntax is: \n" + " jezup -i INPUT_FILE -o OUTPUT_FILE -t HTML_TEMPLATE \n" + ); + +} + + +int main(int argc, char **argv) +{ + struct Jezup jezup; + int i; + char *arg = NULL, *output = NULL, *input = NULL, *template = NULL; + enum result res = result_error; + + for (i = 0; i < argc; ++i) + { + arg = argv[i]; + if (!strcmp(arg, "-o")) + output = argv[++i]; + else if (!strcmp(arg, "-i")) + input = argv[++i]; + else if (!strcmp(arg, "-t")) + template = argv[++i]; + else if (!strcmp(arg, "-d")) + debug = true; + else if (!strcmp(arg, "-h")) + { + usage(); return 0; + } + else + ; + } + + jezup = jezup_new(); + + res = jezup_register_input_file(&jezup, input); + if (res != result_success) + goto error; + dbg("registered input"); + + jezup_register_template_file(&jezup, template); + if (res != result_success) + goto error; + dbg("registered template"); + + output = jezup_substitute_into_html(&jezup); + dbg("substituted into html"); + + printf("%s\n", output); + + return 0; + error: + jezup_free(&jezup); + fprintf(stderr, "error\n"); + return -1; +} diff --git a/scripts/neostr.h b/scripts/neostr.h new file mode 100644 index 0000000..68991ed --- /dev/null +++ b/scripts/neostr.h @@ -0,0 +1,178 @@ +/* neostr: various functions for manipulating strings. + * char* is the main type that is manipulated, and there is no expectation of + * Unicode, though I could implement support at some point. + */ + +/* Given a string, replace all instances of orig with with, ensuring that only + * one allocation is made. + */ + +#define owned +#define borrowed +#define str_discard_const (char*) +void dbg(const char *fmt, ...); + +owned const char * +neostr_replace_all(const char *haystack, const char *orig, const char *with) +{ + int count, to_malloc; + size_t len_orig, len_with; + char *buf, *index_from, *index_to, *buf_cpos, *haystack_tmp, *ss; + + len_orig = strlen(orig); + len_with = strlen(with); + + count = 0; + haystack_tmp = str_discard_const haystack; + for (;;) + { + ss = strstr(haystack_tmp, orig); + if (!ss) + break; + count++; + haystack_tmp += (ss - haystack_tmp) + len_orig; + } + + /* Allocate the new buffer */ + to_malloc = strlen(haystack) + - (count * len_orig) + + (count * len_with); + + buf = (char*)malloc(to_malloc); + memset(buf, 0, to_malloc); + + /* Copy until the text to substitute is found, then copy the new text into + * buf, incrementing the counter in the haystack by len_orig. + */ + index_from = str_discard_const haystack; + buf_cpos = buf; + while ((index_to = strstr(index_from, orig)) != NULL) + { + memcpy(buf_cpos, index_from, (index_to - index_from)); + buf_cpos += (index_to - index_from); + memcpy(buf_cpos, with, len_with); + buf_cpos += len_with; + index_from = index_to + len_orig; + } + + /* Finally, copy any remaining non-substituted contents. */ + memcpy(buf_cpos, index_from, strlen(haystack) - (index_from - haystack)); + buf[to_malloc] = '\0'; + + return buf; +} + +/* In a single allocation, replace all instances of the prefix at the start of + * the line with a circumfix, at the start and end of the line. + */ +owned char * +neostr_linewise_replace_prefix_with_circumfix(const char *haystack, + const char *prefix, + const char *circumfix_start, + const char *circumfix_end) +{ + int prefix_len = strlen(prefix), + circumfix_start_len = strlen(circumfix_start), + circumfix_end_len = strlen(circumfix_end); + + /* Count of affected lines */ + int count = 0; + char *haystack_tmp = str_discard_const haystack, *nl; + for (;;) + { + nl = strstr(haystack_tmp, "\n"); + if (!nl) + break; + count++; + haystack_tmp += (nl - haystack_tmp) + 1; + } + dbg("count :: %d\n", count); + + int to_malloc = strlen(haystack) + - (count * prefix_len) + + (count * (circumfix_start_len + circumfix_start_len)); + dbg("prefix_with_circumfix: to malloc %d bytes\n", to_malloc); + + char *buf = malloc(to_malloc); + if (!buf) + return NULL; + + /* Copy into the buffer. */ + /* 1) Copy until the first strstr of '\n'. + * 2) If the line begins with prefix: + * 2a) Copy circumfix_start. + * 2b) Copy from the (strstr + prefix_len) to the next strstr of '\n'. + * 2c) Copy circumfix_end. + * 2z) Else, just copy the whole line until strstr of '\n'. + * 3) Once strstr of '\n' returns NULL, just copy until the end of the string. + * + * Given it is always linewise, at any given time, line_begin and line_end refer to the line. + */ + + + char *line_begin, *line_end; + int line_len; + char *buf_idx; + + buf_idx = buf; + line_begin = str_discard_const haystack; + line_end = strstr(haystack, "\n"); + line_len = line_end - line_begin; + dbg("Initialized line: %.*s", line_len, line_begin); + + while (true) + { + dbg("\n\nBegun loop.\n"); + dbg("Sizeof haystack: %d; Sizeof line begin offset: %d\n", strlen(haystack), line_begin - haystack); + dbg("Sizeof alloc: %d; Sizeof buf: %d\n", to_malloc, buf_idx - buf); + dbg("Strncmp of [%.*s] and [%s]\n", prefix_len, line_begin, prefix); + if (strncmp(line_begin, prefix, prefix_len) == 0) + { + dbg(" Circumfix line [%.*s].\n", line_len, line_begin); + memcpy(buf_idx, circumfix_start, circumfix_start_len); + buf_idx += circumfix_start_len; + + memcpy(buf_idx, line_begin + prefix_len, line_len - prefix_len); + buf_idx += (line_len - prefix_len); + + memcpy(buf_idx, circumfix_end, circumfix_end_len); + buf_idx += circumfix_end_len; + } + else + { + dbg(" Normal line [%.*s].\n", line_len, line_begin); + memcpy(buf_idx, line_begin, line_len); + buf_idx += line_len; + } + + dbg("Buf is [%.*s]\n", buf_idx - buf, buf); + /* Append the newline. */ + *buf_idx = '\n'; + buf_idx++; + + /* Go to next line. */ + while (true) + { + line_begin = line_end + 1; + line_end = strstr(line_begin, "\n"); + if (!line_end) + goto final_line; + line_len = line_end - line_begin; + + if (line_len == 0) + { + dbg("Line length zero.\n"); + *buf_idx = '\n'; + buf_idx++; + continue; + } + break; + } + } + +final_line: + dbg("final line{{%.*s}}\n", (haystack + strlen(haystack)), line_begin); + memcpy(buf_idx, line_begin, (haystack + strlen(haystack)) - line_begin); + + return buf; +} diff --git a/scripts/target/kmd b/scripts/target/kmd new file mode 120000 index 0000000..8f14555 --- /dev/null +++ b/scripts/target/kmd @@ -0,0 +1 @@ +../kmd/zig-out/bin/kmd
\ No newline at end of file |