diff options
| author | George Abbott <george@gabbott.dev> | 2025-01-26 11:38:37 +0000 | 
|---|---|---|
| committer | George Abbott <george@gabbott.dev> | 2025-01-26 11:38:37 +0000 | 
| commit | f6372e58ad54560919a061d33689c81281aa902c (patch) | |
| tree | 576113f09e6cb2ceed53af1a3dff298cdce2424d | |
| parent | 52c4d3793e09bb95f154880fd1372333f94f66cd (diff) | |
misc
| -rw-r--r-- | scripts/Makefile | 7 | ||||
| -rw-r--r-- | scripts/jezup.c | 413 | ||||
| -rw-r--r-- | scripts/neostr.h | 178 | ||||
| l--------- | scripts/target/kmd | 1 | 
4 files changed, 599 insertions, 0 deletions
| diff --git a/scripts/Makefile b/scripts/Makefile new file mode 100644 index 0000000..10989c3 --- /dev/null +++ b/scripts/Makefile @@ -0,0 +1,7 @@ +jezup: +	cd kmd +	zig build +	cd .. + +bar: +	clang++ -Wall -Wpedantic -O3 -std=c99 bar.c -o target/bar diff --git a/scripts/jezup.c b/scripts/jezup.c new file mode 100644 index 0000000..b09e8d5 --- /dev/null +++ b/scripts/jezup.c @@ -0,0 +1,413 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> +#include "neostr.h" + +#define owned /* needs to be freed */ +#define borrowed /* does not need to be freed */ + +/* Global Variables */ +bool debug = 0; + +/* Structures */ +enum result  +{ +    result_success, +    result_error, +}; + +typedef struct char_ptr  +{ +    borrowed char *ptr; +    size_t        size; +} char_ptr; + +struct Jezup +{ +    borrowed const char *registered_input_file; +    borrowed const char *registered_template_file; +    owned char *input_str; +    owned char *template_str; +}; + +/* Functions */ +owned char   *char_ptr_copy_alloc(char_ptr cp); +bool         char_ptr_is_null(char_ptr cp); +char_ptr     char_ptr_new(borrowed char *ptr, size_t size); +char_ptr     char_ptr_new_null(void); +owned char   *freadall(const char *path); +void         jezup_free(struct Jezup *jz); +struct Jezup jezup_new(void); +enum result  jezup_register_input_file(struct Jezup *jz, const char *file); +enum result  jezup_register_template_file(struct Jezup *jz, const char *file); +char_ptr     jezup_get_date(const struct Jezup *jz, bool *has_date); +char_ptr     jezup_get_title(const struct Jezup *jz); +char_ptr     jezup_get_contents(const struct Jezup *jz); +owned char   *jezup_substitute(struct Jezup *jz); +owned char   *jezup_substitute_into_html(const struct Jezup *jz); +void         dbg(const char *fmt, ...); +void         usage(void); + +/* Function Implementations */ + +/* Given a char_ptr, copy the pointed to segment as a new allocation and  + * return the pointer to the newly allocated memory. Caller to free(). */ +owned char * +char_ptr_copy_alloc(char_ptr cp) +{ +    char *buf = (char*)malloc(cp.size); +    if (!buf)  +	return NULL; + +    strncpy(buf, cp.ptr, cp.size); +    buf[cp.size] = '\0'; +    return buf; +} + +bool +char_ptr_is_null(char_ptr cp) +{ +    return (!cp.ptr && cp.size == 0); +} + +char_ptr +char_ptr_new(borrowed char *ptr, size_t size) +{ +    return (char_ptr) { ptr, size }; +} + +char_ptr +char_ptr_new_null(void) +{ +    return (char_ptr) { (char*)NULL, 0 }; +} + + +owned char * /* caller to free */ +freadall(const char *path) +{ +    FILE *f; +    long fsize; +    char *buf; + +    f = fopen(path, "rb"); +    if (!f) +	goto error; +    fseek(f, 0, SEEK_END); +    fsize = ftell(f); +    fseek(f, 0, SEEK_SET); + +    buf = (char*)malloc(fsize + 1); +    if (!buf)  +	goto error; + +    fread(buf, fsize, 1, f); +    fclose(f); + +    buf[fsize] = 0; +    return buf; + +error: +    fclose(f); +    return NULL; +} + +struct Jezup +jezup_new(void) +{ +    return (struct Jezup) { NULL, NULL, NULL, NULL }; +} + +void +jezup_free(struct Jezup *jz)  +{ +    free(jz->input_str); +    free(jz->template_str); +} + +enum result +jezup_register_input_file(struct Jezup *jz, const char *file) +{ +    if (!file) +	return result_error; + +    jz->registered_input_file = file; + +    /* read contents of file */ +    jz->input_str = freadall(file); + +    if (!jz->input_str) +        return result_error; +    else +        return result_success; +} + +enum result  +jezup_register_template_file(struct Jezup *jz, const char *file) +{ +    if (!file) +	return result_error; + +    jz->registered_template_file = file; + +    /* read contents of file */ +    jz->template_str = freadall(file); + +    if (!jz->template_str) +        return result_error; +    else +        return result_success; +} + +char_ptr +jezup_get_date(const struct Jezup *jz, bool *has_date) +{ +    char_ptr p; + +    /* Iterate just until the first newline ... */ +    p = (char_ptr) { jz->input_str, 0 }; +    for (; *(p.ptr + p.size) != '\n'; p.size++); +    p.size++; /* account for the newline */ +    p.ptr = p.ptr + p.size; /* set current position to date pos */ +    p.size = 0; /* reset size to be changed lower down */ + +    /* If the next character is a newline, there is no date and we return NULL. */ +    if (*p.ptr == '\n') +    { +	*has_date = false; +	return char_ptr_new_null(); +    } + +    /* We have content on the second line, so return this. */ +    for (; *(p.ptr + p.size) != NULL; p.size++) +	if (*(p.ptr + p.size) == '\n') +	    break; + +    *has_date = true; +    return p; + + +} + +char_ptr +jezup_get_title(const struct Jezup *jz) +{ +    /* The title guaranteed to be the contents of the first line, and  +     * must be populated. As such, we scan until the first newline. */ +    char_ptr p; +    p = (char_ptr) { jz->input_str, 0 }; + +    for (; *(p.ptr + p.size) != NULL; p.size++) +        if (*(p.ptr + p.size) == '\n') +            break; + +    return p; +} + +char_ptr +jezup_get_contents(const struct Jezup *jz) +{ +    char_ptr p; +    bool first; +    size_t tmp; + +    p = char_ptr_new(jz->input_str, 0); +    first = false; + +    for (; *(p.ptr + p.size) != NULL; p.size++) +    { +        if (*(p.ptr + p.size) == '\n' && first) +            goto found; +        else if (*(p.ptr + p.size) == '\n' && !first) +            first = true; +        else  +            first = false; +    } + +    return char_ptr_new_null(); + +found: +    tmp = p.size; /* we need to swap p.size into p.ptr */ +    p.ptr += p.size + 1; +    p.size = strlen(jz->input_str) - tmp; +    return p; +} + +/* Takes a char_ptr to the contents, and allocates a new owned string  + * with the contents converted to HTML.  + */ +owned char * +jezup_to_html(const struct Jezup *jz, char_ptr cp) +{ +    /* This function should substitute each part of a Jezup syntax into the  +     * equivalent structure in HTML. Additionally, each paragraph should be  +     * placed within <p></p> tags. +     * HTML	Jezup +     * <p>	Any string of text separated by at most one newline, and not  +     *          falling under any other tag.  +     * <hN>     #, ##, ###, etc. at the beginning of the line.  +     *  +     */ + +    /* Placing each paragraph within <p></p>. */ +    // 1) identify paragraph, surround with <p></p> +    // A <p> tag is placed at the start of a sequence, and following each  +    // newline, if followed by:  +    // 1. (</p><ol>), - (</p><ul>), \n (</p>) +    char *tmp = char_ptr_copy_alloc(cp); +    char *result0 = neostr_linewise_replace_prefix_with_circumfix(tmp, "# ", "<h1>", "</h1>"); +    char *result1 = neostr_linewise_replace_prefix_with_circumfix(result0, "## ", "<h2>", "</h2>"); +    char *result2 = neostr_linewise_replace_prefix_with_circumfix(result1, "### ", "<h3>", "</h3>"); + +    free(tmp); +    free(result0); +    free(result1); + +    return result2; +     +} +  +owned char * +jezup_substitute_into_html(const struct Jezup *jz) +{ +    char_ptr title, date, contents; +    owned const char *contents_as_html, *result0, *result1, *result2; +    bool has_date; + +    title, date, contents = char_ptr_new_null(); + +    title    = jezup_get_title(jz); +    if (char_ptr_is_null(title)) +	return NULL; +    dbg("jezup_substitute_into_html: title nonnull"); + +    date     = jezup_get_date(jz, &has_date); +    dbg("jezup_substitute_into_html: date nonnull"); + +    contents = jezup_get_contents(jz); +    if (char_ptr_is_null(contents)) +	return NULL; +    dbg("jezup_substitute_into_html: contents nonnull"); + +    contents_as_html = jezup_to_html(jz, contents); +    if (!contents_as_html) +	return NULL; +    dbg("jezup_substitute_into_html: contents as html nonnull"); + + +    /* Substitute */ +    char *alloc_title = NULL, *alloc_date = NULL, *alloc_contents = NULL; + +    /* substitute title */ +    alloc_title = char_ptr_copy_alloc(title); +    result0 = neostr_replace_all(jz->template_str, "$$TITLE$$", alloc_title); + +    /* date */ +    if (!char_ptr_is_null(date)) +    { +	alloc_date  = char_ptr_copy_alloc(date); +	result1 = neostr_replace_all(result0, "$$DATE$$", alloc_date);  +    } +    else /* so result1 is valid */ +    { +	result1 = result0; +    } + +    /* content */ +    result2 = neostr_replace_all(result1, "$$CONTENT$$", contents_as_html); + +    free(result0);  +    free(alloc_title);  +    free(alloc_contents); +    free(contents_as_html); + +    if (!char_ptr_is_null(date)) /* to stop double-free if date not subbed */ +        free(result1); +    else +	free(alloc_date); + +    return result2; +    // freelist: result0, result1, contents_as_html, alloc_title, _date, _contents +} + +void +dbg(const char *fmt, ...) +{ +    if (!debug) +	return; + +    va_list ap; +      +    va_start(ap, fmt); +    vfprintf(stderr, fmt, ap); +    va_end(ap); + +    printf("%s\n", fmt); +} + +void +usage(void) +{ +    printf("jezup: convert Jezup to HTML\n"); +    printf("  Jezup is a Markdown-like language with a specific syntax to \n" +	   "  provide a title and date. The first line must be the title; \n" +	   "  the second line can optionally contain a date, and the      \n" +	   "  contents follow after this with a Markdown syntax.          \n" +	   "The syntax is:                                                \n" +	   "  jezup -i INPUT_FILE -o OUTPUT_FILE -t HTML_TEMPLATE         \n" +	  ); + +} + + +int main(int argc, char **argv) +{ +    struct Jezup jezup; +    int i;  +    char *arg = NULL, *output = NULL, *input = NULL, *template = NULL; +    enum result res = result_error; + +    for (i = 0; i < argc; ++i) +    { +	arg = argv[i]; +	if (!strcmp(arg, "-o")) +	    output = argv[++i]; +	else if (!strcmp(arg, "-i")) +	    input = argv[++i]; +	else if (!strcmp(arg, "-t")) +	    template = argv[++i]; +	else if (!strcmp(arg, "-d")) +	    debug = true; +	else if (!strcmp(arg, "-h")) +	{ +	    usage(); return 0; +	} +	else +	    ; +    } + +    jezup = jezup_new(); + +    res = jezup_register_input_file(&jezup, input); +    if (res != result_success) +	goto error; +    dbg("registered input"); + +    jezup_register_template_file(&jezup, template); +    if (res != result_success) +	goto error; +    dbg("registered template"); + +    output = jezup_substitute_into_html(&jezup); +    dbg("substituted into html"); + +    printf("%s\n", output); + +    return 0; +    error: +	jezup_free(&jezup); +	fprintf(stderr, "error\n"); +	return -1; +} diff --git a/scripts/neostr.h b/scripts/neostr.h new file mode 100644 index 0000000..68991ed --- /dev/null +++ b/scripts/neostr.h @@ -0,0 +1,178 @@ +/* neostr: various functions for manipulating strings.  + * char* is the main type that is manipulated, and there is no expectation of  + * Unicode, though I could implement support at some point. + */ + +/* Given a string, replace all instances of orig with with, ensuring that only + * one allocation is made.  + */ + +#define owned +#define borrowed +#define str_discard_const (char*) +void        dbg(const char *fmt, ...); + +owned const char *  +neostr_replace_all(const char *haystack, const char *orig, const char *with) +{ +    int count, to_malloc; +    size_t len_orig, len_with; +    char *buf, *index_from, *index_to, *buf_cpos, *haystack_tmp, *ss; + +    len_orig = strlen(orig); +    len_with = strlen(with); + +    count = 0; +    haystack_tmp = str_discard_const haystack; +    for (;;) +    { +	ss = strstr(haystack_tmp, orig); +	if (!ss) +	    break; +	count++; +	haystack_tmp += (ss - haystack_tmp) + len_orig; +    } + +    /* Allocate the new buffer */ +    to_malloc = strlen(haystack) +	      - (count * len_orig) +	      + (count * len_with); + +    buf = (char*)malloc(to_malloc); +    memset(buf, 0, to_malloc); +     +    /* Copy until the text to substitute is found, then copy the new text into +     * buf, incrementing the counter in the haystack by len_orig.  +     */ +    index_from = str_discard_const haystack; +    buf_cpos = buf; +    while ((index_to = strstr(index_from, orig)) != NULL) +    { +	memcpy(buf_cpos, index_from, (index_to - index_from)); +	buf_cpos += (index_to - index_from); +	memcpy(buf_cpos, with, len_with); +	buf_cpos += len_with; +	index_from = index_to + len_orig; +    } + +    /* Finally, copy any remaining non-substituted contents. */ +    memcpy(buf_cpos, index_from, strlen(haystack) - (index_from - haystack)); +    buf[to_malloc] = '\0'; + +    return buf; +} + +/* In a single allocation, replace all instances of the prefix at the start of  + * the line with a circumfix, at the start and end of the line. + */ +owned char * +neostr_linewise_replace_prefix_with_circumfix(const char *haystack,  +	                                      const char *prefix,  +					      const char *circumfix_start,  +					      const char *circumfix_end) +{ +    int prefix_len          = strlen(prefix),  +	circumfix_start_len = strlen(circumfix_start), +	circumfix_end_len   = strlen(circumfix_end); + +    /* Count of affected lines */ +    int count = 0; +    char *haystack_tmp = str_discard_const haystack, *nl; +    for (;;) +    { +	nl = strstr(haystack_tmp, "\n"); +	if (!nl) +	    break; +	count++; +	haystack_tmp += (nl - haystack_tmp) + 1; +    } +    dbg("count :: %d\n", count); + +    int to_malloc = strlen(haystack)  +	          - (count * prefix_len) +		  + (count * (circumfix_start_len + circumfix_start_len)); +    dbg("prefix_with_circumfix: to malloc %d bytes\n", to_malloc); +     +    char *buf = malloc(to_malloc); +    if (!buf) +	return NULL; + +    /* Copy into the buffer. */ +    /* 1) Copy until the first strstr of '\n'. +     * 2) If the line begins with prefix: +     *    2a) Copy circumfix_start. +     *    2b) Copy from the (strstr + prefix_len) to the next strstr of '\n'. +     *    2c) Copy circumfix_end. +     * 2z) Else, just copy the whole line until strstr of '\n'. +     * 3) Once strstr of '\n' returns NULL, just copy until the end of the string. +     * +     * Given it is always linewise, at any given time, line_begin and line_end refer to the line. +     */ + + +    char *line_begin, *line_end; +    int line_len; +    char *buf_idx; + +    buf_idx = buf; +    line_begin = str_discard_const haystack; +    line_end   = strstr(haystack, "\n"); +    line_len   = line_end - line_begin; +    dbg("Initialized line: %.*s", line_len, line_begin); + +    while (true) +    { +	dbg("\n\nBegun loop.\n"); +	dbg("Sizeof haystack: %d; Sizeof line begin offset: %d\n", strlen(haystack), line_begin - haystack); +	dbg("Sizeof alloc: %d; Sizeof buf: %d\n", to_malloc, buf_idx - buf); +	dbg("Strncmp of [%.*s] and [%s]\n", prefix_len, line_begin, prefix); +	if (strncmp(line_begin, prefix, prefix_len) == 0) +	{ +	    dbg("    Circumfix line [%.*s].\n", line_len, line_begin); +	    memcpy(buf_idx, circumfix_start, circumfix_start_len); +	    buf_idx += circumfix_start_len; + +	    memcpy(buf_idx, line_begin + prefix_len, line_len - prefix_len); +	    buf_idx += (line_len - prefix_len); + +	    memcpy(buf_idx, circumfix_end, circumfix_end_len); +	    buf_idx += circumfix_end_len; +	} +	else +	{ +	    dbg("    Normal line [%.*s].\n", line_len, line_begin); +	    memcpy(buf_idx, line_begin, line_len); +	    buf_idx += line_len; +	} + +	dbg("Buf is [%.*s]\n", buf_idx - buf, buf); +	/* Append the newline. */ +	*buf_idx = '\n'; +	buf_idx++; + +	/* Go to next line. */ +	while (true) +	{ +	    line_begin = line_end + 1; +	    line_end   = strstr(line_begin, "\n"); +	    if (!line_end) +		goto final_line; +	    line_len   = line_end - line_begin; + +	    if (line_len == 0) +	    { +		dbg("Line length zero.\n"); +		*buf_idx = '\n'; +		buf_idx++; +		continue; +	    } +	    break; +	} +    } + +final_line: +    dbg("final line{{%.*s}}\n", (haystack + strlen(haystack)), line_begin); +    memcpy(buf_idx, line_begin, (haystack + strlen(haystack)) - line_begin); + +    return buf; +} diff --git a/scripts/target/kmd b/scripts/target/kmd new file mode 120000 index 0000000..8f14555 --- /dev/null +++ b/scripts/target/kmd @@ -0,0 +1 @@ +../kmd/zig-out/bin/kmd
\ No newline at end of file | 
