From bac748dbe8c28cf1ed3b387b24f89ffe5a58ffc9 Mon Sep 17 00:00:00 2001 From: George Abbott Date: Sun, 26 Jan 2025 11:37:12 +0000 Subject: kmd --- scripts/kmd/src/kmd.zig | 490 +++++++++++++++++++++++++++++++++++++++++++++++ scripts/kmd/src/main.zig | 196 +++++++++++++++++++ scripts/kmd/src/plan | 35 ++++ scripts/kmd/src/root.zig | 10 + scripts/kmd/src/tags.zig | 7 + scripts/kmd/src/util.zig | 50 +++++ 6 files changed, 788 insertions(+) create mode 100644 scripts/kmd/src/kmd.zig create mode 100644 scripts/kmd/src/main.zig create mode 100644 scripts/kmd/src/plan create mode 100644 scripts/kmd/src/root.zig create mode 100644 scripts/kmd/src/tags.zig create mode 100644 scripts/kmd/src/util.zig (limited to 'scripts/kmd/src') diff --git a/scripts/kmd/src/kmd.zig b/scripts/kmd/src/kmd.zig new file mode 100644 index 0000000..38cb96e --- /dev/null +++ b/scripts/kmd/src/kmd.zig @@ -0,0 +1,490 @@ +const std = @import("std"); +const util = @import("./util.zig"); +const Allocator = std.mem.Allocator; +const expect = std.testing.expect; + +// Kmd = Kinda Markdown + +// Supported Elements +//

\n"); + } + pos += 6; + }, + .ol => { + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "\n\n"); + } + pos += 6; + }, + else => {}, + } + break; + } + }) { + // Handle . + // cannot accept any other formatting, e.g. links, bold, etc. It is plaintext. + if (std.mem.startsWith(u8, line, "# ")) { + append(&linebuf, &linepos, "

"); + append(&linebuf, &linepos, line[2..]); + append(&linebuf, &linepos, "

"); + continue; + } + + if (std.mem.startsWith(u8, line, "## ")) { + append(&linebuf, &linepos, "

"); + append(&linebuf, &linepos, line[3..]); + append(&linebuf, &linepos, "

"); + continue; + } + + if (std.mem.startsWith(u8, line, "### ")) { + append(&linebuf, &linepos, "

"); + append(&linebuf, &linepos, line[4..]); + append(&linebuf, &linepos, "

"); + continue; + } + + if (std.mem.startsWith(u8, line, "#### ")) { + append(&linebuf, &linepos, "

"); + append(&linebuf, &linepos, line[5..]); + append(&linebuf, &linepos, "

"); + continue; + } + + if (std.mem.startsWith(u8, line, "#[raw]#")) { + append(&linebuf, &linepos, line[7..]); + continue; + } + + if (mode == .none) { + if (std.mem.startsWith(u8, line, "- ")) { + mode = .ul; + append(&linebuf, &linepos, "
    \n
  • \n"); + line = line[2..]; + } else if (std.mem.startsWith(u8, line, "~ ")) { + mode = .ol; + append(&linebuf, &linepos, "
      \n
    1. \n"); + line = line[2..]; + } else { + mode = .p; + append(&linebuf, &linepos, "

      \n"); + } + } else { + // Handle empty line + if (std.mem.eql(u8, line, "")) { + switch (mode) { + .p => append(&linebuf, &linepos, "

      "), + .ul => append(&linebuf, &linepos, "
    2. \n
"), + .ol => append(&linebuf, &linepos, "\n"), + .none => {}, // do nothing, + } + mode = .none; + continue; + } + + if ((mode == .ul or mode == .ol) and + (std.mem.startsWith(u8, line, "- ") or std.mem.startsWith(u8, line, "~ "))) + { + append(&linebuf, &linepos, "\n
  • \n"); + line = line[2..]; + } + + if (mode == .p and std.mem.startsWith(u8, line, "- ") or std.mem.startsWith(u8, line, "~ ")) { + append(&linebuf, &linepos, "

    \n"); + if (std.mem.startsWith(u8, line, "- ")) { + mode = .ul; + append(&linebuf, &linepos, "
      \n
    • \n"); + } else { + mode = .ol; + append(&linebuf, &linepos, "
        \n
      1. \n"); + } + line = line[2..]; + } + } + + const vals: [11][]const u8 = .{ "@@[", "_", "*", ";;", "<", ">", "#[", "]#", "@@img[", "##[", "~[" }; + while (true) { + const found = util.indexOfAnyArray(u8, line, 0, &vals) orelse { + // Copy the entire rest of line + append(&linebuf, &linepos, line[0..]); + break; + }; + + if (opts.debug) { + std.debug.print("[{d}] Iterating: found {any} at index {any}.\n", .{ linenum, found.value, found.index }); + } + + // Copy up until the token + append(&linebuf, &linepos, line[0..found.index]); + + // Handle the particular token + switch (found.value) { + 0 => { // @@[desc][href] + // These constants describe indexes into line. + const desc_begin = found.index + 3; + const desc_end = std.mem.indexOfPos(u8, line, desc_begin, "][") orelse @panic("could not find ][ to terminate the description of anchor tag"); + const href_begin = desc_end + 2; + const href_end = std.mem.indexOfPos(u8, line, href_begin, "]") orelse @panic("could not find ] to terminate href of anchor tag"); + + const desc = line[desc_begin..desc_end]; + const href = line[href_begin..href_end]; + + append(&linebuf, &linepos, ""); + append(&linebuf, &linepos, desc); + append(&linebuf, &linepos, ""); + + line = line[href_end + "]".len ..]; + }, + + 1 => { // _ + append(&linebuf, &linepos, if (is_strong) "" else ""); + is_strong = !is_strong; + line = line[found.index + "_".len ..]; + }, + + 2 => { // * + append(&linebuf, &linepos, if (is_emphatic) "" else ""); + is_emphatic = !is_emphatic; + line = line[found.index + "*".len ..]; + }, + + 3 => { // ;; + append(&linebuf, &linepos, "
        "); + line = line[found.index + ";;".len ..]; + }, + + 4 => { // < + append(&linebuf, &linepos, "<"); + line = line[found.index + "<".len ..]; + }, + + 5 => { // > + append(&linebuf, &linepos, ">"); + line = line[found.index + ">".len ..]; + }, + + 6 => { // #[ + append(&linebuf, &linepos, ""); + line = line[found.index + "]#".len ..]; + }, + + 8 => { // @@img[src][alt] + const src_begin = found.index + 6; + const src_end = std.mem.indexOfPos(u8, line, src_begin, "][").?; + const alt_begin = src_end + 2; + const alt_end = std.mem.indexOfPos(u8, line, alt_begin, "]").?; + + const src = line[src_begin..src_end]; + const alt = line[alt_begin..alt_end]; + + append(&linebuf, &linepos, "\"");"); + + line = line[alt_end + "]".len ..]; + }, + + 9 => { // ##[ - only output if show_hidden_comments passed. + const hc_begin = found.index + 3; + const hc_end = std.mem.indexOfPos(u8, line, hc_begin, "]##") orelse @panic("not yet implemented support for multiline hidden comments"); + + if (opts.show_hidden_comments) { + const hc = line[hc_begin..hc_end]; + append(&linebuf, &linepos, hc); + } + + line = line[hc_end + "]##".len ..]; + }, + + 10 => { // ~[attr][text] for spans with attributes + // These constants describe indexes into line. + const attr_begin = found.index + 2; + const attr_end = std.mem.indexOfPos(u8, line, attr_begin, "][") orelse @panic("could not find ][ to terminate the description of attr span"); + const text_begin = attr_end + 2; + const text_end = std.mem.indexOfPos(u8, line, text_begin, "]") orelse @panic("could not find ] to terminate text of attr span"); + + const attr = line[attr_begin..attr_end]; + const text = line[text_begin..text_end]; + + append(&linebuf, &linepos, ""); + append(&linebuf, &linepos, text); + append(&linebuf, &linepos, ""); + line = line[text_end + "]".len ..]; + }, + + else => unreachable, + } + } + } + if (opts.debug) { + std.debug.print("sizeofHtml :: return = {any}\n", .{pos}); + } + return pos; + } + + fn sizeofHtml(self: Kmd, opts: KmdHtmlOptions) usize { + const append = struct { + fn append(unused: []u8, pos: *usize, data: []const u8) void { + _ = unused; + pos.* += data.len; + } + }.append; + + return htmlBackingFn(self, opts, append, null); + } + + pub fn html(self: Kmd, allocator: Allocator, opts: KmdHtmlOptions, req_capacity_if_known: ?usize) ![]const u8 { + const append = struct { + fn append(buf: []u8, pos: *usize, data: []const u8) void { + std.mem.copyForwards(u8, buf[pos.*..], data); + pos.* += data.len; + } + }.append; + + const req_capacity = req_capacity_if_known orelse sizeofHtml(self, opts); + var buf: []u8 = try allocator.alloc(u8, req_capacity); + + _ = htmlBackingFn(self, opts, append, &buf); + return buf; + } +}; + +test "kmd" { + const allocator = std.testing.allocator; + var foo: Kmd = undefined; + + const s1 = + \\A good title + \\2024-10-31 + \\This is a paragraph. + \\ + \\This is another: it *has* _formatting_ and @@[hyperlinks][https://example.com]. + \\In fact, it even spans multiple lines! + \\ + \\A list would be like this: + \\- I like big butts + \\- and I cannot lie. + \\ + \\~ Entry eins + \\~ Entry zwei + \\ + \\Yayy! + \\ + \\Bye. + \\ + ; + + const r1 = + \\

        + \\This is a paragraph. + \\

        + \\

        + \\This is another: it has formatting and hyperlinks. + \\In fact, it even spans multiple lines! + \\

        + \\

        + \\A list would be like this: + \\

        + \\
          + \\
        • + \\I like big butts + \\
        • + \\
        • + \\and I cannot lie. + \\
        • + \\
        + \\
          + \\
        1. + \\Entry eins + \\
        2. + \\
        3. + \\Entry zwei + \\
        4. + \\
        + \\

        + \\Yayy! + \\

        + \\

        + \\Bye. + \\

        + \\ + ; + foo.init(s1); + const res1 = try foo.html(allocator, .{ .br_after_newline = false }, null); + defer allocator.free(res1); + try std.testing.expectEqualStrings(r1, res1); + + // Test br_after_newline + const s2 = + \\My poetry is amazing + \\2024-11-01 + \\I am such a good poet + \\Truly fantastic + \\So great + \\Lol xD + ; + + const r2 = + \\

        + \\I am such a good poet
        + \\Truly fantastic
        + \\So great
        + \\Lol xD
        + \\

        + \\ + ; + + foo.init(s2); + const res2 = try foo.html(allocator, .{ .br_after_newline = true }, null); + defer allocator.free(res2); + try std.testing.expectEqualStrings(r2, res2); +} diff --git a/scripts/kmd/src/main.zig b/scripts/kmd/src/main.zig new file mode 100644 index 0000000..fdc033e --- /dev/null +++ b/scripts/kmd/src/main.zig @@ -0,0 +1,196 @@ +const std = @import("std"); +const kmd = @import("kmd.zig"); +const Allocator = std.mem.Allocator; + +// Work to do +// - Implement --io which takes an input and an output so +// save the need to spawn the program multiple times. + +// Globals +const stdout_file = std.io.getStdOut().writer(); +var bw = std.io.bufferedWriter(stdout_file); +const stdout = bw.writer(); + +var output: ?[]const u8 = null; +var input: ?[]const u8 = null; +var template: ?[]const u8 = null; +var debug = false; +var insert_br = false; +var show_hidden_comments = false; + +fn print(comptime fmt: []const u8, args: anytype) !void { + try stdout.print(fmt, args); + try bw.flush(); +} + +fn usage() noreturn { + print( + \\kmd: a set of utilities relating to the kmd format + \\`kmd convert-to-html`: + \\ -i INPUT The input Markdown file. + \\ Alternatively specify multiple with --io. + \\ -o OUTPUT The file to output to. Alternatively, + \\ specify multiple with --io. + \\ -t TEMPLATE The template to substitute into. $$CONTENTS$$, + \\ $$DATE$$, $$TITLE$$, $$WC$$, $$TTR$$ are substituted. + \\ -h Display this help message. + \\ kmd wc + \\ -w, -l, -b + \\ Retrieves the word, line, or byte count ignoring the header-seq. + \\ -t + \\ Allows passing multiple files, and the total is retrieved for these. + \\ -d + \\ Pass a directory and (non-recursively) get total count. Assumes all files in the directory are kmd. + \\ EXAMPLE kmd wc -wd $WEBSITE/blog + \\ kmd generate-index DIRECTORIES + \\ -s [title|date] [asc|dsc] + \\ Sort mode. + , .{}) catch { + std.process.exit(1); + }; + std.process.exit(0); +} + +pub fn convertToHtml(allocator: Allocator, args: anytype) !void { + var i: usize = 1; + while (i < args.len) : (i += 1) { + if (std.mem.eql(u8, args[i], "-o")) { + i += 1; + output = args[i]; + } else if (std.mem.eql(u8, args[i], "-i")) { + i += 1; + input = args[i]; + } else if (std.mem.eql(u8, args[i], "-t")) { + i += 1; + template = args[i]; + } else if (std.mem.eql(u8, args[i], "-d")) { + debug = true; + } else if (std.mem.eql(u8, args[i], "-h")) { + usage(); + } else if (std.mem.eql(u8, args[i], "-b")) { + insert_br = true; + } else if (std.mem.eql(u8, args[i], "-p")) { + show_hidden_comments = true; + } else { + std.debug.print("not a valid arg\n", .{}); + } + } + + if (input == null) { + std.debug.print("Did not pass input parameter -i\n", .{}); + std.process.exit(4); + } + const dir = std.fs.cwd(); + // const file = try std.fs.openFileAbsolute(input.?, .{}); + const file = try dir.openFile(input.?, .{}); + const bytes = try file.readToEndAlloc(allocator, std.math.maxInt(usize)); + defer allocator.free(bytes); + var md: kmd.Kmd = undefined; + md.init(bytes); + const html = try md.html(allocator, .{ + .br_after_newline = insert_br, + .show_hidden_comments = show_hidden_comments, + .debug = debug, + }, null); + defer allocator.free(html); + + if (template) |t| { + // read template + // const template_file = try std.fs.openFileAbsolute(t, .{}); + const template_file = try dir.openFile(t, .{}); + const template_bytes = try template_file.readToEndAlloc(allocator, std.math.maxInt(usize)); + defer allocator.free(template_bytes); + + // many allocations version + const r1 = try std.mem.replaceOwned(u8, allocator, template_bytes, "$$CONTENT$$", html); + const r2 = try std.mem.replaceOwned(u8, allocator, r1, "$$CONTENTS$$", html); + const r3 = try std.mem.replaceOwned(u8, allocator, r2, "$$DATE$$", md.date); + const r4 = try std.mem.replaceOwned(u8, allocator, r3, "$$TITLE$$", md.title); + const r5 = try std.mem.replaceOwned(u8, allocator, r4, "$$TAGS$$", if (std.mem.eql(u8, md.tags, "")) "none" else md.tags); + + const wc_ = md.wc(.words); + const wc_str = try std.fmt.allocPrint(allocator, "{}", .{wc_}); + defer allocator.free(wc_str); + const r6 = try std.mem.replaceOwned(u8, allocator, r5, "$$WC$$", wc_str); + + const ttr_str = try std.fmt.allocPrint(allocator, "{}", .{md.averageMinutesTakenToRead(wc_)}); + defer allocator.free(ttr_str); + const r7 = try std.mem.replaceOwned(u8, allocator, r6, "$$TTR$$", ttr_str); + + defer allocator.free(r1); + defer allocator.free(r2); + defer allocator.free(r3); + defer allocator.free(r4); + defer allocator.free(r5); + defer allocator.free(r6); + try print("{s}\n", .{r7}); + allocator.free(r7); + } else { + // output `html` to either stdout or a file + try print("{s}", .{html}); + } +} + +pub fn wc(allocator: Allocator, args: anytype) !void { + var mode: ?kmd.WcMode = null; + var file_str: ?[]const u8 = null; + var direc = false; + var total = false; + + var i: usize = 1; + while (i < args.len) : (i += 1) { + if (std.mem.eql(u8, args[i], "-h")) { + usage(); + } else if (args[i][0] == '-') { + // process flags + for (args[i][1..]) |f| switch (f) { + 'w' => mode = .words, + 'l' => mode = .lines, + 'b' => mode = .bytes, + 'd' => direc = true, + 't' => total = true, + else => { + std.debug.print("invalid flag {}\n", .{f}); + std.process.exit(5); + }, + }; + } else { + file_str = args[i]; + } + } + + if (file_str == null) { + std.debug.print("Did not pass file to run wc over\n", .{}); + std.process.exit(4); + } + + const dir = std.fs.cwd(); + const file = try dir.openFile(file_str.?, .{}); + const bytes = try file.readToEndAlloc(allocator, std.math.maxInt(usize)); + defer allocator.free(bytes); + var md: kmd.Kmd = undefined; + md.init(bytes); + try print("{any}\n", .{md.wc(mode orelse .words)}); + std.process.exit(0); +} + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + + const args = try std.process.argsAlloc(allocator); + + if (args.len <= 1) { + usage(); + } + + if (std.mem.eql(u8, args[1], "convert-to-html")) { + const rest_of_args = args[1..]; + try convertToHtml(allocator, rest_of_args); + } else if (std.mem.eql(u8, args[1], "wc")) { + const rest_of_args = args[1..]; + try wc(allocator, rest_of_args); + } else { + usage(); + } +} diff --git a/scripts/kmd/src/plan b/scripts/kmd/src/plan new file mode 100644 index 0000000..27e3040 --- /dev/null +++ b/scripts/kmd/src/plan @@ -0,0 +1,35 @@ + + + +linebuf : [256]u8 = uninitialized +linepos : size = 0; + +append :: action capture(linebuf, linepos) receive(s: str) +do + linebuf.append( +end + +define reset(linebuf) :: clear_to_zero(linebuf); +define reset(linepos) :: linepos = 0; + +foreach (line in lines) +do + reset linebuf, linepos; + + if line.starts_with("# ") + append "

        "; + append line[2..]; + append "

        "; + ... + + # handle @@[][] + i : size = 0; + while (line.locate_next("@@[")) |pos| + do + std.mem.copy(dest: linebuf, src: line, from: i, to: pos); + append line[from:pos]; + + end + + +end diff --git a/scripts/kmd/src/root.zig b/scripts/kmd/src/root.zig new file mode 100644 index 0000000..ecfeade --- /dev/null +++ b/scripts/kmd/src/root.zig @@ -0,0 +1,10 @@ +const std = @import("std"); +const testing = std.testing; + +export fn add(a: i32, b: i32) i32 { + return a + b; +} + +test "basic add functionality" { + try testing.expect(add(3, 7) == 10); +} diff --git a/scripts/kmd/src/tags.zig b/scripts/kmd/src/tags.zig new file mode 100644 index 0000000..30d1f18 --- /dev/null +++ b/scripts/kmd/src/tags.zig @@ -0,0 +1,7 @@ +// For implementation of tags. + +// It should have the following features: +// - ability to search tags within the files in the directories passed +// - ability to provide index pages (based on a template?) for each tag (under /tag/XXX) +// (this will also go hand in hand with generating index pages - replacing wv ls-html for instance) +// - then, when generating a page, I need to transform the $$TAGS$$ into a set of TAG diff --git a/scripts/kmd/src/util.zig b/scripts/kmd/src/util.zig new file mode 100644 index 0000000..32400c8 --- /dev/null +++ b/scripts/kmd/src/util.zig @@ -0,0 +1,50 @@ +const std = @import("std"); + +// A max of 16 values. +pub fn indexOfAnyArray( + comptime T: type, + slice: []const T, + start: usize, + values: []const []const T, +) ?struct { index: usize, value: usize } { + var lowest_index: ?usize = null; + var value_index: ?usize = null; + + for (values, 0..) |v, i| { + const f = std.mem.indexOfPos(T, slice, start, v); + if (f) |f_| { + if (lowest_index == null) { + // First found: unconditionally set. + lowest_index = f_; + value_index = i; + } else { + // Check whether lower. + if (f_ < lowest_index.?) { + lowest_index = f_; + value_index = i; + } + } + } + } + if (lowest_index) |li_| { + return .{ .index = li_, .value = value_index.? }; + } else { + return null; + } +} + +test "indexOfAnyArray" { + const vals: [2][]const u8 = .{ "Gri", "Sli" }; + const wrong_vals: [2][]const u8 = .{ "Tri", "Pri" }; + const a = indexOfAnyArray(u8, "HiGriSliMi", 0, &vals); + try std.testing.expect(a.index == 2); + try std.testing.expect(a.value == 0); + + const b = indexOfAnyArray(u8, "HiGriSliMi", 3, &vals); + try std.testing.expect(b.index == 5); + try std.testing.expect(b.value == 1); + + const c = indexOfAnyArray(u8, "HiGriSliMi", 0, &wrong_vals); + try std.testing.expect(c.index == null); + try std.testing.expect(c.value == null); +} -- cgit v1.2.1