diff options
-rw-r--r-- | scripts/kmd/build.zig | 91 | ||||
-rw-r--r-- | scripts/kmd/build.zig.zon | 72 | ||||
-rw-r--r-- | scripts/kmd/kmd.close | 34 | ||||
-rw-r--r-- | scripts/kmd/old_main.zig | 297 | ||||
-rw-r--r-- | scripts/kmd/src/kmd.zig | 490 | ||||
-rw-r--r-- | scripts/kmd/src/main.zig | 196 | ||||
-rw-r--r-- | scripts/kmd/src/plan | 35 | ||||
-rw-r--r-- | scripts/kmd/src/root.zig | 10 | ||||
-rw-r--r-- | scripts/kmd/src/tags.zig | 7 | ||||
-rw-r--r-- | scripts/kmd/src/util.zig | 50 |
10 files changed, 1282 insertions, 0 deletions
diff --git a/scripts/kmd/build.zig b/scripts/kmd/build.zig new file mode 100644 index 0000000..b1d3a61 --- /dev/null +++ b/scripts/kmd/build.zig @@ -0,0 +1,91 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const lib = b.addStaticLibrary(.{ + .name = "kmd", + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + // This declares intent for the library to be installed into the standard + // location when the user invokes the "install" step (the default step when + // running `zig build`). + b.installArtifact(lib); + + const exe = b.addExecutable(.{ + .name = "kmd", + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const lib_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); + + const exe_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_lib_unit_tests.step); + test_step.dependOn(&run_exe_unit_tests.step); +} diff --git a/scripts/kmd/build.zig.zon b/scripts/kmd/build.zig.zon new file mode 100644 index 0000000..4d172b1 --- /dev/null +++ b/scripts/kmd/build.zig.zon @@ -0,0 +1,72 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save <url>`, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = "jezup-zig", + + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + + // This field is optional. + // This is currently advisory only; Zig does not yet do anything + // with this value. + //.minimum_zig_version = "0.11.0", + + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save <url>` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/scripts/kmd/kmd.close b/scripts/kmd/kmd.close new file mode 100644 index 0000000..569e507 --- /dev/null +++ b/scripts/kmd/kmd.close @@ -0,0 +1,34 @@ +import "std"; +import "util"; + +Kmd :: struct +has + Mode :: enum(none, p, ul, ol); + HtmlOptions :: record(br_after_newline: bool); +is + title : str, + date : ?str, + contents : str, +do + new :: construct fn(text: str) -> ?Kmd + do + iter := text.split_iter("\n"); + return construct Kmd // Kmd is coercible to ?Kmd. + as + title = iter.first(), + date = iter.after(), + contents = iter.rest(), + end; + end + + apart :: deconstruct fn(^self) -> (str, str, str) + do + return (self.title, self.date, self.contents); + end + + html :: fn(self, allocator: Allocator, opts: HtmlOptions, req_cap_if_known: ?usize) -> Kmd + do + mode : Mode = .none; + is_strong, is_emph := false, false; + end +end diff --git a/scripts/kmd/old_main.zig b/scripts/kmd/old_main.zig new file mode 100644 index 0000000..16feb59 --- /dev/null +++ b/scripts/kmd/old_main.zig @@ -0,0 +1,297 @@ +const std = @import("std"); +const kmd = @import("kmd.zig"); +const Allocator = std.mem.Allocator; + +// Globals +const stdout_file = std.io.getStdOut().writer(); +var bw = std.io.bufferedWriter(stdout_file); +const stdout = bw.writer(); + +var output: []const u8 = undefined; +var input: []const u8 = undefined; +var template: []const u8 = undefined; +var debug = false; + +fn print(comptime fmt: []const u8, args: anytype) !void { + try stdout.print(fmt, args); + try bw.flush(); +} + +fn usage() noreturn { + print("TODO: add usage\n", .{}) catch { + std.process.exit(1); + }; + std.process.exit(0); +} + +const Jezup = struct { + const Mode = enum { + none, + p, + ul, + ol, + }; + title: []const u8, + date: []const u8, + contents: []const u8, + + pub fn init(self: *Jezup, text: []const u8) void { + var it = std.mem.splitAny(u8, text, "\n"); + self.title = it.first(); + self.date = it.next().?; + self.contents = it.rest(); + } + + pub fn calculateSizeOfContentsAsHtml(self: *Jezup) usize { + var mode: Mode = .none; + var it = std.mem.splitAny(u8, self.contents, "\n"); + var line: []const u8 = it.first(); + var additional: usize = 0; + var handled = false; // if !handled and not empty, default is to handle as <p> + while (true) { + // handle <h1>, <h2>, etc. + if (std.mem.startsWith(u8, line, "# ")) { + handled = true; + additional += 8; // <h1></h2> = 9 minus 2 from "# " + } + + if (std.mem.startsWith(u8, line, "## ")) { + handled = true; + additional += 7; + } + + if (std.mem.startsWith(u8, line, "### ")) { + handled = true; + additional += 6; + } + if (std.mem.startsWith(u8, line, "- ")) { + handled = true; + additional += if (mode == .ul) 8 else 7; + if (mode != .ul) mode = .ul; + } + + if (std.mem.startsWith(u8, line, "~ ")) { + handled = true; + additional += if (mode == .ol) 8 else 7; + if (mode != .ol) mode = .ol; + } + + if (mode == .none and !handled and !std.mem.eql(u8, line, "")) { + mode = .p; + additional += 4; // <p>\n + } + + // handle <a href="https://link.com"></a> + // @@[][] becomes <a href=""></a> -> 6 bytes becomes 15 -> 9 add bytes + additional += std.mem.count(u8, line, "@@[") * 9; + + // handle > and < - they become > and < so +3 bytes each + additional += std.mem.count(u8, line, "<") * 3; + additional += std.mem.count(u8, line, ">") * 3; + + // peek next and if empty, that means \n\n chain - cancel out cond. + if (it.peek() == null or std.mem.eql(u8, it.peek().?, "")) { + switch (mode) { + .p => { + additional += 6; // \n</p>\n + }, // add </p>\n + .ul => { + additional += 11; + }, // add </li>\n</ul> (11 bytes) + .ol => { + additional += 11; + }, // add </li></ul> + .none => {}, // do nothing + } + mode = .none; + break; + } else { + line = it.next().?; + } + } + // handle <br> + additional += std.mem.count(u8, self.contents, ";;") * 2; // <br> (4) - ;; (2) -> 2 add chars + return additional + self.contents.len; + } + + fn append(buf: *[256]u8, pos: *usize, data: []const u8) void { + std.mem.copyForwards(u8, buf[pos.*..], data); + pos.* += data.len; + } + + pub fn contentsAsHtml(self: *Jezup, allocator: Allocator, required_capacity_param: ?usize) ![]const u8 { + var mode: Mode = .none; + var it = std.mem.splitAny(u8, self.contents, "\n"); + var line: []const u8 = it.first(); + const required_capacity = if (required_capacity_param) |req| req else calculateSizeOfContentsAsHtml(self); + var handled = false; // if !handled and not empty, default is to handle as <p> + + // allocate the new buffer - text + additional_bytes_req + // this will hold the full output + var buf: []u8 = try allocator.alloc(u8, required_capacity - 1); + var pos: usize = 0; // offset to memcpy to + + // hold the buffer for the entire line + var linebuf: [256]u8 = undefined; + var linepos: usize = 0; + + std.debug.print("allocated_capacity: {}\n", .{required_capacity - 1}); + std.debug.print("The length of buf is: {}\n", .{buf.len}); + std.debug.print("buf = {s};\nbuf[pos..] = {s};\n", .{ buf, buf[pos..] }); + + // Part two - actually run the changes + { + // TODO: handle em, b, br, ul, ol, p, <, > + // NOTE: handling hN a + while (true) { + // reset + @memset(&linebuf, 0); + linepos = 0; + + std.debug.print("While iteration with line: {s}\n", .{line}); + // handle <h1>, <h2>, etc. + // these are "interrupting handlers" which disrupt the normal flow of <p> + // included are: hN, ul, ol. + if (std.mem.startsWith(u8, line, "# ")) { + handled = true; + append(&linebuf, &linepos, "<h1>"); + append(&linebuf, &linepos, line[2..]); + append(&linebuf, &linepos, "</h1>\n"); + } + + if (std.mem.startsWith(u8, line, "## ")) { + handled = true; + append(&linebuf, &linepos, "<h2>"); + append(&linebuf, &linepos, line[3..]); + append(&linebuf, &linepos, "</h2>\n"); + } + + if (std.mem.startsWith(u8, line, "### ")) { + handled = true; + append(&linebuf, &linepos, "<h3>"); + append(&linebuf, &linepos, line[4..]); + append(&linebuf, &linepos, "</h3>\n"); + } + + if (std.mem.startsWith(u8, line, "- ") or std.mem.startsWith(u8, line, "~ ")) { + handled = true; + if (mode != .ul and mode != .ol) { + mode = if (line[0] == '-') .ul else .ol; + append(&linebuf, &linepos, if (line[0] == '-') "<ul>\n<li>" else "<ol>\n<li>"); + } else { + append(&linebuf, &linepos, "</li>\n<li>"); + } + append(&linebuf, &linepos, line[2..]); + } + + if (mode == .none and !handled and !std.mem.eql(u8, line, "")) { + std.debug.print("executed p\n", .{}); + mode = .p; + append(&linebuf, &linepos, "<p>\n"); + } + + // handle <a> (@@[][]) + var i: usize = 0; // index into the line (not linebuf) + var found_href = false; + while (std.mem.indexOfPos(u8, line, i, "@@[")) |index_start| { + found_href = true; + std.debug.print("@@[ while clause: i = {}; linebuf = {s}\n", .{ i, linebuf }); + // copy until the link + append(&linebuf, &linepos, line[i..index_start]); + + const desc_start = index_start + 3; + const desc_end = std.mem.indexOfPos(u8, line, desc_start, "]").?; // safe as @@[ must contain desc and href components in form @@[desc][href] + const href_begin = desc_end + 2; + const href_end = std.mem.indexOfPos(u8, line, href_begin, "]").?; + const desc = line[desc_start..desc_end]; + const href = line[href_begin..href_end]; + i = href_end + 1; + + // copy the link + append(&linebuf, &linepos, "<a href=\""); + append(&linebuf, &linepos, href); + append(&linebuf, &linepos, "\">"); + append(&linebuf, &linepos, desc); + append(&linebuf, &linepos, "</a>"); + std.debug.print("@@[ while post: i = {}; linebuf = {s}\n----\n", .{ i, linebuf }); + } + if (found_href) { + std.debug.print("@@[ else clause: i = {}; line.len = {}\n", .{ i, line.len }); + append(&linebuf, &linepos, line[i..]); // FIXME panic here! + } + std.debug.print("After @@[: {s}\n", .{linebuf}); + + // handle > and < - they become > and < so +3 bytes each + // TODO + // additional += std.mem.count(u8, line, "<") * 3; + // additional += std.mem.count(u8, line, ">") * 3; + + // TODO: write it all out, get rid of the break above + if (it.peek() == null or std.mem.eql(u8, it.peek().?, "")) { + switch (mode) { + .p => append(&linebuf, &linepos, "\n</p>\n"), + .ul => append(&linebuf, &linepos, "</li>\n</ul>"), + .ol => append(&linebuf, &linepos, "</li>\n</ol>"), + .none => {}, // do nothing + } + mode = .none; + } else {} + + // copy linebuf into buf + std.debug.print("copied linebuf = {s}; buf.len = {}; pos = {}; linebuf.len = {}; linepos = {}\n", .{ linebuf, buf.len, pos, linebuf.len, linepos }); + std.mem.copyForwards(u8, buf[pos..], linebuf[0..linepos]); + pos += linepos; // linebuf.len is always 256 + line = it.next().?; + handled = false; + std.debug.print("---- CLOSE LINE ----\n", .{}); + if (it.peek() == null) { + std.debug.print("---- QUITTING ----\n", .{}); + break; + } + } + } + return buf; + } +}; + +test "jezup.contentsToHtml" { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + var jz1: Jezup = undefined; + const str = "Title\n2024-10-01\n# My header\nThis line has @@[a link][https://gabbott.dev] in it\n\nThis line has @@[two][https://kagi.com] and @@[three][https://boodle.mon] awesome links."; + const out = "<h1>My header</h1>\nThis line has <a href=\"https://gabbott.dev\">a link</a> in it\n\nThis line has <a href=\"https://kagi.com\">two</a> and <a href=\"https://boodle.mon\">three</a> awesome links."; + jz1.init(str); + + const html = try jz1.contentsAsHtml(allocator, null); + defer allocator.free(html); + + std.debug.print("HTML\n----\n{s}\n| len = {}\n", .{ html, html.len }); + try std.testing.expect(std.mem.eql(u8, html, out)); +} + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + + const args = try std.process.argsAlloc(allocator); + + var i: usize = 1; + while (i < args.len) : (i += 1) { + if (std.mem.eql(u8, args[i], "-o")) { + i += 1; + output = args[i]; + } else if (std.mem.eql(u8, args[i], "-i")) { + i += 1; + input = args[i]; + } else if (std.mem.eql(u8, args[i], "-t")) { + i += 1; + template = args[i]; + } else if (std.mem.eql(u8, args[i], "-d")) { + debug = true; + } else if (std.mem.eql(u8, args[i], "-h")) { + usage(); + } else { + try print("not a valid arg\n", .{}); + } + } +} diff --git a/scripts/kmd/src/kmd.zig b/scripts/kmd/src/kmd.zig new file mode 100644 index 0000000..38cb96e --- /dev/null +++ b/scripts/kmd/src/kmd.zig @@ -0,0 +1,490 @@ +const std = @import("std"); +const util = @import("./util.zig"); +const Allocator = std.mem.Allocator; +const expect = std.testing.expect; + +// Kmd = Kinda Markdown + +// Supported Elements +// <hN> <p> <ul> <ol> <b> <em> <br> +// # - ~ _ * ;; + +// * will always be turned into <i> (or </i> if already italic). +// Same with _. To prevent this, \* should be used. + +// Also should handle: +// <, > -> <, > + +// Also, allows for comments, with: +// #[ ]# -> <!-- --> +// And ##[]## for hidden comments, that only show if the show_hidden_comments option is passed. + +// Images can be added with: +// @@img[/assets/src.jpg][alttext] -> <img src="/assets/src.jpg" alt="alttext"> + +// For a line to be raw and unprocessed (useful for writing raw HTML): +// #[raw]# <p> <a href="/foo">this line is just html raw!</a> </p> + +// For spans that are given attributes, do: +// ~[attr][text] + +pub const WcMode = enum { words, lines, bytes }; + +pub const Kmd = struct { + const Mode = enum { + none, + p, + ul, + ol, + }; + pub const KmdHtmlOptions = struct { + br_after_newline: bool, + show_hidden_comments: bool, // ##[ ]## + debug: bool, + }; + + title: []const u8, + date: []const u8, + tags: []const u8, + contents: []const u8, + + /// Initialise the text of the file. + pub fn init(self: *Kmd, text: []const u8) void { + var it = std.mem.splitAny(u8, text, "\n"); + self.title = it.first(); + self.date = it.next().?; + self.tags = it.next().?; + self.contents = it.rest(); + } + + /// Get the word count of the title - this is a naive count of spaces. + pub fn titleWc(self: Kmd) usize { + if (self.title.len == 0) { + return 0; + } else { + return std.mem.count(u8, self.title, " ") + 1; + } + } + + /// Get the word count of the article. + pub fn wc(self: Kmd, mode: WcMode) usize { + if (self.contents.len == 0) { + return 0; + } + + // TODO + // Implement by going like by line, and if we encounter @@[ then count + // until ][ then skip until ]. The rest of the punctuation shouldn't + // cause an issue. + if (mode == .bytes) { + return self.contents.len; + } + const char = switch (mode) { + .words => " ", + .lines => "\n", + else => unreachable, + }; + return std.mem.count(u8, self.contents, char) + 1; + } + + /// Pass in the word-count if you already have it to avoid recalculating. + pub fn averageMinutesTakenToRead(self: Kmd, maybe_wc: ?usize) usize { + const wc_ = maybe_wc orelse self.wc(.words); + const mins = wc_ / 238; + return if (mins == 0) 1 else mins; + } + + // Appends to buf if passed, and returns the size of the buf otherwise. + fn htmlBackingFn(self: Kmd, opts: KmdHtmlOptions, append: fn (unused: []u8, pos: *usize, data: []const u8) void, buf: ?*[]u8) usize { + var mode: Mode = .none; + var is_strong = false; + var is_emphatic = false; + var iter = std.mem.splitAny(u8, self.contents, "\n"); + var pos: usize = 0; // position in the buffer + + // As we iterate linewise, this holds the constructed line as we go through it, + // ane is copied at the end of each line. + var line: []const u8 = iter.first(); + var linenum: usize = 0; + var linebuf: [256]u8 = undefined; + var linepos: usize = 0; + + while (true) : ({ + // Reset `linebuf`, append to `buf`, go to next iteration. + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], linebuf[0..linepos]); + } + pos += linepos; + + if (opts.br_after_newline) { + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "<br>"); + } + pos += 4; + } + + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "\n"); + } + pos += 1; + + @memset(&linebuf, 0); + linepos = 0; + linenum += 1; + + if (iter.next()) |v| { + line = v; + } else { + switch (mode) { + .p => { + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "</p>\n"); + } + pos += 5; + }, + .ul => { + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "</li>\n</ul>\n"); + } + pos += 6; + }, + .ol => { + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "</li>\n</ol>\n"); + } + pos += 6; + }, + else => {}, + } + break; + } + }) { + // Handle <hN>. + // <hN> cannot accept any other formatting, e.g. links, bold, etc. It is plaintext. + if (std.mem.startsWith(u8, line, "# ")) { + append(&linebuf, &linepos, "<h1>"); + append(&linebuf, &linepos, line[2..]); + append(&linebuf, &linepos, "</h1>"); + continue; + } + + if (std.mem.startsWith(u8, line, "## ")) { + append(&linebuf, &linepos, "<h2>"); + append(&linebuf, &linepos, line[3..]); + append(&linebuf, &linepos, "</h2>"); + continue; + } + + if (std.mem.startsWith(u8, line, "### ")) { + append(&linebuf, &linepos, "<h3>"); + append(&linebuf, &linepos, line[4..]); + append(&linebuf, &linepos, "</h3>"); + continue; + } + + if (std.mem.startsWith(u8, line, "#### ")) { + append(&linebuf, &linepos, "<h4>"); + append(&linebuf, &linepos, line[5..]); + append(&linebuf, &linepos, "</h4>"); + continue; + } + + if (std.mem.startsWith(u8, line, "#[raw]#")) { + append(&linebuf, &linepos, line[7..]); + continue; + } + + if (mode == .none) { + if (std.mem.startsWith(u8, line, "- ")) { + mode = .ul; + append(&linebuf, &linepos, "<ul>\n<li>\n"); + line = line[2..]; + } else if (std.mem.startsWith(u8, line, "~ ")) { + mode = .ol; + append(&linebuf, &linepos, "<ol>\n<li>\n"); + line = line[2..]; + } else { + mode = .p; + append(&linebuf, &linepos, "<p>\n"); + } + } else { + // Handle empty line + if (std.mem.eql(u8, line, "")) { + switch (mode) { + .p => append(&linebuf, &linepos, "</p>"), + .ul => append(&linebuf, &linepos, "</li>\n</ul>"), + .ol => append(&linebuf, &linepos, "</li>\n</ol>"), + .none => {}, // do nothing, + } + mode = .none; + continue; + } + + if ((mode == .ul or mode == .ol) and + (std.mem.startsWith(u8, line, "- ") or std.mem.startsWith(u8, line, "~ "))) + { + append(&linebuf, &linepos, "</li>\n<li>\n"); + line = line[2..]; + } + + if (mode == .p and std.mem.startsWith(u8, line, "- ") or std.mem.startsWith(u8, line, "~ ")) { + append(&linebuf, &linepos, "</p>\n"); + if (std.mem.startsWith(u8, line, "- ")) { + mode = .ul; + append(&linebuf, &linepos, "<ul>\n<li>\n"); + } else { + mode = .ol; + append(&linebuf, &linepos, "<ol>\n<li>\n"); + } + line = line[2..]; + } + } + + const vals: [11][]const u8 = .{ "@@[", "_", "*", ";;", "<", ">", "#[", "]#", "@@img[", "##[", "~[" }; + while (true) { + const found = util.indexOfAnyArray(u8, line, 0, &vals) orelse { + // Copy the entire rest of line + append(&linebuf, &linepos, line[0..]); + break; + }; + + if (opts.debug) { + std.debug.print("[{d}] Iterating: found {any} at index {any}.\n", .{ linenum, found.value, found.index }); + } + + // Copy up until the token + append(&linebuf, &linepos, line[0..found.index]); + + // Handle the particular token + switch (found.value) { + 0 => { // @@[desc][href] + // These constants describe indexes into line. + const desc_begin = found.index + 3; + const desc_end = std.mem.indexOfPos(u8, line, desc_begin, "][") orelse @panic("could not find ][ to terminate the description of anchor tag"); + const href_begin = desc_end + 2; + const href_end = std.mem.indexOfPos(u8, line, href_begin, "]") orelse @panic("could not find ] to terminate href of anchor tag"); + + const desc = line[desc_begin..desc_end]; + const href = line[href_begin..href_end]; + + append(&linebuf, &linepos, "<a href=\""); + append(&linebuf, &linepos, href); + append(&linebuf, &linepos, "\">"); + append(&linebuf, &linepos, desc); + append(&linebuf, &linepos, "</a>"); + + line = line[href_end + "]".len ..]; + }, + + 1 => { // _ + append(&linebuf, &linepos, if (is_strong) "</strong>" else "<strong>"); + is_strong = !is_strong; + line = line[found.index + "_".len ..]; + }, + + 2 => { // * + append(&linebuf, &linepos, if (is_emphatic) "</em>" else "<em>"); + is_emphatic = !is_emphatic; + line = line[found.index + "*".len ..]; + }, + + 3 => { // ;; + append(&linebuf, &linepos, "<br>"); + line = line[found.index + ";;".len ..]; + }, + + 4 => { // < + append(&linebuf, &linepos, "<"); + line = line[found.index + "<".len ..]; + }, + + 5 => { // > + append(&linebuf, &linepos, ">"); + line = line[found.index + ">".len ..]; + }, + + 6 => { // #[ + append(&linebuf, &linepos, "<!--"); + line = line[found.index + "#[".len ..]; + }, + + 7 => { // ]# + append(&linebuf, &linepos, "-->"); + line = line[found.index + "]#".len ..]; + }, + + 8 => { // @@img[src][alt] + const src_begin = found.index + 6; + const src_end = std.mem.indexOfPos(u8, line, src_begin, "][").?; + const alt_begin = src_end + 2; + const alt_end = std.mem.indexOfPos(u8, line, alt_begin, "]").?; + + const src = line[src_begin..src_end]; + const alt = line[alt_begin..alt_end]; + + append(&linebuf, &linepos, "<img src=\""); + append(&linebuf, &linepos, src); + append(&linebuf, &linepos, "\" alt=\""); + append(&linebuf, &linepos, alt); + append(&linebuf, &linepos, "\">"); + + line = line[alt_end + "]".len ..]; + }, + + 9 => { // ##[ - only output if show_hidden_comments passed. + const hc_begin = found.index + 3; + const hc_end = std.mem.indexOfPos(u8, line, hc_begin, "]##") orelse @panic("not yet implemented support for multiline hidden comments"); + + if (opts.show_hidden_comments) { + const hc = line[hc_begin..hc_end]; + append(&linebuf, &linepos, hc); + } + + line = line[hc_end + "]##".len ..]; + }, + + 10 => { // ~[attr][text] for spans with attributes + // These constants describe indexes into line. + const attr_begin = found.index + 2; + const attr_end = std.mem.indexOfPos(u8, line, attr_begin, "][") orelse @panic("could not find ][ to terminate the description of attr span"); + const text_begin = attr_end + 2; + const text_end = std.mem.indexOfPos(u8, line, text_begin, "]") orelse @panic("could not find ] to terminate text of attr span"); + + const attr = line[attr_begin..attr_end]; + const text = line[text_begin..text_end]; + + append(&linebuf, &linepos, "<span class=\""); + append(&linebuf, &linepos, attr); + append(&linebuf, &linepos, "\">"); + append(&linebuf, &linepos, text); + append(&linebuf, &linepos, "</span>"); + line = line[text_end + "]".len ..]; + }, + + else => unreachable, + } + } + } + if (opts.debug) { + std.debug.print("sizeofHtml :: return = {any}\n", .{pos}); + } + return pos; + } + + fn sizeofHtml(self: Kmd, opts: KmdHtmlOptions) usize { + const append = struct { + fn append(unused: []u8, pos: *usize, data: []const u8) void { + _ = unused; + pos.* += data.len; + } + }.append; + + return htmlBackingFn(self, opts, append, null); + } + + pub fn html(self: Kmd, allocator: Allocator, opts: KmdHtmlOptions, req_capacity_if_known: ?usize) ![]const u8 { + const append = struct { + fn append(buf: []u8, pos: *usize, data: []const u8) void { + std.mem.copyForwards(u8, buf[pos.*..], data); + pos.* += data.len; + } + }.append; + + const req_capacity = req_capacity_if_known orelse sizeofHtml(self, opts); + var buf: []u8 = try allocator.alloc(u8, req_capacity); + + _ = htmlBackingFn(self, opts, append, &buf); + return buf; + } +}; + +test "kmd" { + const allocator = std.testing.allocator; + var foo: Kmd = undefined; + + const s1 = + \\A good title + \\2024-10-31 + \\This is a paragraph. + \\ + \\This is another: it *has* _formatting_ and @@[hyperlinks][https://example.com]. + \\In fact, it even spans multiple lines! + \\ + \\A list would be like this: + \\- I like big butts + \\- and I cannot lie. + \\ + \\~ Entry eins + \\~ Entry zwei + \\ + \\Yayy! + \\ + \\Bye. + \\ + ; + + const r1 = + \\<p> + \\This is a paragraph. + \\</p> + \\<p> + \\This is another: it <em>has</em> <strong>formatting</strong> and <a href="https://example.com">hyperlinks</a>. + \\In fact, it even spans multiple lines! + \\</p> + \\<p> + \\A list would be like this: + \\</p> + \\<ul> + \\<li> + \\I like big butts + \\</li> + \\<li> + \\and I cannot lie. + \\</li> + \\</ul> + \\<ol> + \\<li> + \\Entry eins + \\</li> + \\<li> + \\Entry zwei + \\</li> + \\</ol> + \\<p> + \\Yayy! + \\</p> + \\<p> + \\Bye. + \\</p> + \\ + ; + foo.init(s1); + const res1 = try foo.html(allocator, .{ .br_after_newline = false }, null); + defer allocator.free(res1); + try std.testing.expectEqualStrings(r1, res1); + + // Test br_after_newline + const s2 = + \\My poetry is amazing + \\2024-11-01 + \\I am such a good poet + \\Truly fantastic + \\So great + \\Lol xD + ; + + const r2 = + \\<p> + \\I am such a good poet<br> + \\Truly fantastic<br> + \\So great<br> + \\Lol xD<br> + \\</p> + \\ + ; + + foo.init(s2); + const res2 = try foo.html(allocator, .{ .br_after_newline = true }, null); + defer allocator.free(res2); + try std.testing.expectEqualStrings(r2, res2); +} diff --git a/scripts/kmd/src/main.zig b/scripts/kmd/src/main.zig new file mode 100644 index 0000000..fdc033e --- /dev/null +++ b/scripts/kmd/src/main.zig @@ -0,0 +1,196 @@ +const std = @import("std"); +const kmd = @import("kmd.zig"); +const Allocator = std.mem.Allocator; + +// Work to do +// - Implement --io which takes an input and an output so +// save the need to spawn the program multiple times. + +// Globals +const stdout_file = std.io.getStdOut().writer(); +var bw = std.io.bufferedWriter(stdout_file); +const stdout = bw.writer(); + +var output: ?[]const u8 = null; +var input: ?[]const u8 = null; +var template: ?[]const u8 = null; +var debug = false; +var insert_br = false; +var show_hidden_comments = false; + +fn print(comptime fmt: []const u8, args: anytype) !void { + try stdout.print(fmt, args); + try bw.flush(); +} + +fn usage() noreturn { + print( + \\kmd: a set of utilities relating to the kmd format + \\`kmd convert-to-html`: + \\ -i INPUT The input Markdown file. + \\ Alternatively specify multiple with --io. + \\ -o OUTPUT The file to output to. Alternatively, + \\ specify multiple with --io. + \\ -t TEMPLATE The template to substitute into. $$CONTENTS$$, + \\ $$DATE$$, $$TITLE$$, $$WC$$, $$TTR$$ are substituted. + \\ -h Display this help message. + \\ kmd wc + \\ -w, -l, -b + \\ Retrieves the word, line, or byte count ignoring the header-seq. + \\ -t + \\ Allows passing multiple files, and the total is retrieved for these. + \\ -d + \\ Pass a directory and (non-recursively) get total count. Assumes all files in the directory are kmd. + \\ EXAMPLE kmd wc -wd $WEBSITE/blog + \\ kmd generate-index DIRECTORIES + \\ -s [title|date] [asc|dsc] + \\ Sort mode. + , .{}) catch { + std.process.exit(1); + }; + std.process.exit(0); +} + +pub fn convertToHtml(allocator: Allocator, args: anytype) !void { + var i: usize = 1; + while (i < args.len) : (i += 1) { + if (std.mem.eql(u8, args[i], "-o")) { + i += 1; + output = args[i]; + } else if (std.mem.eql(u8, args[i], "-i")) { + i += 1; + input = args[i]; + } else if (std.mem.eql(u8, args[i], "-t")) { + i += 1; + template = args[i]; + } else if (std.mem.eql(u8, args[i], "-d")) { + debug = true; + } else if (std.mem.eql(u8, args[i], "-h")) { + usage(); + } else if (std.mem.eql(u8, args[i], "-b")) { + insert_br = true; + } else if (std.mem.eql(u8, args[i], "-p")) { + show_hidden_comments = true; + } else { + std.debug.print("not a valid arg\n", .{}); + } + } + + if (input == null) { + std.debug.print("Did not pass input parameter -i\n", .{}); + std.process.exit(4); + } + const dir = std.fs.cwd(); + // const file = try std.fs.openFileAbsolute(input.?, .{}); + const file = try dir.openFile(input.?, .{}); + const bytes = try file.readToEndAlloc(allocator, std.math.maxInt(usize)); + defer allocator.free(bytes); + var md: kmd.Kmd = undefined; + md.init(bytes); + const html = try md.html(allocator, .{ + .br_after_newline = insert_br, + .show_hidden_comments = show_hidden_comments, + .debug = debug, + }, null); + defer allocator.free(html); + + if (template) |t| { + // read template + // const template_file = try std.fs.openFileAbsolute(t, .{}); + const template_file = try dir.openFile(t, .{}); + const template_bytes = try template_file.readToEndAlloc(allocator, std.math.maxInt(usize)); + defer allocator.free(template_bytes); + + // many allocations version + const r1 = try std.mem.replaceOwned(u8, allocator, template_bytes, "$$CONTENT$$", html); + const r2 = try std.mem.replaceOwned(u8, allocator, r1, "$$CONTENTS$$", html); + const r3 = try std.mem.replaceOwned(u8, allocator, r2, "$$DATE$$", md.date); + const r4 = try std.mem.replaceOwned(u8, allocator, r3, "$$TITLE$$", md.title); + const r5 = try std.mem.replaceOwned(u8, allocator, r4, "$$TAGS$$", if (std.mem.eql(u8, md.tags, "")) "none" else md.tags); + + const wc_ = md.wc(.words); + const wc_str = try std.fmt.allocPrint(allocator, "{}", .{wc_}); + defer allocator.free(wc_str); + const r6 = try std.mem.replaceOwned(u8, allocator, r5, "$$WC$$", wc_str); + + const ttr_str = try std.fmt.allocPrint(allocator, "{}", .{md.averageMinutesTakenToRead(wc_)}); + defer allocator.free(ttr_str); + const r7 = try std.mem.replaceOwned(u8, allocator, r6, "$$TTR$$", ttr_str); + + defer allocator.free(r1); + defer allocator.free(r2); + defer allocator.free(r3); + defer allocator.free(r4); + defer allocator.free(r5); + defer allocator.free(r6); + try print("{s}\n", .{r7}); + allocator.free(r7); + } else { + // output `html` to either stdout or a file + try print("{s}", .{html}); + } +} + +pub fn wc(allocator: Allocator, args: anytype) !void { + var mode: ?kmd.WcMode = null; + var file_str: ?[]const u8 = null; + var direc = false; + var total = false; + + var i: usize = 1; + while (i < args.len) : (i += 1) { + if (std.mem.eql(u8, args[i], "-h")) { + usage(); + } else if (args[i][0] == '-') { + // process flags + for (args[i][1..]) |f| switch (f) { + 'w' => mode = .words, + 'l' => mode = .lines, + 'b' => mode = .bytes, + 'd' => direc = true, + 't' => total = true, + else => { + std.debug.print("invalid flag {}\n", .{f}); + std.process.exit(5); + }, + }; + } else { + file_str = args[i]; + } + } + + if (file_str == null) { + std.debug.print("Did not pass file to run wc over\n", .{}); + std.process.exit(4); + } + + const dir = std.fs.cwd(); + const file = try dir.openFile(file_str.?, .{}); + const bytes = try file.readToEndAlloc(allocator, std.math.maxInt(usize)); + defer allocator.free(bytes); + var md: kmd.Kmd = undefined; + md.init(bytes); + try print("{any}\n", .{md.wc(mode orelse .words)}); + std.process.exit(0); +} + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + + const args = try std.process.argsAlloc(allocator); + + if (args.len <= 1) { + usage(); + } + + if (std.mem.eql(u8, args[1], "convert-to-html")) { + const rest_of_args = args[1..]; + try convertToHtml(allocator, rest_of_args); + } else if (std.mem.eql(u8, args[1], "wc")) { + const rest_of_args = args[1..]; + try wc(allocator, rest_of_args); + } else { + usage(); + } +} diff --git a/scripts/kmd/src/plan b/scripts/kmd/src/plan new file mode 100644 index 0000000..27e3040 --- /dev/null +++ b/scripts/kmd/src/plan @@ -0,0 +1,35 @@ + + + +linebuf : [256]u8 = uninitialized +linepos : size = 0; + +append :: action capture(linebuf, linepos) receive(s: str) +do + linebuf.append( +end + +define reset(linebuf) :: clear_to_zero(linebuf); +define reset(linepos) :: linepos = 0; + +foreach (line in lines) +do + reset linebuf, linepos; + + if line.starts_with("# ") + append "<h1>"; + append line[2..]; + append "</h1>"; + ... + + # handle @@[][] + i : size = 0; + while (line.locate_next("@@[")) |pos| + do + std.mem.copy(dest: linebuf, src: line, from: i, to: pos); + append line[from:pos]; + + end + + +end diff --git a/scripts/kmd/src/root.zig b/scripts/kmd/src/root.zig new file mode 100644 index 0000000..ecfeade --- /dev/null +++ b/scripts/kmd/src/root.zig @@ -0,0 +1,10 @@ +const std = @import("std"); +const testing = std.testing; + +export fn add(a: i32, b: i32) i32 { + return a + b; +} + +test "basic add functionality" { + try testing.expect(add(3, 7) == 10); +} diff --git a/scripts/kmd/src/tags.zig b/scripts/kmd/src/tags.zig new file mode 100644 index 0000000..30d1f18 --- /dev/null +++ b/scripts/kmd/src/tags.zig @@ -0,0 +1,7 @@ +// For implementation of tags. + +// It should have the following features: +// - ability to search tags within the files in the directories passed +// - ability to provide index pages (based on a template?) for each tag (under /tag/XXX) +// (this will also go hand in hand with generating index pages - replacing wv ls-html for instance) +// - then, when generating a page, I need to transform the $$TAGS$$ into a set of <a href="/tag/TAG">TAG</a> diff --git a/scripts/kmd/src/util.zig b/scripts/kmd/src/util.zig new file mode 100644 index 0000000..32400c8 --- /dev/null +++ b/scripts/kmd/src/util.zig @@ -0,0 +1,50 @@ +const std = @import("std"); + +// A max of 16 values. +pub fn indexOfAnyArray( + comptime T: type, + slice: []const T, + start: usize, + values: []const []const T, +) ?struct { index: usize, value: usize } { + var lowest_index: ?usize = null; + var value_index: ?usize = null; + + for (values, 0..) |v, i| { + const f = std.mem.indexOfPos(T, slice, start, v); + if (f) |f_| { + if (lowest_index == null) { + // First found: unconditionally set. + lowest_index = f_; + value_index = i; + } else { + // Check whether lower. + if (f_ < lowest_index.?) { + lowest_index = f_; + value_index = i; + } + } + } + } + if (lowest_index) |li_| { + return .{ .index = li_, .value = value_index.? }; + } else { + return null; + } +} + +test "indexOfAnyArray" { + const vals: [2][]const u8 = .{ "Gri", "Sli" }; + const wrong_vals: [2][]const u8 = .{ "Tri", "Pri" }; + const a = indexOfAnyArray(u8, "HiGriSliMi", 0, &vals); + try std.testing.expect(a.index == 2); + try std.testing.expect(a.value == 0); + + const b = indexOfAnyArray(u8, "HiGriSliMi", 3, &vals); + try std.testing.expect(b.index == 5); + try std.testing.expect(b.value == 1); + + const c = indexOfAnyArray(u8, "HiGriSliMi", 0, &wrong_vals); + try std.testing.expect(c.index == null); + try std.testing.expect(c.value == null); +} |