diff options
Diffstat (limited to 'scripts/kmd/src/kmd.zig')
-rw-r--r-- | scripts/kmd/src/kmd.zig | 490 |
1 files changed, 490 insertions, 0 deletions
diff --git a/scripts/kmd/src/kmd.zig b/scripts/kmd/src/kmd.zig new file mode 100644 index 0000000..38cb96e --- /dev/null +++ b/scripts/kmd/src/kmd.zig @@ -0,0 +1,490 @@ +const std = @import("std"); +const util = @import("./util.zig"); +const Allocator = std.mem.Allocator; +const expect = std.testing.expect; + +// Kmd = Kinda Markdown + +// Supported Elements +// <hN> <p> <ul> <ol> <b> <em> <br> +// # - ~ _ * ;; + +// * will always be turned into <i> (or </i> if already italic). +// Same with _. To prevent this, \* should be used. + +// Also should handle: +// <, > -> <, > + +// Also, allows for comments, with: +// #[ ]# -> <!-- --> +// And ##[]## for hidden comments, that only show if the show_hidden_comments option is passed. + +// Images can be added with: +// @@img[/assets/src.jpg][alttext] -> <img src="/assets/src.jpg" alt="alttext"> + +// For a line to be raw and unprocessed (useful for writing raw HTML): +// #[raw]# <p> <a href="/foo">this line is just html raw!</a> </p> + +// For spans that are given attributes, do: +// ~[attr][text] + +pub const WcMode = enum { words, lines, bytes }; + +pub const Kmd = struct { + const Mode = enum { + none, + p, + ul, + ol, + }; + pub const KmdHtmlOptions = struct { + br_after_newline: bool, + show_hidden_comments: bool, // ##[ ]## + debug: bool, + }; + + title: []const u8, + date: []const u8, + tags: []const u8, + contents: []const u8, + + /// Initialise the text of the file. + pub fn init(self: *Kmd, text: []const u8) void { + var it = std.mem.splitAny(u8, text, "\n"); + self.title = it.first(); + self.date = it.next().?; + self.tags = it.next().?; + self.contents = it.rest(); + } + + /// Get the word count of the title - this is a naive count of spaces. + pub fn titleWc(self: Kmd) usize { + if (self.title.len == 0) { + return 0; + } else { + return std.mem.count(u8, self.title, " ") + 1; + } + } + + /// Get the word count of the article. + pub fn wc(self: Kmd, mode: WcMode) usize { + if (self.contents.len == 0) { + return 0; + } + + // TODO + // Implement by going like by line, and if we encounter @@[ then count + // until ][ then skip until ]. The rest of the punctuation shouldn't + // cause an issue. + if (mode == .bytes) { + return self.contents.len; + } + const char = switch (mode) { + .words => " ", + .lines => "\n", + else => unreachable, + }; + return std.mem.count(u8, self.contents, char) + 1; + } + + /// Pass in the word-count if you already have it to avoid recalculating. + pub fn averageMinutesTakenToRead(self: Kmd, maybe_wc: ?usize) usize { + const wc_ = maybe_wc orelse self.wc(.words); + const mins = wc_ / 238; + return if (mins == 0) 1 else mins; + } + + // Appends to buf if passed, and returns the size of the buf otherwise. + fn htmlBackingFn(self: Kmd, opts: KmdHtmlOptions, append: fn (unused: []u8, pos: *usize, data: []const u8) void, buf: ?*[]u8) usize { + var mode: Mode = .none; + var is_strong = false; + var is_emphatic = false; + var iter = std.mem.splitAny(u8, self.contents, "\n"); + var pos: usize = 0; // position in the buffer + + // As we iterate linewise, this holds the constructed line as we go through it, + // ane is copied at the end of each line. + var line: []const u8 = iter.first(); + var linenum: usize = 0; + var linebuf: [256]u8 = undefined; + var linepos: usize = 0; + + while (true) : ({ + // Reset `linebuf`, append to `buf`, go to next iteration. + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], linebuf[0..linepos]); + } + pos += linepos; + + if (opts.br_after_newline) { + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "<br>"); + } + pos += 4; + } + + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "\n"); + } + pos += 1; + + @memset(&linebuf, 0); + linepos = 0; + linenum += 1; + + if (iter.next()) |v| { + line = v; + } else { + switch (mode) { + .p => { + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "</p>\n"); + } + pos += 5; + }, + .ul => { + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "</li>\n</ul>\n"); + } + pos += 6; + }, + .ol => { + if (buf) |b| { + std.mem.copyForwards(u8, b.*[pos..], "</li>\n</ol>\n"); + } + pos += 6; + }, + else => {}, + } + break; + } + }) { + // Handle <hN>. + // <hN> cannot accept any other formatting, e.g. links, bold, etc. It is plaintext. + if (std.mem.startsWith(u8, line, "# ")) { + append(&linebuf, &linepos, "<h1>"); + append(&linebuf, &linepos, line[2..]); + append(&linebuf, &linepos, "</h1>"); + continue; + } + + if (std.mem.startsWith(u8, line, "## ")) { + append(&linebuf, &linepos, "<h2>"); + append(&linebuf, &linepos, line[3..]); + append(&linebuf, &linepos, "</h2>"); + continue; + } + + if (std.mem.startsWith(u8, line, "### ")) { + append(&linebuf, &linepos, "<h3>"); + append(&linebuf, &linepos, line[4..]); + append(&linebuf, &linepos, "</h3>"); + continue; + } + + if (std.mem.startsWith(u8, line, "#### ")) { + append(&linebuf, &linepos, "<h4>"); + append(&linebuf, &linepos, line[5..]); + append(&linebuf, &linepos, "</h4>"); + continue; + } + + if (std.mem.startsWith(u8, line, "#[raw]#")) { + append(&linebuf, &linepos, line[7..]); + continue; + } + + if (mode == .none) { + if (std.mem.startsWith(u8, line, "- ")) { + mode = .ul; + append(&linebuf, &linepos, "<ul>\n<li>\n"); + line = line[2..]; + } else if (std.mem.startsWith(u8, line, "~ ")) { + mode = .ol; + append(&linebuf, &linepos, "<ol>\n<li>\n"); + line = line[2..]; + } else { + mode = .p; + append(&linebuf, &linepos, "<p>\n"); + } + } else { + // Handle empty line + if (std.mem.eql(u8, line, "")) { + switch (mode) { + .p => append(&linebuf, &linepos, "</p>"), + .ul => append(&linebuf, &linepos, "</li>\n</ul>"), + .ol => append(&linebuf, &linepos, "</li>\n</ol>"), + .none => {}, // do nothing, + } + mode = .none; + continue; + } + + if ((mode == .ul or mode == .ol) and + (std.mem.startsWith(u8, line, "- ") or std.mem.startsWith(u8, line, "~ "))) + { + append(&linebuf, &linepos, "</li>\n<li>\n"); + line = line[2..]; + } + + if (mode == .p and std.mem.startsWith(u8, line, "- ") or std.mem.startsWith(u8, line, "~ ")) { + append(&linebuf, &linepos, "</p>\n"); + if (std.mem.startsWith(u8, line, "- ")) { + mode = .ul; + append(&linebuf, &linepos, "<ul>\n<li>\n"); + } else { + mode = .ol; + append(&linebuf, &linepos, "<ol>\n<li>\n"); + } + line = line[2..]; + } + } + + const vals: [11][]const u8 = .{ "@@[", "_", "*", ";;", "<", ">", "#[", "]#", "@@img[", "##[", "~[" }; + while (true) { + const found = util.indexOfAnyArray(u8, line, 0, &vals) orelse { + // Copy the entire rest of line + append(&linebuf, &linepos, line[0..]); + break; + }; + + if (opts.debug) { + std.debug.print("[{d}] Iterating: found {any} at index {any}.\n", .{ linenum, found.value, found.index }); + } + + // Copy up until the token + append(&linebuf, &linepos, line[0..found.index]); + + // Handle the particular token + switch (found.value) { + 0 => { // @@[desc][href] + // These constants describe indexes into line. + const desc_begin = found.index + 3; + const desc_end = std.mem.indexOfPos(u8, line, desc_begin, "][") orelse @panic("could not find ][ to terminate the description of anchor tag"); + const href_begin = desc_end + 2; + const href_end = std.mem.indexOfPos(u8, line, href_begin, "]") orelse @panic("could not find ] to terminate href of anchor tag"); + + const desc = line[desc_begin..desc_end]; + const href = line[href_begin..href_end]; + + append(&linebuf, &linepos, "<a href=\""); + append(&linebuf, &linepos, href); + append(&linebuf, &linepos, "\">"); + append(&linebuf, &linepos, desc); + append(&linebuf, &linepos, "</a>"); + + line = line[href_end + "]".len ..]; + }, + + 1 => { // _ + append(&linebuf, &linepos, if (is_strong) "</strong>" else "<strong>"); + is_strong = !is_strong; + line = line[found.index + "_".len ..]; + }, + + 2 => { // * + append(&linebuf, &linepos, if (is_emphatic) "</em>" else "<em>"); + is_emphatic = !is_emphatic; + line = line[found.index + "*".len ..]; + }, + + 3 => { // ;; + append(&linebuf, &linepos, "<br>"); + line = line[found.index + ";;".len ..]; + }, + + 4 => { // < + append(&linebuf, &linepos, "<"); + line = line[found.index + "<".len ..]; + }, + + 5 => { // > + append(&linebuf, &linepos, ">"); + line = line[found.index + ">".len ..]; + }, + + 6 => { // #[ + append(&linebuf, &linepos, "<!--"); + line = line[found.index + "#[".len ..]; + }, + + 7 => { // ]# + append(&linebuf, &linepos, "-->"); + line = line[found.index + "]#".len ..]; + }, + + 8 => { // @@img[src][alt] + const src_begin = found.index + 6; + const src_end = std.mem.indexOfPos(u8, line, src_begin, "][").?; + const alt_begin = src_end + 2; + const alt_end = std.mem.indexOfPos(u8, line, alt_begin, "]").?; + + const src = line[src_begin..src_end]; + const alt = line[alt_begin..alt_end]; + + append(&linebuf, &linepos, "<img src=\""); + append(&linebuf, &linepos, src); + append(&linebuf, &linepos, "\" alt=\""); + append(&linebuf, &linepos, alt); + append(&linebuf, &linepos, "\">"); + + line = line[alt_end + "]".len ..]; + }, + + 9 => { // ##[ - only output if show_hidden_comments passed. + const hc_begin = found.index + 3; + const hc_end = std.mem.indexOfPos(u8, line, hc_begin, "]##") orelse @panic("not yet implemented support for multiline hidden comments"); + + if (opts.show_hidden_comments) { + const hc = line[hc_begin..hc_end]; + append(&linebuf, &linepos, hc); + } + + line = line[hc_end + "]##".len ..]; + }, + + 10 => { // ~[attr][text] for spans with attributes + // These constants describe indexes into line. + const attr_begin = found.index + 2; + const attr_end = std.mem.indexOfPos(u8, line, attr_begin, "][") orelse @panic("could not find ][ to terminate the description of attr span"); + const text_begin = attr_end + 2; + const text_end = std.mem.indexOfPos(u8, line, text_begin, "]") orelse @panic("could not find ] to terminate text of attr span"); + + const attr = line[attr_begin..attr_end]; + const text = line[text_begin..text_end]; + + append(&linebuf, &linepos, "<span class=\""); + append(&linebuf, &linepos, attr); + append(&linebuf, &linepos, "\">"); + append(&linebuf, &linepos, text); + append(&linebuf, &linepos, "</span>"); + line = line[text_end + "]".len ..]; + }, + + else => unreachable, + } + } + } + if (opts.debug) { + std.debug.print("sizeofHtml :: return = {any}\n", .{pos}); + } + return pos; + } + + fn sizeofHtml(self: Kmd, opts: KmdHtmlOptions) usize { + const append = struct { + fn append(unused: []u8, pos: *usize, data: []const u8) void { + _ = unused; + pos.* += data.len; + } + }.append; + + return htmlBackingFn(self, opts, append, null); + } + + pub fn html(self: Kmd, allocator: Allocator, opts: KmdHtmlOptions, req_capacity_if_known: ?usize) ![]const u8 { + const append = struct { + fn append(buf: []u8, pos: *usize, data: []const u8) void { + std.mem.copyForwards(u8, buf[pos.*..], data); + pos.* += data.len; + } + }.append; + + const req_capacity = req_capacity_if_known orelse sizeofHtml(self, opts); + var buf: []u8 = try allocator.alloc(u8, req_capacity); + + _ = htmlBackingFn(self, opts, append, &buf); + return buf; + } +}; + +test "kmd" { + const allocator = std.testing.allocator; + var foo: Kmd = undefined; + + const s1 = + \\A good title + \\2024-10-31 + \\This is a paragraph. + \\ + \\This is another: it *has* _formatting_ and @@[hyperlinks][https://example.com]. + \\In fact, it even spans multiple lines! + \\ + \\A list would be like this: + \\- I like big butts + \\- and I cannot lie. + \\ + \\~ Entry eins + \\~ Entry zwei + \\ + \\Yayy! + \\ + \\Bye. + \\ + ; + + const r1 = + \\<p> + \\This is a paragraph. + \\</p> + \\<p> + \\This is another: it <em>has</em> <strong>formatting</strong> and <a href="https://example.com">hyperlinks</a>. + \\In fact, it even spans multiple lines! + \\</p> + \\<p> + \\A list would be like this: + \\</p> + \\<ul> + \\<li> + \\I like big butts + \\</li> + \\<li> + \\and I cannot lie. + \\</li> + \\</ul> + \\<ol> + \\<li> + \\Entry eins + \\</li> + \\<li> + \\Entry zwei + \\</li> + \\</ol> + \\<p> + \\Yayy! + \\</p> + \\<p> + \\Bye. + \\</p> + \\ + ; + foo.init(s1); + const res1 = try foo.html(allocator, .{ .br_after_newline = false }, null); + defer allocator.free(res1); + try std.testing.expectEqualStrings(r1, res1); + + // Test br_after_newline + const s2 = + \\My poetry is amazing + \\2024-11-01 + \\I am such a good poet + \\Truly fantastic + \\So great + \\Lol xD + ; + + const r2 = + \\<p> + \\I am such a good poet<br> + \\Truly fantastic<br> + \\So great<br> + \\Lol xD<br> + \\</p> + \\ + ; + + foo.init(s2); + const res2 = try foo.html(allocator, .{ .br_after_newline = true }, null); + defer allocator.free(res2); + try std.testing.expectEqualStrings(r2, res2); +} |