const std = @import("std");
const util = @import("./util.zig");
const Allocator = std.mem.Allocator;
const expect = std.testing.expect;
// Kmd = Kinda Markdown
// Supported Elements
//
// # - ~ _ * ;;
// * will always be turned into (or if already italic).
// Same with _. To prevent this, \* should be used.
// Also should handle:
// <, > -> <, >
// Also, allows for comments, with:
// #[ ]# ->
// And ##[]## for hidden comments, that only show if the show_hidden_comments option is passed.
// Images can be added with:
// @@img[/assets/src.jpg][alttext] ->
// For a line to be raw and unprocessed (useful for writing raw HTML):
// #[raw]# this line is just html raw!
// For spans that are given attributes, do:
// ~[attr][text]
pub const WcMode = enum { words, lines, bytes };
pub const Kmd = struct {
const Mode = enum {
none,
p,
ul,
ol,
};
pub const KmdHtmlOptions = struct {
br_after_newline: bool,
show_hidden_comments: bool, // ##[ ]##
debug: bool,
};
title: []const u8,
date: []const u8,
tags: []const u8,
contents: []const u8,
/// Initialise the text of the file.
pub fn init(self: *Kmd, text: []const u8) void {
var it = std.mem.splitAny(u8, text, "\n");
self.title = it.first();
self.date = it.next().?;
self.tags = it.next().?;
self.contents = it.rest();
}
/// Get the word count of the title - this is a naive count of spaces.
pub fn titleWc(self: Kmd) usize {
if (self.title.len == 0) {
return 0;
} else {
return std.mem.count(u8, self.title, " ") + 1;
}
}
/// Get the word count of the article.
pub fn wc(self: Kmd, mode: WcMode) usize {
if (self.contents.len == 0) {
return 0;
}
// TODO
// Implement by going like by line, and if we encounter @@[ then count
// until ][ then skip until ]. The rest of the punctuation shouldn't
// cause an issue.
if (mode == .bytes) {
return self.contents.len;
}
const char = switch (mode) {
.words => " ",
.lines => "\n",
else => unreachable,
};
return std.mem.count(u8, self.contents, char) + 1;
}
/// Pass in the word-count if you already have it to avoid recalculating.
pub fn averageMinutesTakenToRead(self: Kmd, maybe_wc: ?usize) usize {
const wc_ = maybe_wc orelse self.wc(.words);
const mins = wc_ / 238;
return if (mins == 0) 1 else mins;
}
// Appends to buf if passed, and returns the size of the buf otherwise.
fn htmlBackingFn(self: Kmd, opts: KmdHtmlOptions, append: fn (unused: []u8, pos: *usize, data: []const u8) void, buf: ?*[]u8) usize {
var mode: Mode = .none;
var is_strong = false;
var is_emphatic = false;
var iter = std.mem.splitAny(u8, self.contents, "\n");
var pos: usize = 0; // position in the buffer
// As we iterate linewise, this holds the constructed line as we go through it,
// ane is copied at the end of each line.
var line: []const u8 = iter.first();
var linenum: usize = 0;
var linebuf: [256]u8 = undefined;
var linepos: usize = 0;
while (true) : ({
// Reset `linebuf`, append to `buf`, go to next iteration.
if (buf) |b| {
std.mem.copyForwards(u8, b.*[pos..], linebuf[0..linepos]);
}
pos += linepos;
if (opts.br_after_newline) {
if (buf) |b| {
std.mem.copyForwards(u8, b.*[pos..], "
");
}
pos += 4;
}
if (buf) |b| {
std.mem.copyForwards(u8, b.*[pos..], "\n");
}
pos += 1;
@memset(&linebuf, 0);
linepos = 0;
linenum += 1;
if (iter.next()) |v| {
line = v;
} else {
switch (mode) {
.p => {
if (buf) |b| {
std.mem.copyForwards(u8, b.*[pos..], "
\n");
}
pos += 5;
},
.ul => {
if (buf) |b| {
std.mem.copyForwards(u8, b.*[pos..], "\n\n");
}
pos += 6;
},
.ol => {
if (buf) |b| {
std.mem.copyForwards(u8, b.*[pos..], "\n\n");
}
pos += 6;
},
else => {},
}
break;
}
}) {
// Handle .
// cannot accept any other formatting, e.g. links, bold, etc. It is plaintext.
if (std.mem.startsWith(u8, line, "# ")) {
append(&linebuf, &linepos, "");
append(&linebuf, &linepos, line[2..]);
append(&linebuf, &linepos, "
");
continue;
}
if (std.mem.startsWith(u8, line, "## ")) {
append(&linebuf, &linepos, "");
append(&linebuf, &linepos, line[3..]);
append(&linebuf, &linepos, "
");
continue;
}
if (std.mem.startsWith(u8, line, "### ")) {
append(&linebuf, &linepos, "");
append(&linebuf, &linepos, line[4..]);
append(&linebuf, &linepos, "
");
continue;
}
if (std.mem.startsWith(u8, line, "#### ")) {
append(&linebuf, &linepos, "");
append(&linebuf, &linepos, line[5..]);
append(&linebuf, &linepos, "
");
continue;
}
if (std.mem.startsWith(u8, line, "#[raw]#")) {
append(&linebuf, &linepos, line[7..]);
continue;
}
if (mode == .none) {
if (std.mem.startsWith(u8, line, "- ")) {
mode = .ul;
append(&linebuf, &linepos, "\n- \n");
line = line[2..];
} else if (std.mem.startsWith(u8, line, "~ ")) {
mode = .ol;
append(&linebuf, &linepos, "
\n- \n");
line = line[2..];
} else {
mode = .p;
append(&linebuf, &linepos, "
\n");
}
} else {
// Handle empty line
if (std.mem.eql(u8, line, "")) {
switch (mode) {
.p => append(&linebuf, &linepos, "
"),
.ul => append(&linebuf, &linepos, " \n
"),
.ol => append(&linebuf, &linepos, "\n"),
.none => {}, // do nothing,
}
mode = .none;
continue;
}
if ((mode == .ul or mode == .ol) and
(std.mem.startsWith(u8, line, "- ") or std.mem.startsWith(u8, line, "~ ")))
{
append(&linebuf, &linepos, "\n\n");
line = line[2..];
}
if (mode == .p and std.mem.startsWith(u8, line, "- ") or std.mem.startsWith(u8, line, "~ ")) {
append(&linebuf, &linepos, "\n");
if (std.mem.startsWith(u8, line, "- ")) {
mode = .ul;
append(&linebuf, &linepos, "\n- \n");
} else {
mode = .ol;
append(&linebuf, &linepos, "
\n- \n");
}
line = line[2..];
}
}
const vals: [11][]const u8 = .{ "@@[", "_", "*", ";;", "<", ">", "#[", "]#", "@@img[", "##[", "~[" };
while (true) {
const found = util.indexOfAnyArray(u8, line, 0, &vals) orelse {
// Copy the entire rest of line
append(&linebuf, &linepos, line[0..]);
break;
};
if (opts.debug) {
std.debug.print("[{d}] Iterating: found {any} at index {any}.\n", .{ linenum, found.value, found.index });
}
// Copy up until the token
append(&linebuf, &linepos, line[0..found.index]);
// Handle the particular token
switch (found.value) {
0 => { // @@[desc][href]
// These constants describe indexes into line.
const desc_begin = found.index + 3;
const desc_end = std.mem.indexOfPos(u8, line, desc_begin, "][") orelse @panic("could not find ][ to terminate the description of anchor tag");
const href_begin = desc_end + 2;
const href_end = std.mem.indexOfPos(u8, line, href_begin, "]") orelse @panic("could not find ] to terminate href of anchor tag");
const desc = line[desc_begin..desc_end];
const href = line[href_begin..href_end];
append(&linebuf, &linepos, "");
append(&linebuf, &linepos, desc);
append(&linebuf, &linepos, "");
line = line[href_end + "]".len ..];
},
1 => { // _
append(&linebuf, &linepos, if (is_strong) "" else "");
is_strong = !is_strong;
line = line[found.index + "_".len ..];
},
2 => { // *
append(&linebuf, &linepos, if (is_emphatic) "" else "");
is_emphatic = !is_emphatic;
line = line[found.index + "*".len ..];
},
3 => { // ;;
append(&linebuf, &linepos, "
");
line = line[found.index + ";;".len ..];
},
4 => { // <
append(&linebuf, &linepos, "<");
line = line[found.index + "<".len ..];
},
5 => { // >
append(&linebuf, &linepos, ">");
line = line[found.index + ">".len ..];
},
6 => { // #[
append(&linebuf, &linepos, "");
line = line[found.index + "]#".len ..];
},
8 => { // @@img[src][alt]
const src_begin = found.index + 6;
const src_end = std.mem.indexOfPos(u8, line, src_begin, "][").?;
const alt_begin = src_end + 2;
const alt_end = std.mem.indexOfPos(u8, line, alt_begin, "]").?;
const src = line[src_begin..src_end];
const alt = line[alt_begin..alt_end];
append(&linebuf, &linepos, "
");
line = line[alt_end + "]".len ..];
},
9 => { // ##[ - only output if show_hidden_comments passed.
const hc_begin = found.index + 3;
const hc_end = std.mem.indexOfPos(u8, line, hc_begin, "]##") orelse @panic("not yet implemented support for multiline hidden comments");
if (opts.show_hidden_comments) {
const hc = line[hc_begin..hc_end];
append(&linebuf, &linepos, hc);
}
line = line[hc_end + "]##".len ..];
},
10 => { // ~[attr][text] for spans with attributes
// These constants describe indexes into line.
const attr_begin = found.index + 2;
const attr_end = std.mem.indexOfPos(u8, line, attr_begin, "][") orelse @panic("could not find ][ to terminate the description of attr span");
const text_begin = attr_end + 2;
const text_end = std.mem.indexOfPos(u8, line, text_begin, "]") orelse @panic("could not find ] to terminate text of attr span");
const attr = line[attr_begin..attr_end];
const text = line[text_begin..text_end];
append(&linebuf, &linepos, "");
append(&linebuf, &linepos, text);
append(&linebuf, &linepos, "");
line = line[text_end + "]".len ..];
},
else => unreachable,
}
}
}
if (opts.debug) {
std.debug.print("sizeofHtml :: return = {any}\n", .{pos});
}
return pos;
}
fn sizeofHtml(self: Kmd, opts: KmdHtmlOptions) usize {
const append = struct {
fn append(unused: []u8, pos: *usize, data: []const u8) void {
_ = unused;
pos.* += data.len;
}
}.append;
return htmlBackingFn(self, opts, append, null);
}
pub fn html(self: Kmd, allocator: Allocator, opts: KmdHtmlOptions, req_capacity_if_known: ?usize) ![]const u8 {
const append = struct {
fn append(buf: []u8, pos: *usize, data: []const u8) void {
std.mem.copyForwards(u8, buf[pos.*..], data);
pos.* += data.len;
}
}.append;
const req_capacity = req_capacity_if_known orelse sizeofHtml(self, opts);
var buf: []u8 = try allocator.alloc(u8, req_capacity);
_ = htmlBackingFn(self, opts, append, &buf);
return buf;
}
};
test "kmd" {
const allocator = std.testing.allocator;
var foo: Kmd = undefined;
const s1 =
\\A good title
\\2024-10-31
\\This is a paragraph.
\\
\\This is another: it *has* _formatting_ and @@[hyperlinks][https://example.com].
\\In fact, it even spans multiple lines!
\\
\\A list would be like this:
\\- I like big butts
\\- and I cannot lie.
\\
\\~ Entry eins
\\~ Entry zwei
\\
\\Yayy!
\\
\\Bye.
\\
;
const r1 =
\\
\\This is a paragraph.
\\
\\
\\This is another: it has formatting and hyperlinks.
\\In fact, it even spans multiple lines!
\\
\\
\\A list would be like this:
\\
\\
\\-
\\I like big butts
\\
\\-
\\and I cannot lie.
\\
\\
\\
\\-
\\Entry eins
\\
\\-
\\Entry zwei
\\
\\
\\
\\Yayy!
\\
\\
\\Bye.
\\
\\
;
foo.init(s1);
const res1 = try foo.html(allocator, .{ .br_after_newline = false }, null);
defer allocator.free(res1);
try std.testing.expectEqualStrings(r1, res1);
// Test br_after_newline
const s2 =
\\My poetry is amazing
\\2024-11-01
\\I am such a good poet
\\Truly fantastic
\\So great
\\Lol xD
;
const r2 =
\\
\\I am such a good poet
\\Truly fantastic
\\So great
\\Lol xD
\\
\\
;
foo.init(s2);
const res2 = try foo.html(allocator, .{ .br_after_newline = true }, null);
defer allocator.free(res2);
try std.testing.expectEqualStrings(r2, res2);
}