diff options
Diffstat (limited to 'source/app.d')
-rw-r--r-- | source/app.d | 172 |
1 files changed, 147 insertions, 25 deletions
diff --git a/source/app.d b/source/app.d index 3c913c3..97630d2 100644 --- a/source/app.d +++ b/source/app.d @@ -56,22 +56,30 @@ */ +import std.stdio; +import std.algorithm : canFind, filter, findSplitAfter; +import std.file : dirEntries, DirEntry, readText, SpanMode; +import core.stdc.stdlib : exit; + +import html; +import rssmeta; + void usage() { writeln( - "reassess: create an RSS feed out of HTML documents.\n" - "Usage: \n" - "\treassess DIRECTORY TARGET (FLAGS)\n" - "DIRECTORY: The directory where the HTML files are sourced.\n" - "TARGET: The desired target location of the RSS file.\n" - "Flags:\n" - "\t-h, --help Display this help message.\n" - "\t-m, --metafile The path of the metafile if not in DIRECTORY.\n" - "\t-r, --recursive Whether to recurse DIRECTORY or not.\n" - "\t-f, --follow Whether to follow symlinks in DIRECTORY or not.\n" - "\t-o, --order The order to output the RSS entries.\n" - "\t Options: reverse; forward; alphabetic-by-title.\n" - "\t Default to `reverse` if unspecified.\n" + "reassess: create an RSS feed out of HTML documents.\n" ~ + "Usage: \n" ~ + "\treassess DIRECTORY TARGET (FLAGS)\n" ~ + "DIRECTORY: The directory where the HTML files are sourced.\n" ~ + "TARGET: The desired target location of the RSS file.\n" ~ + "Flags:\n" ~ + "\t-h, --help Display this help message.\n" ~ + "\t-m, --metafile The path of the metafile if not in DIRECTORY.\n" ~ + "\t-r, --recursive Whether to recurse DIRECTORY or not.\n" ~ + "\t-f, --follow Whether to follow symlinks in DIRECTORY or not.\n" ~ + "\t-o, --order The order to output the RSS entries.\n" ~ + "\t Options: reverse; forward; alphabetic-by-title.\n" ~ + "\t Default to `reverse` if unspecified.\n" ~ "\t-c, --config Options to configure functionality. Details TODO\n" ); exit(0); @@ -79,6 +87,7 @@ void usage() +// TODO: make ctor so that pubDateAsUnixTimestamp gets populated. struct Entry { string title; @@ -91,7 +100,8 @@ private: int pubDateAsUnixTimestamp; // So that we can sort easily. } -Entry make_entry(DirEntry dirEntry) + +Entry make_entry(DirEntry dirEntry, RssMeta rssmeta) { // From the DirEntry, we can get all file info, and also read the full file // since we know its path. We just need to remember, when we read the @@ -100,14 +110,115 @@ Entry make_entry(DirEntry dirEntry) // escaped ones. // Or, if <body> does not work, we could try between some comments. // I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria. -} + // COMPLETE + string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // Title is either: + // 1) rssmeta:<title> + // 2) <!-- rss-title: TITLE --> comment in text. + // 3) <title>TITLE</title> from text. + string title = rssmeta.title_of(dirEntry.name); + if (title !is null) + return title; + title = find_rss_comment("title", text); + if (title !is null) + return title; + title = find_html_title(text); + return title; // even if it's null. + } -import std.stdio; -import std.algorithm : canFind; -import std.file : dirEntries, DirEntry; + // COMPLETE + string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // Link is either: + // 1) rssmeta:<link> + // 2) <!-- rss-link: link --> comment in text. + + string link = rssmeta.link_of(dirEntry.name); + if (link !is null) + return link; + + link = find_rss_comment("link", text); + return link; + } + + // Output is formatted as ISO-8601. + string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // The date must be formatted YYYY-mm-dd HH:MM. + // pubDate is either: + // 1) rssmeta:<pubDate> + // 2) <!-- rss-pubDate: TITLE --> comment in text. + // 3) the Unix timestamp of the file. + + string pubDate = rssmeta.title_of(dirEntry.name); + if (pubDate !is null) + return pubDate; + + pubDate = find_rss_comment("pubDate", text); + if (pubDate !is null) + return pubDate; + + // TODO: add in getting date from the dirEntry.time. + + return pubDate; // even if it's null. + } + + // COMPLETE + string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // guid is either: + // 1) rssmeta:<guid> + // 2) <!-- rss-guid: guid --> comment in text. + // 3) delegated to get_link as a last resort. + + string guid = rssmeta.guid_of(dirEntry.name); + if (guid !is null) + return guid; + + guid = find_rss_comment("guid", text); + if (guid !is null) + return guid; + + return get_link(dirEntry, text, rssmeta); + } + + void fail(string what) + { + writeln("Error occurred when retrieving attribute ", what, " in make_entry"); + exit(-1); + } + + + string text = readText(dirEntry.name); + string title = get_title(dirEntry, text, rssmeta); + string link = get_link(dirEntry, text, rssmeta); + string pubDate = get_pubdate(dirEntry, text, rssmeta); + string guid = get_guid(dirEntry, text, rssmeta); + + if (text is null) + fail("text"); + + if (title is null) + fail("title"); + + if (link is null) + fail("link"); + + if (pubDate is null) + fail("pubDate"); + + if (guid is null) + fail("guid"); + + Entry ret = Entry(text, title, link, pubDate, guid); + return ret; + + +} // Flags which are passed to -i (--ignore). immutable FILE_WO_ENTRY = "file-wo-entry"; @@ -119,7 +230,7 @@ int main(string[] args) /* Flags */ string metafile; bool entries_set = false; - string entries; + string entries_dir; string target; string[] ignore; bool recursive = false; @@ -137,6 +248,7 @@ int main(string[] args) else if (args[i] == "-f" || args[i] == "--follow") follow_symlinks = true; else if (args[i] == "-o" || args[i] == "--order") + {} else if (args[i] == "-i" || args[i] == "--ignore") { auto param = args[++i]; @@ -154,19 +266,25 @@ int main(string[] args) if (!entries_set) { entries_set = true; - entries = metafile[i]; + entries_dir = args[i]; } else - target = metafile[i]; + target = args[i]; } } + // Get RssMeta. + // TODO: also make it so it can get the metafile from the current dir + // and rssmeta.xml xor rssmeta.json files. + RssMeta rssmeta = RssMeta(metafile); + + // Grab list of files in directory. DirEntry[] objects = []; - foreach (DirEntry file; dirEntries(entries, ".html", + foreach (DirEntry file; dirEntries(entries_dir, ".html", recursive? SpanMode.depth : SpanMode.shallow, follow_symlinks) - .filter!(file => f.isFile()) + .filter!(f => f.isFile()) ) { objects ~= file; @@ -177,15 +295,19 @@ int main(string[] args) Entry[] entries; foreach (DirEntry dirEntry; objects) { - entries ~= make_entry(dirEntry); + entries ~= make_entry(dirEntry, rssmeta); } // Iterate over all entries in the order as described by -o flag, and // create the entry for them. This gets spat back out as a big string. - auto entries = create_entries(...); + // auto entries = create_entries(...); // Now, write all the fluff around the entries, and jobs-almost-agoodun. // Finally, write back the full RSS XML into the file specified by $2. + + + + return 0; } |