diff options
author | George Abbott <george@gabbott.dev> | 2023-10-09 18:11:41 +0100 |
---|---|---|
committer | George Abbott <george@gabbott.dev> | 2023-10-09 18:11:41 +0100 |
commit | c91061498ca9137c0bb2e65765ab15c1439941fd (patch) | |
tree | f42b6377f3b8488051836cb5d891e09237860734 | |
parent | 08cec360e16615b1ae157e4926e7317d16765f7e (diff) |
Another commit with some more progress
-rw-r--r-- | source/app.d | 199 | ||||
-rw-r--r-- | source/entry.d | 145 | ||||
-rw-r--r-- | source/rssmeta.d | 36 | ||||
-rw-r--r-- | source/sort.d | 50 |
4 files changed, 291 insertions, 139 deletions
diff --git a/source/app.d b/source/app.d index 97630d2..cb57d19 100644 --- a/source/app.d +++ b/source/app.d @@ -63,6 +63,8 @@ import core.stdc.stdlib : exit; import html; import rssmeta; +import entry; +import sort; void usage() { @@ -85,141 +87,30 @@ void usage() exit(0); } - - -// TODO: make ctor so that pubDateAsUnixTimestamp gets populated. -struct Entry -{ - string title; - string link; - string pubDate; - string guid; - string description; // The actual entry itself. - -private: - int pubDateAsUnixTimestamp; // So that we can sort easily. -} - - -Entry make_entry(DirEntry dirEntry, RssMeta rssmeta) +// TODO: RssInfo. We need the struct, with the relevant information, and where +// do we get it from? +void create_rss(Entry[] entries, RssInfo info) { - // From the DirEntry, we can get all file info, and also read the full file - // since we know its path. We just need to remember, when we read the - // entry, to only read between the <body> and </body> tags, whilst - // searching the full file for content, and replace all bad characters with - // escaped ones. - // Or, if <body> does not work, we could try between some comments. - // I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria. - // COMPLETE - string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta) - { - // Title is either: - // 1) rssmeta:<title> - // 2) <!-- rss-title: TITLE --> comment in text. - // 3) <title>TITLE</title> from text. - - string title = rssmeta.title_of(dirEntry.name); - if (title !is null) - return title; - - title = find_rss_comment("title", text); - if (title !is null) - return title; - - title = find_html_title(text); - return title; // even if it's null. - } - - // COMPLETE - string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta) - { - // Link is either: - // 1) rssmeta:<link> - // 2) <!-- rss-link: link --> comment in text. - - string link = rssmeta.link_of(dirEntry.name); - if (link !is null) - return link; - - link = find_rss_comment("link", text); - return link; - } - - // Output is formatted as ISO-8601. - string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta) - { - // The date must be formatted YYYY-mm-dd HH:MM. - // pubDate is either: - // 1) rssmeta:<pubDate> - // 2) <!-- rss-pubDate: TITLE --> comment in text. - // 3) the Unix timestamp of the file. - - string pubDate = rssmeta.title_of(dirEntry.name); - if (pubDate !is null) - return pubDate; - - pubDate = find_rss_comment("pubDate", text); - if (pubDate !is null) - return pubDate; - - // TODO: add in getting date from the dirEntry.time. - - return pubDate; // even if it's null. - } - - // COMPLETE - string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta) - { - // guid is either: - // 1) rssmeta:<guid> - // 2) <!-- rss-guid: guid --> comment in text. - // 3) delegated to get_link as a last resort. - - string guid = rssmeta.guid_of(dirEntry.name); - if (guid !is null) - return guid; - - guid = find_rss_comment("guid", text); - if (guid !is null) - return guid; - - return get_link(dirEntry, text, rssmeta); - } - - void fail(string what) + string ret; + ret = + "<rss version="2.0">\n" ~ + "\t\t<channel>\n" ~ + "\t\t<title>" ~ info.title ~ "</title>\n" ~ + "\t\t<description>" ~ info.description ~ "</description>\n" ~ + "\t\t<language>" ~ info.language ~ "</language>\n" ~ + "\t\t<link href=\"" ~ info.link ~ "\" rel=\"self\" type=\"application/rss+xml\"/>"; + + for (int i = 0; i < entries.length; ++i) { - writeln("Error occurred when retrieving attribute ", what, " in make_entry"); - exit(-1); + // TODO: add in the rest of the entry RSS info. + auto entry = entry[i]; } - - string text = readText(dirEntry.name); - string title = get_title(dirEntry, text, rssmeta); - string link = get_link(dirEntry, text, rssmeta); - string pubDate = get_pubdate(dirEntry, text, rssmeta); - string guid = get_guid(dirEntry, text, rssmeta); - - if (text is null) - fail("text"); - - if (title is null) - fail("title"); - - if (link is null) - fail("link"); - - if (pubDate is null) - fail("pubDate"); - - if (guid is null) - fail("guid"); - - Entry ret = Entry(text, title, link, pubDate, guid); - return ret; - - + ret ~= "\n\t</channel>"; + ret ~= "\n</rss>"; } + // Flags which are passed to -i (--ignore). immutable FILE_WO_ENTRY = "file-wo-entry"; immutable ENTRY_WO_FILE = "entry-wo-file"; @@ -230,11 +121,12 @@ int main(string[] args) /* Flags */ string metafile; bool entries_set = false; - string entries_dir; - string target; + string entries_dir = null; + string target = null; string[] ignore; bool recursive = false; bool follow_symlinks = false; + bool sort_mode = null; for (int i = 0; i < args.length; ++i) { @@ -248,7 +140,30 @@ int main(string[] args) else if (args[i] == "-f" || args[i] == "--follow") follow_symlinks = true; else if (args[i] == "-o" || args[i] == "--order") - {} + { + auto param = args[++i]; + switch (param) + { + case "title-alphabetic": + sort_mode = SortMode.TitleAlphabetic; + break; + case "date-reverse": + case "date_reverse": + case "reverse": + sort_mode = SortMode.DateReverse; + break; + case "date-forward": case "date_forward": + case "date-forwards": case "date_forwards": + case "forward": case "forwards": + sort_mode = SortMode.DateForward; + break; + default: + writeln("Invalid sort mode inputted - valid are" ~ + " title-alphabetic, reverse, forward"); + exit(1); + + } + } else if (args[i] == "-i" || args[i] == "--ignore") { auto param = args[++i]; @@ -273,6 +188,17 @@ int main(string[] args) } } + if (entries_dir == null) + { + writeln("Please provide a directory containing entries: see --help"); + exit(1); + } + if (target == null) + { + writeln("Please provide a target rss.xml file; see --help"); + exit(1); + } + // Get RssMeta. // TODO: also make it so it can get the metafile from the current dir // and rssmeta.xml xor rssmeta.json files. @@ -298,6 +224,13 @@ int main(string[] args) entries ~= make_entry(dirEntry, rssmeta); } + // Sort entries. + entries2 = sort(entries, sort_mode); + + // Iterate over the sorted entries and create the RSS file contents from + // them. + const result = create_rss(entries2); + // Iterate over all entries in the order as described by -o flag, and // create the entry for them. This gets spat back out as a big string. @@ -307,7 +240,5 @@ int main(string[] args) // Finally, write back the full RSS XML into the file specified by $2. - - return 0; } diff --git a/source/entry.d b/source/entry.d new file mode 100644 index 0000000..01d4e3d --- /dev/null +++ b/source/entry.d @@ -0,0 +1,145 @@ +// TODO: make ctor so that pubDateAsUnixTimestamp gets populated. +struct Entry +{ + string title; + string link; + string pubDate; + string guid; + string description; // The actual entry itself. + + this(string title, string link, string pubDate, string guid, string text) + { + this.title = title; + this.link = link; + this.pubdate = pubDate; + this.guid = guid; + this.description = text; + pubDateAsUnixTimestamp = unix_timestamp_of_date(pubDate, "%Y-%m-%d %H:%M"); + } + +private: + int pubDateAsUnixTimestamp; // So that we can sort easily. +} + +int unix_timstamp_of_date(string date, string format) +{ + // TODO: used in Entry ctor. +} + + +Entry make_entry(DirEntry dirEntry, RssMeta rssmeta) +{ + // From the DirEntry, we can get all file info, and also read the full file + // since we know its path. We just need to remember, when we read the + // entry, to only read between the <body> and </body> tags, whilst + // searching the full file for content, and replace all bad characters with + // escaped ones. + // Or, if <body> does not work, we could try between some comments. + // I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria. + // COMPLETE + string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // Title is either: + // 1) rssmeta:<title> + // 2) <!-- rss-title: TITLE --> comment in text. + // 3) <title>TITLE</title> from text. + + string title = rssmeta.title_of(dirEntry.name); + if (title !is null) + return title; + + title = find_rss_comment("title", text); + if (title !is null) + return title; + + title = find_html_title(text); + return title; // even if it's null. + } + + // COMPLETE + string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // Link is either: + // 1) rssmeta:<link> + // 2) <!-- rss-link: link --> comment in text. + + string link = rssmeta.link_of(dirEntry.name); + if (link !is null) + return link; + + link = find_rss_comment("link", text); + return link; + } + + // Output is formatted as ISO-8601. + string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // The date must be formatted YYYY-mm-dd HH:MM. + // pubDate is either: + // 1) rssmeta:<pubDate> + // 2) <!-- rss-pubDate: TITLE --> comment in text. + // 3) the Unix timestamp of the file. + + string pubDate = rssmeta.title_of(dirEntry.name); + if (pubDate !is null) + return pubDate; + + pubDate = find_rss_comment("pubDate", text); + if (pubDate !is null) + return pubDate; + + // TODO: add in getting date from the dirEntry.time. + + return pubDate; // even if it's null. + } + + // COMPLETE + string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // guid is either: + // 1) rssmeta:<guid> + // 2) <!-- rss-guid: guid --> comment in text. + // 3) delegated to get_link as a last resort. + + string guid = rssmeta.guid_of(dirEntry.name); + if (guid !is null) + return guid; + + guid = find_rss_comment("guid", text); + if (guid !is null) + return guid; + + return get_link(dirEntry, text, rssmeta); + } + + void fail(string what) + { + writeln("Error occurred when retrieving attribute ", what, " in make_entry"); + exit(-1); + } + + + string text = readText(dirEntry.name); + string title = get_title(dirEntry, text, rssmeta); + string link = get_link(dirEntry, text, rssmeta); + string pubDate = get_pubdate(dirEntry, text, rssmeta); + string guid = get_guid(dirEntry, text, rssmeta); + + if (text is null) + fail("text"); + + if (title is null) + fail("title"); + + if (link is null) + fail("link"); + + if (pubDate is null) + fail("pubDate"); + + if (guid is null) + fail("guid"); + + Entry ret = Entry(text, title, link, pubDate, guid); + return ret; +} diff --git a/source/rssmeta.d b/source/rssmeta.d index 6fec5b4..864503c 100644 --- a/source/rssmeta.d +++ b/source/rssmeta.d @@ -1,13 +1,39 @@ // Represents the RssMeta struct, containing the metadata with some fns to // access it easily. +import dxml; +import entry; + struct RssMeta { - // TODO: add the magic. + // TODO + // The RssMeta should be a lookup table: we have a key (the filename given + // in rssmeta.xml, etc.; and we have a set of values, e.g. the title, link, + // etc. values which form the metadata. + string metafile; + string[Entry] lookup; + + this(string metafile) + { + + this.metafile = metafile; + if (metafile[-4:] == ".json") {} + else if (metafile[-4:] == ".xml") {} + else + { + writeln("Cannot deduce whether metafile is json or xml. Please" ~ + " ensure it is called either rssmeta.xml or rssmeta.json."); + exit(1); + } + } // filename: either just name or fully qualified should work. - string title_of(string filename) const; - string link_of(string filename) const; - string pubdate_of(string filename) const; - string guid_of(string filename) const; + string title_of(string filename) const + { + // TODO: we need to sort out the internal magic of RssMeta first, then + // we can just do a lookup on the filename. + } + string link_of(string filename) const { return ""; } + string pubdate_of(string filename) const { return ""; } + string guid_of(string filename) const { return ""; } } diff --git a/source/sort.d b/source/sort.d new file mode 100644 index 0000000..9baa667 --- /dev/null +++ b/source/sort.d @@ -0,0 +1,50 @@ +/* For sorting the entries as per the -o flag. + * This allows a list of entries to be sorted, + * e.g. title-alphabetic, date-reverse, date-forward. + */ + +enum SortMode +{ + /* For instance: + * TitleAA, TitleZZ, TitleFF + * becomes + * TitleAA, TitleFF, TitleZZ + */ + TitleAlphabetic; + + /* For instance: + * 2023-10-01, 2023-10-05, 2023-10-03 + * becomes + * 2023-10-05, 2023-10-03, 2023-10-01 + */ + DateReverse; + + /* For instance: + * 2023-10-01, 2023-10-05, 2023-10-03 + * becomes + * 2023-10-01, 2023-10-03, 2023-10-05 + */ + DateForwards; +} + +// TODO: write sorting functions. +Entry[] sort_title_alphabetic(Entry[] entries) { return entries; } +Entry[] sort_date_reverse(Entry[] entries) { return entries; } +Entry[] sort_date_forward(Entry[] entries) { return entries; } + + +Entry[] sort(Entry[] entries, SortMode sort_mode) +{ + switch (sort_mode) + { + case SortMode.TitleAlphabetic: + return sort_title_alphabetic(entries); + case SortMode.DateReverse: + return sort_date_reverse(entries); + case SortMode.DateForward: + return sort_date_forward(entries); + } +} + + + |