summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Abbott <george@gabbott.dev>2023-10-09 18:11:41 +0100
committerGeorge Abbott <george@gabbott.dev>2023-10-09 18:11:41 +0100
commitc91061498ca9137c0bb2e65765ab15c1439941fd (patch)
treef42b6377f3b8488051836cb5d891e09237860734
parent08cec360e16615b1ae157e4926e7317d16765f7e (diff)
Another commit with some more progress
-rw-r--r--source/app.d199
-rw-r--r--source/entry.d145
-rw-r--r--source/rssmeta.d36
-rw-r--r--source/sort.d50
4 files changed, 291 insertions, 139 deletions
diff --git a/source/app.d b/source/app.d
index 97630d2..cb57d19 100644
--- a/source/app.d
+++ b/source/app.d
@@ -63,6 +63,8 @@ import core.stdc.stdlib : exit;
import html;
import rssmeta;
+import entry;
+import sort;
void usage()
{
@@ -85,141 +87,30 @@ void usage()
exit(0);
}
-
-
-// TODO: make ctor so that pubDateAsUnixTimestamp gets populated.
-struct Entry
-{
- string title;
- string link;
- string pubDate;
- string guid;
- string description; // The actual entry itself.
-
-private:
- int pubDateAsUnixTimestamp; // So that we can sort easily.
-}
-
-
-Entry make_entry(DirEntry dirEntry, RssMeta rssmeta)
+// TODO: RssInfo. We need the struct, with the relevant information, and where
+// do we get it from?
+void create_rss(Entry[] entries, RssInfo info)
{
- // From the DirEntry, we can get all file info, and also read the full file
- // since we know its path. We just need to remember, when we read the
- // entry, to only read between the <body> and </body> tags, whilst
- // searching the full file for content, and replace all bad characters with
- // escaped ones.
- // Or, if <body> does not work, we could try between some comments.
- // I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria.
- // COMPLETE
- string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta)
- {
- // Title is either:
- // 1) rssmeta:<title>
- // 2) <!-- rss-title: TITLE --> comment in text.
- // 3) <title>TITLE</title> from text.
-
- string title = rssmeta.title_of(dirEntry.name);
- if (title !is null)
- return title;
-
- title = find_rss_comment("title", text);
- if (title !is null)
- return title;
-
- title = find_html_title(text);
- return title; // even if it's null.
- }
-
- // COMPLETE
- string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta)
- {
- // Link is either:
- // 1) rssmeta:<link>
- // 2) <!-- rss-link: link --> comment in text.
-
- string link = rssmeta.link_of(dirEntry.name);
- if (link !is null)
- return link;
-
- link = find_rss_comment("link", text);
- return link;
- }
-
- // Output is formatted as ISO-8601.
- string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta)
- {
- // The date must be formatted YYYY-mm-dd HH:MM.
- // pubDate is either:
- // 1) rssmeta:<pubDate>
- // 2) <!-- rss-pubDate: TITLE --> comment in text.
- // 3) the Unix timestamp of the file.
-
- string pubDate = rssmeta.title_of(dirEntry.name);
- if (pubDate !is null)
- return pubDate;
-
- pubDate = find_rss_comment("pubDate", text);
- if (pubDate !is null)
- return pubDate;
-
- // TODO: add in getting date from the dirEntry.time.
-
- return pubDate; // even if it's null.
- }
-
- // COMPLETE
- string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta)
- {
- // guid is either:
- // 1) rssmeta:<guid>
- // 2) <!-- rss-guid: guid --> comment in text.
- // 3) delegated to get_link as a last resort.
-
- string guid = rssmeta.guid_of(dirEntry.name);
- if (guid !is null)
- return guid;
-
- guid = find_rss_comment("guid", text);
- if (guid !is null)
- return guid;
-
- return get_link(dirEntry, text, rssmeta);
- }
-
- void fail(string what)
+ string ret;
+ ret =
+ "<rss version="2.0">\n" ~
+ "\t\t<channel>\n" ~
+ "\t\t<title>" ~ info.title ~ "</title>\n" ~
+ "\t\t<description>" ~ info.description ~ "</description>\n" ~
+ "\t\t<language>" ~ info.language ~ "</language>\n" ~
+ "\t\t<link href=\"" ~ info.link ~ "\" rel=\"self\" type=\"application/rss+xml\"/>";
+
+ for (int i = 0; i < entries.length; ++i)
{
- writeln("Error occurred when retrieving attribute ", what, " in make_entry");
- exit(-1);
+ // TODO: add in the rest of the entry RSS info.
+ auto entry = entry[i];
}
-
- string text = readText(dirEntry.name);
- string title = get_title(dirEntry, text, rssmeta);
- string link = get_link(dirEntry, text, rssmeta);
- string pubDate = get_pubdate(dirEntry, text, rssmeta);
- string guid = get_guid(dirEntry, text, rssmeta);
-
- if (text is null)
- fail("text");
-
- if (title is null)
- fail("title");
-
- if (link is null)
- fail("link");
-
- if (pubDate is null)
- fail("pubDate");
-
- if (guid is null)
- fail("guid");
-
- Entry ret = Entry(text, title, link, pubDate, guid);
- return ret;
-
-
+ ret ~= "\n\t</channel>";
+ ret ~= "\n</rss>";
}
+
// Flags which are passed to -i (--ignore).
immutable FILE_WO_ENTRY = "file-wo-entry";
immutable ENTRY_WO_FILE = "entry-wo-file";
@@ -230,11 +121,12 @@ int main(string[] args)
/* Flags */
string metafile;
bool entries_set = false;
- string entries_dir;
- string target;
+ string entries_dir = null;
+ string target = null;
string[] ignore;
bool recursive = false;
bool follow_symlinks = false;
+ bool sort_mode = null;
for (int i = 0; i < args.length; ++i)
{
@@ -248,7 +140,30 @@ int main(string[] args)
else if (args[i] == "-f" || args[i] == "--follow")
follow_symlinks = true;
else if (args[i] == "-o" || args[i] == "--order")
- {}
+ {
+ auto param = args[++i];
+ switch (param)
+ {
+ case "title-alphabetic":
+ sort_mode = SortMode.TitleAlphabetic;
+ break;
+ case "date-reverse":
+ case "date_reverse":
+ case "reverse":
+ sort_mode = SortMode.DateReverse;
+ break;
+ case "date-forward": case "date_forward":
+ case "date-forwards": case "date_forwards":
+ case "forward": case "forwards":
+ sort_mode = SortMode.DateForward;
+ break;
+ default:
+ writeln("Invalid sort mode inputted - valid are" ~
+ " title-alphabetic, reverse, forward");
+ exit(1);
+
+ }
+ }
else if (args[i] == "-i" || args[i] == "--ignore")
{
auto param = args[++i];
@@ -273,6 +188,17 @@ int main(string[] args)
}
}
+ if (entries_dir == null)
+ {
+ writeln("Please provide a directory containing entries: see --help");
+ exit(1);
+ }
+ if (target == null)
+ {
+ writeln("Please provide a target rss.xml file; see --help");
+ exit(1);
+ }
+
// Get RssMeta.
// TODO: also make it so it can get the metafile from the current dir
// and rssmeta.xml xor rssmeta.json files.
@@ -298,6 +224,13 @@ int main(string[] args)
entries ~= make_entry(dirEntry, rssmeta);
}
+ // Sort entries.
+ entries2 = sort(entries, sort_mode);
+
+ // Iterate over the sorted entries and create the RSS file contents from
+ // them.
+ const result = create_rss(entries2);
+
// Iterate over all entries in the order as described by -o flag, and
// create the entry for them. This gets spat back out as a big string.
@@ -307,7 +240,5 @@ int main(string[] args)
// Finally, write back the full RSS XML into the file specified by $2.
-
-
return 0;
}
diff --git a/source/entry.d b/source/entry.d
new file mode 100644
index 0000000..01d4e3d
--- /dev/null
+++ b/source/entry.d
@@ -0,0 +1,145 @@
+// TODO: make ctor so that pubDateAsUnixTimestamp gets populated.
+struct Entry
+{
+ string title;
+ string link;
+ string pubDate;
+ string guid;
+ string description; // The actual entry itself.
+
+ this(string title, string link, string pubDate, string guid, string text)
+ {
+ this.title = title;
+ this.link = link;
+ this.pubdate = pubDate;
+ this.guid = guid;
+ this.description = text;
+ pubDateAsUnixTimestamp = unix_timestamp_of_date(pubDate, "%Y-%m-%d %H:%M");
+ }
+
+private:
+ int pubDateAsUnixTimestamp; // So that we can sort easily.
+}
+
+int unix_timstamp_of_date(string date, string format)
+{
+ // TODO: used in Entry ctor.
+}
+
+
+Entry make_entry(DirEntry dirEntry, RssMeta rssmeta)
+{
+ // From the DirEntry, we can get all file info, and also read the full file
+ // since we know its path. We just need to remember, when we read the
+ // entry, to only read between the <body> and </body> tags, whilst
+ // searching the full file for content, and replace all bad characters with
+ // escaped ones.
+ // Or, if <body> does not work, we could try between some comments.
+ // I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria.
+ // COMPLETE
+ string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta)
+ {
+ // Title is either:
+ // 1) rssmeta:<title>
+ // 2) <!-- rss-title: TITLE --> comment in text.
+ // 3) <title>TITLE</title> from text.
+
+ string title = rssmeta.title_of(dirEntry.name);
+ if (title !is null)
+ return title;
+
+ title = find_rss_comment("title", text);
+ if (title !is null)
+ return title;
+
+ title = find_html_title(text);
+ return title; // even if it's null.
+ }
+
+ // COMPLETE
+ string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta)
+ {
+ // Link is either:
+ // 1) rssmeta:<link>
+ // 2) <!-- rss-link: link --> comment in text.
+
+ string link = rssmeta.link_of(dirEntry.name);
+ if (link !is null)
+ return link;
+
+ link = find_rss_comment("link", text);
+ return link;
+ }
+
+ // Output is formatted as ISO-8601.
+ string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta)
+ {
+ // The date must be formatted YYYY-mm-dd HH:MM.
+ // pubDate is either:
+ // 1) rssmeta:<pubDate>
+ // 2) <!-- rss-pubDate: TITLE --> comment in text.
+ // 3) the Unix timestamp of the file.
+
+ string pubDate = rssmeta.title_of(dirEntry.name);
+ if (pubDate !is null)
+ return pubDate;
+
+ pubDate = find_rss_comment("pubDate", text);
+ if (pubDate !is null)
+ return pubDate;
+
+ // TODO: add in getting date from the dirEntry.time.
+
+ return pubDate; // even if it's null.
+ }
+
+ // COMPLETE
+ string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta)
+ {
+ // guid is either:
+ // 1) rssmeta:<guid>
+ // 2) <!-- rss-guid: guid --> comment in text.
+ // 3) delegated to get_link as a last resort.
+
+ string guid = rssmeta.guid_of(dirEntry.name);
+ if (guid !is null)
+ return guid;
+
+ guid = find_rss_comment("guid", text);
+ if (guid !is null)
+ return guid;
+
+ return get_link(dirEntry, text, rssmeta);
+ }
+
+ void fail(string what)
+ {
+ writeln("Error occurred when retrieving attribute ", what, " in make_entry");
+ exit(-1);
+ }
+
+
+ string text = readText(dirEntry.name);
+ string title = get_title(dirEntry, text, rssmeta);
+ string link = get_link(dirEntry, text, rssmeta);
+ string pubDate = get_pubdate(dirEntry, text, rssmeta);
+ string guid = get_guid(dirEntry, text, rssmeta);
+
+ if (text is null)
+ fail("text");
+
+ if (title is null)
+ fail("title");
+
+ if (link is null)
+ fail("link");
+
+ if (pubDate is null)
+ fail("pubDate");
+
+ if (guid is null)
+ fail("guid");
+
+ Entry ret = Entry(text, title, link, pubDate, guid);
+ return ret;
+}
diff --git a/source/rssmeta.d b/source/rssmeta.d
index 6fec5b4..864503c 100644
--- a/source/rssmeta.d
+++ b/source/rssmeta.d
@@ -1,13 +1,39 @@
// Represents the RssMeta struct, containing the metadata with some fns to
// access it easily.
+import dxml;
+import entry;
+
struct RssMeta
{
- // TODO: add the magic.
+ // TODO
+ // The RssMeta should be a lookup table: we have a key (the filename given
+ // in rssmeta.xml, etc.; and we have a set of values, e.g. the title, link,
+ // etc. values which form the metadata.
+ string metafile;
+ string[Entry] lookup;
+
+ this(string metafile)
+ {
+
+ this.metafile = metafile;
+ if (metafile[-4:] == ".json") {}
+ else if (metafile[-4:] == ".xml") {}
+ else
+ {
+ writeln("Cannot deduce whether metafile is json or xml. Please" ~
+ " ensure it is called either rssmeta.xml or rssmeta.json.");
+ exit(1);
+ }
+ }
// filename: either just name or fully qualified should work.
- string title_of(string filename) const;
- string link_of(string filename) const;
- string pubdate_of(string filename) const;
- string guid_of(string filename) const;
+ string title_of(string filename) const
+ {
+ // TODO: we need to sort out the internal magic of RssMeta first, then
+ // we can just do a lookup on the filename.
+ }
+ string link_of(string filename) const { return ""; }
+ string pubdate_of(string filename) const { return ""; }
+ string guid_of(string filename) const { return ""; }
}
diff --git a/source/sort.d b/source/sort.d
new file mode 100644
index 0000000..9baa667
--- /dev/null
+++ b/source/sort.d
@@ -0,0 +1,50 @@
+/* For sorting the entries as per the -o flag.
+ * This allows a list of entries to be sorted,
+ * e.g. title-alphabetic, date-reverse, date-forward.
+ */
+
+enum SortMode
+{
+ /* For instance:
+ * TitleAA, TitleZZ, TitleFF
+ * becomes
+ * TitleAA, TitleFF, TitleZZ
+ */
+ TitleAlphabetic;
+
+ /* For instance:
+ * 2023-10-01, 2023-10-05, 2023-10-03
+ * becomes
+ * 2023-10-05, 2023-10-03, 2023-10-01
+ */
+ DateReverse;
+
+ /* For instance:
+ * 2023-10-01, 2023-10-05, 2023-10-03
+ * becomes
+ * 2023-10-01, 2023-10-03, 2023-10-05
+ */
+ DateForwards;
+}
+
+// TODO: write sorting functions.
+Entry[] sort_title_alphabetic(Entry[] entries) { return entries; }
+Entry[] sort_date_reverse(Entry[] entries) { return entries; }
+Entry[] sort_date_forward(Entry[] entries) { return entries; }
+
+
+Entry[] sort(Entry[] entries, SortMode sort_mode)
+{
+ switch (sort_mode)
+ {
+ case SortMode.TitleAlphabetic:
+ return sort_title_alphabetic(entries);
+ case SortMode.DateReverse:
+ return sort_date_reverse(entries);
+ case SortMode.DateForward:
+ return sort_date_forward(entries);
+ }
+}
+
+
+