summaryrefslogtreecommitdiff
path: root/source/entry.d
diff options
context:
space:
mode:
Diffstat (limited to 'source/entry.d')
-rw-r--r--source/entry.d145
1 files changed, 145 insertions, 0 deletions
diff --git a/source/entry.d b/source/entry.d
new file mode 100644
index 0000000..01d4e3d
--- /dev/null
+++ b/source/entry.d
@@ -0,0 +1,145 @@
+// TODO: make ctor so that pubDateAsUnixTimestamp gets populated.
+struct Entry
+{
+ string title;
+ string link;
+ string pubDate;
+ string guid;
+ string description; // The actual entry itself.
+
+ this(string title, string link, string pubDate, string guid, string text)
+ {
+ this.title = title;
+ this.link = link;
+ this.pubdate = pubDate;
+ this.guid = guid;
+ this.description = text;
+ pubDateAsUnixTimestamp = unix_timestamp_of_date(pubDate, "%Y-%m-%d %H:%M");
+ }
+
+private:
+ int pubDateAsUnixTimestamp; // So that we can sort easily.
+}
+
+int unix_timstamp_of_date(string date, string format)
+{
+ // TODO: used in Entry ctor.
+}
+
+
+Entry make_entry(DirEntry dirEntry, RssMeta rssmeta)
+{
+ // From the DirEntry, we can get all file info, and also read the full file
+ // since we know its path. We just need to remember, when we read the
+ // entry, to only read between the <body> and </body> tags, whilst
+ // searching the full file for content, and replace all bad characters with
+ // escaped ones.
+ // Or, if <body> does not work, we could try between some comments.
+ // I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria.
+ // COMPLETE
+ string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta)
+ {
+ // Title is either:
+ // 1) rssmeta:<title>
+ // 2) <!-- rss-title: TITLE --> comment in text.
+ // 3) <title>TITLE</title> from text.
+
+ string title = rssmeta.title_of(dirEntry.name);
+ if (title !is null)
+ return title;
+
+ title = find_rss_comment("title", text);
+ if (title !is null)
+ return title;
+
+ title = find_html_title(text);
+ return title; // even if it's null.
+ }
+
+ // COMPLETE
+ string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta)
+ {
+ // Link is either:
+ // 1) rssmeta:<link>
+ // 2) <!-- rss-link: link --> comment in text.
+
+ string link = rssmeta.link_of(dirEntry.name);
+ if (link !is null)
+ return link;
+
+ link = find_rss_comment("link", text);
+ return link;
+ }
+
+ // Output is formatted as ISO-8601.
+ string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta)
+ {
+ // The date must be formatted YYYY-mm-dd HH:MM.
+ // pubDate is either:
+ // 1) rssmeta:<pubDate>
+ // 2) <!-- rss-pubDate: TITLE --> comment in text.
+ // 3) the Unix timestamp of the file.
+
+ string pubDate = rssmeta.title_of(dirEntry.name);
+ if (pubDate !is null)
+ return pubDate;
+
+ pubDate = find_rss_comment("pubDate", text);
+ if (pubDate !is null)
+ return pubDate;
+
+ // TODO: add in getting date from the dirEntry.time.
+
+ return pubDate; // even if it's null.
+ }
+
+ // COMPLETE
+ string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta)
+ {
+ // guid is either:
+ // 1) rssmeta:<guid>
+ // 2) <!-- rss-guid: guid --> comment in text.
+ // 3) delegated to get_link as a last resort.
+
+ string guid = rssmeta.guid_of(dirEntry.name);
+ if (guid !is null)
+ return guid;
+
+ guid = find_rss_comment("guid", text);
+ if (guid !is null)
+ return guid;
+
+ return get_link(dirEntry, text, rssmeta);
+ }
+
+ void fail(string what)
+ {
+ writeln("Error occurred when retrieving attribute ", what, " in make_entry");
+ exit(-1);
+ }
+
+
+ string text = readText(dirEntry.name);
+ string title = get_title(dirEntry, text, rssmeta);
+ string link = get_link(dirEntry, text, rssmeta);
+ string pubDate = get_pubdate(dirEntry, text, rssmeta);
+ string guid = get_guid(dirEntry, text, rssmeta);
+
+ if (text is null)
+ fail("text");
+
+ if (title is null)
+ fail("title");
+
+ if (link is null)
+ fail("link");
+
+ if (pubDate is null)
+ fail("pubDate");
+
+ if (guid is null)
+ fail("guid");
+
+ Entry ret = Entry(text, title, link, pubDate, guid);
+ return ret;
+}