From c91061498ca9137c0bb2e65765ab15c1439941fd Mon Sep 17 00:00:00 2001 From: George Abbott Date: Mon, 9 Oct 2023 18:11:41 +0100 Subject: Another commit with some more progress --- source/entry.d | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 source/entry.d (limited to 'source/entry.d') diff --git a/source/entry.d b/source/entry.d new file mode 100644 index 0000000..01d4e3d --- /dev/null +++ b/source/entry.d @@ -0,0 +1,145 @@ +// TODO: make ctor so that pubDateAsUnixTimestamp gets populated. +struct Entry +{ + string title; + string link; + string pubDate; + string guid; + string description; // The actual entry itself. + + this(string title, string link, string pubDate, string guid, string text) + { + this.title = title; + this.link = link; + this.pubdate = pubDate; + this.guid = guid; + this.description = text; + pubDateAsUnixTimestamp = unix_timestamp_of_date(pubDate, "%Y-%m-%d %H:%M"); + } + +private: + int pubDateAsUnixTimestamp; // So that we can sort easily. +} + +int unix_timstamp_of_date(string date, string format) +{ + // TODO: used in Entry ctor. +} + + +Entry make_entry(DirEntry dirEntry, RssMeta rssmeta) +{ + // From the DirEntry, we can get all file info, and also read the full file + // since we know its path. We just need to remember, when we read the + // entry, to only read between the and tags, whilst + // searching the full file for content, and replace all bad characters with + // escaped ones. + // Or, if does not work, we could try between some comments. + // I say, and would meet those criteria. + // COMPLETE + string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // Title is either: + // 1) rssmeta: + // 2) <!-- rss-title: TITLE --> comment in text. + // 3) <title>TITLE from text. + + string title = rssmeta.title_of(dirEntry.name); + if (title !is null) + return title; + + title = find_rss_comment("title", text); + if (title !is null) + return title; + + title = find_html_title(text); + return title; // even if it's null. + } + + // COMPLETE + string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // Link is either: + // 1) rssmeta: + // 2) comment in text. + + string link = rssmeta.link_of(dirEntry.name); + if (link !is null) + return link; + + link = find_rss_comment("link", text); + return link; + } + + // Output is formatted as ISO-8601. + string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // The date must be formatted YYYY-mm-dd HH:MM. + // pubDate is either: + // 1) rssmeta: + // 2) comment in text. + // 3) the Unix timestamp of the file. + + string pubDate = rssmeta.title_of(dirEntry.name); + if (pubDate !is null) + return pubDate; + + pubDate = find_rss_comment("pubDate", text); + if (pubDate !is null) + return pubDate; + + // TODO: add in getting date from the dirEntry.time. + + return pubDate; // even if it's null. + } + + // COMPLETE + string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta) + { + // guid is either: + // 1) rssmeta: + // 2) comment in text. + // 3) delegated to get_link as a last resort. + + string guid = rssmeta.guid_of(dirEntry.name); + if (guid !is null) + return guid; + + guid = find_rss_comment("guid", text); + if (guid !is null) + return guid; + + return get_link(dirEntry, text, rssmeta); + } + + void fail(string what) + { + writeln("Error occurred when retrieving attribute ", what, " in make_entry"); + exit(-1); + } + + + string text = readText(dirEntry.name); + string title = get_title(dirEntry, text, rssmeta); + string link = get_link(dirEntry, text, rssmeta); + string pubDate = get_pubdate(dirEntry, text, rssmeta); + string guid = get_guid(dirEntry, text, rssmeta); + + if (text is null) + fail("text"); + + if (title is null) + fail("title"); + + if (link is null) + fail("link"); + + if (pubDate is null) + fail("pubDate"); + + if (guid is null) + fail("guid"); + + Entry ret = Entry(text, title, link, pubDate, guid); + return ret; +} -- cgit v1.2.1