1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
// TODO: make ctor so that pubDateAsUnixTimestamp gets populated.
struct Entry
{
string title;
string link;
string pubDate;
string guid;
string description; // The actual entry itself.
this(string title, string link, string pubDate, string guid, string text)
{
this.title = title;
this.link = link;
this.pubdate = pubDate;
this.guid = guid;
this.description = text;
pubDateAsUnixTimestamp = unix_timestamp_of_date(pubDate, "%Y-%m-%d %H:%M");
}
private:
int pubDateAsUnixTimestamp; // So that we can sort easily.
}
int unix_timstamp_of_date(string date, string format)
{
// TODO: used in Entry ctor.
}
Entry make_entry(DirEntry dirEntry, RssMeta rssmeta)
{
// From the DirEntry, we can get all file info, and also read the full file
// since we know its path. We just need to remember, when we read the
// entry, to only read between the <body> and </body> tags, whilst
// searching the full file for content, and replace all bad characters with
// escaped ones.
// Or, if <body> does not work, we could try between some comments.
// I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria.
// COMPLETE
string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta)
{
// Title is either:
// 1) rssmeta:<title>
// 2) <!-- rss-title: TITLE --> comment in text.
// 3) <title>TITLE</title> from text.
string title = rssmeta.title_of(dirEntry.name);
if (title !is null)
return title;
title = find_rss_comment("title", text);
if (title !is null)
return title;
title = find_html_title(text);
return title; // even if it's null.
}
// COMPLETE
string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta)
{
// Link is either:
// 1) rssmeta:<link>
// 2) <!-- rss-link: link --> comment in text.
string link = rssmeta.link_of(dirEntry.name);
if (link !is null)
return link;
link = find_rss_comment("link", text);
return link;
}
// Output is formatted as ISO-8601.
string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta)
{
// The date must be formatted YYYY-mm-dd HH:MM.
// pubDate is either:
// 1) rssmeta:<pubDate>
// 2) <!-- rss-pubDate: TITLE --> comment in text.
// 3) the Unix timestamp of the file.
string pubDate = rssmeta.title_of(dirEntry.name);
if (pubDate !is null)
return pubDate;
pubDate = find_rss_comment("pubDate", text);
if (pubDate !is null)
return pubDate;
// TODO: add in getting date from the dirEntry.time.
return pubDate; // even if it's null.
}
// COMPLETE
string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta)
{
// guid is either:
// 1) rssmeta:<guid>
// 2) <!-- rss-guid: guid --> comment in text.
// 3) delegated to get_link as a last resort.
string guid = rssmeta.guid_of(dirEntry.name);
if (guid !is null)
return guid;
guid = find_rss_comment("guid", text);
if (guid !is null)
return guid;
return get_link(dirEntry, text, rssmeta);
}
void fail(string what)
{
writeln("Error occurred when retrieving attribute ", what, " in make_entry");
exit(-1);
}
string text = readText(dirEntry.name);
string title = get_title(dirEntry, text, rssmeta);
string link = get_link(dirEntry, text, rssmeta);
string pubDate = get_pubdate(dirEntry, text, rssmeta);
string guid = get_guid(dirEntry, text, rssmeta);
if (text is null)
fail("text");
if (title is null)
fail("title");
if (link is null)
fail("link");
if (pubDate is null)
fail("pubDate");
if (guid is null)
fail("guid");
Entry ret = Entry(text, title, link, pubDate, guid);
return ret;
}
|