1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
|
/* reassess: generates an RSS feed given a directory.
To run, simply pass in the directory containing all entries, and the
intended target RSS location. For instance:
reassess $WEBSITE_URL/blog/entries $WEBSITE_URL/blog/rss.xml
Then, you are set! Remember to re-run this program whenever an update is
made to the website.
The location of the metadata can be derived either from the file itself, or
from an rssmeta.xml file, which should contain all the metadata for the
relevant files.
If not, please provide the metadata within the file.
reassess requires the following bits of metadata to be provided:
TAG derived from?
---------|-------------
- <title> rssmeta:title OR <!-- title: --> OR <title> OR first <h1> tag
- <link> rssmeta:link OR assessrc:global_link OR <!-- link: ... -->
- <pubDate> rssmeta:pubDate OR <!-- pubDate: ... --> OR <!-- written: ... -->
- <guid> rssmeta:guid OR <!-- guid: ... --> OR the same as <link>.
- <description> Everything within <body>.
It is best to provide an rssmeta.xml or rssmeta.json file, but failing this
all information can be derived from the body of the document if correctly
written.
reassess will fail if:
- There is both an rssmeta.xml and an rssmeta.json file in $1.
- The rssmeta file is corrupted or incorrectly formatted.
- There is insufficient information to populate a metadata field.
etc.
In all these cases, the program will output to stderr and return a non-zero
return.
Optional Flags:
-m Gives the location of the rssmeta.xml or rssmeta.json if it is not
present in the directory to be converted.
`reassess $WEBSITE_URL/blog/entries $WEBSITE_URL/blog/rss.xml -m $METAFILE`
-i Errors which to ignore.
By default, nothing is ignored.
-o Order: in-order; reverse; title-alphabetical; more TBA.
Default is reverse, which gives most recent entry first.
Format of rssmeta.xml:
<entry>
<filename>...</filename>
... the metadata entries, e.g. <title>, <link>, etc.
</entry>
The <filename> attribute is used to find the file in particular. An error
is thrown if:
- There exists a file in the directory which is not present in the rssmeta
file (You can pass in `-i file-wo-entry` to ignore this check, and skip
said file when outputting RSS.)
- There exists a file in the rssmeta file which is not in the directory.
(You can pass in `-i entry-wo-file` to ignore this.
*/
void usage()
{
writeln(
"reassess: create an RSS feed out of HTML documents.\n"
"Usage: \n"
"\treassess DIRECTORY TARGET (FLAGS)\n"
"DIRECTORY: The directory where the HTML files are sourced.\n"
"TARGET: The desired target location of the RSS file.\n"
"Flags:\n"
"\t-h, --help Display this help message.\n"
"\t-m, --metafile The path of the metafile if not in DIRECTORY.\n"
"\t-r, --recursive Whether to recurse DIRECTORY or not.\n"
"\t-f, --follow Whether to follow symlinks in DIRECTORY or not.\n"
"\t-o, --order The order to output the RSS entries.\n"
"\t Options: reverse; forward; alphabetic-by-title.\n"
"\t Default to `reverse` if unspecified.\n"
"\t-c, --config Options to configure functionality. Details TODO\n"
);
exit(0);
}
struct Entry
{
string title;
string link;
string pubDate;
string guid;
string description; // The actual entry itself.
private:
int pubDateAsUnixTimestamp; // So that we can sort easily.
}
Entry make_entry(DirEntry dirEntry)
{
// From the DirEntry, we can get all file info, and also read the full file
// since we know its path. We just need to remember, when we read the
// entry, to only read between the <body> and </body> tags, whilst
// searching the full file for content, and replace all bad characters with
// escaped ones.
// Or, if <body> does not work, we could try between some comments.
// I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria.
}
import std.stdio;
import std.algorithm : canFind;
import std.file : dirEntries, DirEntry;
// Flags which are passed to -i (--ignore).
immutable FILE_WO_ENTRY = "file-wo-entry";
immutable ENTRY_WO_FILE = "entry-wo-file";
immutable DASH_I_PARAMS = [ FILE_WO_ENTRY, ENTRY_WO_FILE ];
int main(string[] args)
{
/* Flags */
string metafile;
bool entries_set = false;
string entries;
string target;
string[] ignore;
bool recursive = false;
bool follow_symlinks = false;
for (int i = 0; i < args.length; ++i)
{
if (args[i] == "-m" || args[i] == "--metafile")
metafile = args[++i];
else if (args[i] == "-h" || args[i] == "--help")
usage();
else if (args[i] == "-r" || args[i] == "--recursive")
recursive = true;
else if (args[i] == "-f" || args[i] == "--follow")
follow_symlinks = true;
else if (args[i] == "-o" || args[i] == "--order")
else if (args[i] == "-i" || args[i] == "--ignore")
{
auto param = args[++i];
if (DASH_I_PARAMS.canFind(param))
ignore ~= param;
else
{
writeln("Bad param for -i:", param);
exit(-1);
}
}
else
{
if (!entries_set)
{
entries_set = true;
entries = metafile[i];
}
else
target = metafile[i];
}
}
// Grab list of files in directory.
DirEntry[] objects = [];
foreach (DirEntry file; dirEntries(entries, ".html",
recursive? SpanMode.depth : SpanMode.shallow,
follow_symlinks)
.filter!(file => f.isFile())
)
{
objects ~= file;
}
// Convert the file, given all info for the metadata, into an Entry.
Entry[] entries;
foreach (DirEntry dirEntry; objects)
{
entries ~= make_entry(dirEntry);
}
// Iterate over all entries in the order as described by -o flag, and
// create the entry for them. This gets spat back out as a big string.
auto entries = create_entries(...);
// Now, write all the fluff around the entries, and jobs-almost-agoodun.
// Finally, write back the full RSS XML into the file specified by $2.
}
|