summaryrefslogtreecommitdiff
path: root/source/app.d
blob: cb57d198b94b96a38ffba4fe6bc2bd2c8a9aac14 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/* reassess: generates an RSS feed given a directory.
   To run, simply pass in the directory containing all entries, and the
   intended target RSS location. For instance:
      reassess $WEBSITE_URL/blog/entries $WEBSITE_URL/blog/rss.xml
   Then, you are set! Remember to re-run this program whenever an update is
   made to the website.

   The location of the metadata can be derived either from the file itself, or
   from an rssmeta.xml file, which should contain all the metadata for the
   relevant files.
   If not, please provide the metadata within the file. 

   reassess requires the following bits of metadata to be provided:
   	 TAG       derived from?
	 ---------|-------------
   - <title>       rssmeta:title OR <!-- title: --> OR <title> OR first <h1> tag
   - <link>        rssmeta:link OR assessrc:global_link OR <!-- link: ... -->
   - <pubDate>     rssmeta:pubDate OR <!-- pubDate: ... --> OR <!-- written: ... -->
   - <guid>        rssmeta:guid OR <!-- guid: ... --> OR the same as <link>.
   - <description> Everything within <body>.
	
   It is best to provide an rssmeta.xml or rssmeta.json file, but failing this
   all information can be derived from the body of the document if correctly
   written.

   reassess will fail if: 
   - There is both an rssmeta.xml and an rssmeta.json file in $1.
   - The rssmeta file is corrupted or incorrectly formatted.
   - There is insufficient information to populate a metadata field.
   etc.
   In all these cases, the program will output to stderr and return a non-zero
   return.

   Optional Flags:
   -m	Gives the location of the rssmeta.xml or rssmeta.json if it is not
   		present in the directory to be converted.
		`reassess $WEBSITE_URL/blog/entries $WEBSITE_URL/blog/rss.xml -m $METAFILE`
   -i	Errors which to ignore.
   		By default, nothing is ignored.
   -o	Order: in-order; reverse; title-alphabetical; more TBA. 
    	Default is reverse, which gives most recent entry first.

	Format of rssmeta.xml:
	<entry>
		<filename>...</filename>
		... the metadata entries, e.g. <title>, <link>, etc.
	</entry>
	The <filename> attribute is used to find the file in particular. An error
	is thrown if:
	- There exists a file in the directory which is not present in the rssmeta
	  file (You can pass in `-i file-wo-entry` to ignore this check, and skip
	  said file when outputting RSS.)
	- There exists a file in the rssmeta file which is not in the directory.
	  (You can pass in `-i entry-wo-file` to ignore this. 


   */

import std.stdio;
import std.algorithm : canFind, filter, findSplitAfter;
import std.file : dirEntries, DirEntry, readText, SpanMode;
import core.stdc.stdlib : exit;

import html;
import rssmeta;
import entry;
import sort;

void usage()
{
	writeln(
			"reassess: create an RSS feed out of HTML documents.\n" ~
			"Usage: \n" ~
			"\treassess DIRECTORY TARGET (FLAGS)\n" ~
			"DIRECTORY: The directory where the HTML files are sourced.\n" ~
			"TARGET:    The desired target location of the RSS file.\n" ~ 
			"Flags:\n" ~ 
			"\t-h, --help      Display this help message.\n" ~ 
			"\t-m, --metafile  The path of the metafile if not in DIRECTORY.\n" ~
			"\t-r, --recursive Whether to recurse DIRECTORY or not.\n" ~ 
			"\t-f, --follow    Whether to follow symlinks in DIRECTORY or not.\n" ~
			"\t-o, --order     The order to output the RSS entries.\n" ~
			"\t                Options: reverse; forward; alphabetic-by-title.\n" ~
			"\t                Default to `reverse` if unspecified.\n" ~
			"\t-c, --config    Options to configure functionality. Details TODO\n"
		   );
	exit(0);
}

// TODO: RssInfo. We need the struct, with the relevant information, and where
// do we get it from?
void create_rss(Entry[] entries, RssInfo info)
{
	string ret;
	ret = 
		"<rss version="2.0">\n" ~
		"\t\t<channel>\n" ~
		"\t\t<title>" ~ info.title ~ "</title>\n" ~
		"\t\t<description>" ~ info.description ~ "</description>\n" ~
		"\t\t<language>" ~ info.language ~ "</language>\n" ~
		"\t\t<link href=\"" ~ info.link ~ "\" rel=\"self\" type=\"application/rss+xml\"/>";
	
	for (int i = 0; i < entries.length; ++i)
	{
		// TODO: add in the rest of the entry RSS info.
		auto entry = entry[i];
	}

	ret ~= "\n\t</channel>";
	ret ~= "\n</rss>";
}


// Flags which are passed to -i (--ignore).
immutable FILE_WO_ENTRY = "file-wo-entry";
immutable ENTRY_WO_FILE = "entry-wo-file";
immutable DASH_I_PARAMS = [ FILE_WO_ENTRY, ENTRY_WO_FILE ];

int main(string[] args)
{
	/* Flags */
	string metafile;
	bool entries_set = false;
	string entries_dir = null;
	string target = null;
	string[] ignore;
	bool recursive = false;
	bool follow_symlinks = false;
	bool sort_mode = null;

	for (int i = 0; i < args.length; ++i)
	{

		if (args[i] == "-m" || args[i] == "--metafile")
			metafile = args[++i];
		else if (args[i] == "-h" || args[i] == "--help")
			usage();
		else if (args[i] == "-r" || args[i] == "--recursive")
			recursive = true;
		else if (args[i] == "-f" || args[i] == "--follow")
			follow_symlinks = true;
		else if (args[i] == "-o" || args[i] == "--order")
		{
			auto param = args[++i];
			switch (param)
			{
				case "title-alphabetic":
					sort_mode = SortMode.TitleAlphabetic;
					break;
				case "date-reverse":
				case "date_reverse":
				case "reverse":
					sort_mode = SortMode.DateReverse;
					break;
				case "date-forward": case "date_forward":
				case "date-forwards": case "date_forwards":
				case "forward": case "forwards":
					sort_mode = SortMode.DateForward;
					break;
				default:
					writeln("Invalid sort mode inputted - valid are" ~
							" title-alphabetic, reverse, forward");
					exit(1);
				
			}
		}
		else if (args[i] == "-i" || args[i] == "--ignore")
		{
			auto param = args[++i];
			if (DASH_I_PARAMS.canFind(param))
				ignore ~= param;
			else 
			{
				writeln("Bad param for -i:", param);
				exit(-1);
			}

		}
		else 
		{
			if (!entries_set)
			{
				entries_set = true;
				entries_dir = args[i];
			}
			else
				target = args[i];
		}
	}

	if (entries_dir == null)
	{
		writeln("Please provide a directory containing entries: see --help");
		exit(1);
	}
	if (target == null)
	{
		writeln("Please provide a target rss.xml file; see --help");
		exit(1);
	}

	// Get RssMeta.
	// TODO: also make it so it can get the metafile from the current dir 
	// and rssmeta.xml xor rssmeta.json files.
	RssMeta rssmeta = RssMeta(metafile);


	// Grab list of files in directory.
	DirEntry[] objects = []; 
	foreach (DirEntry file; dirEntries(entries_dir, ".html", 
				recursive? SpanMode.depth : SpanMode.shallow,
				follow_symlinks)
			.filter!(f => f.isFile())
			) 
	{
		objects ~= file;
	}


	// Convert the file, given all info for the metadata, into an Entry. 
	Entry[] entries;
	foreach (DirEntry dirEntry; objects)
	{
		entries ~= make_entry(dirEntry, rssmeta);
	}

	// Sort entries. 
	entries2 = sort(entries, sort_mode); 

	// Iterate over the sorted entries and create the RSS file contents from
	// them.
	const result = create_rss(entries2);


	// Iterate over all entries in the order as described by -o flag, and 
	// create the entry for them. This gets spat back out as a big string.
	// auto entries = create_entries(...);

	// Now, write all the fluff around the entries, and jobs-almost-agoodun.

	// Finally, write back the full RSS XML into the file specified by $2.

	return 0;
}