diff options
| -rw-r--r-- | source/app.d | 199 | ||||
| -rw-r--r-- | source/entry.d | 145 | ||||
| -rw-r--r-- | source/rssmeta.d | 36 | ||||
| -rw-r--r-- | source/sort.d | 50 | 
4 files changed, 291 insertions, 139 deletions
diff --git a/source/app.d b/source/app.d index 97630d2..cb57d19 100644 --- a/source/app.d +++ b/source/app.d @@ -63,6 +63,8 @@ import core.stdc.stdlib : exit;  import html;  import rssmeta; +import entry; +import sort;  void usage()  { @@ -85,141 +87,30 @@ void usage()  	exit(0);  } - - -// TODO: make ctor so that pubDateAsUnixTimestamp gets populated. -struct Entry -{ -	string title; -	string link; -	string pubDate; -	string guid; -	string description; // The actual entry itself. - -private: -	int pubDateAsUnixTimestamp; // So that we can sort easily. -} - - -Entry make_entry(DirEntry dirEntry, RssMeta rssmeta) +// TODO: RssInfo. We need the struct, with the relevant information, and where +// do we get it from? +void create_rss(Entry[] entries, RssInfo info)  { -	// From the DirEntry, we can get all file info, and also read the full file -	// since we know its path. We just need to remember, when we read the -	// entry, to only read between the <body> and </body> tags, whilst -	// searching the full file for content, and replace all bad characters with -	// escaped ones. -	// Or, if <body> does not work, we could try between some comments.  -	// I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria. -	// COMPLETE -	string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta) -	{ -		// Title is either: -		// 1) rssmeta:<title> -		// 2) <!-- rss-title: TITLE --> comment in text. -		// 3) <title>TITLE</title> from text. - -		string title = rssmeta.title_of(dirEntry.name); -		if (title !is null) -			return title; - -		title = find_rss_comment("title", text); -		if (title !is null) -			return title; - -		title = find_html_title(text); -		return title; // even if it's null.  -	} - -	// COMPLETE -	string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta) -	{ -		// Link is either: -		// 1) rssmeta:<link> -		// 2) <!-- rss-link: link --> comment in text. - -		string link = rssmeta.link_of(dirEntry.name); -		if (link !is null) -			return link; - -		link = find_rss_comment("link", text); -		return link; -	} - -	// Output is formatted as ISO-8601. -	string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta) -	{ -		// The date must be formatted YYYY-mm-dd HH:MM.  -		// pubDate is either: -		// 1) rssmeta:<pubDate> -		// 2) <!-- rss-pubDate: TITLE --> comment in text. -		// 3) the Unix timestamp of the file. - -		string pubDate = rssmeta.title_of(dirEntry.name); -		if (pubDate !is null) -			return pubDate; - -		pubDate = find_rss_comment("pubDate", text); -		if (pubDate !is null) -			return pubDate; - -		// TODO: add in getting date from the dirEntry.time. - -		return pubDate; // even if it's null.  -	} - -	// COMPLETE -	string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta) -	{ -		// guid is either: -		// 1) rssmeta:<guid> -		// 2) <!-- rss-guid: guid --> comment in text. -		// 3) delegated to get_link as a last resort. - -		string guid = rssmeta.guid_of(dirEntry.name); -		if (guid !is null) -			return guid; - -		guid = find_rss_comment("guid", text); -		if (guid !is null) -			return guid; - -		return get_link(dirEntry, text, rssmeta); -	} - -	void fail(string what) +	string ret; +	ret =  +		"<rss version="2.0">\n" ~ +		"\t\t<channel>\n" ~ +		"\t\t<title>" ~ info.title ~ "</title>\n" ~ +		"\t\t<description>" ~ info.description ~ "</description>\n" ~ +		"\t\t<language>" ~ info.language ~ "</language>\n" ~ +		"\t\t<link href=\"" ~ info.link ~ "\" rel=\"self\" type=\"application/rss+xml\"/>"; +	 +	for (int i = 0; i < entries.length; ++i)  	{ -		writeln("Error occurred when retrieving attribute ", what, " in make_entry"); -		exit(-1); +		// TODO: add in the rest of the entry RSS info. +		auto entry = entry[i];  	} - -	string text    = readText(dirEntry.name); -	string title   = get_title(dirEntry, text, rssmeta); -	string link    = get_link(dirEntry, text, rssmeta); -	string pubDate = get_pubdate(dirEntry, text, rssmeta); -	string guid    = get_guid(dirEntry, text, rssmeta); - -	if (text is null) -		fail("text"); - -	if (title is null) -		fail("title"); - -	if (link is null) -		fail("link"); - -	if (pubDate is null) -		fail("pubDate"); - -	if (guid is null) -		fail("guid"); - -	Entry ret = Entry(text, title, link, pubDate, guid); -	return ret; - - +	ret ~= "\n\t</channel>"; +	ret ~= "\n</rss>";  } +  // Flags which are passed to -i (--ignore).  immutable FILE_WO_ENTRY = "file-wo-entry";  immutable ENTRY_WO_FILE = "entry-wo-file"; @@ -230,11 +121,12 @@ int main(string[] args)  	/* Flags */  	string metafile;  	bool entries_set = false; -	string entries_dir; -	string target; +	string entries_dir = null; +	string target = null;  	string[] ignore;  	bool recursive = false;  	bool follow_symlinks = false; +	bool sort_mode = null;  	for (int i = 0; i < args.length; ++i)  	{ @@ -248,7 +140,30 @@ int main(string[] args)  		else if (args[i] == "-f" || args[i] == "--follow")  			follow_symlinks = true;  		else if (args[i] == "-o" || args[i] == "--order") -		{} +		{ +			auto param = args[++i]; +			switch (param) +			{ +				case "title-alphabetic": +					sort_mode = SortMode.TitleAlphabetic; +					break; +				case "date-reverse": +				case "date_reverse": +				case "reverse": +					sort_mode = SortMode.DateReverse; +					break; +				case "date-forward": case "date_forward": +				case "date-forwards": case "date_forwards": +				case "forward": case "forwards": +					sort_mode = SortMode.DateForward; +					break; +				default: +					writeln("Invalid sort mode inputted - valid are" ~ +							" title-alphabetic, reverse, forward"); +					exit(1); +				 +			} +		}  		else if (args[i] == "-i" || args[i] == "--ignore")  		{  			auto param = args[++i]; @@ -273,6 +188,17 @@ int main(string[] args)  		}  	} +	if (entries_dir == null) +	{ +		writeln("Please provide a directory containing entries: see --help"); +		exit(1); +	} +	if (target == null) +	{ +		writeln("Please provide a target rss.xml file; see --help"); +		exit(1); +	} +  	// Get RssMeta.  	// TODO: also make it so it can get the metafile from the current dir   	// and rssmeta.xml xor rssmeta.json files. @@ -298,6 +224,13 @@ int main(string[] args)  		entries ~= make_entry(dirEntry, rssmeta);  	} +	// Sort entries.  +	entries2 = sort(entries, sort_mode);  + +	// Iterate over the sorted entries and create the RSS file contents from +	// them. +	const result = create_rss(entries2); +  	// Iterate over all entries in the order as described by -o flag, and   	// create the entry for them. This gets spat back out as a big string. @@ -307,7 +240,5 @@ int main(string[] args)  	// Finally, write back the full RSS XML into the file specified by $2. - -  	return 0;  } diff --git a/source/entry.d b/source/entry.d new file mode 100644 index 0000000..01d4e3d --- /dev/null +++ b/source/entry.d @@ -0,0 +1,145 @@ +// TODO: make ctor so that pubDateAsUnixTimestamp gets populated. +struct Entry +{ +	string title; +	string link; +	string pubDate; +	string guid; +	string description; // The actual entry itself. + +	this(string title, string link, string pubDate, string guid, string text) +	{ +		this.title = title; +		this.link = link; +		this.pubdate = pubDate; +		this.guid = guid; +		this.description = text; +		pubDateAsUnixTimestamp = unix_timestamp_of_date(pubDate, "%Y-%m-%d %H:%M"); +	} + +private: +	int pubDateAsUnixTimestamp; // So that we can sort easily. +} + +int unix_timstamp_of_date(string date, string format) +{ +	// TODO: used in Entry ctor. +} + + +Entry make_entry(DirEntry dirEntry, RssMeta rssmeta) +{ +	// From the DirEntry, we can get all file info, and also read the full file +	// since we know its path. We just need to remember, when we read the +	// entry, to only read between the <body> and </body> tags, whilst +	// searching the full file for content, and replace all bad characters with +	// escaped ones. +	// Or, if <body> does not work, we could try between some comments.  +	// I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria. +	// COMPLETE +	string get_title(DirEntry dirEntry, in string text, in RssMeta rssmeta) +	{ +		// Title is either: +		// 1) rssmeta:<title> +		// 2) <!-- rss-title: TITLE --> comment in text. +		// 3) <title>TITLE</title> from text. + +		string title = rssmeta.title_of(dirEntry.name); +		if (title !is null) +			return title; + +		title = find_rss_comment("title", text); +		if (title !is null) +			return title; + +		title = find_html_title(text); +		return title; // even if it's null.  +	} + +	// COMPLETE +	string get_link(DirEntry dirEntry, in string text, in RssMeta rssmeta) +	{ +		// Link is either: +		// 1) rssmeta:<link> +		// 2) <!-- rss-link: link --> comment in text. + +		string link = rssmeta.link_of(dirEntry.name); +		if (link !is null) +			return link; + +		link = find_rss_comment("link", text); +		return link; +	} + +	// Output is formatted as ISO-8601. +	string get_pubdate(DirEntry dirEntry, in string text, in RssMeta rssmeta) +	{ +		// The date must be formatted YYYY-mm-dd HH:MM.  +		// pubDate is either: +		// 1) rssmeta:<pubDate> +		// 2) <!-- rss-pubDate: TITLE --> comment in text. +		// 3) the Unix timestamp of the file. + +		string pubDate = rssmeta.title_of(dirEntry.name); +		if (pubDate !is null) +			return pubDate; + +		pubDate = find_rss_comment("pubDate", text); +		if (pubDate !is null) +			return pubDate; + +		// TODO: add in getting date from the dirEntry.time. + +		return pubDate; // even if it's null.  +	} + +	// COMPLETE +	string get_guid(DirEntry dirEntry, in string text, in RssMeta rssmeta) +	{ +		// guid is either: +		// 1) rssmeta:<guid> +		// 2) <!-- rss-guid: guid --> comment in text. +		// 3) delegated to get_link as a last resort. + +		string guid = rssmeta.guid_of(dirEntry.name); +		if (guid !is null) +			return guid; + +		guid = find_rss_comment("guid", text); +		if (guid !is null) +			return guid; + +		return get_link(dirEntry, text, rssmeta); +	} + +	void fail(string what) +	{ +		writeln("Error occurred when retrieving attribute ", what, " in make_entry"); +		exit(-1); +	} + + +	string text    = readText(dirEntry.name); +	string title   = get_title(dirEntry, text, rssmeta); +	string link    = get_link(dirEntry, text, rssmeta); +	string pubDate = get_pubdate(dirEntry, text, rssmeta); +	string guid    = get_guid(dirEntry, text, rssmeta); + +	if (text is null) +		fail("text"); + +	if (title is null) +		fail("title"); + +	if (link is null) +		fail("link"); + +	if (pubDate is null) +		fail("pubDate"); + +	if (guid is null) +		fail("guid"); + +	Entry ret = Entry(text, title, link, pubDate, guid); +	return ret; +} diff --git a/source/rssmeta.d b/source/rssmeta.d index 6fec5b4..864503c 100644 --- a/source/rssmeta.d +++ b/source/rssmeta.d @@ -1,13 +1,39 @@  // Represents the RssMeta struct, containing the metadata with some fns to   // access it easily. +import dxml; +import entry; +  struct RssMeta  { -	// TODO: add the magic. +	// TODO +	// The RssMeta should be a lookup table: we have a key (the filename given +	// in rssmeta.xml, etc.; and we have a set of values, e.g. the title, link, +	// etc. values which form the metadata. +	string metafile; +	string[Entry] lookup; + +	this(string metafile) +	{ + +		this.metafile = metafile; +		if (metafile[-4:] == ".json") {} +		else if (metafile[-4:] == ".xml") {} +		else  +		{ +			writeln("Cannot deduce whether metafile is json or xml. Please" ~ +					" ensure it is called either rssmeta.xml or rssmeta.json."); +			exit(1); +		} +	}  	// filename: either just name or fully qualified should work. -	string title_of(string filename) const; -	string link_of(string filename) const; -	string pubdate_of(string filename) const; -	string guid_of(string filename) const; +	string title_of(string filename) const  +	{ +		// TODO: we need to sort out the internal magic of RssMeta first, then +		// we can just do a lookup on the filename.  +	} +	string link_of(string filename) const { return ""; } +	string pubdate_of(string filename) const { return ""; } +	string guid_of(string filename) const { return ""; }  } diff --git a/source/sort.d b/source/sort.d new file mode 100644 index 0000000..9baa667 --- /dev/null +++ b/source/sort.d @@ -0,0 +1,50 @@ +/* For sorting the entries as per the -o flag. + * This allows a list of entries to be sorted,  + * e.g. title-alphabetic, date-reverse, date-forward. + */ + +enum SortMode +{ +	/* For instance: +	 *   TitleAA, TitleZZ, TitleFF +	 * becomes +	 *   TitleAA, TitleFF, TitleZZ +	 */  +	TitleAlphabetic; + +	/* For instance: +	 *   2023-10-01, 2023-10-05, 2023-10-03 +	 * becomes +	 *   2023-10-05, 2023-10-03, 2023-10-01 +	 */ +	DateReverse; + +	/* For instance: +	 *   2023-10-01, 2023-10-05, 2023-10-03 +	 * becomes +	 *   2023-10-01, 2023-10-03, 2023-10-05 +	 */ +	DateForwards; +} + +// TODO: write sorting functions. +Entry[] sort_title_alphabetic(Entry[] entries) { return entries; } +Entry[] sort_date_reverse(Entry[] entries) { return entries; } +Entry[] sort_date_forward(Entry[] entries) { return entries; } + + +Entry[] sort(Entry[] entries, SortMode sort_mode) +{ +	switch (sort_mode) +	{ +		case SortMode.TitleAlphabetic: +			return sort_title_alphabetic(entries); +		case SortMode.DateReverse: +			return sort_date_reverse(entries); +		case SortMode.DateForward: +			return sort_date_forward(entries); +	} +} + + +  | 
