summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorself <george@gabbott.dev>2023-10-02 20:35:53 +0100
committerGeorge Abbott <george@gabbott.dev>2023-10-02 20:36:48 +0100
commit8e4092347975db7fe90b0895f605bdd79de47410 (patch)
treee7aa55355f4f72b9f7972077539e9e05b6e50702
Initial commit
-rw-r--r--.gitignore16
-rw-r--r--dub.json14
-rw-r--r--source/app.d191
3 files changed, 221 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..086d612
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,16 @@
+.dub
+docs.json
+__dummy.html
+docs/
+/reassess
+reassess.so
+reassess.dylib
+reassess.dll
+reassess.a
+reassess.lib
+reassess-test-*
+*.exe
+*.pdb
+*.o
+*.obj
+*.lst
diff --git a/dub.json b/dub.json
new file mode 100644
index 0000000..e2848da
--- /dev/null
+++ b/dub.json
@@ -0,0 +1,14 @@
+{
+ "name": "reassess",
+ "copyright": "Copyright © 2024, George",
+ "license": "GPL-3.0",
+ "description": "Generates an RSS feed given a directory of entries.",
+
+ "authors": [
+ "George Abbott"
+ ],
+ "homepage": "https://gabbott.dev/project/reassess",
+ "dependencies": {
+ "dxml": "~>0.4.4"
+ }
+}
diff --git a/source/app.d b/source/app.d
new file mode 100644
index 0000000..3c913c3
--- /dev/null
+++ b/source/app.d
@@ -0,0 +1,191 @@
+/* reassess: generates an RSS feed given a directory.
+ To run, simply pass in the directory containing all entries, and the
+ intended target RSS location. For instance:
+ reassess $WEBSITE_URL/blog/entries $WEBSITE_URL/blog/rss.xml
+ Then, you are set! Remember to re-run this program whenever an update is
+ made to the website.
+
+ The location of the metadata can be derived either from the file itself, or
+ from an rssmeta.xml file, which should contain all the metadata for the
+ relevant files.
+ If not, please provide the metadata within the file.
+
+ reassess requires the following bits of metadata to be provided:
+ TAG derived from?
+ ---------|-------------
+ - <title> rssmeta:title OR <!-- title: --> OR <title> OR first <h1> tag
+ - <link> rssmeta:link OR assessrc:global_link OR <!-- link: ... -->
+ - <pubDate> rssmeta:pubDate OR <!-- pubDate: ... --> OR <!-- written: ... -->
+ - <guid> rssmeta:guid OR <!-- guid: ... --> OR the same as <link>.
+ - <description> Everything within <body>.
+
+ It is best to provide an rssmeta.xml or rssmeta.json file, but failing this
+ all information can be derived from the body of the document if correctly
+ written.
+
+ reassess will fail if:
+ - There is both an rssmeta.xml and an rssmeta.json file in $1.
+ - The rssmeta file is corrupted or incorrectly formatted.
+ - There is insufficient information to populate a metadata field.
+ etc.
+ In all these cases, the program will output to stderr and return a non-zero
+ return.
+
+ Optional Flags:
+ -m Gives the location of the rssmeta.xml or rssmeta.json if it is not
+ present in the directory to be converted.
+ `reassess $WEBSITE_URL/blog/entries $WEBSITE_URL/blog/rss.xml -m $METAFILE`
+ -i Errors which to ignore.
+ By default, nothing is ignored.
+ -o Order: in-order; reverse; title-alphabetical; more TBA.
+ Default is reverse, which gives most recent entry first.
+
+ Format of rssmeta.xml:
+ <entry>
+ <filename>...</filename>
+ ... the metadata entries, e.g. <title>, <link>, etc.
+ </entry>
+ The <filename> attribute is used to find the file in particular. An error
+ is thrown if:
+ - There exists a file in the directory which is not present in the rssmeta
+ file (You can pass in `-i file-wo-entry` to ignore this check, and skip
+ said file when outputting RSS.)
+ - There exists a file in the rssmeta file which is not in the directory.
+ (You can pass in `-i entry-wo-file` to ignore this.
+
+
+ */
+
+void usage()
+{
+ writeln(
+ "reassess: create an RSS feed out of HTML documents.\n"
+ "Usage: \n"
+ "\treassess DIRECTORY TARGET (FLAGS)\n"
+ "DIRECTORY: The directory where the HTML files are sourced.\n"
+ "TARGET: The desired target location of the RSS file.\n"
+ "Flags:\n"
+ "\t-h, --help Display this help message.\n"
+ "\t-m, --metafile The path of the metafile if not in DIRECTORY.\n"
+ "\t-r, --recursive Whether to recurse DIRECTORY or not.\n"
+ "\t-f, --follow Whether to follow symlinks in DIRECTORY or not.\n"
+ "\t-o, --order The order to output the RSS entries.\n"
+ "\t Options: reverse; forward; alphabetic-by-title.\n"
+ "\t Default to `reverse` if unspecified.\n"
+ "\t-c, --config Options to configure functionality. Details TODO\n"
+ );
+ exit(0);
+}
+
+
+
+struct Entry
+{
+ string title;
+ string link;
+ string pubDate;
+ string guid;
+ string description; // The actual entry itself.
+
+private:
+ int pubDateAsUnixTimestamp; // So that we can sort easily.
+}
+
+Entry make_entry(DirEntry dirEntry)
+{
+ // From the DirEntry, we can get all file info, and also read the full file
+ // since we know its path. We just need to remember, when we read the
+ // entry, to only read between the <body> and </body> tags, whilst
+ // searching the full file for content, and replace all bad characters with
+ // escaped ones.
+ // Or, if <body> does not work, we could try between some comments.
+ // I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria.
+}
+
+
+
+
+import std.stdio;
+import std.algorithm : canFind;
+import std.file : dirEntries, DirEntry;
+
+// Flags which are passed to -i (--ignore).
+immutable FILE_WO_ENTRY = "file-wo-entry";
+immutable ENTRY_WO_FILE = "entry-wo-file";
+immutable DASH_I_PARAMS = [ FILE_WO_ENTRY, ENTRY_WO_FILE ];
+
+int main(string[] args)
+{
+ /* Flags */
+ string metafile;
+ bool entries_set = false;
+ string entries;
+ string target;
+ string[] ignore;
+ bool recursive = false;
+ bool follow_symlinks = false;
+
+ for (int i = 0; i < args.length; ++i)
+ {
+
+ if (args[i] == "-m" || args[i] == "--metafile")
+ metafile = args[++i];
+ else if (args[i] == "-h" || args[i] == "--help")
+ usage();
+ else if (args[i] == "-r" || args[i] == "--recursive")
+ recursive = true;
+ else if (args[i] == "-f" || args[i] == "--follow")
+ follow_symlinks = true;
+ else if (args[i] == "-o" || args[i] == "--order")
+ else if (args[i] == "-i" || args[i] == "--ignore")
+ {
+ auto param = args[++i];
+ if (DASH_I_PARAMS.canFind(param))
+ ignore ~= param;
+ else
+ {
+ writeln("Bad param for -i:", param);
+ exit(-1);
+ }
+
+ }
+ else
+ {
+ if (!entries_set)
+ {
+ entries_set = true;
+ entries = metafile[i];
+ }
+ else
+ target = metafile[i];
+ }
+ }
+
+ // Grab list of files in directory.
+ DirEntry[] objects = [];
+ foreach (DirEntry file; dirEntries(entries, ".html",
+ recursive? SpanMode.depth : SpanMode.shallow,
+ follow_symlinks)
+ .filter!(file => f.isFile())
+ )
+ {
+ objects ~= file;
+ }
+
+
+ // Convert the file, given all info for the metadata, into an Entry.
+ Entry[] entries;
+ foreach (DirEntry dirEntry; objects)
+ {
+ entries ~= make_entry(dirEntry);
+ }
+
+
+ // Iterate over all entries in the order as described by -o flag, and
+ // create the entry for them. This gets spat back out as a big string.
+ auto entries = create_entries(...);
+
+ // Now, write all the fluff around the entries, and jobs-almost-agoodun.
+
+ // Finally, write back the full RSS XML into the file specified by $2.
+}