diff options
author | self <george@gabbott.dev> | 2023-10-02 20:35:53 +0100 |
---|---|---|
committer | George Abbott <george@gabbott.dev> | 2023-10-02 20:36:48 +0100 |
commit | 8e4092347975db7fe90b0895f605bdd79de47410 (patch) | |
tree | e7aa55355f4f72b9f7972077539e9e05b6e50702 |
Initial commit
-rw-r--r-- | .gitignore | 16 | ||||
-rw-r--r-- | dub.json | 14 | ||||
-rw-r--r-- | source/app.d | 191 |
3 files changed, 221 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..086d612 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +.dub +docs.json +__dummy.html +docs/ +/reassess +reassess.so +reassess.dylib +reassess.dll +reassess.a +reassess.lib +reassess-test-* +*.exe +*.pdb +*.o +*.obj +*.lst diff --git a/dub.json b/dub.json new file mode 100644 index 0000000..e2848da --- /dev/null +++ b/dub.json @@ -0,0 +1,14 @@ +{ + "name": "reassess", + "copyright": "Copyright © 2024, George", + "license": "GPL-3.0", + "description": "Generates an RSS feed given a directory of entries.", + + "authors": [ + "George Abbott" + ], + "homepage": "https://gabbott.dev/project/reassess", + "dependencies": { + "dxml": "~>0.4.4" + } +} diff --git a/source/app.d b/source/app.d new file mode 100644 index 0000000..3c913c3 --- /dev/null +++ b/source/app.d @@ -0,0 +1,191 @@ +/* reassess: generates an RSS feed given a directory. + To run, simply pass in the directory containing all entries, and the + intended target RSS location. For instance: + reassess $WEBSITE_URL/blog/entries $WEBSITE_URL/blog/rss.xml + Then, you are set! Remember to re-run this program whenever an update is + made to the website. + + The location of the metadata can be derived either from the file itself, or + from an rssmeta.xml file, which should contain all the metadata for the + relevant files. + If not, please provide the metadata within the file. + + reassess requires the following bits of metadata to be provided: + TAG derived from? + ---------|------------- + - <title> rssmeta:title OR <!-- title: --> OR <title> OR first <h1> tag + - <link> rssmeta:link OR assessrc:global_link OR <!-- link: ... --> + - <pubDate> rssmeta:pubDate OR <!-- pubDate: ... --> OR <!-- written: ... --> + - <guid> rssmeta:guid OR <!-- guid: ... --> OR the same as <link>. + - <description> Everything within <body>. + + It is best to provide an rssmeta.xml or rssmeta.json file, but failing this + all information can be derived from the body of the document if correctly + written. + + reassess will fail if: + - There is both an rssmeta.xml and an rssmeta.json file in $1. + - The rssmeta file is corrupted or incorrectly formatted. + - There is insufficient information to populate a metadata field. + etc. + In all these cases, the program will output to stderr and return a non-zero + return. + + Optional Flags: + -m Gives the location of the rssmeta.xml or rssmeta.json if it is not + present in the directory to be converted. + `reassess $WEBSITE_URL/blog/entries $WEBSITE_URL/blog/rss.xml -m $METAFILE` + -i Errors which to ignore. + By default, nothing is ignored. + -o Order: in-order; reverse; title-alphabetical; more TBA. + Default is reverse, which gives most recent entry first. + + Format of rssmeta.xml: + <entry> + <filename>...</filename> + ... the metadata entries, e.g. <title>, <link>, etc. + </entry> + The <filename> attribute is used to find the file in particular. An error + is thrown if: + - There exists a file in the directory which is not present in the rssmeta + file (You can pass in `-i file-wo-entry` to ignore this check, and skip + said file when outputting RSS.) + - There exists a file in the rssmeta file which is not in the directory. + (You can pass in `-i entry-wo-file` to ignore this. + + + */ + +void usage() +{ + writeln( + "reassess: create an RSS feed out of HTML documents.\n" + "Usage: \n" + "\treassess DIRECTORY TARGET (FLAGS)\n" + "DIRECTORY: The directory where the HTML files are sourced.\n" + "TARGET: The desired target location of the RSS file.\n" + "Flags:\n" + "\t-h, --help Display this help message.\n" + "\t-m, --metafile The path of the metafile if not in DIRECTORY.\n" + "\t-r, --recursive Whether to recurse DIRECTORY or not.\n" + "\t-f, --follow Whether to follow symlinks in DIRECTORY or not.\n" + "\t-o, --order The order to output the RSS entries.\n" + "\t Options: reverse; forward; alphabetic-by-title.\n" + "\t Default to `reverse` if unspecified.\n" + "\t-c, --config Options to configure functionality. Details TODO\n" + ); + exit(0); +} + + + +struct Entry +{ + string title; + string link; + string pubDate; + string guid; + string description; // The actual entry itself. + +private: + int pubDateAsUnixTimestamp; // So that we can sort easily. +} + +Entry make_entry(DirEntry dirEntry) +{ + // From the DirEntry, we can get all file info, and also read the full file + // since we know its path. We just need to remember, when we read the + // entry, to only read between the <body> and </body> tags, whilst + // searching the full file for content, and replace all bad characters with + // escaped ones. + // Or, if <body> does not work, we could try between some comments. + // I say, <!-- bct:beg --> and <!-- bct:end --> would meet those criteria. +} + + + + +import std.stdio; +import std.algorithm : canFind; +import std.file : dirEntries, DirEntry; + +// Flags which are passed to -i (--ignore). +immutable FILE_WO_ENTRY = "file-wo-entry"; +immutable ENTRY_WO_FILE = "entry-wo-file"; +immutable DASH_I_PARAMS = [ FILE_WO_ENTRY, ENTRY_WO_FILE ]; + +int main(string[] args) +{ + /* Flags */ + string metafile; + bool entries_set = false; + string entries; + string target; + string[] ignore; + bool recursive = false; + bool follow_symlinks = false; + + for (int i = 0; i < args.length; ++i) + { + + if (args[i] == "-m" || args[i] == "--metafile") + metafile = args[++i]; + else if (args[i] == "-h" || args[i] == "--help") + usage(); + else if (args[i] == "-r" || args[i] == "--recursive") + recursive = true; + else if (args[i] == "-f" || args[i] == "--follow") + follow_symlinks = true; + else if (args[i] == "-o" || args[i] == "--order") + else if (args[i] == "-i" || args[i] == "--ignore") + { + auto param = args[++i]; + if (DASH_I_PARAMS.canFind(param)) + ignore ~= param; + else + { + writeln("Bad param for -i:", param); + exit(-1); + } + + } + else + { + if (!entries_set) + { + entries_set = true; + entries = metafile[i]; + } + else + target = metafile[i]; + } + } + + // Grab list of files in directory. + DirEntry[] objects = []; + foreach (DirEntry file; dirEntries(entries, ".html", + recursive? SpanMode.depth : SpanMode.shallow, + follow_symlinks) + .filter!(file => f.isFile()) + ) + { + objects ~= file; + } + + + // Convert the file, given all info for the metadata, into an Entry. + Entry[] entries; + foreach (DirEntry dirEntry; objects) + { + entries ~= make_entry(dirEntry); + } + + + // Iterate over all entries in the order as described by -o flag, and + // create the entry for them. This gets spat back out as a big string. + auto entries = create_entries(...); + + // Now, write all the fluff around the entries, and jobs-almost-agoodun. + + // Finally, write back the full RSS XML into the file specified by $2. +} |