aboutsummaryrefslogtreecommitdiff
path: root/noobfarm2fortune.pl
diff options
context:
space:
mode:
Diffstat (limited to 'noobfarm2fortune.pl')
-rwxr-xr-xnoobfarm2fortune.pl184
1 files changed, 184 insertions, 0 deletions
diff --git a/noobfarm2fortune.pl b/noobfarm2fortune.pl
new file mode 100755
index 0000000..06e6dc4
--- /dev/null
+++ b/noobfarm2fortune.pl
@@ -0,0 +1,184 @@
+#!/usr/bin/perl -CIOESA -w
+
+# Download quotes from noobfarm.org, format into a fortune file.
+
+($SELF = $0) =~ s,.*/,,;
+$VERSION = "0.9.99";
+
+$fortunefile = 'noobfarm';
+$jsonfile = 'noobfarm.json';
+$indent = " " x 30;
+
+use JSON;
+use Getopt::Long;
+
+sub usage {
+ print <<EOF;
+$SELF [-d] [-n] [-p] [-o dir]
+
+$SELF v$VERSION by B. Watson (yalhcru\@gmail.com), released
+under the WTFPL. Do WTF you like with this. For full license, see
+http://sam.zoy.org/wtfpl/
+
+Downloads quotes from noobfarm.org, formats them as a fortune
+database. Output is a text file called "noobfarm" and an index called
+"noobfarm.dat", created in the current directory (or wherever the -o
+option says).
+
+Options:
+ -h, --help
+ This usage message.
+ -o, --output-dir=DIR
+ Write the noobfarm and noobfarm.dat files here. This could be
+ /usr/share/games/fortunes to write directly to the system-wide
+ fortune database. Default is the current directory.
+ -d, --download-only
+ Download the quotes, but do not parse or create a fortune file.
+ The downloaded file will be saved as "$jsonfile" in the
+ current directory. This option automatically enables -k/--keep-json.
+ -n, --no-format
+ Do not format the output with the fmt(1) command. Default is to use
+ "fmt -s" to break up long lines in the fortune file.
+ -p, --process-only
+ Do not download the quotes. Instead, use the file "$jsonfile"
+ in the current directory (possibly created by a previous run with
+ the -d, --download-only option, and possibly edited since then).
+ This option automatically enables -k/--keep-json.
+ -k, --keep-json
+ Do not delete "$jsonfile" before exiting. Normally, without the
+ -d/--download-only or -p/--process-only options, this gets deleted.
+
+Notes:
+ - This script is NOT supported by the owners of noobfarm.org. Do NOT
+ contact them if you have problems.
+ - Single-quotes and/or backslashes are silently removed from the
+ directory name for --output-dir. WONTFIX. Spaces are actually OK :)
+ - Exit status is generally 0 (success) or non-zero (failure). But
+ every possible failure mode hasn't been tested, so YMMV.
+ - I wouldn't run this with elevated privileges, or from something
+ like a CGI script, if I were you. It hasn't been audited for
+ security and never will be (use or use not, there is no warranty).
+EOF
+ exit ($_[0] || 0);
+}
+
+# Download all the quotes in one request. As of this writing, it's a
+# 1.1MB json file.
+# wget warns about wildcards in HTTP (the * in the URL), but changing
+# it to an escaped form, %2a, breaks the search (0 results). So let
+# it warn.
+# We ask for the first billion quotes all at once. I think it'll be
+# several eons before there are more than a billion quotes...
+sub download {
+ unlink($jsonfile);
+ system(
+ "wget " .
+ "-O$jsonfile " .
+ "--header='Accept: application/json' " .
+ "https://noobfarm.org/search/Approved:T*/1/999999999"
+ ) && die "wget failed"; # the && is not a typo! non-zero means error.
+}
+
+# Read JSON from a filehandle. Return the entire fortune file as a string,
+# ready to use except for breaking up long lines (caller does that).
+sub format_quotes {
+ my $fh = shift;
+ local $/;
+ undef $/;
+
+ $input = <$fh>;
+ # from_json will die() on invalid input. Let it.
+ my $j = from_json($input);
+
+ for(@{$j->{Quotes}}) {
+ my $quote = $_->{Quote};
+ my $date = $_->{Submitted};
+
+ # \ are doubled
+ $quote =~ s,\\\\,\\,g;
+
+ # some (but not all!) newlines are encoded as \\n instead of \n.
+ # this is a potentially lossy transform: if the user in the quote
+ # actually said a literal '\n', this will turn it into a newline.
+ # in practice, this doesn't seem to be a problem.
+ $quote =~ s,\\\\?n,\n,g;
+
+ # turn \uXXXX hex escapes back into utf8 sequences.
+ $quote =~ s,\\u([0-9a-fA-F]{4}),chr(hex $1),ge,;
+
+ # dates look like: 2020-10-09T22:38:39.404662072Z
+ # we only want 2020-10-09.
+ $date = substr($date, 0, 10);
+
+ $quotes{$_->{ID}} = $quote;
+ $dates{$_->{ID}} = $date;
+ }
+
+ for(sort { $a <=> $b } keys %quotes) {
+ push @output, $quotes{$_} . "\n" .
+ "$indent-- noobfarm.org, quote #$_, $dates{$_}\n";
+ }
+
+ close $fh;
+ return join "%\n", @output;
+}
+
+# main()
+
+$help = $dlonly = $process_only = $outdir = 0;
+
+### Parse options
+GetOptions(
+ 'help' => \$help,
+ 'download-only' => \$dlonly,
+ 'output-dir=s' => \$outdir,
+ 'no-format' => \$no_format,
+ 'process-only' => \$process_only,
+ 'keep-json' => \$keep_json,
+) || usage(1);
+
+$keep_json++ if $dlonly | $process_only;
+
+### Check options
+die "--download-only and --process-only can't both be given\n"
+ if $dlonly && $process_only;
+die "Unknown argument(s): @ARGV" if @ARGV;
+usage(0) if $help;
+
+### Create output dir, if needed
+if($outdir) {
+ $outdir =~ s/['\\]//g;
+ system("mkdir", "-p", $outdir);
+ exit 1 unless -d $outdir && -w _;
+ $fortunefile = "$outdir/$fortunefile";
+}
+
+### Download the JSON file (and exit, if --download-only)
+download unless $process_only;
+exit 0 if $dlonly;
+
+### Open input & output filehandles
+open($infh, "<", $jsonfile) or die $!;
+if($no_format) {
+ open(OUT, ">", $fortunefile) or die $!;
+} else {
+ # using a shell pipe like this is the reason for not allowing
+ # single quotes or backslashes in the output dir. I know it's not
+ # "modern" or "best practice" but it's *so* convenient.
+ open(OUT, "|fmt -s > '$fortunefile'") or die $!;
+}
+
+### Write output, close filehandles
+print OUT format_quotes($infh);
+close $infh;
+close OUT;
+
+### Clean up, unless asked not to
+if($keep_json) {
+ print STDERR "Keeping JSON file $jsonfile\n";
+} else {
+ unlink($jsonfile);
+}
+
+### Create the index fortune needs. Let strfile's exit status be ours.
+exec("strfile '$fortunefile'");