diff options
Diffstat (limited to 'noobfarm2fortune.pl')
-rwxr-xr-x | noobfarm2fortune.pl | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/noobfarm2fortune.pl b/noobfarm2fortune.pl new file mode 100755 index 0000000..06e6dc4 --- /dev/null +++ b/noobfarm2fortune.pl @@ -0,0 +1,184 @@ +#!/usr/bin/perl -CIOESA -w + +# Download quotes from noobfarm.org, format into a fortune file. + +($SELF = $0) =~ s,.*/,,; +$VERSION = "0.9.99"; + +$fortunefile = 'noobfarm'; +$jsonfile = 'noobfarm.json'; +$indent = " " x 30; + +use JSON; +use Getopt::Long; + +sub usage { + print <<EOF; +$SELF [-d] [-n] [-p] [-o dir] + +$SELF v$VERSION by B. Watson (yalhcru\@gmail.com), released +under the WTFPL. Do WTF you like with this. For full license, see +http://sam.zoy.org/wtfpl/ + +Downloads quotes from noobfarm.org, formats them as a fortune +database. Output is a text file called "noobfarm" and an index called +"noobfarm.dat", created in the current directory (or wherever the -o +option says). + +Options: + -h, --help + This usage message. + -o, --output-dir=DIR + Write the noobfarm and noobfarm.dat files here. This could be + /usr/share/games/fortunes to write directly to the system-wide + fortune database. Default is the current directory. + -d, --download-only + Download the quotes, but do not parse or create a fortune file. + The downloaded file will be saved as "$jsonfile" in the + current directory. This option automatically enables -k/--keep-json. + -n, --no-format + Do not format the output with the fmt(1) command. Default is to use + "fmt -s" to break up long lines in the fortune file. + -p, --process-only + Do not download the quotes. Instead, use the file "$jsonfile" + in the current directory (possibly created by a previous run with + the -d, --download-only option, and possibly edited since then). + This option automatically enables -k/--keep-json. + -k, --keep-json + Do not delete "$jsonfile" before exiting. Normally, without the + -d/--download-only or -p/--process-only options, this gets deleted. + +Notes: + - This script is NOT supported by the owners of noobfarm.org. Do NOT + contact them if you have problems. + - Single-quotes and/or backslashes are silently removed from the + directory name for --output-dir. WONTFIX. Spaces are actually OK :) + - Exit status is generally 0 (success) or non-zero (failure). But + every possible failure mode hasn't been tested, so YMMV. + - I wouldn't run this with elevated privileges, or from something + like a CGI script, if I were you. It hasn't been audited for + security and never will be (use or use not, there is no warranty). +EOF + exit ($_[0] || 0); +} + +# Download all the quotes in one request. As of this writing, it's a +# 1.1MB json file. +# wget warns about wildcards in HTTP (the * in the URL), but changing +# it to an escaped form, %2a, breaks the search (0 results). So let +# it warn. +# We ask for the first billion quotes all at once. I think it'll be +# several eons before there are more than a billion quotes... +sub download { + unlink($jsonfile); + system( + "wget " . + "-O$jsonfile " . + "--header='Accept: application/json' " . + "https://noobfarm.org/search/Approved:T*/1/999999999" + ) && die "wget failed"; # the && is not a typo! non-zero means error. +} + +# Read JSON from a filehandle. Return the entire fortune file as a string, +# ready to use except for breaking up long lines (caller does that). +sub format_quotes { + my $fh = shift; + local $/; + undef $/; + + $input = <$fh>; + # from_json will die() on invalid input. Let it. + my $j = from_json($input); + + for(@{$j->{Quotes}}) { + my $quote = $_->{Quote}; + my $date = $_->{Submitted}; + + # \ are doubled + $quote =~ s,\\\\,\\,g; + + # some (but not all!) newlines are encoded as \\n instead of \n. + # this is a potentially lossy transform: if the user in the quote + # actually said a literal '\n', this will turn it into a newline. + # in practice, this doesn't seem to be a problem. + $quote =~ s,\\\\?n,\n,g; + + # turn \uXXXX hex escapes back into utf8 sequences. + $quote =~ s,\\u([0-9a-fA-F]{4}),chr(hex $1),ge,; + + # dates look like: 2020-10-09T22:38:39.404662072Z + # we only want 2020-10-09. + $date = substr($date, 0, 10); + + $quotes{$_->{ID}} = $quote; + $dates{$_->{ID}} = $date; + } + + for(sort { $a <=> $b } keys %quotes) { + push @output, $quotes{$_} . "\n" . + "$indent-- noobfarm.org, quote #$_, $dates{$_}\n"; + } + + close $fh; + return join "%\n", @output; +} + +# main() + +$help = $dlonly = $process_only = $outdir = 0; + +### Parse options +GetOptions( + 'help' => \$help, + 'download-only' => \$dlonly, + 'output-dir=s' => \$outdir, + 'no-format' => \$no_format, + 'process-only' => \$process_only, + 'keep-json' => \$keep_json, +) || usage(1); + +$keep_json++ if $dlonly | $process_only; + +### Check options +die "--download-only and --process-only can't both be given\n" + if $dlonly && $process_only; +die "Unknown argument(s): @ARGV" if @ARGV; +usage(0) if $help; + +### Create output dir, if needed +if($outdir) { + $outdir =~ s/['\\]//g; + system("mkdir", "-p", $outdir); + exit 1 unless -d $outdir && -w _; + $fortunefile = "$outdir/$fortunefile"; +} + +### Download the JSON file (and exit, if --download-only) +download unless $process_only; +exit 0 if $dlonly; + +### Open input & output filehandles +open($infh, "<", $jsonfile) or die $!; +if($no_format) { + open(OUT, ">", $fortunefile) or die $!; +} else { + # using a shell pipe like this is the reason for not allowing + # single quotes or backslashes in the output dir. I know it's not + # "modern" or "best practice" but it's *so* convenient. + open(OUT, "|fmt -s > '$fortunefile'") or die $!; +} + +### Write output, close filehandles +print OUT format_quotes($infh); +close $infh; +close OUT; + +### Clean up, unless asked not to +if($keep_json) { + print STDERR "Keeping JSON file $jsonfile\n"; +} else { + unlink($jsonfile); +} + +### Create the index fortune needs. Let strfile's exit status be ours. +exec("strfile '$fortunefile'"); |