aboutsummaryrefslogtreecommitdiff
path: root/noobfarm2fortune.pl
blob: 583688bcc2990a45049426054beb2d732c5a90b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/perl -CIOESA -w

# Download quotes from noobfarm.org, format into a fortune file.

($SELF = $0) =~ s,.*/,,;
$VERSION = "0.9.99";

$fortunefile = 'noobfarm';
$jsonfile = 'noobfarm.json';
$indent = " " x 30;

use JSON;
use Getopt::Long;

sub usage {
	print <<EOF;
$SELF [-d] [-n] [-p] [-o dir]

$SELF v$VERSION by B. Watson (urchlay\@slackware.uk), released
under the WTFPL. Do WTF you like with this. For full license, see
http://sam.zoy.org/wtfpl/

Downloads quotes from noobfarm.org, formats them as a fortune
database. Output is a text file called "noobfarm" and an index called
"noobfarm.dat", created in the current directory (or wherever the -o
option says).

Options:
 -h, --help
   This usage message.
 -o, --output-dir=DIR
   Write the noobfarm and noobfarm.dat files here. This could be
   /usr/share/games/fortunes to write directly to the system-wide
   fortune database. Default is the current directory.
 -d, --download-only
   Download the quotes, but do not parse or create a fortune file.
   The downloaded file will be saved as "$jsonfile" in the
   current directory. This option automatically enables -k/--keep-json.
 -n, --no-format
   Do not format the output with the fmt(1) command. Default is to use
   "fmt -s" to break up long lines in the fortune file.
 -p, --process-only
   Do not download the quotes. Instead, use the file "$jsonfile"
   in the current directory (possibly created by a previous run with
   the -d, --download-only option, and possibly edited since then).
   This option automatically enables -k/--keep-json.
 -k, --keep-json
   Do not delete "$jsonfile" before exiting. Normally, without the
   -d/--download-only or -p/--process-only options, this gets deleted.

Notes:
 - This script is NOT supported by the owners of noobfarm.org. Do NOT
   contact them if you have problems.
 - Single-quotes and/or backslashes are silently removed from the
   directory name for --output-dir. WONTFIX. Spaces are actually OK :)
 - Exit status is generally 0 (success) or non-zero (failure). But
   every possible failure mode hasn't been tested, so YMMV.
 - I wouldn't run this with elevated privileges, or from something
   like a CGI script, if I were you. It hasn't been audited for
   security and never will be (use or use not, there is no warranty).
EOF
	exit ($_[0] || 0);
}

# Download all the quotes in one request. As of this writing, it's a
# 1.1MB json file.
# wget warns about wildcards in HTTP (the * in the URL), but changing
# it to an escaped form, %2a, breaks the search (0 results). So let
# it warn.
# We ask for the first billion quotes all at once. I think it'll be
# several eons before there are more than a billion quotes...
sub download {
	unlink($jsonfile);
	system(
		"wget " .
		"-O$jsonfile " .
		"--header='Accept: application/json' " .
		"https://noobfarm.org/search/Approved:T*/1/999999999"
	) && die "wget failed"; # the && is not a typo! non-zero means error.
}

# Read JSON from a filehandle. Return the entire fortune file as a string,
# ready to use except for breaking up long lines (caller does that).
sub format_quotes {
	my $fh = shift;
	local $/;
	undef $/;

	$input = <$fh>;
	# from_json will die() on invalid input. Let it.
	my $j = from_json($input);

	for(@{$j->{Quotes}}) {
		my $quote = $_->{Quote};
		my $date = $_->{Submitted};

		# \ are doubled
		$quote =~ s,\\\\,\\,g;

		# some (but not all!) newlines are encoded as \\n instead of \n.
		# this is a potentially lossy transform: if the user in the quote
		# actually said a literal '\n', this will turn it into a newline.
		# in practice, this doesn't seem to be a problem.
		$quote =~ s,\\\\?n,\n,g;

		# turn \uXXXX hex escapes back into utf8 sequences.
		$quote =~ s,\\u([0-9a-fA-F]{4}),chr(hex $1),ge,;

		# dates look like: 2020-10-09T22:38:39.404662072Z
		# we only want 2020-10-09.
		$date = substr($date, 0, 10);

		$quotes{$_->{ID}} = $quote;
		$dates{$_->{ID}} = $date;
	}

	for(sort { $a <=> $b } keys %quotes) {
		push @output, $quotes{$_} . "\n" .
			"$indent-- noobfarm.org, quote #$_, $dates{$_}\n";
	}

	close $fh;
	return join "%\n", @output;
}

# main()

$help = $dlonly = $process_only = $outdir = 0;

### Parse options
GetOptions(
	'help'          => \$help,
	'download-only' => \$dlonly,
	'output-dir=s'  => \$outdir,
	'no-format'     => \$no_format,
   'process-only'  => \$process_only,
	'keep-json'     => \$keep_json,
) || usage(1);

$keep_json++ if $dlonly | $process_only;

### Check options
die "--download-only and --process-only can't both be given\n"
	if $dlonly && $process_only;
die "Unknown argument(s): @ARGV" if @ARGV;
usage(0) if $help;

### Create output dir, if needed
if($outdir) {
	$outdir =~ s/['\\]//g;
	system("mkdir", "-p", $outdir);
	exit 1 unless -d $outdir && -w _;
	$fortunefile = "$outdir/$fortunefile";
}

### Download the JSON file (and exit, if --download-only)
download unless $process_only;
exit 0 if $dlonly;

### Open input & output filehandles
open($infh, "<", $jsonfile) or die $!;
if($no_format) {
	open(OUT, ">", $fortunefile) or die $!;
} else {
	# using a shell pipe like this is the reason for not allowing
	# single quotes or backslashes in the output dir. I know it's not
	# "modern" or "best practice" but it's *so* convenient.
	open(OUT, "|fmt -s > '$fortunefile'") or die $!;
}

### Write output, close filehandles
print OUT format_quotes($infh);
close $infh;
close OUT;

### Clean up, unless asked not to
if($keep_json) {
	print STDERR "Keeping JSON file $jsonfile\n";
} else {
	unlink($jsonfile);
}

### Create the index fortune needs. Let strfile's exit status be ours.
exec("strfile '$fortunefile'");