aboutsummaryrefslogtreecommitdiff
path: root/noobfarm2fortune.sh
diff options
context:
space:
mode:
authorB. Watson <yalhcru@gmail.com>2015-04-08 03:18:53 -0400
committerB. Watson <yalhcru@gmail.com>2015-04-08 03:18:53 -0400
commit122f3c401f23f84799802c7b9667bda222646487 (patch)
treebc77cc44c516eac71b2d6490574fd32a5b5efd65 /noobfarm2fortune.sh
downloadmisc-scripts-122f3c401f23f84799802c7b9667bda222646487.tar.gz
initial commit
Diffstat (limited to 'noobfarm2fortune.sh')
-rwxr-xr-xnoobfarm2fortune.sh205
1 files changed, 205 insertions, 0 deletions
diff --git a/noobfarm2fortune.sh b/noobfarm2fortune.sh
new file mode 100755
index 0000000..479ace2
--- /dev/null
+++ b/noobfarm2fortune.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+
+VERSION=0.99.1
+
+usage() {
+ cat <<EOF
+noobfarm2fortune [-options]
+
+noobfarm2fortune v$VERSION by B. Watson (yalhcru@gmail.com), released
+under the WTFPL. Do WTF you like with this. For full license, see
+http://sam.zoy.org/wtfpl/
+
+Scrapes quotes from noobfarm.org, formats them as a fortune database.
+Output is a text file called "noobfarm" and an index called
+"noobfarm.dat", created in the current directory (or wherever the -o
+option says).
+
+Options:
+ -h --help This usage message.
+ -f --first First quote. Default is 1.
+ -l --last Last quote. Default is to use the highest numbered
+ on the site.
+ -n --no-cache Wipe the cache directory before starting.
+ -s --sleep N Sleep N seconds between fetches. Minimum is 2.
+ -o --output-dir Write the noobfarm and noobfarm.dat files here. This
+ could be /usr/share/games/fortunes to write directly
+ to the system-wide fortune database. Default is the
+ current directory.
+
+This script is NOT supported by the owners of noobfarm.org. Do NOT
+contact them if you have problems.
+EOF
+}
+
+sanity_check() {
+ local exe
+ local bad=0
+
+ for exe in strfile links sed wget touch grep id getopt; do
+ # use 'type' instead of 'which', it's a bash builtin.
+ # also if getopt became a bash builtin one day, this
+ # code wouldn't break.
+ if ! type $exe &>/dev/null; then
+ echo "Can't find $exe in path"
+ bad=1
+ fi
+ done
+
+ if [ "$bad" = "1" ]; then
+ echo "Install the required external programs or fix PATH so they're visible."
+ echo "PATH is set to: $PATH"
+ exit 1
+ fi
+}
+
+die() {
+ echo "$@" 1>&2
+ exit 1
+}
+
+extract_quote() {
+ local file="$1"
+ links -html-margin 0 -dump $file > $file.tmp
+ local added=$( sed -n 's/.*\(Added:\)/\1/p' $file.tmp )
+ sed -e '1,/^$/d' -e '/Home || Add Quote/,$d' < $file.tmp >> $QUOTEFILE
+ ( echo ; echo "$INDENT-- noobfarm.org, quote #$quote" ; echo "$INDENT $added " ) >> $QUOTEFILE
+ if [ "$quote" -ne "$LASTQUOTE" ]; then
+ echo '%' >> $QUOTEFILE
+ fi
+ rm -f $file.tmp
+}
+
+# This is completely stupid, but amusing.
+spinchars="|/-\\"
+spinner() {
+ local step
+ local pos
+ local char
+
+ let step=$1/10
+ let pos=$step%4
+ char=${spinchars:$pos:1}
+ echo -ne "[ $char ]\r"
+}
+
+# main() would start here in a real programming language
+
+sanity_check
+
+OPTS=$(getopt -n noobfarm2fortune -o hno:s:f:l: -l help,no-cache,output-dir:,sleep:,first:,last: -- "$@")
+
+if [ "$?" != "0" ]; then
+ usage
+ exit 1
+fi
+
+while true; do
+ case "$1" in
+ -h|--help) usage; exit 0 ;;
+ -n|--no-cache) WIPECACHE=1 ; shift ;;
+ -o|--output-dir) OUTDIR="$2" ; shift 2 ;;
+ -s|--sleep) SLEEPTIME="$2" ; shift 2 ;;
+ -f|--first) FIRSTQUOTE="$2" ; shift 2 ;;
+ -l|--last) LASTQUOTE="$2" ; shift 2 ;;
+ ""|--) shift ; break ;;
+ *) echo "Unknown argument: $1" ; usage ; exit 1 ;;
+ esac
+done
+if [[ "$@" != "" ]]; then
+ usage
+ exit 1
+fi
+
+if [ "$( id -u )" = "0" ]; then
+ CACHEDIR=/var/cache/noobfarm2fortune
+else
+ CACHEDIR=/tmp/noobfarm2fortune."$( id -nu )"
+fi
+
+echo "Using cache dir: $CACHEDIR"
+
+OUTDIR=${OUTDIR:-$( pwd )}
+QUOTEFILE=$CACHEDIR/noobfarm
+SLEEPTIME=${SLEEPTIME:-2}
+INDENT=" " # default is 37 spaces
+
+OUTDIR="$( readlink -m "$OUTDIR" )"
+echo "Using output dir: $OUTDIR"
+
+case "$SLEEPTIME" in
+ 1|1.*|.*|0*|"") echo "Ignoring sleep time $SLEEPTIME, using 2 sec"
+ SLEEPTIME=2 ;;
+ [0-9]*) ;;
+ *) echo "Invalid sleep time $SLEEPTIME" ; usage ; exit 1 ;;
+esac
+
+if [ "$WIPECACHE" != "" ]; then
+ rm -rf $CACHEDIR
+ echo "Wiped cache"
+fi
+
+mkdir -p $CACHEDIR
+cd $CACHEDIR || die "Couldn't create $CACHEDIR"
+rm -f $QUOTEFILE
+
+if [ "$LASTQUOTE" = "" ]; then
+ LASTQUOTE="$( wget -q -O- http://noobfarm.org/ | sed -n 's/.*div *id *= *"quote_\([0-9]*\)">.*/\1/p' | sort -n | tail -1 )"
+
+ if [ -z "$LASTQUOTE" ]; then
+ die "can't get last quote number from site (page layout changed?)"
+ fi
+
+ if echo "$LASTQUOTE" | grep -q '[^0-9]'; then
+ die "got weirdness \"$LASTQUOTE\" instead of last quote number (page layout changed?)"
+ fi
+fi
+
+echo "Last quote is $LASTQUOTE"
+
+FIRSTQUOTE=${FIRSTQUOTE:-1}
+quote=$FIRSTQUOTE
+
+# The .inprogress marker is there in case the user presses ^C
+# while wget's waiting for data (resulting in a 0-byte HTML file).
+while [ "$quote" -le "$LASTQUOTE" ]; do
+ spinner $quote
+ if [ -e "$quote".inprogress ]; then
+ rm -f "$quote".inprogress "$quote".html
+ fi
+
+ if [ ! -e "$quote".html ]; then
+ echo -ne "Getting quote $quote... \b\b\b\b\b\b\b\b"
+ touch "$quote".inprogress
+ wget -q -O "$quote".html "http://noobfarm.org/index.php?id=$quote"
+ if [ "$?" = "0" ]; then
+ echo -ne "OK\r"
+ else
+ echo "FAIL"
+ fi
+ rm -f "$quote".inprogress
+ sleep $SLEEPTIME
+ fi
+ quote=$(( quote + 1 ))
+done
+
+quote=$FIRSTQUOTE
+
+while [ "$quote" -le "$LASTQUOTE" ]; do
+ spinner $quote
+ if grep -q '"quote_output"' $quote.html; then
+ extract_quote $quote.html
+ elif grep -q 'That quote does not exist' $quote.html; then
+ echo -ne " Quote #$quote does not exist\r"
+ else
+ echo "Quote #$quote seems malformed, check $CACHEDIR/$quote.html"
+ fi
+ quote=$(( quote + 1 ))
+done
+
+set -e
+echo
+mkdir -p "$OUTDIR"
+cd "$OUTDIR"
+cp $QUOTEFILE .
+strfile $( basename $QUOTEFILE )