diff options
Diffstat (limited to 'noobfarm2fortune.sh')
-rwxr-xr-x | noobfarm2fortune.sh | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/noobfarm2fortune.sh b/noobfarm2fortune.sh new file mode 100755 index 0000000..479ace2 --- /dev/null +++ b/noobfarm2fortune.sh @@ -0,0 +1,205 @@ +#!/bin/bash + +VERSION=0.99.1 + +usage() { + cat <<EOF +noobfarm2fortune [-options] + +noobfarm2fortune v$VERSION by B. Watson (yalhcru@gmail.com), released +under the WTFPL. Do WTF you like with this. For full license, see +http://sam.zoy.org/wtfpl/ + +Scrapes quotes from noobfarm.org, formats them as a fortune database. +Output is a text file called "noobfarm" and an index called +"noobfarm.dat", created in the current directory (or wherever the -o +option says). + +Options: + -h --help This usage message. + -f --first First quote. Default is 1. + -l --last Last quote. Default is to use the highest numbered + on the site. + -n --no-cache Wipe the cache directory before starting. + -s --sleep N Sleep N seconds between fetches. Minimum is 2. + -o --output-dir Write the noobfarm and noobfarm.dat files here. This + could be /usr/share/games/fortunes to write directly + to the system-wide fortune database. Default is the + current directory. + +This script is NOT supported by the owners of noobfarm.org. Do NOT +contact them if you have problems. +EOF +} + +sanity_check() { + local exe + local bad=0 + + for exe in strfile links sed wget touch grep id getopt; do + # use 'type' instead of 'which', it's a bash builtin. + # also if getopt became a bash builtin one day, this + # code wouldn't break. + if ! type $exe &>/dev/null; then + echo "Can't find $exe in path" + bad=1 + fi + done + + if [ "$bad" = "1" ]; then + echo "Install the required external programs or fix PATH so they're visible." + echo "PATH is set to: $PATH" + exit 1 + fi +} + +die() { + echo "$@" 1>&2 + exit 1 +} + +extract_quote() { + local file="$1" + links -html-margin 0 -dump $file > $file.tmp + local added=$( sed -n 's/.*\(Added:\)/\1/p' $file.tmp ) + sed -e '1,/^$/d' -e '/Home || Add Quote/,$d' < $file.tmp >> $QUOTEFILE + ( echo ; echo "$INDENT-- noobfarm.org, quote #$quote" ; echo "$INDENT $added " ) >> $QUOTEFILE + if [ "$quote" -ne "$LASTQUOTE" ]; then + echo '%' >> $QUOTEFILE + fi + rm -f $file.tmp +} + +# This is completely stupid, but amusing. +spinchars="|/-\\" +spinner() { + local step + local pos + local char + + let step=$1/10 + let pos=$step%4 + char=${spinchars:$pos:1} + echo -ne "[ $char ]\r" +} + +# main() would start here in a real programming language + +sanity_check + +OPTS=$(getopt -n noobfarm2fortune -o hno:s:f:l: -l help,no-cache,output-dir:,sleep:,first:,last: -- "$@") + +if [ "$?" != "0" ]; then + usage + exit 1 +fi + +while true; do + case "$1" in + -h|--help) usage; exit 0 ;; + -n|--no-cache) WIPECACHE=1 ; shift ;; + -o|--output-dir) OUTDIR="$2" ; shift 2 ;; + -s|--sleep) SLEEPTIME="$2" ; shift 2 ;; + -f|--first) FIRSTQUOTE="$2" ; shift 2 ;; + -l|--last) LASTQUOTE="$2" ; shift 2 ;; + ""|--) shift ; break ;; + *) echo "Unknown argument: $1" ; usage ; exit 1 ;; + esac +done +if [[ "$@" != "" ]]; then + usage + exit 1 +fi + +if [ "$( id -u )" = "0" ]; then + CACHEDIR=/var/cache/noobfarm2fortune +else + CACHEDIR=/tmp/noobfarm2fortune."$( id -nu )" +fi + +echo "Using cache dir: $CACHEDIR" + +OUTDIR=${OUTDIR:-$( pwd )} +QUOTEFILE=$CACHEDIR/noobfarm +SLEEPTIME=${SLEEPTIME:-2} +INDENT=" " # default is 37 spaces + +OUTDIR="$( readlink -m "$OUTDIR" )" +echo "Using output dir: $OUTDIR" + +case "$SLEEPTIME" in + 1|1.*|.*|0*|"") echo "Ignoring sleep time $SLEEPTIME, using 2 sec" + SLEEPTIME=2 ;; + [0-9]*) ;; + *) echo "Invalid sleep time $SLEEPTIME" ; usage ; exit 1 ;; +esac + +if [ "$WIPECACHE" != "" ]; then + rm -rf $CACHEDIR + echo "Wiped cache" +fi + +mkdir -p $CACHEDIR +cd $CACHEDIR || die "Couldn't create $CACHEDIR" +rm -f $QUOTEFILE + +if [ "$LASTQUOTE" = "" ]; then + LASTQUOTE="$( wget -q -O- http://noobfarm.org/ | sed -n 's/.*div *id *= *"quote_\([0-9]*\)">.*/\1/p' | sort -n | tail -1 )" + + if [ -z "$LASTQUOTE" ]; then + die "can't get last quote number from site (page layout changed?)" + fi + + if echo "$LASTQUOTE" | grep -q '[^0-9]'; then + die "got weirdness \"$LASTQUOTE\" instead of last quote number (page layout changed?)" + fi +fi + +echo "Last quote is $LASTQUOTE" + +FIRSTQUOTE=${FIRSTQUOTE:-1} +quote=$FIRSTQUOTE + +# The .inprogress marker is there in case the user presses ^C +# while wget's waiting for data (resulting in a 0-byte HTML file). +while [ "$quote" -le "$LASTQUOTE" ]; do + spinner $quote + if [ -e "$quote".inprogress ]; then + rm -f "$quote".inprogress "$quote".html + fi + + if [ ! -e "$quote".html ]; then + echo -ne "Getting quote $quote... \b\b\b\b\b\b\b\b" + touch "$quote".inprogress + wget -q -O "$quote".html "http://noobfarm.org/index.php?id=$quote" + if [ "$?" = "0" ]; then + echo -ne "OK\r" + else + echo "FAIL" + fi + rm -f "$quote".inprogress + sleep $SLEEPTIME + fi + quote=$(( quote + 1 )) +done + +quote=$FIRSTQUOTE + +while [ "$quote" -le "$LASTQUOTE" ]; do + spinner $quote + if grep -q '"quote_output"' $quote.html; then + extract_quote $quote.html + elif grep -q 'That quote does not exist' $quote.html; then + echo -ne " Quote #$quote does not exist\r" + else + echo "Quote #$quote seems malformed, check $CACHEDIR/$quote.html" + fi + quote=$(( quote + 1 )) +done + +set -e +echo +mkdir -p "$OUTDIR" +cd "$OUTDIR" +cp $QUOTEFILE . +strfile $( basename $QUOTEFILE ) |