#!/bin/sh # elvis: webao -- Search archive.org Wayback Machine (alternate interface) # Author: B. Watson (yalhcru at gmail) # Licensed under the WTFPL. See http://www.wtfpl.net/txt/copying/ for details. # This uses the Wayback Machine Availability JSON API: # https://archive.org/help/wayback_api.php # I'm not going to implement a full JSON parser or require one as an # external dep, this is just crude scraping. # TODO: the -y stuff isn't quite ready for prime time. . surfraw || exit 1 # no w3_config_hook as there are no config options (yet?) w3_usage_hook() { cat < Timestamp. Result will be the archived snapshot closest to this timestamp. Must be at least the year and month, can include full 14 digits, yyyyMMddhhmmss. Default: today's date. Example: -20100304 -y, -y= Search backwards from , one search per month, for years. Plain -y means search 1 year. Be careful with this option as it does rapid repeated requests to the archive.org server and may annoy the operators and/or get your IP banned! EOF w3_global_usage } check_timestap() { echo "$timestamp" | egrep -q '^[0-9]{6,14}$' || err "invalid timestamp (must be 6-14 digits)" } w3_parse_option_hook() { opt="$1" optarg="$2" case "$opt" in -[0-9]*) timestamp="${opt/-/}" check_timestap ;; -y) years=1 ;; -y=*) years="$optarg" ;; *) return 1 ;; esac return 0 } make_url() { local url timestamp timestamp="$1" url="http://archive.org/wayback/available?url=" if null "$w3_args"; then w3_usage_hook exit 1 fi url="$url$( w3_url_of_arg $w3_args )" [ -n "$timestamp" ] && url="$url×tamp=$timestamp" echo "$url" } w3_config w3_parse_args "$@" # do http request to site, return result (if any). depends on the fact # that the json api double-quotes the values, and that the result url # is always at their site. get_result() { local url url="$( make_url "$1" )" wget -qO- "$url" | sed 's,",\n,g' | grep '//web\.archive\.org/web/' } # jump directly to the result (when there's only one) goto_result() { local result result="$( get_result "$1" )" if [ -z "$result" ]; then err "search found no results, sorry" fi w3_browse_url "$result" } iterate_years() { local year month stampmonth count limit stamp results link dir year="$( echo "$1" | cut -b1-4 )" month="$( echo "$1" | cut -b5-6 | sed 's,^0,,' )" count=0 limit=$(( years * 12 )) #echo "year $year, month $month, limit $limit" dir="$( mktemp -td sr.webao.XXXXXX )" [ -d $dir ] || exit 1 while [ "$count" -le "$limit" ]; do stampmonth="$( printf "%02d" $month )" stamp="$year$stampmonth" get_result "$stamp" >> "$dir/tmp" month=$(( month - 1 )) if [ "$month" -eq 0 ]; then month=12 year=$(( year - 1 )) fi : $(( count++ )) done if [ ! -s "$dir/tmp" ]; then rm -rf "$dir" err "search found no results, sorry" fi echo "Wayback Results" > "$dir/r.html" sort -ru "$dir/tmp" | while read link; do echo "$link
" >> "$dir/r.html" done echo "" >> "$dir/r.html" w3_browse_url "file://$dir/r.html" rm -rf "$dir" } if [ -n "$years" ]; then [ -z "$timestamp" ] && timestamp="$( date +%Y%m%d )" iterate_years "$timestamp" else goto_result "$timestamp" fi