#!/bin/bash # 20150827 bkw: attempt to find missing source tarballs ### configurable stuff # where to look for slackbuilds. override with SBOROOT environment # variable. current directory is always searched first. #SBODEFAULT=$HOME/slackbuilds SBODEFAULT=/home/urchlay/sbo-master # to add a repo, list its name here, and write a _download() # function, which should return success if a file was downloaded and # failure otherwise. order isn't important here, it's randomized on # every run. #sbosrcarch repos=" filewatcher wayback macports fedora pldattic tld ponce sfdirect gentoo netbsd freebsd debian " # mirror(s) to use for sbosrcarch, one or more, space or newline-separated. # these are tried in the order listed. # leave off the trailing / (shouldn't really matter, but...) sbosrcarch_mirrors=" http://slackware.org.uk/sbosrcarch " ### end of config, start of code SELF=$( basename $0 ) usage() { cat <" to search only one repo from the list. Setting REPO in the environment has the same effect. This option is mostly just for testing. Don't forget the space between -r and the repo name! EOF exit "$1" } die() { echo "$SELF:" "$@" 1>&2 exit 1 } read_info_file() { case "$1" in "") dir=. ;; *.info) file="$1" ;; *) dir="$1" ;; esac if [ "$dir" != "" ]; then file="$dir"/*.info fi if [ ! -f $file ]; then file="${SBOROOT:-$SBODEFAULT}"/$file fi if [ ! -f $file ]; then die "Can't find .info file matching $1" fi file=$( eval echo $file ) echo "Using info file: $file" source $file # snarfed straight from template.SlackBuild: if [ -z "$ARCH" ]; then case "$( uname -m )" in i?86) ARCH=i486 ;; arm*) ARCH=arm ;; *) ARCH=$( uname -m ) ;; esac fi if [ "$ARCH" = "x86_64" ]; then case "$DOWNLOAD_x86_64" in ""|UNSUPPORTED|UNTESTED) ;; *) DOWNLOAD="$DOWNLOAD_x86_64" MD5SUM="$MD5SUM_x86_64" ;; esac fi } do_wget() { url="$1" shift echo wget $wgetopts $@ $url wget $wgetopts $@ $url } sbosrcarch_download() { dir="by-md5/$( echo $dlmd5 | cut -b1 )/$( echo $dlmd5 | cut -b2 )/$dlmd5" for mirror in $sbosrcarch_mirrors; do do_wget "$mirror/$dir/$dlfile" check_file && return 0; done } # ponce's server returns 200 OK status for its 404 page, hence the ugly: ponce_download() { do_wget "http://ponce.cc/slackware/sources/repo/$dlfile" 2>&1 | tee tmp.$$ ret=$? grep -q '^Length.*text/html' tmp.$$ && ret=1 rm -f tmp.$$ return $ret } # the user agent is set because sf does something different if it thinks # you're using a browser, and some of us like to set the user agent to # firefox in .wgetrc because it fixes downloading from most other sites # that check it. sfdirect_download() { do_wget "http://downloads.sourceforge.net/project/slackbuildsdirectlinks/$PRGNAM/$dlfile" --user-agent wget } gentoo_download() { # b2sum from SBo blake2 package. if type -p b2sum &>/dev/null; then dir="$( echo -n $dlfile | b2sum | head -c2 )" do_wget "http://ftp.osuosl.org/pub/gentoo/distfiles/$dir/$dlfile" else echo "*** can't try gentoo distfiles because b2sum is missing; install system/blake2" fi } freebsd_download() { do_wget "http://distcache.FreeBSD.org/ports-distfiles/$dlfile" } netbsd_download() { do_wget "http://ftp.netbsd.org/pub/pkgsrc/distfiles/$dlfile" } # debian's tricky because they rename the files: all lowercase, an # underscore between name and version, and .orig added before the # filename extension. Also they're fanned out into subdirs, see # http://http.debian.net/debian/pool/main/ debian_download() { case "$dlfile" in *.tar.*) ext="$( echo $dlfile | sed 's,.*\.\(tar\..*\)$,\1,' )" ;; *) ext="$( echo $dlfile | sed 's,.*\.\([^.]*\)$,\1,' )" ;; esac prog="$( echo $dlfile | tr A-Z a-z | sed 's,_,-,g' | sed "s,\.$ext\$,," )" ver="$( echo $prog | rev | cut -d- -f1 | rev )" prog="$( echo $prog | rev | cut -d- -f2- | rev )" case "$prog" in lib*) subdir="$( echo $prog | head -c4 )" ;; *) subdir="$( echo $prog | head -c1 )" ;; esac #echo "prog='$prog' ver='$ver' ext='$ext' subdir='$subdir'" debfile=${prog}_$ver.orig.$ext do_wget "http://http.debian.net/debian/pool/main/$subdir/$prog/$debfile" ret=$? mv "$debfile" "$dlfile" 2>/dev/null return $ret } # my own archive. Not well populated yet. naptime_download() { do_wget "https://slackware.uk/~urchlay/src/$dlfile" } tld_download() { dir="$( echo $dlmd5 | cut -b1 )/$( echo $dlmd5 | cut -b2 )/$dlmd5" do_wget "http://df.tld-linux.org/distfiles/by-md5/$dir/" -r -l1 -nH -np -nd -Rdesc -Rindex.html\* } # TODO: try also http://distfiles.pld-linux.org/distfiles/by-md5/ ? (is it the same?) pldattic_download() { dir="$( echo $dlmd5 | cut -b1 )/$( echo $dlmd5 | cut -b2 )/$dlmd5" do_wget "http://attic-distfiles.pld-linux.org/distfiles/by-md5/$dir/" -r -l1 -nH -np -nd -Rdesc -Rindex.html\* } # https://archive.org/help/wayback_api.php # json_pp included in slackware's perl package wayback_download() { url=$( wget -O- "http://archive.org/wayback/available?url=$srcurl" | \ json_pp -f json -t dumper | \ perl -e 'undef $/; $_=<>; eval $_; print $VAR1->{archived_snapshots}->{closest}->{url};' ) if [ "$url" = "" ]; then return 1 fi do_wget "$url" } # lot of stuff here. URLs of the form: # http://pkgs.fedoraproject.org/repo/pkgs/zziplib/zziplib-0.13.62.tar.bz2/5fe874946390f939ee8f4abe9624b96c/zziplib-0.13.62.tar.bz2 fedora_download() { pkgname="$( echo $dlfile | rev | cut -d- -f2- | rev )" do_wget "http://pkgs.fedoraproject.org/repo/pkgs/$pkgname/$dlfile/$dlmd5/$dlfile" } # URL form: # http://distfiles.macports.org/arj/arj-3.10.22.tar.gz macports_download() { pkgname="$( echo $dlfile | rev | cut -d- -f2- | rev )" do_wget "http://distfiles.macports.org/$pkgname/$dlfile" } # http://www.filewatcher.com/_/?q=Lirc-Client-2.00.tar.gz # for some reason, wget's getting the content gzipped. The # server appears to violate the HTTP/1.1 spec: it ignores # "Accept-Encoding: identity" or "Accept-Encoding:" with no arg, # and always sends gzipped content with "Content-encoding: gzip" # We have to do HTML scraping :( # TODO: CGI parameter escaping? filewatcher_download() { fwurl="$( wget -O- "http://www.filewatcher.com/_/?q=$dlfile" | \ zcat 2>/dev/null | grep '