From d1cf05f126174fd6a36f26faf17823b2baf6e86d Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Fri, 16 Oct 2015 02:01:43 -0400 Subject: sbosrcarch: make wget binary a config option --- sbosrcarch | 9 +++++---- sbosrcarch.conf | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/sbosrcarch b/sbosrcarch index a6383c1..437a7d6 100755 --- a/sbosrcarch +++ b/sbosrcarch @@ -295,7 +295,7 @@ use POSIX 'getcwd'; use File::Path qw/mkpath rmtree/; use File::Copy 'copy'; -our($sbogiturl, $sbogitdir, $archivedir, $maxfilemegs, +our($sbogiturl, $sbogitdir, $archivedir, $maxfilemegs, $wget, $wgetargs, $symlinks, $wgetrc_contents, $wgetrc, %user_agent_overrides, @trim_empty_dirs, $skipcount, $urlcount, $archivecount, $attemptcount, $failcount, $dlcount, $nowarchived, $coverage, @@ -352,6 +352,7 @@ sub read_config { } # quietly use defaults if missing: + $wget = "wget" unless defined $wget; $wgetargs = "" unless defined $wgetargs; $symlinks = "" unless defined $symlinks; @@ -515,7 +516,7 @@ sub toobig { sub wget_fake_head { my $url = shift; our $wget_config_arg; - my $cmd = "wget $wget_config_arg " . + my $cmd = "$wget $wget_config_arg " . "--tries 1 --quiet -O- --save-headers " . user_agent($url) . " " . " $wgetargs " . @@ -573,7 +574,7 @@ sub wget { if(not defined $wget_config_arg) { $wget_config_arg = ""; - open my $fh, "wget --help|" or die "can't run wget: $!\n"; + open my $fh, "$wget --help|" or die "can't run wget: $!\n"; while(<$fh>) { $wget_config_arg = "--config=$wgetrc" if /--config/; } @@ -591,7 +592,7 @@ sub wget { # TODO: open3? # the -O is there to force the filename, in case of a redirect. newer # versions of wget don't actually need this, but it doesn't hurt. - my $cmd = "wget $wget_config_arg " . + my $cmd = "$wget $wget_config_arg " . user_agent($url) . " " . ($head ? "--spider --tries 1" : "-O '" . url_to_filename($url) . "'") . " $wgetargs " . diff --git a/sbosrcarch.conf b/sbosrcarch.conf index 4d29c28..311f99b 100644 --- a/sbosrcarch.conf +++ b/sbosrcarch.conf @@ -62,6 +62,11 @@ $maxfilemegs = 1; $symlinks = 0; +## $wget (string, optional, default "wget") +# Path to wget binary. Absolute paths will be used as-is, otherwise $PATH +# will be searched. +$wget = "wget"; + ## $wgetargs (string, optional, default "") # Extra arguments to pass to wget. We're already creating a config file # and using it in place of .wgetrc and /etc/wgetrc, you don't need to @@ -69,6 +74,22 @@ $symlinks = 0; $wgetargs = ""; +# If your wget is older than version 1.14 or so, sbosrcarch will complain +# that it doesn't support the --config option. In that case, the +# $wgetrc_contents below won't be used. You can either copy $wgetrc_contents +# to ~/.wgetrc, or use $wgetargs to set the config options on the command +# line. Something like this: + +# $wgetargs = +# "--timeout=30 ". +# "--user-agent='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' ". +# "--no-check-certificate ". +# "--no-content-disposition"; + +# Unfortunately there's not a --no-robots option. Upgrading wget is a +# better solution, and you can compile it with e.g. --prefix=/home/you/wget.new, +# and set $wget = "/home/you/wget.new/bin/wget" above. + ## $wgetrc_contents (string, optional, see "man wget" and/or the comments in # /etc/wgetrc for more information). -- cgit v1.2.3