aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <yalhcru@gmail.com>2015-10-16 02:01:43 -0400
committerB. Watson <yalhcru@gmail.com>2015-10-16 02:01:43 -0400
commitd1cf05f126174fd6a36f26faf17823b2baf6e86d (patch)
treeb4f1ecf5022873145250e32df72f9ad3adbec590
parent6ac0db861e3ee0cb71a14108dc2cefe7e1962e7e (diff)
downloadsbostuff-d1cf05f126174fd6a36f26faf17823b2baf6e86d.tar.gz
sbosrcarch: make wget binary a config option
-rwxr-xr-xsbosrcarch9
-rw-r--r--sbosrcarch.conf21
2 files changed, 26 insertions, 4 deletions
diff --git a/sbosrcarch b/sbosrcarch
index a6383c1..437a7d6 100755
--- a/sbosrcarch
+++ b/sbosrcarch
@@ -295,7 +295,7 @@ use POSIX 'getcwd';
use File::Path qw/mkpath rmtree/;
use File::Copy 'copy';
-our($sbogiturl, $sbogitdir, $archivedir, $maxfilemegs,
+our($sbogiturl, $sbogitdir, $archivedir, $maxfilemegs, $wget,
$wgetargs, $symlinks, $wgetrc_contents, $wgetrc, %user_agent_overrides,
@trim_empty_dirs, $skipcount, $urlcount, $archivecount,
$attemptcount, $failcount, $dlcount, $nowarchived, $coverage,
@@ -352,6 +352,7 @@ sub read_config {
}
# quietly use defaults if missing:
+ $wget = "wget" unless defined $wget;
$wgetargs = "" unless defined $wgetargs;
$symlinks = "" unless defined $symlinks;
@@ -515,7 +516,7 @@ sub toobig {
sub wget_fake_head {
my $url = shift;
our $wget_config_arg;
- my $cmd = "wget $wget_config_arg " .
+ my $cmd = "$wget $wget_config_arg " .
"--tries 1 --quiet -O- --save-headers " .
user_agent($url) . " " .
" $wgetargs " .
@@ -573,7 +574,7 @@ sub wget {
if(not defined $wget_config_arg) {
$wget_config_arg = "";
- open my $fh, "wget --help|" or die "can't run wget: $!\n";
+ open my $fh, "$wget --help|" or die "can't run wget: $!\n";
while(<$fh>) {
$wget_config_arg = "--config=$wgetrc" if /--config/;
}
@@ -591,7 +592,7 @@ sub wget {
# TODO: open3?
# the -O is there to force the filename, in case of a redirect. newer
# versions of wget don't actually need this, but it doesn't hurt.
- my $cmd = "wget $wget_config_arg " .
+ my $cmd = "$wget $wget_config_arg " .
user_agent($url) . " " .
($head ? "--spider --tries 1" : "-O '" . url_to_filename($url) . "'") .
" $wgetargs " .
diff --git a/sbosrcarch.conf b/sbosrcarch.conf
index 4d29c28..311f99b 100644
--- a/sbosrcarch.conf
+++ b/sbosrcarch.conf
@@ -62,6 +62,11 @@ $maxfilemegs = 1;
$symlinks = 0;
+## $wget (string, optional, default "wget")
+# Path to wget binary. Absolute paths will be used as-is, otherwise $PATH
+# will be searched.
+$wget = "wget";
+
## $wgetargs (string, optional, default "")
# Extra arguments to pass to wget. We're already creating a config file
# and using it in place of .wgetrc and /etc/wgetrc, you don't need to
@@ -69,6 +74,22 @@ $symlinks = 0;
$wgetargs = "";
+# If your wget is older than version 1.14 or so, sbosrcarch will complain
+# that it doesn't support the --config option. In that case, the
+# $wgetrc_contents below won't be used. You can either copy $wgetrc_contents
+# to ~/.wgetrc, or use $wgetargs to set the config options on the command
+# line. Something like this:
+
+# $wgetargs =
+# "--timeout=30 ".
+# "--user-agent='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' ".
+# "--no-check-certificate ".
+# "--no-content-disposition";
+
+# Unfortunately there's not a --no-robots option. Upgrading wget is a
+# better solution, and you can compile it with e.g. --prefix=/home/you/wget.new,
+# and set $wget = "/home/you/wget.new/bin/wget" above.
+
## $wgetrc_contents (string, optional, see "man wget" and/or the comments in
# /etc/wgetrc for more information).