From 68d6d853df2072de525f87ccc123849ec28fc007 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Thu, 15 Oct 2015 04:28:16 -0400 Subject: sbosrcarch: fix infofilecount bug, support older perls --- sbosrcarch | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'sbosrcarch') diff --git a/sbosrcarch b/sbosrcarch index cc8cd0c..0ac8259 100755 --- a/sbosrcarch +++ b/sbosrcarch @@ -1,5 +1,26 @@ #!/usr/bin/perl +# 20151015 bkw: finally tested a full run on slack 13.0, results: +# - create_mode stats are wrong +# - the old openssl on slack 13.0 can't handle cloud.github.com. chokes +# with 'sslv3 alert handshake failure'... or maybe it's wget that +# can't handle it, as curl seems to be able to, using the same +# openssl. +# - older versions of wget also have issues with the filename +# they save as (e.g. if redirected to a URL with a different +# filename part at the end). maybe just fix with wget -O$filename. +# - as a result of the above, I've got files that got downloaded +# with wrong names, saved in the git tree. need add_or_rm_mode +# to be smart enough to figure out where they go, by md5sum alone. +# - wget_fake_head doesn't show errors, not even 'file too large'. +# - seriously considering switching to curl. +# - another thought: do away with HEAD requests entirely. do something +# like open a pipeline reading from wget, read the headers (like +# wget_fake_head does now)... then decide whether to finish the +# download or close the fh. if we finish it, read from the pipeline +# and write to the target filename. +# - if a download fails, turds shouldn't be left behind in the git tree. + # TODO based on feedback from ttkp and pink_mist on IRC: # - IPC::Open3 instead of open my $fh, "wget ...|"? At least use # open my $fh, "-|", "wget", @args or such, to avoid quoting issues. @@ -265,7 +286,7 @@ use File::Find; use Digest::MD5; use Net::FTP; use POSIX 'getcwd'; -use File::Path qw/make_path remove_tree/; +use File::Path qw/mkpath rmtree/; use File::Copy 'copy'; our($sbogiturl, $sbogitdir, $archivedir, $maxfilemegs, @@ -572,8 +593,7 @@ sub wget { # Grr. Some sites refuse HEAD requests, and some allow them but # don't return a Content-Length header. So we must resort to more # drastic measures. - # FIXME: don't bother doing this if we got 404 (not found) from the HEAD, - # or stuff like DNS errors. + # FIXME: don't bother doing this if we got a DNS error from the HEAD. if($head && not(defined($size))) { return wget_fake_head($url); } @@ -682,7 +702,7 @@ sub store_file { my $md5dir = md5_dir($md5); my $namedir = name_dir($category, $prgnam); - make_path($md5dir, $namedir); + mkpath($md5dir, $namedir); link($filename, $namedir . "/" . $filename); if($symlinks) { symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename, @@ -836,7 +856,7 @@ sub purge_mode { # pass 3 if($rebuild) { - remove_tree("by-md5"); + rmtree("by-md5"); print "Removed by-md5 tree, rebuilding\n"; find({wanted => \&rebuild_wanted, no_chdir => 1}, "by-name"); } else { @@ -880,7 +900,7 @@ sub rebuild_wanted { my $md5dir = md5_dir(md5sum_file($_)); my (undef, $category, $prgnam, $filename) = split /\//, $_; - make_path($md5dir); + mkpath($md5dir); if($symlinks) { symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename, @@ -1157,7 +1177,7 @@ sub check_info_wanted { my ($category, $prgnam, undef) = split /\//; my $dls = parse_info($_); $totalfiles += keys %$dls; - $infofilecount{"$category/$prgnam"}++; + $infofilecount{"$category/$prgnam"} += keys %$dls; $parsedinfo{"$category/$prgnam"} = $dls; } -- cgit v1.2.3