aboutsummaryrefslogtreecommitdiff
path: root/sbosrcarch
diff options
context:
space:
mode:
authorB. Watson <yalhcru@gmail.com>2015-10-15 04:28:16 -0400
committerB. Watson <yalhcru@gmail.com>2015-10-15 04:28:16 -0400
commit68d6d853df2072de525f87ccc123849ec28fc007 (patch)
treeb031dc19792a65d89d68a6e698aaa16a8c61f448 /sbosrcarch
parent4d8c837dd3392ebaeede51a66725efa8c74fa049 (diff)
downloadsbostuff-68d6d853df2072de525f87ccc123849ec28fc007.tar.gz
sbosrcarch: fix infofilecount bug, support older perls
Diffstat (limited to 'sbosrcarch')
-rwxr-xr-xsbosrcarch34
1 files changed, 27 insertions, 7 deletions
diff --git a/sbosrcarch b/sbosrcarch
index cc8cd0c..0ac8259 100755
--- a/sbosrcarch
+++ b/sbosrcarch
@@ -1,5 +1,26 @@
#!/usr/bin/perl
+# 20151015 bkw: finally tested a full run on slack 13.0, results:
+# - create_mode stats are wrong
+# - the old openssl on slack 13.0 can't handle cloud.github.com. chokes
+# with 'sslv3 alert handshake failure'... or maybe it's wget that
+# can't handle it, as curl seems to be able to, using the same
+# openssl.
+# - older versions of wget also have issues with the filename
+# they save as (e.g. if redirected to a URL with a different
+# filename part at the end). maybe just fix with wget -O$filename.
+# - as a result of the above, I've got files that got downloaded
+# with wrong names, saved in the git tree. need add_or_rm_mode
+# to be smart enough to figure out where they go, by md5sum alone.
+# - wget_fake_head doesn't show errors, not even 'file too large'.
+# - seriously considering switching to curl.
+# - another thought: do away with HEAD requests entirely. do something
+# like open a pipeline reading from wget, read the headers (like
+# wget_fake_head does now)... then decide whether to finish the
+# download or close the fh. if we finish it, read from the pipeline
+# and write to the target filename.
+# - if a download fails, turds shouldn't be left behind in the git tree.
+
# TODO based on feedback from ttkp and pink_mist on IRC:
# - IPC::Open3 instead of open my $fh, "wget ...|"? At least use
# open my $fh, "-|", "wget", @args or such, to avoid quoting issues.
@@ -265,7 +286,7 @@ use File::Find;
use Digest::MD5;
use Net::FTP;
use POSIX 'getcwd';
-use File::Path qw/make_path remove_tree/;
+use File::Path qw/mkpath rmtree/;
use File::Copy 'copy';
our($sbogiturl, $sbogitdir, $archivedir, $maxfilemegs,
@@ -572,8 +593,7 @@ sub wget {
# Grr. Some sites refuse HEAD requests, and some allow them but
# don't return a Content-Length header. So we must resort to more
# drastic measures.
- # FIXME: don't bother doing this if we got 404 (not found) from the HEAD,
- # or stuff like DNS errors.
+ # FIXME: don't bother doing this if we got a DNS error from the HEAD.
if($head && not(defined($size))) {
return wget_fake_head($url);
}
@@ -682,7 +702,7 @@ sub store_file {
my $md5dir = md5_dir($md5);
my $namedir = name_dir($category, $prgnam);
- make_path($md5dir, $namedir);
+ mkpath($md5dir, $namedir);
link($filename, $namedir . "/" . $filename);
if($symlinks) {
symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename,
@@ -836,7 +856,7 @@ sub purge_mode {
# pass 3
if($rebuild) {
- remove_tree("by-md5");
+ rmtree("by-md5");
print "Removed by-md5 tree, rebuilding\n";
find({wanted => \&rebuild_wanted, no_chdir => 1}, "by-name");
} else {
@@ -880,7 +900,7 @@ sub rebuild_wanted {
my $md5dir = md5_dir(md5sum_file($_));
my (undef, $category, $prgnam, $filename) = split /\//, $_;
- make_path($md5dir);
+ mkpath($md5dir);
if($symlinks) {
symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename,
@@ -1157,7 +1177,7 @@ sub check_info_wanted {
my ($category, $prgnam, undef) = split /\//;
my $dls = parse_info($_);
$totalfiles += keys %$dls;
- $infofilecount{"$category/$prgnam"}++;
+ $infofilecount{"$category/$prgnam"} += keys %$dls;
$parsedinfo{"$category/$prgnam"} = $dls;
}