sbosrcarch: fix infofilecount bug, support older perls

author: B. Watson <yalhcru@gmail.com> 2015-10-15 04:28:16 -0400
committer: B. Watson <yalhcru@gmail.com> 2015-10-15 04:28:16 -0400
commit: 68d6d853df2072de525f87ccc123849ec28fc007 (patch)
tree: b031dc19792a65d89d68a6e698aaa16a8c61f448 /sbosrcarch
parent: 4d8c837dd3392ebaeede51a66725efa8c74fa049 (diff)
download: sbostuff-68d6d853df2072de525f87ccc123849ec28fc007.tar.gz
1 files changed, 27 insertions, 7 deletions
diff --git a/sbosrcarch b/sbosrcarch
index cc8cd0c..0ac8259 100755
--- a/sbosrcarch
+++ b/sbosrcarch
@@ -1,5 +1,26 @@
 #!/usr/bin/perl
 
+# 20151015 bkw: finally tested a full run on slack 13.0, results:
+# - create_mode stats are wrong
+# - the old openssl on slack 13.0 can't handle cloud.github.com. chokes
+#   with 'sslv3 alert handshake failure'... or maybe it's wget that
+#   can't handle it, as curl seems to be able to, using the same
+#   openssl.
+# - older versions of wget also have issues with the filename
+#   they save as (e.g. if redirected to a URL with a different
+#   filename part at the end). maybe just fix with wget -O$filename.
+# - as a result of the above, I've got files that got downloaded
+#   with wrong names, saved in the git tree. need add_or_rm_mode
+#   to be smart enough to figure out where they go, by md5sum alone.
+# - wget_fake_head doesn't show errors, not even 'file too large'.
+# - seriously considering switching to curl.
+# - another thought: do away with HEAD requests entirely. do something
+#   like open a pipeline reading from wget, read the headers (like
+#   wget_fake_head does now)... then decide whether to finish the
+#   download or close the fh. if we finish it, read from the pipeline
+#   and write to the target filename.
+# - if a download fails, turds shouldn't be left behind in the git tree.
+
 # TODO based on feedback from ttkp and pink_mist on IRC:
 # - IPC::Open3 instead of open my $fh, "wget ...|"? At least use
 #   open my $fh, "-|", "wget", @args or such, to avoid quoting issues.
@@ -265,7 +286,7 @@ use File::Find;
 use Digest::MD5;
 use Net::FTP;
 use POSIX 'getcwd';
-use File::Path qw/make_path remove_tree/;
+use File::Path qw/mkpath rmtree/;
 use File::Copy 'copy';
 
 our($sbogiturl, $sbogitdir, $archivedir, $maxfilemegs,
@@ -572,8 +593,7 @@ sub wget {
 	# Grr. Some sites refuse HEAD requests, and some allow them but
 	# don't return a Content-Length header. So we must resort to more
 	# drastic measures.
-	# FIXME: don't bother doing this if we got 404 (not found) from the HEAD,
-	# or stuff like DNS errors.
+	# FIXME: don't bother doing this if we got a DNS error from the HEAD.
 	if($head && not(defined($size))) {
 		return wget_fake_head($url);
 	}
@@ -682,7 +702,7 @@ sub store_file {
 	my $md5dir = md5_dir($md5);
 	my $namedir = name_dir($category, $prgnam);
 
-	make_path($md5dir, $namedir);
+	mkpath($md5dir, $namedir);
 	link($filename, $namedir . "/" . $filename);
 	if($symlinks) {
 		symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename,
@@ -836,7 +856,7 @@ sub purge_mode {
 
 	# pass 3
 	if($rebuild) {
-		remove_tree("by-md5");
+		rmtree("by-md5");
 		print "Removed by-md5 tree, rebuilding\n";
 		find({wanted => \&rebuild_wanted, no_chdir => 1}, "by-name");
 	} else {
@@ -880,7 +900,7 @@ sub rebuild_wanted {
 	my $md5dir = md5_dir(md5sum_file($_));
 	my (undef, $category, $prgnam, $filename) = split /\//, $_;
 
-	make_path($md5dir);
+	mkpath($md5dir);
 
 	if($symlinks) {
 		symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename,
@@ -1157,7 +1177,7 @@ sub check_info_wanted {
 	my ($category, $prgnam, undef) = split /\//;
 	my $dls = parse_info($_);
 	$totalfiles += keys %$dls;
-	$infofilecount{"$category/$prgnam"}++;
+	$infofilecount{"$category/$prgnam"} += keys %$dls;
 	$parsedinfo{"$category/$prgnam"} = $dls;
 }
author	B. Watson <yalhcru@gmail.com>	2015-10-15 04:28:16 -0400
committer	B. Watson <yalhcru@gmail.com>	2015-10-15 04:28:16 -0400
commit	68d6d853df2072de525f87ccc123849ec28fc007 (patch)
tree	b031dc19792a65d89d68a6e698aaa16a8c61f448 /sbosrcarch
parent	4d8c837dd3392ebaeede51a66725efa8c74fa049 (diff)
download	sbostuff-68d6d853df2072de525f87ccc123849ec28fc007.tar.gz