From 89d77b90b53bbd60e4deac4cc73ad85e915a08b4 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Tue, 17 Jul 2018 19:13:16 -0400 Subject: sbosrcarch: hack around filename collision issue --- sbosrcarch | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'sbosrcarch') diff --git a/sbosrcarch b/sbosrcarch index 3bcdc7a..8e1956c 100755 --- a/sbosrcarch +++ b/sbosrcarch @@ -4,16 +4,18 @@ our $DEBUG_HTTP = 0; #our $DEBUG_HTTP = 1; -# TODO create_mode stats are wrong +# hack to work around the fact that the download filenames for +# a few builds are the same filename, but different files. +# this list could be populated automatically, but it wouldn't have +# changed in the past 3 years, so might as well hard-code it. +our %url_filename_collisions = ( + 'http://hgwdev.cse.ucsc.edu/~kent/exe/opteron/blatSuite.34.zip' => 'blatSuite.34.zip.x86_64', + 'https://www.perforce.com/downloads/perforce/r18.1/bin.linux26x86_64/p4' => 'p4.x86_64', + 'https://www.perforce.com/downloads/perforce/r18.1/bin.linux26x86_64/p4d' => 'p4d.x86_64', + 'https://ftp.mirrorservice.org/sites/download.salixos.org/x86_64/extra-14.2/source/libraries/p4api/p4api.tgz' => 'p4api.tgz.x86_64' +); -# FIXME 20151016 bkw: behold: -# $ grep ^D libraries/p4api/*.info -# DOWNLOAD="ftp://ftp.perforce.com/perforce/r10.1/bin.linux26x86/p4api.tgz" -# DOWNLOAD_x86_64="ftp://ftp.perforce.com/perforce/r10.1/bin.linux26x86_64/p4api.tgz" -# notice that both URLs have the same filename (p4api.tgz)? this is -# exactly why we have a by-md5 tree. But right now, "add libraries/p4api" -# only adds one of them. other builds with the same issue: development/p4 -# and academic/ucsc-blat +# TODO create_mode stats are wrong # TODO based on feedback from ttkp and pink_mist on IRC: # - IPC::Open3 instead of open my $fh, "wget ...|"? At least use @@ -22,7 +24,9 @@ our $DEBUG_HTTP = 0; # stderr & stdout to the same place. Hm. # Also, stuff added with "add" sometimes ends up as separate files -# instead of hardlinks. Not sure how to replicate this. +# instead of hardlinks. Not sure how to replicate this. It hasn't +# actually happened in ages, so probably I fixed it while working +# on something else... # Ideas for future features: # - autopurge option for update. It only needs to purge the dirs that @@ -545,6 +549,10 @@ sub blacklisted { # called e.g. "c++-utils.tar.gz" that would get broken by it. sub url_to_filename { my $u = shift; + + my $v = $url_filename_collisions{$u}; + return $v if $v; + $u =~ s,.*/,,; $u =~ s,%([0-9A-F]{2}),chr(hex($1)),ge; return $u; @@ -572,6 +580,7 @@ sub parse_info { my @md5s = split " ", join " ", @md5lines; my %ret; + for(@urls) { my $m = shift @md5s; #next if /^un(test|support)ed$/i; # no longer need -- cgit v1.2.3