diff options
| author | B. Watson <yalhcru@gmail.com> | 2015-10-15 17:13:21 -0400 | 
|---|---|---|
| committer | B. Watson <yalhcru@gmail.com> | 2015-10-15 17:13:21 -0400 | 
| commit | 275cb7d96b7bcad1c4e8bc5411477866f0c6a9c0 (patch) | |
| tree | dd5695cb163086b94e0f6403f3df4c411c327446 /sbosrcarch | |
| parent | 68d6d853df2072de525f87ccc123849ec28fc007 (diff) | |
| download | sbostuff-275cb7d96b7bcad1c4e8bc5411477866f0c6a9c0.tar.gz | |
sbosrcarch: add_by_md5 added, minor fixes.
Diffstat (limited to 'sbosrcarch')
| -rwxr-xr-x | sbosrcarch | 128 | 
1 files changed, 98 insertions, 30 deletions
@@ -6,13 +6,6 @@  #   with 'sslv3 alert handshake failure'... or maybe it's wget that  #   can't handle it, as curl seems to be able to, using the same  #   openssl. -# - older versions of wget also have issues with the filename -#   they save as (e.g. if redirected to a URL with a different -#   filename part at the end). maybe just fix with wget -O$filename. -# - as a result of the above, I've got files that got downloaded -#   with wrong names, saved in the git tree. need add_or_rm_mode -#   to be smart enough to figure out where they go, by md5sum alone. -# - wget_fake_head doesn't show errors, not even 'file too large'.  # - seriously considering switching to curl.  # - another thought: do away with HEAD requests entirely. do something  #   like open a pipeline reading from wget, read the headers (like @@ -129,8 +122,7 @@ Checks the integrity and coverage of the archive. Reports at least these conditi   - invalid md5sums   - files present in only one of by-name or by-md5 but not the other   - extraneous files in the tree - - generates a detailed status report, giving the total size, -   coverage, and a list of slackbuilds not covered. + - generates a status report, giving the total size and coverage.  Will not modify the archive in any way, but might recommend fixes. @@ -138,7 +130,7 @@ With -v, lists all SlackBuilds not covered by the archive.  =item add [-f] <category/prgnam> [<file> ...] -Manually add a single (possibly already downloaded) file to the archive. +Manually add (possibly already downloaded) files to the archive.  Use -f to skip the size limit checking, so your archive can include a  few large files (perhaps because they're for builds you maintain). @@ -146,15 +138,27 @@ few large files (perhaps because they're for builds you maintain).  Files added this way will still be deleted by 'sbosrcarch trim', if  they're larger than the limit. -This is intended to let the mirror operator keep a few large files, over -the maxfilemegs limit, or save bandwidth by using already-downloaded +This is intended to let the mirror operator keep a few large files (over +the maxfilemegs limit), or save bandwidth by using already-downloaded  copies (e.g. of stuff that was built recently).  If files are given after the category/prgnam argument, they will be -used instead of downloading the URLs in the .info files (provided their +used instead of downloading the URLs in the .info file (provided their  md5sums match the .info file). Size limits are not checked for files  added this way. +=item add <file> [...] + +Manually add local file(s) to the archive. As above, but the +category/prgnam is discovered by parsing all the .info files and +matching md5sums. This is a good bit slower, but it can handle files +for many different category/prgnam at once. It's especially useful if +you already have an archive of SBo sources that you want to convert to +sbosrcarch format. + +The -f option is not supported (or needed) with this form of the add +command. +  =item rm <category/prgnam>  Manually remove files from the archive. All the files referenced by the @@ -341,7 +345,7 @@ sub read_config {  # not required, but warn if it's missing:  	if((not defined $maxfilemegs) || ($maxfilemegs < 0)) { -		warn "config file missing/invalid \$maxfilemegs, defaulting to 10\n"; +		print "config file missing/invalid \$maxfilemegs, defaulting to 10\n";  		$maxfilemegs = 10;  	} @@ -400,8 +404,8 @@ sub parse_info {  	my %ret;  	for(@urls) { -		next if /^un(test|support)ed$/; -		die "bad URL in $file\n" if /`/; # backticks should never occur! +		next if /^un(test|support)ed$/i; +		print "bad URL in $file (backtick)\n", next if /`/; # backticks should never occur!  		$ret{$_} = shift @md5s;  	} @@ -504,13 +508,14 @@ sub toobig {  sub wget_fake_head {  	my $url = shift; -	my $cmd = "wget --config=$wgetrc " . +	our $wget_config_arg; +	my $cmd = "wget $wget_config_arg " .  		"--tries 1 --quiet -O- --save-headers " .  		user_agent($url) . " " .  		" $wgetargs " .  		"'$url'"; -	print "real HEAD failed, trying fake HEAD request: $cmd\n"; +	#print "real HEAD failed, trying fake HEAD request: $cmd\n";  	# TODO: open3?  	open my $fh, "$cmd|" or return undef; @@ -527,6 +532,8 @@ sub wget_fake_head {  		printf "file too large: %0.2fMB\n", $size / (1024 * 1024);  		$skipcount++;  		$size = 0; +	} elsif(not defined $size) { +		print "can't determine file size, skipping\n";  	}  	return $size; @@ -537,6 +544,7 @@ sub wget_fake_head {  # or 0 for "too big", or undef for any error.  sub wget {  	my $url = shift; +	our $wget_config_arg;  	if($url =~ /'/) {  		print "Refusing to deal with URL \"$url\" due to embedded single-quote.\n" . @@ -557,22 +565,38 @@ sub wget {  		close $fh;  	} +	if(not defined $wget_config_arg) { +		$wget_config_arg = ""; +		open my $fh, "wget --help|" or die "can't run wget: $!\n"; +		while(<$fh>) { +			$wget_config_arg = "--config=$wgetrc" if /--config/; +		} +		close $fh; +		if(not $wget_config_arg) { +			print "\n| wget version is too old to support --config option.\n"; +			print "| continuing without it...\n"; +			sleep 1; +		} +	} +  	my $outfile;  	($fh, $outfile) = tempfile("wget.out.XXXXXXXX", DIR => $tmpdir, UNLINK => 1);  	close $fh;  	# TODO: open3? -	my $cmd = "wget --config=$wgetrc " . +	# the -O is there to force the filename, in case of a redirect. newer +	# versions of wget don't actually need this, but it doesn't hurt. +	my $cmd = "wget $wget_config_arg " .  		user_agent($url) . " " . -		($head ? "--spider --tries 1" : "") . +		($head ? "--spider --tries 1" : "-O " . url_to_filename($url)) .  		" $wgetargs " .  		"'$url' " .  		">$outfile 2>&1";  		#" --referer='$url' " . # don't use, it breaks sourceforge -	print "$cmd\n";  	my $retval = system($cmd); +	print "$cmd\n" if $retval != 0;  	open $fh, "<", "$outfile";  	while(<$fh>) { @@ -702,7 +726,9 @@ sub store_file {  	my $md5dir = md5_dir($md5);  	my $namedir = name_dir($category, $prgnam); -	mkpath($md5dir, $namedir); +	mkpath($md5dir); +	mkpath($namedir); +	unlink($namedir . "/" . $filename); # rm -f old copy, if any  	link($filename, $namedir . "/" . $filename);  	if($symlinks) {  		symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename, @@ -723,34 +749,35 @@ sub handle_info_file {  	s,^\./,,; # strip leading ./, if present  	my ($category, $prgnam) = split /\//, $_; -	print "=== $category/$prgnam: "; +	print "=== $category/$prgnam\n";  	for(keys %$dls) {  		$urlcount++;  		my $url = $_;  		my $md5 = $dls->{$_};  		my $filename = url_to_filename($url); +		print ": $url\n";  		if(already_exists($filename, $category, $prgnam, $md5)) { -			print "already in archive, OK\n"; +			print "  already in archive, OK\n";  			$archivecount++;  		} else {  			$attemptcount++;  			download_file($url); # TODO: check result!  			if(! -f $filename) {  				$failcount++; -				print "$filename not downloaded\n"; +				print "  not downloaded\n";  				next;  			}  			if(md5sum_file($filename) ne $md5) {  				$failcount++; -				print "md5sum failed for $url"; +				print "  md5sum failed\n";  				unlink($filename);  				next;  			} -			print "downloaded, OK\n"; +			print "  downloaded, OK\n";  			$archivecount++;  			$dlcount++;  			store_file($filename, $category, $prgnam, $md5); @@ -996,19 +1023,61 @@ sub local_add {  		copy($localfile, $targetfile);  		store_file($targetfile, $category, $prgnam, $md5);  		unlink($targetfile); +		print "added $targetfile for $category/$prgnam\n";  	}  	for(keys %localmd5s) {  		print "$localmd5s{$_} ($_) ignored: doesn't match any md5sum in $info\n";  	} +} -	exit 0; +sub add_by_md5_wanted { +	our %md5_to_dl; +	return unless /\.info/; +	s,\./,,; +	my ($category, $prgnam, undef) = split /\//; +	my $dls = parse_info($_); +	$md5_to_dl{$_} = "$category/$prgnam" for values %$dls; +} + +sub add_by_md5 { +	print "no category/prgnam, adding file(s) by md5sum\n"; +	my $oldcwd = shift; +	our %md5_to_dl; +	find({wanted => \&add_by_md5_wanted, no_chdir => 1}, "."); + +	for my $filename (@_) { +		my $infile = $filename; +		$infile = "$oldcwd/$infile" unless -f $infile; + +		my $md5 = md5sum_file($infile); +		next unless defined $md5; + +		my $catname = $md5_to_dl{$md5} or do { +			print "$filename ($md5) doesn't match any .info file, skipping\n"; +			next; +		}; + +		my $info = find_info_file($catname) or do { +			print "can't find info file for $catname"; +			next; +		}; + +		local_add($oldcwd, $catname, $info, $filename); +		chdir($sbogitdir); +	}  }  sub add_or_rm_mode {  	my $oldcwd = POSIX::getcwd();  	init_git();  	my $mode = shift @ARGV; + +	if($mode eq 'add' && @ARGV && -f $ARGV[0] || -f "$oldcwd/$ARGV[0]") { +		add_by_md5($oldcwd, @ARGV); +		exit 0; +	} +  	my $catname = shift @ARGV or usage();  	if($catname eq '-f') { @@ -1025,7 +1094,6 @@ sub add_or_rm_mode {  		if(!@ARGV) { # no args, use URL(s) in .info file  			$_ = $info;  			handle_info_file(); -			exit 0;  		} else {  			local_add($oldcwd, $catname, $info, @ARGV);  		} @@ -1039,11 +1107,11 @@ sub add_or_rm_mode {  			rmdir(name_dir($category, $prgname));  			unlink(md5_dir($md5) . "/$filename");  			rmdir(md5_dir($md5)); -			exit 0;  		}  	} else {  		die "this never happens";  	} +	exit 0;  }  # check_mode() needs to do this:  | 
