From 484d33e7b4471e5d9038aa0f929c1b9a422875b5 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Tue, 29 Sep 2015 05:50:06 -0400 Subject: Cleanups based on suggestions from ttkp and pink_mist, plus TODO comments for more of them --- sbosrcarch | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/sbosrcarch b/sbosrcarch index ce269f8..6acd856 100755 --- a/sbosrcarch +++ b/sbosrcarch @@ -1,5 +1,13 @@ #!/usr/bin/perl -w +# TODO based on feedback from IRC, ttkp and pink_mist +# - use warnings; instead of -w (not sure that matters here TBH) +# - use strict +# - be more paranoid about input (e.g. invalid URLs in info files +# with embedded quotes or whatever) +# - IPC::Open3 instead of open my $fh, "wget ...|"? At least use +# open my $fh, "-|", "wget", @args or such, to avoid quoting issues. + =pod =head1 NAME @@ -228,6 +236,8 @@ use File::Find; use Digest::MD5; use Net::FTP; use POSIX 'getcwd'; +use File::Path 'make_path'; +use File::Copy 'copy'; sub read_config { @configdirs = ( @@ -290,6 +300,9 @@ EOF # url_to_filename, gets the filename part of a URL (after the last slash) # and un-escapes any %XX sequences. +# Note: we *don't* do plus-to-space conversion here, as that's only +# for CGI params, not URLs in general. There are quite a few files +# called e.g. "c++-utils.tar.gz" that would get broken by it. sub url_to_filename { my $u = shift; $u =~ s,.*/,,; @@ -423,6 +436,9 @@ sub wget_fake_head { return $size; } +# wget() does a HEAD (or fake head, if HEAD fails), or GET (download), +# using an external wget process. Return value is the file size in bytes, +# or 0 for "too big", or undef for any error. sub wget { my $url = shift; my $head = shift; # boolean, 0 = download (GET), 1 = HEAD request only @@ -452,7 +468,7 @@ sub wget { print "$cmd\n"; my $retval = system($cmd); - open $fh, "<$outfile"; + open $fh, "<", "$outfile"; while(<$fh>) { print " ! $_" if $retval != 0; @@ -550,7 +566,10 @@ sub name_dir { sub md5sum_file { my $filename = shift; - open my $fh, "<", $filename; # XXX: error check (don't use die) + open my $fh, "<", $filename or do { + print "can't get md5sum of $filename: $!\n"; + return undef; + }; binmode($fh); my $ret = Digest::MD5->new->addfile($fh)->hexdigest; close $fh; @@ -570,18 +589,22 @@ sub already_exists { ($md5 eq md5sum_file($n)); } -# TODO: handle %20 => space (and other URL encodings) -# ...needs to be done elsewhere too, not just here. sub store_file { my ($filename, $category, $prgnam, $md5) = @_; #warn "store_file($filename, $category, $prgnam, $md5);\n"; - system("mkdir -p " . md5_dir($md5)); - system("mkdir -p " . name_dir($category, $prgnam)); - link($filename, name_dir($category, $prgnam) . "/" . $filename); - warn "symlinks not yet supported, using hardlink instead\n" if $symlinks; - link($filename, md5_dir($md5) . "/" . $filename); # TODO: symlink option + my $md5dir = md5_dir($md5); + my $namedir = name_dir($category, $prgnam); + + make_path($md5dir, $namedir); + link($filename, $namedir . "/" . $filename); + if($symlinks) { + symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename, + $md5dir . "/" . $filename); + } else { + link($filename, $md5dir . "/" . $filename); + } } # handle_info_file() is used as the 'wanted' sub for File::Find, but @@ -591,7 +614,7 @@ sub store_file { sub handle_info_file { return unless /\.info$/; - my $dls = parse_info("$_"); + my $dls = parse_info($_); s,^\./,,; # strip leading ./, if present my ($category, $prgnam) = split /\//, $_; @@ -828,7 +851,7 @@ sub local_add { delete $localmd5s{$md5}; - system("cp \"$localfile\" \"./$targetfile\""); + copy($localfile, $targetfile); store_file($targetfile, $category, $prgnam, $md5); unlink($targetfile); } -- cgit v1.2.3