From 484d33e7b4471e5d9038aa0f929c1b9a422875b5 Mon Sep 17 00:00:00 2001
From: "B. Watson" <yalhcru@gmail.com>
Date: Tue, 29 Sep 2015 05:50:06 -0400
Subject: Cleanups based on suggestions from ttkp and pink_mist, plus TODO
 comments for more of them

---
 sbosrcarch | 45 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/sbosrcarch b/sbosrcarch
index ce269f8..6acd856 100755
--- a/sbosrcarch
+++ b/sbosrcarch
@@ -1,5 +1,13 @@
 #!/usr/bin/perl -w
 
+# TODO based on feedback from IRC, ttkp and pink_mist
+# - use warnings; instead of -w (not sure that matters here TBH)
+# - use strict
+# - be more paranoid about input (e.g. invalid URLs in info files
+#   with embedded quotes or whatever)
+# - IPC::Open3 instead of open my $fh, "wget ...|"? At least use
+#   open my $fh, "-|", "wget", @args or such, to avoid quoting issues.
+
 =pod
 
 =head1 NAME
@@ -228,6 +236,8 @@ use File::Find;
 use Digest::MD5;
 use Net::FTP;
 use POSIX 'getcwd';
+use File::Path 'make_path';
+use File::Copy 'copy';
 
 sub read_config {
 	@configdirs = (
@@ -290,6 +300,9 @@ EOF
 
 # url_to_filename, gets the filename part of a URL (after the last slash)
 # and un-escapes any %XX sequences.
+# Note: we *don't* do plus-to-space conversion here, as that's only
+# for CGI params, not URLs in general. There are quite a few files
+# called e.g. "c++-utils.tar.gz" that would get broken by it.
 sub url_to_filename {
 	my $u = shift;
 	$u =~ s,.*/,,;
@@ -423,6 +436,9 @@ sub wget_fake_head {
 	return $size;
 }
 
+# wget() does a HEAD (or fake head, if HEAD fails), or GET (download),
+# using an external wget process. Return value is the file size in bytes,
+# or 0 for "too big", or undef for any error.
 sub wget {
 	my $url = shift;
 	my $head = shift; # boolean, 0 = download (GET), 1 = HEAD request only
@@ -452,7 +468,7 @@ sub wget {
 	print "$cmd\n";
 	my $retval = system($cmd);
 
-	open $fh, "<$outfile";
+	open $fh, "<", "$outfile";
 	while(<$fh>) {
 		print " ! $_" if $retval != 0;
 
@@ -550,7 +566,10 @@ sub name_dir {
 
 sub md5sum_file {
 	my $filename = shift;
-	open my $fh, "<", $filename; # XXX: error check (don't use die)
+	open my $fh, "<", $filename or do {
+		print "can't get md5sum of $filename: $!\n";
+		return undef;
+	};
 	binmode($fh);
 	my $ret = Digest::MD5->new->addfile($fh)->hexdigest;
 	close $fh;
@@ -570,18 +589,22 @@ sub already_exists {
 		($md5 eq md5sum_file($n));
 }
 
-# TODO: handle %20 => space (and other URL encodings)
-# ...needs to be done elsewhere too, not just here.
 sub store_file {
 	my ($filename, $category, $prgnam, $md5) = @_;
 
 	#warn "store_file($filename, $category, $prgnam, $md5);\n";
 
-	system("mkdir -p " . md5_dir($md5));
-	system("mkdir -p " . name_dir($category, $prgnam));
-	link($filename, name_dir($category, $prgnam) . "/" . $filename);
-	warn "symlinks not yet supported, using hardlink instead\n" if $symlinks;
-	link($filename, md5_dir($md5) . "/" . $filename); # TODO: symlink option
+	my $md5dir = md5_dir($md5);
+	my $namedir = name_dir($category, $prgnam);
+
+	make_path($md5dir, $namedir);
+	link($filename, $namedir . "/" . $filename);
+	if($symlinks) {
+		symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename,
+				$md5dir . "/" . $filename);
+	} else {
+		link($filename, $md5dir . "/" . $filename);
+	}
 }
 
 # handle_info_file() is used as the 'wanted' sub for File::Find, but
@@ -591,7 +614,7 @@ sub store_file {
 sub handle_info_file {
 	return unless /\.info$/;
 
-	my $dls = parse_info("$_");
+	my $dls = parse_info($_);
 
 	s,^\./,,; # strip leading ./, if present
 	my ($category, $prgnam) = split /\//, $_;
@@ -828,7 +851,7 @@ sub local_add {
 
 		delete $localmd5s{$md5};
 
-		system("cp \"$localfile\" \"./$targetfile\"");
+		copy($localfile, $targetfile);
 		store_file($targetfile, $category, $prgnam, $md5);
 		unlink($targetfile);
 	}
-- 
cgit v1.2.3