From 33b9a9ea4cadcdf7fde87be65bcdf826755bcf31 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Sun, 3 Jun 2018 23:14:35 -0400 Subject: sbosrcarch purge_mode fix, wip --- sbosrcarch | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/sbosrcarch b/sbosrcarch index 944e5ba..bfa91b5 100755 --- a/sbosrcarch +++ b/sbosrcarch @@ -378,7 +378,8 @@ our($sbogiturl, $sbogitdir, $archivedir, $maxfilemegs, $wget, $wgetargs, $symlinks, $wgetrc_contents, $wgetrc, %user_agent_overrides, @trim_empty_dirs, $skipcount, $urlcount, $archivecount, $attemptcount, $failcount, $dlcount, $nowarchived, $coverage, - $purgebytes, $purgefiles, $trimcount, $trimbytes, %keep_filenames); + $purgebytes, $purgefiles, $trimcount, $trimbytes, + %keep_filenames, %keep_md5sums, $fake_purge); our ($curl, $curlopts); our (%whitehash, %blackhash, $use_bwlist); our @whitelist = (); @@ -561,6 +562,7 @@ sub parse_info { $ret{$_} = $m; } + close $fh; return \%ret; } @@ -1198,16 +1200,16 @@ sub update_mode { } # purge_mode() does 3 passes. -# 1. get all the filenames from all the info files, build a hash of filenames. -# 2. walk the archive tree with File::Find and rm any file that's in a -# category/name dir, but not mentioned in the filename hash (also, rm its -# md5_dir() counterpart). +# 1. get all the filenames from all the info files, build hashes of filenames +# and md5sums that we want to keep. +# 2. walk the archive tree with File::Find and rm any file that's (a) in a +# category/name dir, but not mentioned in the filename hash, or (b) in a +# by-md5 dir, but whose md5sum is not mentioned in the md5sum hash. # 3. do a trim_post() pass to delete any empty dirs and/or dangling symlinks # If --rebuild is given, pass 3 instead deletes the by-md5 tree and # recreates it. +# If --fake is given, the 3 passes are all done, but nothing is deleted. -# FIXME: files from different URLs but with the same filename will not be -# purged when they should, because the comparison is solely filename-based! sub purge_mode { my $rebuild = 0; @@ -1215,6 +1217,8 @@ sub purge_mode { if($ARGV[0]) { if($ARGV[0] =~ /^--?r(?:ebuild)?/) { $rebuild = 1; + } elsif($ARGV[0] =~ /^--?f(?:ake)?/) { + $fake_purge = 1; } else { die "Unknown option: $ARGV[0]\n"; } @@ -1225,7 +1229,7 @@ sub purge_mode { $purgebytes = $purgefiles = 0; # pass 1 - %keep_filenames = (); # populated by the find(): + %keep_filenames = %keep_md5sums = (); # populated by the find(): find({wanted => \&purge_pass_1_wanted, no_chdir => 1}, "."); # for(keys %keep_filenames) { @@ -1249,13 +1253,15 @@ sub purge_mode { exit 0; } -# helper for purge_mode, populates %keep_filenames +# helper for purge_mode, populates %keep_filenames and %keep_md5sums sub purge_pass_1_wanted { return unless /\.info$/; my $dls = parse_info($_); + my ($cat, $name, undef) = split /\//, $_; for(keys %$dls) { - $_ = url_to_filename($_); + my $path = "by-name/$cat/$name/" . url_to_filename($_); $keep_filenames{$_}++; + # TODO: populate %keep_md5sums } } @@ -1271,10 +1277,13 @@ sub purge_pass_2_wanted { $purgefiles++; my $namepath = name_dir($cat, $name) . "$file"; - my $md5path = md5_dir(md5sum_file($namepath)) . "$file"; - print "purge $namepath $md5path\n"; - unlink $namepath; - unlink $md5path; + #my $md5path = md5_dir(md5sum_file($namepath)) . "$file"; + + #print "purge $namepath $md5path\n"; + print "purge $namepath\n"; + + unlink $namepath unless $fake_purge; + #unlink $md5path; } sub rebuild_wanted { @@ -1307,6 +1316,7 @@ sub trim_wanted { # helper for trim_post sub trim_post_wanted { + return if $fake_purge; unlink $_ if -l $_ && ! -e _; return unless -d _; push @trim_empty_dirs, $_ if !<*>; -- cgit v1.2.3