aboutsummaryrefslogtreecommitdiff
path: root/sbosrcarch
diff options
context:
space:
mode:
authorB. Watson <yalhcru@gmail.com>2018-06-03 23:14:35 -0400
committerB. Watson <yalhcru@gmail.com>2018-06-03 23:14:35 -0400
commit33b9a9ea4cadcdf7fde87be65bcdf826755bcf31 (patch)
tree914a8c6ac91e49a5d5ceea9f88f389764530321b /sbosrcarch
parente59a94d14d132d7d88d5a7dfb465b1a218f5840e (diff)
downloadsbostuff-33b9a9ea4cadcdf7fde87be65bcdf826755bcf31.tar.gz
sbosrcarch purge_mode fix, wip
Diffstat (limited to 'sbosrcarch')
-rwxr-xr-xsbosrcarch38
1 files changed, 24 insertions, 14 deletions
diff --git a/sbosrcarch b/sbosrcarch
index 944e5ba..bfa91b5 100755
--- a/sbosrcarch
+++ b/sbosrcarch
@@ -378,7 +378,8 @@ our($sbogiturl, $sbogitdir, $archivedir, $maxfilemegs, $wget,
$wgetargs, $symlinks, $wgetrc_contents, $wgetrc, %user_agent_overrides,
@trim_empty_dirs, $skipcount, $urlcount, $archivecount,
$attemptcount, $failcount, $dlcount, $nowarchived, $coverage,
- $purgebytes, $purgefiles, $trimcount, $trimbytes, %keep_filenames);
+ $purgebytes, $purgefiles, $trimcount, $trimbytes,
+ %keep_filenames, %keep_md5sums, $fake_purge);
our ($curl, $curlopts);
our (%whitehash, %blackhash, $use_bwlist);
our @whitelist = ();
@@ -561,6 +562,7 @@ sub parse_info {
$ret{$_} = $m;
}
+ close $fh;
return \%ret;
}
@@ -1198,16 +1200,16 @@ sub update_mode {
}
# purge_mode() does 3 passes.
-# 1. get all the filenames from all the info files, build a hash of filenames.
-# 2. walk the archive tree with File::Find and rm any file that's in a
-# category/name dir, but not mentioned in the filename hash (also, rm its
-# md5_dir() counterpart).
+# 1. get all the filenames from all the info files, build hashes of filenames
+# and md5sums that we want to keep.
+# 2. walk the archive tree with File::Find and rm any file that's (a) in a
+# category/name dir, but not mentioned in the filename hash, or (b) in a
+# by-md5 dir, but whose md5sum is not mentioned in the md5sum hash.
# 3. do a trim_post() pass to delete any empty dirs and/or dangling symlinks
# If --rebuild is given, pass 3 instead deletes the by-md5 tree and
# recreates it.
+# If --fake is given, the 3 passes are all done, but nothing is deleted.
-# FIXME: files from different URLs but with the same filename will not be
-# purged when they should, because the comparison is solely filename-based!
sub purge_mode {
my $rebuild = 0;
@@ -1215,6 +1217,8 @@ sub purge_mode {
if($ARGV[0]) {
if($ARGV[0] =~ /^--?r(?:ebuild)?/) {
$rebuild = 1;
+ } elsif($ARGV[0] =~ /^--?f(?:ake)?/) {
+ $fake_purge = 1;
} else {
die "Unknown option: $ARGV[0]\n";
}
@@ -1225,7 +1229,7 @@ sub purge_mode {
$purgebytes = $purgefiles = 0;
# pass 1
- %keep_filenames = (); # populated by the find():
+ %keep_filenames = %keep_md5sums = (); # populated by the find():
find({wanted => \&purge_pass_1_wanted, no_chdir => 1}, ".");
# for(keys %keep_filenames) {
@@ -1249,13 +1253,15 @@ sub purge_mode {
exit 0;
}
-# helper for purge_mode, populates %keep_filenames
+# helper for purge_mode, populates %keep_filenames and %keep_md5sums
sub purge_pass_1_wanted {
return unless /\.info$/;
my $dls = parse_info($_);
+ my ($cat, $name, undef) = split /\//, $_;
for(keys %$dls) {
- $_ = url_to_filename($_);
+ my $path = "by-name/$cat/$name/" . url_to_filename($_);
$keep_filenames{$_}++;
+ # TODO: populate %keep_md5sums
}
}
@@ -1271,10 +1277,13 @@ sub purge_pass_2_wanted {
$purgefiles++;
my $namepath = name_dir($cat, $name) . "$file";
- my $md5path = md5_dir(md5sum_file($namepath)) . "$file";
- print "purge $namepath $md5path\n";
- unlink $namepath;
- unlink $md5path;
+ #my $md5path = md5_dir(md5sum_file($namepath)) . "$file";
+
+ #print "purge $namepath $md5path\n";
+ print "purge $namepath\n";
+
+ unlink $namepath unless $fake_purge;
+ #unlink $md5path;
}
sub rebuild_wanted {
@@ -1307,6 +1316,7 @@ sub trim_wanted {
# helper for trim_post
sub trim_post_wanted {
+ return if $fake_purge;
unlink $_ if -l $_ && ! -e _;
return unless -d _;
push @trim_empty_dirs, $_ if !<*>;