aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <yalhcru@gmail.com>2015-09-30 00:21:56 -0400
committerB. Watson <yalhcru@gmail.com>2015-09-30 00:21:56 -0400
commitc9b34791d6e725a77d0ce23e62ad57905f832f73 (patch)
tree366a0c4c142452d2f3b1556e70e9bc423248de15
parent484d33e7b4471e5d9038aa0f929c1b9a422875b5 (diff)
downloadsbostuff-c9b34791d6e725a77d0ce23e62ad57905f832f73.tar.gz
Add purge --rebuild
-rwxr-xr-xsbosrcarch96
1 files changed, 78 insertions, 18 deletions
diff --git a/sbosrcarch b/sbosrcarch
index 6acd856..8479cd1 100755
--- a/sbosrcarch
+++ b/sbosrcarch
@@ -78,13 +78,16 @@ quickly and not eat many resources. For each new URL, the file is
downloaded and added to the archive, but the old file is *not* deleted
(use 'sbosrcarch purge' to do that).
-=item purge
+=item purge [-r|--rebuild]
Purge files from the archive that are no longer referenced by any
-.info file. Should be run monthly or quarterly as a cron job. This is
+.info file. Should be run monthly or quarterly as a cron job. This is
more resource-intensive than an update, as it must read and parse every
.info file in the SBo repository.
+If -r or --rebuild is given, the entire by-md5 tree is deleted and recreated.
+This shouldn't be needed unless $symlinks is changed.
+
=item trim
Gets rid of files that are in the archive, but are larger than the size
@@ -93,9 +96,9 @@ reason to run it any other time.
=item check
-TODO: this is not yet implemented.
+TODO: check is not yet implemented!
-Checks the integrity and coverage of the archive: Will report at least these conditions:
+Checks the integrity and coverage of the archive. Reports at least these conditions:
- dangling symlinks
- invalid md5sums
@@ -145,7 +148,7 @@ B<CONFIG FILE> for details.
The archive created by sbosrcarch consists of two top-level directories
called B<by-name> and B<by-md5>. All files are present in both hierarchies
-(as hardlinked copies, to save space).
+(as hard or symbolic links, to save space).
B<by-name> is organized by the familiar category and PRGNAM, like SBo
itself. Example:
@@ -162,7 +165,8 @@ file needed. The same file as the example above would be found at:
by-md5/f/4/f4d413f880754fd6677290160f8bc5d7/ifstatus-v1.1.0.tar.gz
Notice there are two layers of subdirectory, named after the first two
-hex digits in the md5sum.
+hex digits in the md5sum. Also, notice that the actual SlackBuilds and
+.info files are not present in the archive.
There is one other directory of files used/maintained by sbosrcarch:
a git clone of SBo's master git branch. This is cloned and updated
@@ -185,7 +189,7 @@ The git clone directory doesn't need to be served to the public.
TODO: example Apache, proftpd, etc configs for serving up the archive.
-=head1 EXAMPLE
+=head1 CLIENT-SIDE EXAMPLE
TODO: shell script that parses an .info file and tries to d/l the source
from the archive.
@@ -213,16 +217,32 @@ that I'm not planning to address any time soon:
No threading. Not likely to change. It would be possible to spawn wget
processes in the background, but I'm not going to complicate it that way.
+It would mainly be useful for create mode, and hopefully each archive
+site only needs to do that once.
-Anything that checks referer header or otherwise tries to stop automated
-downloads, will stop us. This isn't really a bug (sbopkg can't handle them
-either).
-
-Length: unspecified isn't handled (we just don't download these). Specifically,
-dropbox URLs do this.
+There maybe should be a whitelist and a blacklist. The whitelist would be
+a list of builds (or entire categories) that you want to mirror all of,
+regardless of file size limits. The blacklist would be a list of builds
+or categories you don't want to mirror, ever. Probably I won't add this
+unless multiple people ask for it.
-$sbogitdir and $archivedir must be located on the same filesystem, as files
-are moved around by linking them.
+Anything that checks referer header or otherwise tries to stop automated
+downloads, will stop us. This isn't really a bug (sbopkg can't handle
+them either). Usually the README will say "you must download the file
+with a browser" or such. You can still download the file manually
+and use "sbosrcarch add category/prgnam filename.tar.gz" to add it
+to the archive... but please pay attention to licensing! Some files
+(e.g. Oracle's Java) don't allow redistribution, so please don't include
+them in your archive.
+
+Length: unspecified isn't handled (we just don't download
+these). Specifically, dropbox URLs do this. Might add an option that
+controls what to do about these, e.g. download & keep them all instead
+of ignoring them all. Can still add them manually.
+
+$sbogitdir and $archivedir must be located on the same filesystem,
+as files are moved around by linking them. Not a major problem, just
+thought I'd mention it.
=head1 AUTHOR
@@ -236,7 +256,7 @@ use File::Find;
use Digest::MD5;
use Net::FTP;
use POSIX 'getcwd';
-use File::Path 'make_path';
+use File::Path qw/make_path remove_tree/;
use File::Copy 'copy';
sub read_config {
@@ -715,10 +735,23 @@ sub update_mode {
# category/name dir, but not mentioned in the filename hash (also, rm its
# md5_dir() counterpart).
# 3. do a trim_post() pass to delete any empty dirs and/or dangling symlinks
+# If --rebuild is given, pass 3 instead deletes the by-md5 tree and
+# recreates it.
# FIXME: files from different URLs but with the same filename will not be
# purged when they should, because the comparison is solely filename-based!
sub purge_mode {
+ my $rebuild = 0;
+
+ shift @ARGV;
+ if($ARGV[0]) {
+ if($ARGV[0] =~ /^--?r(?:ebuild)?/) {
+ $rebuild = 1;
+ } else {
+ die "Unknown option: $ARGV[0]\n";
+ }
+ }
+
init_git();
$purgebytes = $purgefiles = 0;
@@ -736,7 +769,13 @@ sub purge_mode {
find({wanted => \&purge_pass_2_wanted, no_chdir => 1}, "by-name");
# pass 3
- trim_post();
+ if($rebuild) {
+ remove_tree("by-md5");
+ print "Removed by-md5 tree, rebuilding\n";
+ find({wanted => \&rebuild_wanted, no_chdir => 1}, "by-name");
+ } else {
+ trim_post();
+ }
printf("Purged $purgefiles files, %.1fMB\n", ($purgebytes / (1024 * 1024)));
exit 0;
@@ -764,10 +803,27 @@ sub purge_pass_2_wanted {
$purgebytes += -s $_;
$purgefiles++;
- unlink md5_dir(md5sum_file($_)). "$file";
+ unlink md5_dir(md5sum_file($_)). "/$file";
unlink $_;
}
+sub rebuild_wanted {
+ return unless -f;
+
+ s,^\./,,; # remove leading ./
+ my $md5dir = md5_dir(md5sum_file($_));
+ my (undef, $category, $prgnam, $filename) = split /\//, $_;
+
+ make_path($md5dir);
+
+ if($symlinks) {
+ symlink("../../../../by-name/" . $category . "/" . $prgnam . "/" . $filename,
+ $md5dir . "/" . $filename);
+ } else {
+ link($_, $md5dir . "/" . $filename);
+ }
+}
+
# helper for trim_mode
sub trim_wanted {
return unless -f $_;
@@ -904,6 +960,9 @@ sub add_or_rm_mode {
}
}
+sub check_mode {
+}
+
sub usage {
my $self = $0;
$self =~ s,.*/,,;
@@ -941,6 +1000,7 @@ for ($ARGV[0]) {
/add/ && do { add_or_rm_mode(); };
/rm/ && do { add_or_rm_mode(); };
/trim/ && do { trim_mode(); };
+ /check/ && do { check_mode(); };
usage();
}