#!/usr/bin/perl -w ##### # update_sbtext.pl ##### ##### # Written by B. Watson (urchlay@slackware.uk) on 20240806. If you # modify this file, please include the date, a description of the # changes, and your name, below. ##### ##### # Revision history: # 20240806, initial version, B. Watson. ##### ##### # Description: # Replacement for update_sbtext.sh. Does the same job but runs a good # bit faster. # Differences between update_sbtext.sh and update_sbtext.pl: # 1. update_sbtext.pl is written in Perl (yeah, call me Captain Obvious). # 2. update_sbtext.pl is approximaly 60 times as fast. With ~9400 builds # in the repo, the shell version ran in 15 minutes, and this one # runs in 15 *seconds*. # 3. The output isn't *quite* identical to the .sh version: I made # the short description more consistent (always one space after # the colon; remove the duplicate package names caused by a bug in # the old script that manifested whenever the package name has a + # in it). # 4. This script writes to SLACKBUILDS.TXT.new, and doesn't disturb # the existing SLACKBUILDS.TXT or SLACKBUILDS.TXT.gz until it's # done (then it overwrites the old files in one swell foop, so no # users ever see partial results). # 5. If there are symlinks in the repo, they will show up in the # SLACKBUILD FILE LIST for the build they belong to. With the shell # version, they are ignored. This actually only affects one build # (desktop/mint-y-icons), and it's not clear to me that symlinks # should be allowed in the git repo anyway. # I know most of my fellow SBo admins aren't real big on Perl, so I # commented the hell out of this. If it still doesn't make sense, find # me on IRC or by email and I'll try to make it make sense. ##### ##### # These 2 variables are configurable settings, if you can think of a # reason to change them. ##### $slackbuilds_top_dir = "/slackbuilds/www/slackbuilds"; $output = "SLACKBUILDS.TXT"; ##### # Use only modules that ship with Slackware's perl package here! use File::Find; ##### #### # Rest of the file is hopefully bug-free code (hey, I can hope, right?) #### # File::Find "wanted" predicate. When find() calls this, it sets # $_ to the filename (the basename only) and $File::Find::dir # to the directory (relative to find()'s 2nd argument). See # "perldoc File::Find" if you need more information. sub found_file { # don't process directories at all. return if -d $_; # $dot is always "." # $cat is the category (audio, development, etc). # $prgnam is (guess what?) the build name. # @path is the rest. for e.g. slack-desc, it'll be just the name. # if there's a subdirectory, it'll be dir/filename. my ($dot, $cat, $prgnam, @path) = split /\//, $File::Find::dir; # if there aren't at least 2 dir components, ignore. return unless defined $cat; return unless defined $prgnam; # %files hash key. $dir = "$dot/$cat/$prgnam"; # %files values are array references, holding the subdirs under the # SlackBuild dir, if any, and the basename. push @{$files{$dir}}, join("/", @path, $_); return unless /(.*)\.info$/; # if we found an .info file, add it to the list. push @infos, "$dir/$_"; } # Join together lines in .info files that are split by # backslashes. This is actually more forgiving of spacing errors # than it needs to be (better not to be so picky). sub fix_backslashes { for($_[0]) { s, \s* # optional spaces \\ # required: single backslash \s* # optional spaces \n # required: newline \s* # optional spaces , ,gx; # all the above gets replaced with a single space. return $_; } } # Read an entire file, join any lines back together if they're split # with a backslash, return entire file contents as a scalar. sub slurp_file { my $file = shift; local $/ = undef; open my $fh, '<', $file || die $!; my $content = <$fh>; close $fh; return fix_backslashes($content); } # Return just the value from the .info file contents, given the key. # Assumes the content's already had fix_backslashes() called on it. sub get_info_value { my $file = shift; # only needed for error message my $info = shift; my $key = shift; $$info =~ m,^$key="(.*?)",m or die "no match for $key in $file\n"; return $1; } # get the first non-comment line of the slack-desc in $dir, remove # the 'buildname: ' prefix, return the rest. sub get_short_desc { my $buildname = quotemeta shift; my $dir = shift; my $file = $dir . "/slack-desc"; open my $fh, '<', $file || die "$file: $!\n"; my $line; while(<$fh>) { chomp; last if /^$buildname: /; } close $fh; die "$file: can't figure out short description\n" unless defined $_; s,^\S+:\s*,,; return $_; } # parse_info() actually prints the 'entry' in SLACKBUILDS.TXT for a # given info file. Argument must be the path to the file, like # ./category/progname/progname.info sub parse_info { my $file = shift; my $loc; my $name; ($loc, $name) = ($file =~ m,(^.*)/([^/]+)\.info$,); print "SLACKBUILD NAME: $name\n"; print "SLACKBUILD LOCATION: $loc\n"; print "SLACKBUILD FILES: " . join(" ", sort @{$files{$loc}}) . "\n"; my $content = slurp_file($file); for (qw/VERSION DOWNLOAD DOWNLOAD_x86_64 MD5SUM MD5SUM_x86_64 REQUIRES/) { print "SLACKBUILD $_: " . get_info_value($file, \$content, $_) . "\n"; } # N.B. update_sbtext.sh always puts 2 spaces after the : here, unless # the script name has a + in it (e.g. wmweather+, atari++), in which # case it puts one space, and duplicates the package name. I consider # this a bug in the original script; here, there's always one space # and no dup package names. print "SLACKBUILD SHORT DESCRIPTION: " . get_short_desc($name, $loc); print "\n\n"; } # main() $subdir = shift || die "Usage: $0 \n"; chdir $slackbuilds_top_dir . "/" . $subdir || die $!; open $outfh, '>', "$output.new" || die $!; *STDOUT = $outfh; # slight difference: .sh version printed this string to stdout, not # stderr. I don't think anyone cares. warn "$0: Updating SLACKBUILDS.TXT...\n"; # builds the %files and @infos tables. find(\&found_file, '.'); # generate the entries, in sorted order. parse_info($_) for sort @infos; close $outfh; # don't overwrite SLACKBUILDS.TXT (and .TXT.gz) until the end. rename "$output.new", $output; system("gzip -9c $output > $output.gz");