From a7e8c90ff2616edade63638f041b814e1f6031f1 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Wed, 7 Aug 2024 00:34:40 -0400 Subject: update_sbtext.pl: added. not installed by default since it's pretty niche-market. --- update_sbtext.pl | 215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100755 update_sbtext.pl diff --git a/update_sbtext.pl b/update_sbtext.pl new file mode 100755 index 0000000..952d0c3 --- /dev/null +++ b/update_sbtext.pl @@ -0,0 +1,215 @@ +#!/usr/bin/perl -w + +##### +# update_sbtext.pl +##### + +##### +# Written by B. Watson (urchlay@slackware.uk) on 20240806. If you +# modify this file, please include the date, a description of the +# changes, and your name, below. +##### + +##### +# Revision history: +# 20240806, initial version, B. Watson. +##### + +##### +# Description: + +# Replacement for update_sbtext.sh. Does the same job but runs a good +# bit faster. + +# Differences between update_sbtext.sh and update_sbtext.pl: + +# 1. update_sbtext.pl is written in Perl (yeah, call me Captain Obvious). + +# 2. update_sbtext.pl is approximaly 60 times as fast. With ~9400 builds +# in the repo, the shell version ran in 15 minutes, and this one +# runs in 15 *seconds*. + +# 3. The output isn't *quite* identical to the .sh version: I made +# the short description more consistent (always one space after +# the colon; remove the duplicate package names caused by a bug in +# the old script that manifested whenever the package name has a + +# in it). + +# 4. This script writes to SLACKBUILDS.TXT.new, and doesn't disturb +# the existing SLACKBUILDS.TXT or SLACKBUILDS.TXT.gz until it's +# done (then it overwrites the old files in one swell foop, so no +# users ever see partial results). + +# 5. If there are symlinks in the repo, they will show up in the +# SLACKBUILD FILE LIST for the build they belong to. With the shell +# version, they are ignored. This actually only affects one build +# (desktop/mint-y-icons), and it's not clear to me that symlinks +# should be allowed in the git repo anyway. + +# I know most of my fellow SBo admins aren't real big on Perl, so I +# commented the hell out of this. If it still doesn't make sense, find +# me on IRC or by email and I'll try to make it make sense. +##### + +##### +# These 2 variables are configurable settings, if you can think of a +# reason to change them. +##### +$slackbuilds_top_dir = "/slackbuilds/www/slackbuilds"; +$output = "SLACKBUILDS.TXT"; + +##### +# Use only modules that ship with Slackware's perl package here! +use File::Find; +##### + +#### +# Rest of the file is hopefully bug-free code (hey, I can hope, right?) +#### + +# File::Find "wanted" predicate. When find() calls this, it sets +# $_ to the filename (the basename only) and $File::Find::dir +# to the directory (relative to find()'s 2nd argument). See +# "perldoc File::Find" if you need more information. +sub found_file { + # don't process directories at all. + return if -d $_; + + # $dot is always "." + # $cat is the category (audio, development, etc). + # $prgnam is (guess what?) the build name. + # @path is the rest. for e.g. slack-desc, it'll be just the name. + # if there's a subdirectory, it'll be dir/filename. + my ($dot, $cat, $prgnam, @path) = split /\//, $File::Find::dir; + + # if there aren't at least 2 dir components, ignore. + return unless defined $cat; + return unless defined $prgnam; + + # %files hash key. + $dir = "$dot/$cat/$prgnam"; + + # %files values are array references, holding the subdirs under the + # SlackBuild dir, if any, and the basename. + push @{$files{$dir}}, join("/", @path, $_); + + return unless /(.*)\.info$/; + + # if we found an .info file, add it to the list. + push @infos, "$dir/$_"; +} + +# Join together lines in .info files that are split by +# backslashes. This is actually more forgiving of spacing errors +# than it needs to be (better not to be so picky). +sub fix_backslashes { + for($_[0]) { + s, + \s* # optional spaces + \\ # required: single backslash + \s* # optional spaces + \n # required: newline + \s* # optional spaces + , ,gx; # all the above gets replaced with a single space. + return $_; + } +} + +# Read an entire file, join any lines back together if they're split +# with a backslash, return entire file contents as a scalar. +sub slurp_file { + my $file = shift; + + local $/ = undef; + open my $fh, '<', $file || die $!; + my $content = <$fh>; + close $fh; + + return fix_backslashes($content); +} + +# Return just the value from the .info file contents, given the key. +# Assumes the content's already had fix_backslashes() called on it. +sub get_info_value { + my $file = shift; # only needed for error message + my $info = shift; + my $key = shift; + + $$info =~ m,^$key="(.*?)",m or die "no match for $key in $file\n"; + return $1; +} + +# get the first non-comment line of the slack-desc in $dir, remove +# the 'buildname: ' prefix, return the rest. +sub get_short_desc { + my $buildname = quotemeta shift; + my $dir = shift; + my $file = $dir . "/slack-desc"; + + open my $fh, '<', $file || die "$file: $!\n"; + my $line; + while(<$fh>) { + chomp; + last if /^$buildname: /; + } + close $fh; + + die "$file: can't figure out short description\n" unless defined $_; + + s,^\S+:\s*,,; + return $_; +} + +# parse_info() actually prints the 'entry' in SLACKBUILDS.TXT for a +# given info file. Argument must be the path to the file, like +# ./category/progname/progname.info +sub parse_info { + my $file = shift; + my $loc; + my $name; + + ($loc, $name) = ($file =~ m,(^.*)/([^/]+)\.info$,); + + print "SLACKBUILD NAME: $name\n"; + print "SLACKBUILD LOCATION: $loc\n"; + print "SLACKBUILD FILES: " . join(" ", sort @{$files{$loc}}) . "\n"; + + my $content = slurp_file($file); + + for (qw/VERSION DOWNLOAD DOWNLOAD_x86_64 MD5SUM MD5SUM_x86_64 REQUIRES/) { + print "SLACKBUILD $_: " . get_info_value($file, \$content, $_) . "\n"; + } + + # N.B. update_sbtext.sh always puts 2 spaces after the : here, unless + # the script name has a + in it (e.g. wmweather+, atari++), in which + # case it puts one space, and duplicates the package name. I consider + # this a bug in the original script; here, there's always one space + # and no dup package names. + print "SLACKBUILD SHORT DESCRIPTION: " . get_short_desc($name, $loc); + + print "\n\n"; +} + +# main() +$subdir = shift || die "Usage: $0 \n"; + +chdir $slackbuilds_top_dir . "/" . $subdir || die $!; + +open $outfh, '>', "$output.new" || die $!; +*STDOUT = $outfh; + +# slight difference: .sh version printed this string to stdout, not +# stderr. I don't think anyone cares. +warn "$0: Updating SLACKBUILDS.TXT...\n"; + +# builds the %files and @infos tables. +find(\&found_file, '.'); + +# generate the entries, in sorted order. +parse_info($_) for sort @infos; + +close $outfh; + +# don't overwrite SLACKBUILDS.TXT (and .TXT.gz) until the end. +rename "$output.new", $output; +system("gzip -9c $output > $output.gz"); -- cgit v1.2.3