From d06db00a212ee503ff4fbf4fd7df66a3f71a2f6b Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Wed, 7 Aug 2024 00:42:38 -0400 Subject: update_sbtxt.pl: renamed from update_sbtext.pl (derp). --- update_sbtext.pl | 215 ------------------------------------------------------- update_sbtxt.pl | 215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 215 insertions(+), 215 deletions(-) delete mode 100755 update_sbtext.pl create mode 100755 update_sbtxt.pl diff --git a/update_sbtext.pl b/update_sbtext.pl deleted file mode 100755 index 952d0c3..0000000 --- a/update_sbtext.pl +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/perl -w - -##### -# update_sbtext.pl -##### - -##### -# Written by B. Watson (urchlay@slackware.uk) on 20240806. If you -# modify this file, please include the date, a description of the -# changes, and your name, below. -##### - -##### -# Revision history: -# 20240806, initial version, B. Watson. -##### - -##### -# Description: - -# Replacement for update_sbtext.sh. Does the same job but runs a good -# bit faster. - -# Differences between update_sbtext.sh and update_sbtext.pl: - -# 1. update_sbtext.pl is written in Perl (yeah, call me Captain Obvious). - -# 2. update_sbtext.pl is approximaly 60 times as fast. With ~9400 builds -# in the repo, the shell version ran in 15 minutes, and this one -# runs in 15 *seconds*. - -# 3. The output isn't *quite* identical to the .sh version: I made -# the short description more consistent (always one space after -# the colon; remove the duplicate package names caused by a bug in -# the old script that manifested whenever the package name has a + -# in it). - -# 4. This script writes to SLACKBUILDS.TXT.new, and doesn't disturb -# the existing SLACKBUILDS.TXT or SLACKBUILDS.TXT.gz until it's -# done (then it overwrites the old files in one swell foop, so no -# users ever see partial results). - -# 5. If there are symlinks in the repo, they will show up in the -# SLACKBUILD FILE LIST for the build they belong to. With the shell -# version, they are ignored. This actually only affects one build -# (desktop/mint-y-icons), and it's not clear to me that symlinks -# should be allowed in the git repo anyway. - -# I know most of my fellow SBo admins aren't real big on Perl, so I -# commented the hell out of this. If it still doesn't make sense, find -# me on IRC or by email and I'll try to make it make sense. -##### - -##### -# These 2 variables are configurable settings, if you can think of a -# reason to change them. -##### -$slackbuilds_top_dir = "/slackbuilds/www/slackbuilds"; -$output = "SLACKBUILDS.TXT"; - -##### -# Use only modules that ship with Slackware's perl package here! -use File::Find; -##### - -#### -# Rest of the file is hopefully bug-free code (hey, I can hope, right?) -#### - -# File::Find "wanted" predicate. When find() calls this, it sets -# $_ to the filename (the basename only) and $File::Find::dir -# to the directory (relative to find()'s 2nd argument). See -# "perldoc File::Find" if you need more information. -sub found_file { - # don't process directories at all. - return if -d $_; - - # $dot is always "." - # $cat is the category (audio, development, etc). - # $prgnam is (guess what?) the build name. - # @path is the rest. for e.g. slack-desc, it'll be just the name. - # if there's a subdirectory, it'll be dir/filename. - my ($dot, $cat, $prgnam, @path) = split /\//, $File::Find::dir; - - # if there aren't at least 2 dir components, ignore. - return unless defined $cat; - return unless defined $prgnam; - - # %files hash key. - $dir = "$dot/$cat/$prgnam"; - - # %files values are array references, holding the subdirs under the - # SlackBuild dir, if any, and the basename. - push @{$files{$dir}}, join("/", @path, $_); - - return unless /(.*)\.info$/; - - # if we found an .info file, add it to the list. - push @infos, "$dir/$_"; -} - -# Join together lines in .info files that are split by -# backslashes. This is actually more forgiving of spacing errors -# than it needs to be (better not to be so picky). -sub fix_backslashes { - for($_[0]) { - s, - \s* # optional spaces - \\ # required: single backslash - \s* # optional spaces - \n # required: newline - \s* # optional spaces - , ,gx; # all the above gets replaced with a single space. - return $_; - } -} - -# Read an entire file, join any lines back together if they're split -# with a backslash, return entire file contents as a scalar. -sub slurp_file { - my $file = shift; - - local $/ = undef; - open my $fh, '<', $file || die $!; - my $content = <$fh>; - close $fh; - - return fix_backslashes($content); -} - -# Return just the value from the .info file contents, given the key. -# Assumes the content's already had fix_backslashes() called on it. -sub get_info_value { - my $file = shift; # only needed for error message - my $info = shift; - my $key = shift; - - $$info =~ m,^$key="(.*?)",m or die "no match for $key in $file\n"; - return $1; -} - -# get the first non-comment line of the slack-desc in $dir, remove -# the 'buildname: ' prefix, return the rest. -sub get_short_desc { - my $buildname = quotemeta shift; - my $dir = shift; - my $file = $dir . "/slack-desc"; - - open my $fh, '<', $file || die "$file: $!\n"; - my $line; - while(<$fh>) { - chomp; - last if /^$buildname: /; - } - close $fh; - - die "$file: can't figure out short description\n" unless defined $_; - - s,^\S+:\s*,,; - return $_; -} - -# parse_info() actually prints the 'entry' in SLACKBUILDS.TXT for a -# given info file. Argument must be the path to the file, like -# ./category/progname/progname.info -sub parse_info { - my $file = shift; - my $loc; - my $name; - - ($loc, $name) = ($file =~ m,(^.*)/([^/]+)\.info$,); - - print "SLACKBUILD NAME: $name\n"; - print "SLACKBUILD LOCATION: $loc\n"; - print "SLACKBUILD FILES: " . join(" ", sort @{$files{$loc}}) . "\n"; - - my $content = slurp_file($file); - - for (qw/VERSION DOWNLOAD DOWNLOAD_x86_64 MD5SUM MD5SUM_x86_64 REQUIRES/) { - print "SLACKBUILD $_: " . get_info_value($file, \$content, $_) . "\n"; - } - - # N.B. update_sbtext.sh always puts 2 spaces after the : here, unless - # the script name has a + in it (e.g. wmweather+, atari++), in which - # case it puts one space, and duplicates the package name. I consider - # this a bug in the original script; here, there's always one space - # and no dup package names. - print "SLACKBUILD SHORT DESCRIPTION: " . get_short_desc($name, $loc); - - print "\n\n"; -} - -# main() -$subdir = shift || die "Usage: $0 \n"; - -chdir $slackbuilds_top_dir . "/" . $subdir || die $!; - -open $outfh, '>', "$output.new" || die $!; -*STDOUT = $outfh; - -# slight difference: .sh version printed this string to stdout, not -# stderr. I don't think anyone cares. -warn "$0: Updating SLACKBUILDS.TXT...\n"; - -# builds the %files and @infos tables. -find(\&found_file, '.'); - -# generate the entries, in sorted order. -parse_info($_) for sort @infos; - -close $outfh; - -# don't overwrite SLACKBUILDS.TXT (and .TXT.gz) until the end. -rename "$output.new", $output; -system("gzip -9c $output > $output.gz"); diff --git a/update_sbtxt.pl b/update_sbtxt.pl new file mode 100755 index 0000000..952d0c3 --- /dev/null +++ b/update_sbtxt.pl @@ -0,0 +1,215 @@ +#!/usr/bin/perl -w + +##### +# update_sbtext.pl +##### + +##### +# Written by B. Watson (urchlay@slackware.uk) on 20240806. If you +# modify this file, please include the date, a description of the +# changes, and your name, below. +##### + +##### +# Revision history: +# 20240806, initial version, B. Watson. +##### + +##### +# Description: + +# Replacement for update_sbtext.sh. Does the same job but runs a good +# bit faster. + +# Differences between update_sbtext.sh and update_sbtext.pl: + +# 1. update_sbtext.pl is written in Perl (yeah, call me Captain Obvious). + +# 2. update_sbtext.pl is approximaly 60 times as fast. With ~9400 builds +# in the repo, the shell version ran in 15 minutes, and this one +# runs in 15 *seconds*. + +# 3. The output isn't *quite* identical to the .sh version: I made +# the short description more consistent (always one space after +# the colon; remove the duplicate package names caused by a bug in +# the old script that manifested whenever the package name has a + +# in it). + +# 4. This script writes to SLACKBUILDS.TXT.new, and doesn't disturb +# the existing SLACKBUILDS.TXT or SLACKBUILDS.TXT.gz until it's +# done (then it overwrites the old files in one swell foop, so no +# users ever see partial results). + +# 5. If there are symlinks in the repo, they will show up in the +# SLACKBUILD FILE LIST for the build they belong to. With the shell +# version, they are ignored. This actually only affects one build +# (desktop/mint-y-icons), and it's not clear to me that symlinks +# should be allowed in the git repo anyway. + +# I know most of my fellow SBo admins aren't real big on Perl, so I +# commented the hell out of this. If it still doesn't make sense, find +# me on IRC or by email and I'll try to make it make sense. +##### + +##### +# These 2 variables are configurable settings, if you can think of a +# reason to change them. +##### +$slackbuilds_top_dir = "/slackbuilds/www/slackbuilds"; +$output = "SLACKBUILDS.TXT"; + +##### +# Use only modules that ship with Slackware's perl package here! +use File::Find; +##### + +#### +# Rest of the file is hopefully bug-free code (hey, I can hope, right?) +#### + +# File::Find "wanted" predicate. When find() calls this, it sets +# $_ to the filename (the basename only) and $File::Find::dir +# to the directory (relative to find()'s 2nd argument). See +# "perldoc File::Find" if you need more information. +sub found_file { + # don't process directories at all. + return if -d $_; + + # $dot is always "." + # $cat is the category (audio, development, etc). + # $prgnam is (guess what?) the build name. + # @path is the rest. for e.g. slack-desc, it'll be just the name. + # if there's a subdirectory, it'll be dir/filename. + my ($dot, $cat, $prgnam, @path) = split /\//, $File::Find::dir; + + # if there aren't at least 2 dir components, ignore. + return unless defined $cat; + return unless defined $prgnam; + + # %files hash key. + $dir = "$dot/$cat/$prgnam"; + + # %files values are array references, holding the subdirs under the + # SlackBuild dir, if any, and the basename. + push @{$files{$dir}}, join("/", @path, $_); + + return unless /(.*)\.info$/; + + # if we found an .info file, add it to the list. + push @infos, "$dir/$_"; +} + +# Join together lines in .info files that are split by +# backslashes. This is actually more forgiving of spacing errors +# than it needs to be (better not to be so picky). +sub fix_backslashes { + for($_[0]) { + s, + \s* # optional spaces + \\ # required: single backslash + \s* # optional spaces + \n # required: newline + \s* # optional spaces + , ,gx; # all the above gets replaced with a single space. + return $_; + } +} + +# Read an entire file, join any lines back together if they're split +# with a backslash, return entire file contents as a scalar. +sub slurp_file { + my $file = shift; + + local $/ = undef; + open my $fh, '<', $file || die $!; + my $content = <$fh>; + close $fh; + + return fix_backslashes($content); +} + +# Return just the value from the .info file contents, given the key. +# Assumes the content's already had fix_backslashes() called on it. +sub get_info_value { + my $file = shift; # only needed for error message + my $info = shift; + my $key = shift; + + $$info =~ m,^$key="(.*?)",m or die "no match for $key in $file\n"; + return $1; +} + +# get the first non-comment line of the slack-desc in $dir, remove +# the 'buildname: ' prefix, return the rest. +sub get_short_desc { + my $buildname = quotemeta shift; + my $dir = shift; + my $file = $dir . "/slack-desc"; + + open my $fh, '<', $file || die "$file: $!\n"; + my $line; + while(<$fh>) { + chomp; + last if /^$buildname: /; + } + close $fh; + + die "$file: can't figure out short description\n" unless defined $_; + + s,^\S+:\s*,,; + return $_; +} + +# parse_info() actually prints the 'entry' in SLACKBUILDS.TXT for a +# given info file. Argument must be the path to the file, like +# ./category/progname/progname.info +sub parse_info { + my $file = shift; + my $loc; + my $name; + + ($loc, $name) = ($file =~ m,(^.*)/([^/]+)\.info$,); + + print "SLACKBUILD NAME: $name\n"; + print "SLACKBUILD LOCATION: $loc\n"; + print "SLACKBUILD FILES: " . join(" ", sort @{$files{$loc}}) . "\n"; + + my $content = slurp_file($file); + + for (qw/VERSION DOWNLOAD DOWNLOAD_x86_64 MD5SUM MD5SUM_x86_64 REQUIRES/) { + print "SLACKBUILD $_: " . get_info_value($file, \$content, $_) . "\n"; + } + + # N.B. update_sbtext.sh always puts 2 spaces after the : here, unless + # the script name has a + in it (e.g. wmweather+, atari++), in which + # case it puts one space, and duplicates the package name. I consider + # this a bug in the original script; here, there's always one space + # and no dup package names. + print "SLACKBUILD SHORT DESCRIPTION: " . get_short_desc($name, $loc); + + print "\n\n"; +} + +# main() +$subdir = shift || die "Usage: $0 \n"; + +chdir $slackbuilds_top_dir . "/" . $subdir || die $!; + +open $outfh, '>', "$output.new" || die $!; +*STDOUT = $outfh; + +# slight difference: .sh version printed this string to stdout, not +# stderr. I don't think anyone cares. +warn "$0: Updating SLACKBUILDS.TXT...\n"; + +# builds the %files and @infos tables. +find(\&found_file, '.'); + +# generate the entries, in sorted order. +parse_info($_) for sort @infos; + +close $outfh; + +# don't overwrite SLACKBUILDS.TXT (and .TXT.gz) until the end. +rename "$output.new", $output; +system("gzip -9c $output > $output.gz"); -- cgit v1.2.3