#!/usr/bin/perl -w # sbofixinfo - fix common errors in SBo .info files. # companion piece to sbolint. # Don't edit the next line; use "make version" instead. $VERSION="0.9.3"; =pod =head1 NAME sbofixinfo - fix common errors in SlackBuilds.org .info files =head1 SYNOPSIS B I [I I<...>] =head1 DESCRIPTION B attempts to fix common errors in SlackBuilds.org .info files. Each argument must be an .info file or a directory containing an .info file. With no arguments, the .info file in the current directory is fixed. B attempts to fix the following errors in SBo .info files: =over 4 =item - Out-of-order keys will be reordered to match the order in the template. =item - Extraneous keys will be removed. =item - Blank lines will be removed. =item - Extra whitespace will be removed. This doesn't include indentation for the 2nd and further lines of a multi-line value. =item - Missing \ (backslash) continuation characters will be added. =item - Missing " (double-quote) characters around key values will be added. =item - Values quoted with single-quotes will be quoted with double-quotes. =item - Multi-valued keys (e.g. DOWNLOAD with two URLs) will be split up into multiple lines, if they're not already. Any missing line-continuation backslashes will be added. Continuation lines will be correctly indented. =item - Any capitalized hex digits in MD5SUM or MD5SUM_x86_64 will be lowercased. =item - Lowercase key names (e.g. md5sum or download) will be uppercased. =item - Misspelled (typo'ed) key names will be corrected, if they're not too mangled. When this happens, a warning is given on standard error. The correction algorithm isn't perfect, so make sure you compare the original and 'corrected' key names using your own eyeballs and brain. =item - If there is no PRGNAM, it will be generated from the .info file's name (minus the I<.info> extension). =item - Github archive/ download URLs will be "canonized". This means URLs of these forms: https://github.com/user/project/archive/v1.2.3.tar.gz https://github.com/user/project/archive/refs/tags/v1.2.3.tar.gz https://github.com/user/project/archive/1.2.3.tar.gz https://github.com/user/project/archive/refs/tags/1.2.3.tar.gz ...will be rewritten as: https://github.com/user/project/archive//project-1.2.3.tar.gz ..where will be v1.2.3 or 1.2.3, depending on whether the "v" was in the original URL. The purpose of this is to give a stable download filename, which won't vary depending on whether a browser or wget/curl is used to download the file. See https://slackbuilds.org/GITHUB_URLs.txt for more information. =back B doesn't attempt to detect any other errors or dubious constructs. Use B before and after running B for comprehensive checking. The file is modified 'in-place', in the same way as the B<-i> option to B(1). B keeps a backup of the original file with the extension I<.bak> appended to the filename. If the backup file already exists, it will be silently overwritten. After the new file is generated, B(1) (with its B<-u> and B<--color> options) is run on the backup and modified files, and the backup is deleted if the new file is identical. The diff output also goes to stdout, so the user can see what changes were made. =head1 EXIT STATUS Will be 0 for success. If any errors reading or writing any of the .info files occur, the exit status will be the error count. =head1 BUGS B can't automatically fix every issue B reports. In particular, missing or extra values (for valid keys) can't automatically be fixed. This isn't really a bug, as B can't know what to do in these situations. In other words, B operates only at the syntactic level, and knows nothing of semantics. =head1 AUTHOR B. Watson , aka Urchlay on Libera IRC. =head1 SEE ALSO B(1), B(1), B(1), B(8), B(1) =cut ($SELF = $0) =~ s,.*/,,; @keyorder = qw{ PRGNAM VERSION HOMEPAGE DOWNLOAD MD5SUM DOWNLOAD_x86_64 MD5SUM_x86_64 REQUIRES MAINTAINER EMAIL }; sub fix_github_url { my $url = shift; return $url unless $url =~ m,/archive/,; $url =~ s,refs/tags/,,; (my $proto, undef, undef, $user, $proj, undef, $tag, $filename) = split /\//, $url; return $url if defined $filename; return $url unless $tag =~ s,\.tar\.gz$,,; my $ver = $tag; $ver =~ s,^v(\d),$1,; $url = "https://github.com/$user/$proj/archive/$tag/$proj-$ver.tar.gz"; return $url; } ## main() $arg0 = @ARGV ? $ARGV[0] : undef; for($arg0) { defined || last; /^--?(?:v|ver.*)$/ && do { print "$VERSION\n"; exit 0; }; /^--?(?:h|help|doc)$/ && do { exec("perldoc $0") || die "$SELF: can't exec perldoc\n"; }; /^--?(?:m|man)$/ && do { exec("pod2man --stderr -s1 -csbo-maintainer-tools -r$VERSION $0") || die "$SELF: can't exec pod2man\n"; }; # undocumented option: /^--keytest/ && do { while() { chomp; fix_key($_); } exit 0; }; } push @ARGV, "." unless @ARGV; $errcnt = 0; fix_info($_) for @ARGV; exit $errcnt; sub fix_key { my $key = shift; my $newkey = fix_key_guts($key); if($key ne $newkey) { warn "$SELF: typo correction: $key => $newkey\n"; } return $newkey; } sub fix_key_guts { my $key = shift; # fix case if needed $key =~ tr/-a-wXyz/_A-WxYZ/; # leave "x" lowercase! # if it's a recognized key, use it if(grep { $_ eq $key } @keyorder) { return $key; } # if it's not a recognized key, try to detect typos. # most of the keys look different enough that the matching algo # below does OK with them, except if someone types MAIL in place # of EMAIL (the algo will match MAINTAINER, not EMAIL). so special # case it here: if((length $key < 8) && ($key =~ /^[ME]+[AI]+.?L.?/)) { return "EMAIL"; } my $x64 = 0; if(/_x|_\d|x\d|86|68|64|46/) { $x64 = 1; } my $bestmatch; my $bmcount = 0; # typo-correction matching algorithm: for my $candidate (@keyorder) { next if $candidate =~ /x86/; #warn "checking $key against $candidate\n"; # if the first 2 characters match, assume the candidate is OK. if(substr($key, 0, 2) eq substr($candidate, 0, 2)) { $bestmatch = $candidate; last; } # get rid of extraneous characters (not in the candidate key). my $trykey = eval "\$key =~ tr/$candidate//cdr"; # if nothing's left, this is obviously the wrong candidate. next unless length $trykey; #warn " trykey is " . $trykey; if(length($trykey) > $bmcount) { $bestmatch = $candidate; $bmcount = length($trykey); } } if(defined $bestmatch) { $key = $bestmatch; if(($key eq 'DOWNLOAD') || ($key eq 'MD5SUM')) { $key .= "_x86_64" if $x64; } } # if nothing matched, the key gets returned as-is. return $key; } sub fix_info { my $file = shift; if(-d $file) { my $dir = `realpath $file`; chomp $dir; (my $name = $dir) =~ s,.*/,,; $file = "$dir/$name.info"; } open my $fh, "<$file" or do { warn "$SELF: can't read $file: $!\n"; $errcnt++; return; }; # this only gets used if the .info file is missing the PRGNAM key. my $prgnam_guess = $file; $prgnam_guess =~ s,.*/,,; $prgnam_guess =~ s,\.[^.]*$,,; my $key = 'INVALID_STUFF'; my %info; # read through the whole file, extracting the values of all the # keys, stash them in a hashtable. Multi-valued keys will be stored # with carriage returns as a delimiter (they're otherwise not allowed). # Quotes, backslashes, etc aren't stored (only the actual values). while(<$fh>) { chomp; s/\r//g; # no DOS line endings next if /^\s*$/; # ignore blank lines entirely s/(?:^\s+|\s+$)//g; # remove leading/trailing spaces s/^(\w+)\s*=\s*/$1=/; # remove spaces around = if(/^(\w+)=(.*)$/) { $key = $1; $val = $2; $val =~ s,(?:^['"]|['"]$),,g; # remove quotes around value $val =~ s,[\s\\]*$,,; # remove any line-continuation backslash $key = fix_key($key); if($key =~ /^MD5SUM/) { $val =~ tr/A-Z/a-z/; } # multiple valued keys all on the same line get split up into # multi-line values. This only applies to download and md5sum # values (which can't contain spaces anyway). if($key =~ /^(?:DOWNLOAD|MD5SUM)/) { $val =~ s/\s+/\r/g; } $info{$key} = $val; } else { s,[\s\\\"]*$,,; $info{$key} .= "\r$_"; } } close $fh; # 20240331 bkw: fix github URLs, if needed. for my $key (qw/DOWNLOAD DOWNLOAD_x86_64/) { if($info{$key}) { my @urls = split " ", $info{$key}; my @newurls; for my $url (@urls) { my $newurl = $url; if($url =~ m,https://github.com/,) { $newurl = fix_github_url($url); } push @newurls, $newurl; } $info{$key} = join " ", @newurls; } } system("mv $file $file.bak"); open $fh, ">$file" or do { warn "$SELF: can't write $file: $!\n"; $errcnt++; return; }; if(not defined $info{'PRGNAM'}) { warn "$SELF: $file missing PRGNAM, guessing \"$prgnam_guess\" from filename.\n"; $info{'PRGNAM'} = $prgnam_guess; } # Reconstitute info file from the values, with correct indentation and # quoting, backslashes, etc. for $key (@keyorder) { $info{$key} ||= ""; # avoid unitialized value on missing key my @values = split /\r/, $info{$key}; if(@values == 0) { print $fh "$key=\"\"\n"; } elsif(@values == 1) { print $fh "$key=\"$values[0]\"\n"; } else { my $indent = " " x (length($key) + 2); my $first = shift @values; my $last = pop @values; print $fh "$key=\"$first \\\n"; print $fh "$indent$_ \\\n" for @values; print $fh "$indent$last\"\n"; } } my $result = system("diff -u --color $file.bak $file"); if($result == 0) { unlink("$file.bak"); warn "$SELF: no changes to $file\n"; } }