From 586ad2ce86475daeed7a09d00dc3e042dc9dde8b Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Thu, 7 Jul 2022 00:31:07 -0400 Subject: sbofixinfo: typo correction --- sbofixinfo | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 2 deletions(-) (limited to 'sbofixinfo') diff --git a/sbofixinfo b/sbofixinfo index eeba16e..7c8028e 100755 --- a/sbofixinfo +++ b/sbofixinfo @@ -70,6 +70,18 @@ Any capitalized hex digits in MD5SUM or MD5SUM_x86_64 will be lowercased. Lowercase key names (e.g. md5sum or download) will be uppercased. +=item - + +Misspelled (typo'ed) key names will be corrected, if they're not too +mangled. When this happens, a warning is given on standard error. The +correction algorithm isn't perfect, so make sure you compare the +original and 'corrected' key names using your own eyeballs and brain. + +=item - + +If there is no PRGNAM, it will be generated from the .info file's name +(minus the I<.info> extension). + =back B doesn't attempt to detect any other errors or dubious @@ -142,6 +154,13 @@ for($arg0) { exec("pod2man --stderr -s1 -csbo-maintainer-tools -r$VERSION $0") || die "$SELF: can't exec pod2man\n"; }; + /^--keytest/ && do { + while() { + chomp; + fix_key($_); + } + exit 0; + }; } push @ARGV, "." unless @ARGV; @@ -152,7 +171,75 @@ exit $errcnt; sub fix_key { my $key = shift; - $key =~ tr/-a-wyz/_A-WYZ/; # leave "x" lowercase! + my $newkey = fix_key_guts($key); + if($key ne $newkey) { + warn "$SELF: typo correction: $key => $newkey\n"; + } + return $newkey; +} + +sub fix_key_guts { + my $key = shift; + + # fix case if needed + $key =~ tr/-a-wXyz/_A-WxYZ/; # leave "x" lowercase! + + # if it's a recognized key, use it + if(grep { $_ eq $key } @keyorder) { + return $key; + } + + # if it's not a recognized key, try to detect typos. + # most of the keys look different enough that the matching algo + # below does OK with them, except if someone types MAIL in place + # of EMAIL (the algo will match MAINTAINER, not EMAIL). so special + # case it here: + if((length $key < 8) && ($key =~ /^[ME]+[AI]+.?L.?/)) { + return "EMAIL"; + } + + my $x64 = 0; + if(/_x|_\d|x\d|86|68|64|46/) { + $x64 = 1; + } + + my $bestmatch; + my $bmcount = 0; + + # typo-correction matching algorithm: + for my $candidate (@keyorder) { + next if $candidate =~ /x86/; + + #warn "checking $key against $candidate\n"; + + # if the first 2 characters match, assume the candidate is OK. + if(substr($key, 0, 2) eq substr($candidate, 0, 2)) { + $bestmatch = $candidate; + last; + } + + # get rid of extraneous characters (not in the candidate key). + my $trykey = eval "\$key =~ tr/$candidate//cdr"; + + # if nothing's left, this is obviously the wrong candidate. + next unless length $trykey; + + #warn " trykey is " . $trykey; + + if(length($trykey) > $bmcount) { + $bestmatch = $candidate; + $bmcount = length($trykey); + } + } + + if(defined $bestmatch) { + $key = $bestmatch; + if(($key eq 'DOWNLOAD') || ($key eq 'MD5SUM')) { + $key .= "_x86_64" if $x64; + } + } + + # if nothing matched, the key gets returned as-is. return $key; } @@ -170,6 +257,11 @@ sub fix_info { return; }; + # this only gets used if the .info file is missing the PRGNAM key. + my $prgnam_guess = $file; + $prgnam_guess =~ s,.*/,,; + $prgnam_guess =~ s,\.[^.]*$,,; + my $key = 'INVALID_STUFF'; my %info; @@ -220,6 +312,11 @@ sub fix_info { return; }; + if(not defined $info{'PRGNAM'}) { + warn "$SELF: $file missing PRGNAM, guessing \"$prgnam_guess\" from filename.\n"; + $info{'PRGNAM'} = $prgnam_guess; + } + # Reconstitute info file from the values, with correct indentation and # quoting, backslashes, etc. for $key (@keyorder) { @@ -241,7 +338,7 @@ sub fix_info { my $result = system("diff -u --color $file.bak $file"); if($result == 0) { - system("rm $file.bak"); + unlink("$file.bak"); warn "$SELF: no changes to $file\n"; } } -- cgit v1.2.3