2 files changed, 388 insertions, 0 deletions
diff --git a/README b/README
index bb2c7d5..f9a964c 100644
--- a/README
+++ b/README
@@ -5,6 +5,7 @@ mkslackdesc - make a valid slack-desc from a README
 mkslackinfo - generate .info and template SlackBuild
 sbodeps - generate a queue file based on .info file contents
 sbodl - download the sources (from the .info file)
+sbofixdesc - try to fix malformed slack-desc files
 sbofixinfo - try to fix malformed .info files
 sbolint - examine a SBo tarball or dir, look for common errors
 sbosearch - search local SBo repo
diff --git a/sbofixdesc b/sbofixdesc
new file mode 100755
index 0000000..20d9c02
--- /dev/null
+++ b/sbofixdesc
@@ -0,0 +1,387 @@
+#!/usr/bin/perl
+
+# usage: $0 [file [file...]]
+#        $0 -a
+# no args = fix slack-desc in current dir.
+# -a = find all slack-desc files under current dir, fix them all
+
+=pod
+
+=for reference
+# HOW TO EDIT THIS FILE:
+# The "handy ruler" below makes it easier to edit a package description.
+# Line up the first '|' above the ':' following the base package name, and
+# the '|' on the right side marks the last column you can put a character in.
+# You must make exactly 11 lines for the formatting to be correct.  It's also
+# customary to leave one space after the ':' except on otherwise blank lines.
+
+       |-----handy-ruler------------------------------------------------------|
+appname: appname (short description of app)
+appname:
+appname: Long description of appname, wrapped at 71 characters *after* the
+appname: colon following "appname" (and the 'handy ruler' should start at
+appname: that colon and be exactly 71 characters long).
+appname: If there is room, there might be a link to the homepage of the
+appname: application on one of these lines, but it's not necessary.
+appname:
+appname: The maximum number of lines prefixed by "appname:" is 11.
+appname: Lines without any other text should *not* have a space after the :
+appname:
+=cut
+
+# note to self: don't use die(), if something fails miserably,
+# we want to just skip it, not stop there.
+
+use File::Find;
+use strict;
+use warnings;
+
+# main() {
+our $checked = 0;
+our $fails = 0;
+our $ok = 0;
+our $fixed = 0;
+our $git = 0;
+our @need_attention = ();
+
+if(@ARGV == 1 && $ARGV[0] =~ /^-/) {
+	if($ARGV[0] eq '-a') {
+		@ARGV = ();
+		find({
+				no_chdir => 1,
+				wanted => sub { push @ARGV, $_ if /\/slack-desc$/ }
+				},
+				'.');
+	} else {
+		print <<EOF;
+Usage: $0
+       $0 [file [files ...]]
+       $0 -a &> fixlog
+
+With no arguments, check & fix the slack-desc in the current directory.
+
+With file argument(s), check & fix the named slack-desc file(s).
+
+With -a, check and fix all slack-desc files found under the current
+directory. Best used at the top level of an SBo git checkout. It
+is *highly* recommended to redirect the script's output to a file
+as shown in the usage above.
+
+For all 3 forms, if the .git/ directory exists in the current directory,
+each fixed slack-desc will be checked in, one commit per slack-desc.
+EOF
+		exit 1;
+	}
+}
+
+push @ARGV, "slack-desc" unless @ARGV;
+
+# $git = true, if we're in the top level of the git repo
+if(-d ".git") {
+	my $rev = `git rev-parse HEAD`;
+	chomp $rev;
+	warn <<EOF;
+... running in top level of git repo, git committing each fixed file.
+... to undo all changes:
+    git reset $rev
+EOF
+	$git = 1;
+}
+
+for my $file (@ARGV) {
+	$checked++;
+
+	my $old = read_desc($file);
+	if(not defined $old) {
+		$fails++;
+		push @need_attention, $file;
+		warn "!!! $file: can't read, can't fix, needs human attention\n";
+		next;
+	}
+
+	my $new = fix_desc($file, $old);
+	if(not defined $new) {
+		$fails++;
+		push @need_attention, $file;
+		warn "!!! $file: couldn't fix automatically, needs human attention\n";
+		next;
+	}
+
+	# if $old and $new are identical, the file was correct, no need to rewrite
+	if($old eq $new) {
+		warn "___ $file is OK\n";
+		$ok++;
+		next;
+	} else {
+		warn "::: $file: writing fixed replacement\n";
+		if(!write_desc($file, $new)) {
+			warn "!!! $file: couldn't write replacement, needs human attention\n";
+			$fails++;
+			push @need_attention, $file;
+			next;
+		}
+		system("diff -u \"$file.bak\" \"$file\" | cat -A 1>&2");
+		$fixed++;
+		run_git_commands($file) if $git;
+	}
+}
+
+warn <<EOF;
+
+Checked $checked files, $ok OK, $fixed fixed, $fails need manual fixing
+EOF
+
+if(@need_attention) {
+	warn "\nThese files need human attention:\n\n";
+	warn "$_\n" for @need_attention;
+}
+
+if($fails) {
+	exit 2;
+} elsif($fixed) {
+	exit 1;
+} else {
+	exit 0;
+}
+# }
+
+### read_desc($file);
+
+# Read slack-desc file, return entire content as a scalar, or undef if
+# read failed. Use 'slurp' mode to read the whole file, not line-based,
+# because if there's no \n at the end of the last line, perl would add
+# one for us (and, missing \n is a thing we want to catch and fix).
+
+# The :bytes stops UTF-8 from being treated as Unicode, since only
+# ASCII is allowed... but it also precludes any attempt to actually do
+# anything with Unicode chars (e.g. replace copyright symbol with (C) or
+# accented vowels with unaccented). Right now this is fine as fix_desc()
+# doesn't try to do such things anyway. Note that we couldn't hardcode
+# an encoding (such as UTF-8) here anyway, since we have no idea what
+# encoding the input might be in. We'd have to try to detect it, which
+# is potentially error-prone and not a can of worms I care to open.
+
+sub read_desc {
+	my $file = shift;
+	my $content;
+	undef local $/; # slurp entire file
+
+	if(open my $fh, "<:bytes", $file) {
+		$content = <$fh>;
+		close $fh;
+	} else {
+		warn "!!! $file: $!\n"; # e.g. permission denied, file not found...
+	}
+
+	return $content;
+}
+
+### fix_desc($file, $old_content)
+
+# Recreate slack-desc from old contents, return as a scalar.
+# If the old content was correct, fix_desc's result should be
+# identical to its input.
+
+sub fix_desc {
+	my $file = shift;
+	my $old = shift;
+	my $prgnam = get_prgnam($file);
+
+	if(!$prgnam) {
+		warn "!!! $file: couldn't determine PRGNAM from filesystem\n";
+		return undef;
+	}
+
+	# split into lines, check for non-ASCII chars. have to do this
+	# first so we can give correct line/column errors from original file.
+	# if we did it after the grep/split below, we'd lose the line number.
+	# if the original encoding was UTF-8 (or anything else that can have
+	# multibyte characters), only the first column number can be accurate,
+	# so the 2nd & further ones get a ~ in front.
+	my @lines = split /\n/, $old;
+	my $ok = 1;
+	my $l = 1;
+
+	for(@lines) {
+		my $found = 0;
+
+		while(/[\x80-\xff]+/gc) {
+			$ok = 0;
+			warn "!!! $file: non-ASCII character(s) at line $l, column " .
+				($found ? "~" : "") .
+				pos . "\n";
+			$found = 1;
+		}
+
+		$l++;
+	}
+
+	return undef unless $ok;
+
+	# get description lines only, minus the prgnam: prefix.
+	# they aren't required to match $prgnam, since it might be wrong.
+	@lines = grep { s/^\S+:\s*// } @lines;
+
+	if(!@lines) {
+		warn "!!! $file: no description lines found!\n";
+		return undef;
+	}
+
+	# sanitize the description lines.
+	# TODO: something about non-ASCII characters. For now just
+	# warn if any are found.
+	for(@lines) {
+		s/\t/ /g;              # replace tabs with spaces
+		s/[\x00-\x1f\x7f]//g;  # delete all other control chars
+		s/^\s+//;              # remove leading whitespace
+		s/\s+$//;              # remove trailing whitespace
+
+		# the spec allows each line to be 71 characters, but that
+		# includes the leading space after the colon (which we have
+		# removed already). so:
+		if(length($_) > 70) {
+			warn "!!! $file: description line too long (>70 chars): $_\n";
+			return undef;
+		}
+	}
+
+	if(@lines < 11) { # not enough, add empties at bottom
+		push @lines, "" for @lines+1..11;
+	} elsif(@lines > 11) { # too many
+		# remove empty lines from the bottom, if any.
+		pop @lines while @lines > 11 && $lines[-1] eq "";
+
+		if(@lines > 11) {
+			# still too many, look for blanks we can remove,
+			# starting at the bottom.
+
+			my $cnt = @lines - 11; # remove this many blanks
+			my @newlines = ();
+
+			for(my $i = $#lines; $i >= 0; $i--) {
+				if($lines[$i] ne "") {
+					unshift @newlines, $lines[$i];
+					next;
+				}
+
+				# found a blank
+				if($cnt) {
+					$cnt--;
+				} else {
+					unshift @newlines, "";
+				}
+			}
+			@lines = @newlines;
+		}
+
+		# if we still got too many, give up.
+		if(@lines > 11) {
+			warn "!!! $file: >11 description lines\n";
+			return undef;
+		}
+	}
+
+	# the first description line is supposed to look like:
+	# $prgnam: $prgnam (short desc)
+	# sometimes the 2nd occurrance of $prgnam doesn't match
+	# due to typos, or its case doesn't match. also sometimes
+	# there's a stray space after the ( or before the ).
+	# if the first line doesn't come close to matching this,
+	# it's an error. otherwise we try to fix any formatting issues.
+	for($lines[0]) {
+		if(!m,\(\s*(.+?)\s*\),) {
+			warn "!!! $file: first line isn't: PRGNAM (short desc)\n";
+			return undef;
+		}
+		my $shortdesc = $1;
+		$lines[0] = "$prgnam ($shortdesc)";
+		if(length($lines[0]) > 70) {
+			warn "!!! $file: short description too long\n";
+			return undef;
+		}
+	}
+
+	# compose fixed slack-desc. first, the preamble comments.
+	my $new = <<EOF;
+# HOW TO EDIT THIS FILE:
+# The "handy ruler" below makes it easier to edit a package description.
+# Line up the first '|' above the ':' following the base package name, and
+# the '|' on the right side marks the last column you can put a character in.
+# You must make exactly 11 lines for the formatting to be correct.  It's also
+# customary to leave one space after the ':' except on otherwise blank lines.
+
+EOF
+
+	# correct number of spaces before the handy-ruler
+	$new .= " " x length($prgnam);
+
+	$new .= "|-----handy-ruler------------------------------------------------------|";
+	$new .= "\n";
+
+	# description lines with prgnam: prefix. if the line isn't blank,
+	# include a space after the colon.
+	for(@lines) {
+		$new .= "$prgnam:";
+		if(length $_) {
+			$new .= " $_";
+		}
+		$new .= "\n";
+	}
+
+	return $new;
+}
+
+# write_desc($file, $content)
+
+# Write fixed slack-desc. Returns false if write failed for
+# any reason.
+
+sub write_desc {
+	my $file = shift;
+	my $content = shift;
+
+	# rename() isn't portable, but we're only trying to run
+	# on Linux (in fact only on Slackware).
+	rename($file, $file . ".bak");
+
+	my $fh;
+	if(!open $fh, ">", $file) {
+		warn "$file: $!\n";
+		return undef;
+	}
+	print $fh $content;
+	close $fh;
+}
+
+### get_prgnam($file)
+
+# Get PRGNAM from directory path. Don't look in .info file
+# because it might not exist (we should be able to check a
+# slack-desc in isolation). Works with absolute or relative
+# paths, or just a naked filename.
+
+# TODO: something less danger-prone than shelling out to readlink.
+sub get_prgnam {
+	my $file = shift;
+	my $dir = `readlink -nf "$file"`;
+	my @dirs = split /\//, $dir;
+	pop @dirs; # throw out filename
+	return pop @dirs; # return last dir name
+}
+
+### run_git_commands($file)
+
+# Run appropriate 'git add' and 'git commit' commands for
+# the file we just modified. Assumes we're in the top level of
+# the repo.
+
+sub run_git_commands {
+	my $file = shift;
+	$file =~ s,^\./+,,; # ./foo/bar/slack-desc => foo/bar/slack-desc
+
+	my $prgcat = $file;
+	$prgcat =~ s,/[^/]*$,,; # foo/bar/slack-desc => foo/bar
+
+	system "git add $file";
+	system "git commit -m'$prgcat: Fix slack-desc.'";
+}