#!/usr/bin/perl

# usage: $0 [file [file...]]
#        $0 -a
# no args = fix slack-desc in current dir.
# -a = find all slack-desc files under current dir, fix them all

=pod

=for reference
# HOW TO EDIT THIS FILE:
# The "handy ruler" below makes it easier to edit a package description.
# Line up the first '|' above the ':' following the base package name, and
# the '|' on the right side marks the last column you can put a character in.
# You must make exactly 11 lines for the formatting to be correct.  It's also
# customary to leave one space after the ':' except on otherwise blank lines.

       |-----handy-ruler------------------------------------------------------|
appname: appname (short description of app)
appname:
appname: Long description of appname, wrapped at 71 characters *after* the
appname: colon following "appname" (and the 'handy ruler' should start at
appname: that colon and be exactly 71 characters long).
appname: If there is room, there might be a link to the homepage of the
appname: application on one of these lines, but it's not necessary.
appname:
appname: The maximum number of lines prefixed by "appname:" is 11.
appname: Lines without any other text should *not* have a space after the :
appname:
=cut

# note to self: don't use die(), if something fails miserably,
# we want to just skip it, not stop there.

use File::Find;
use strict;
use warnings;

# main() {
our $checked = 0;
our $fails = 0;
our $ok = 0;
our $fixed = 0;
our $git = 0;
our @need_attention = ();

if(@ARGV == 1 && $ARGV[0] =~ /^-/) {
	if($ARGV[0] eq '-a') {
		@ARGV = ();
		find({
				no_chdir => 1,
				wanted => sub { push @ARGV, $_ if /\/slack-desc$/ }
				},
				'.');
	} else {
		print <<EOF;
Usage: $0
       $0 [file [files ...]]
       $0 -a &> fixlog

With no arguments, check & fix the slack-desc in the current directory.

With file argument(s), check & fix the named slack-desc file(s).

With -a, check and fix all slack-desc files found under the current
directory. Best used at the top level of an SBo git checkout. It
is *highly* recommended to redirect the script's output to a file
as shown in the usage above.

For all 3 forms, if the .git/ directory exists in the current directory,
each fixed slack-desc will be checked in, one commit per slack-desc.
EOF
		exit 1;
	}
}

push @ARGV, "slack-desc" unless @ARGV;

# $git = true, if we're in the top level of the git repo
if(-d ".git") {
	my $rev = `git rev-parse HEAD`;
	chomp $rev;
	warn <<EOF;
... running in top level of git repo, git committing each fixed file.
... to undo all changes:
    git reset $rev
EOF
	$git = 1;
}

for my $file (@ARGV) {
	$checked++;

	my $old = read_desc($file);
	if(not defined $old) {
		$fails++;
		push @need_attention, $file;
		warn "!!! $file: can't read, can't fix, needs human attention\n";
		next;
	}

	my $new = fix_desc($file, $old);
	if(not defined $new) {
		$fails++;
		push @need_attention, $file;
		warn "!!! $file: couldn't fix automatically, needs human attention\n";
		next;
	}

	# if $old and $new are identical, the file was correct, no need to rewrite
	if($old eq $new) {
		warn "___ $file is OK\n";
		$ok++;
		next;
	} else {
		warn "::: $file: writing fixed replacement\n";
		if(!write_desc($file, $new)) {
			warn "!!! $file: couldn't write replacement, needs human attention\n";
			$fails++;
			push @need_attention, $file;
			next;
		}
		system("diff -u \"$file.bak\" \"$file\" | cat -A 1>&2");
		$fixed++;
		run_git_commands($file) if $git;
	}
}

warn <<EOF;

Checked $checked files, $ok OK, $fixed fixed, $fails need manual fixing
EOF

if(@need_attention) {
	warn "\nThese files need human attention:\n\n";
	warn "$_\n" for @need_attention;
}

if($fails) {
	exit 2;
} elsif($fixed) {
	exit 1;
} else {
	exit 0;
}
# }

### read_desc($file);

# Read slack-desc file, return entire content as a scalar, or undef if
# read failed. Use 'slurp' mode to read the whole file, not line-based,
# because if there's no \n at the end of the last line, perl would add
# one for us (and, missing \n is a thing we want to catch and fix).

# The :bytes stops UTF-8 from being treated as Unicode, since only
# ASCII is allowed... but it also precludes any attempt to actually do
# anything with Unicode chars (e.g. replace copyright symbol with (C) or
# accented vowels with unaccented). Right now this is fine as fix_desc()
# doesn't try to do such things anyway. Note that we couldn't hardcode
# an encoding (such as UTF-8) here anyway, since we have no idea what
# encoding the input might be in. We'd have to try to detect it, which
# is potentially error-prone and not a can of worms I care to open.

sub read_desc {
	my $file = shift;
	my $content;
	undef local $/; # slurp entire file

	if(open my $fh, "<:bytes", $file) {
		$content = <$fh>;
		close $fh;
	} else {
		warn "!!! $file: $!\n"; # e.g. permission denied, file not found...
	}

	return $content;
}

### fix_desc($file, $old_content)

# Recreate slack-desc from old contents, return as a scalar.
# If the old content was correct, fix_desc's result should be
# identical to its input.

sub fix_desc {
	my $file = shift;
	my $old = shift;
	my $prgnam = get_prgnam($file);

	if(!$prgnam) {
		warn "!!! $file: couldn't determine PRGNAM from filesystem\n";
		return undef;
	}

	# split into lines, check for non-ASCII chars. have to do this
	# first so we can give correct line/column errors from original file.
	# if we did it after the grep/split below, we'd lose the line number.
	# if the original encoding was UTF-8 (or anything else that can have
	# multibyte characters), only the first column number can be accurate,
	# so the 2nd & further ones get a ~ in front.
	my @lines = split /\n/, $old;
	my $ok = 1;
	my $l = 1;

	for(@lines) {
		my $found = 0;

		while(/[\x80-\xff]+/gc) {
			$ok = 0;
			warn "!!! $file: non-ASCII character(s) at line $l, column " .
				($found ? "~" : "") .
				pos . "\n";
			$found = 1;
		}

		$l++;
	}

	return undef unless $ok;

	# get description lines only, minus the prgnam: prefix.
	# they aren't required to match $prgnam, since it might be wrong.
	@lines = grep { s/^\S+:\s*// } @lines;

	if(!@lines) {
		warn "!!! $file: no description lines found!\n";
		return undef;
	}

	# sanitize the description lines.
	# TODO: something about non-ASCII characters. For now just
	# warn if any are found.
	for(@lines) {
		s/\t/ /g;              # replace tabs with spaces
		s/[\x00-\x1f\x7f]//g;  # delete all other control chars
		s/^\s+//;              # remove leading whitespace
		s/\s+$//;              # remove trailing whitespace

		# the spec allows each line to be 71 characters, but that
		# includes the leading space after the colon (which we have
		# removed already). so:
		if(length($_) > 70) {
			warn "!!! $file: description line too long (>70 chars): $_\n";
			return undef;
		}
	}

	if(@lines < 11) { # not enough, add empties at bottom
		push @lines, "" for @lines+1..11;
	} elsif(@lines > 11) { # too many
		# remove empty lines from the bottom, if any.
		pop @lines while @lines > 11 && $lines[-1] eq "";

		if(@lines > 11) {
			# still too many, look for blanks we can remove,
			# starting at the bottom.

			my $cnt = @lines - 11; # remove this many blanks
			my @newlines = ();

			for(my $i = $#lines; $i >= 0; $i--) {
				if($lines[$i] ne "") {
					unshift @newlines, $lines[$i];
					next;
				}

				# found a blank
				if($cnt) {
					$cnt--;
				} else {
					unshift @newlines, "";
				}
			}
			@lines = @newlines;
		}

		# if we still got too many, give up.
		if(@lines > 11) {
			warn "!!! $file: >11 description lines\n";
			return undef;
		}
	}

	# the first description line is supposed to look like:
	# $prgnam: $prgnam (short desc)
	# sometimes the 2nd occurrance of $prgnam doesn't match
	# due to typos, or its case doesn't match. also sometimes
	# there's a stray space after the ( or before the ).
	# if the first line doesn't come close to matching this,
	# it's an error. otherwise we try to fix any formatting issues.
	for($lines[0]) {
		if(!m,\(\s*(.+?)\s*\),) {
			warn "!!! $file: first line isn't: PRGNAM (short desc)\n";
			return undef;
		}
		my $shortdesc = $1;
		$lines[0] = "$prgnam ($shortdesc)";
		if(length($lines[0]) > 70) {
			warn "!!! $file: short description too long\n";
			return undef;
		}
	}

	# compose fixed slack-desc. first, the preamble comments.
	my $new = <<EOF;
# HOW TO EDIT THIS FILE:
# The "handy ruler" below makes it easier to edit a package description.
# Line up the first '|' above the ':' following the base package name, and
# the '|' on the right side marks the last column you can put a character in.
# You must make exactly 11 lines for the formatting to be correct.  It's also
# customary to leave one space after the ':' except on otherwise blank lines.

EOF

	# correct number of spaces before the handy-ruler
	$new .= " " x length($prgnam);

	$new .= "|-----handy-ruler------------------------------------------------------|";
	$new .= "\n";

	# description lines with prgnam: prefix. if the line isn't blank,
	# include a space after the colon.
	for(@lines) {
		$new .= "$prgnam:";
		if(length $_) {
			$new .= " $_";
		}
		$new .= "\n";
	}

	return $new;
}

# write_desc($file, $content)

# Write fixed slack-desc. Returns false if write failed for
# any reason.

sub write_desc {
	my $file = shift;
	my $content = shift;

	# rename() isn't portable, but we're only trying to run
	# on Linux (in fact only on Slackware).
	rename($file, $file . ".bak");

	my $fh;
	if(!open $fh, ">", $file) {
		warn "$file: $!\n";
		return undef;
	}
	print $fh $content;
	close $fh;
}

### get_prgnam($file)

# Get PRGNAM from directory path. Don't look in .info file
# because it might not exist (we should be able to check a
# slack-desc in isolation). Works with absolute or relative
# paths, or just a naked filename.

# TODO: something less danger-prone than shelling out to readlink.
sub get_prgnam {
	my $file = shift;
	my $dir = `readlink -nf "$file"`;
	my @dirs = split /\//, $dir;
	pop @dirs; # throw out filename
	return pop @dirs; # return last dir name
}

### run_git_commands($file)

# Run appropriate 'git add' and 'git commit' commands for
# the file we just modified. Assumes we're in the top level of
# the repo.

sub run_git_commands {
	my $file = shift;
	$file =~ s,^\./+,,; # ./foo/bar/slack-desc => foo/bar/slack-desc

	my $prgcat = $file;
	$prgcat =~ s,/[^/]*$,,; # foo/bar/slack-desc => foo/bar

	system "git add $file";
	system "git commit -m'$prgcat: Fix slack-desc.'";
}