aboutsummaryrefslogtreecommitdiff
path: root/sbofixdesc
blob: 20d9c022df00f45713d4eef024bf990a9f940dc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
#!/usr/bin/perl

# usage: $0 [file [file...]]
#        $0 -a
# no args = fix slack-desc in current dir.
# -a = find all slack-desc files under current dir, fix them all

=pod

=for reference
# HOW TO EDIT THIS FILE:
# The "handy ruler" below makes it easier to edit a package description.
# Line up the first '|' above the ':' following the base package name, and
# the '|' on the right side marks the last column you can put a character in.
# You must make exactly 11 lines for the formatting to be correct.  It's also
# customary to leave one space after the ':' except on otherwise blank lines.

       |-----handy-ruler------------------------------------------------------|
appname: appname (short description of app)
appname:
appname: Long description of appname, wrapped at 71 characters *after* the
appname: colon following "appname" (and the 'handy ruler' should start at
appname: that colon and be exactly 71 characters long).
appname: If there is room, there might be a link to the homepage of the
appname: application on one of these lines, but it's not necessary.
appname:
appname: The maximum number of lines prefixed by "appname:" is 11.
appname: Lines without any other text should *not* have a space after the :
appname:
=cut

# note to self: don't use die(), if something fails miserably,
# we want to just skip it, not stop there.

use File::Find;
use strict;
use warnings;

# main() {
our $checked = 0;
our $fails = 0;
our $ok = 0;
our $fixed = 0;
our $git = 0;
our @need_attention = ();

if(@ARGV == 1 && $ARGV[0] =~ /^-/) {
	if($ARGV[0] eq '-a') {
		@ARGV = ();
		find({
				no_chdir => 1,
				wanted => sub { push @ARGV, $_ if /\/slack-desc$/ }
				},
				'.');
	} else {
		print <<EOF;
Usage: $0
       $0 [file [files ...]]
       $0 -a &> fixlog

With no arguments, check & fix the slack-desc in the current directory.

With file argument(s), check & fix the named slack-desc file(s).

With -a, check and fix all slack-desc files found under the current
directory. Best used at the top level of an SBo git checkout. It
is *highly* recommended to redirect the script's output to a file
as shown in the usage above.

For all 3 forms, if the .git/ directory exists in the current directory,
each fixed slack-desc will be checked in, one commit per slack-desc.
EOF
		exit 1;
	}
}

push @ARGV, "slack-desc" unless @ARGV;

# $git = true, if we're in the top level of the git repo
if(-d ".git") {
	my $rev = `git rev-parse HEAD`;
	chomp $rev;
	warn <<EOF;
... running in top level of git repo, git committing each fixed file.
... to undo all changes:
    git reset $rev
EOF
	$git = 1;
}

for my $file (@ARGV) {
	$checked++;

	my $old = read_desc($file);
	if(not defined $old) {
		$fails++;
		push @need_attention, $file;
		warn "!!! $file: can't read, can't fix, needs human attention\n";
		next;
	}

	my $new = fix_desc($file, $old);
	if(not defined $new) {
		$fails++;
		push @need_attention, $file;
		warn "!!! $file: couldn't fix automatically, needs human attention\n";
		next;
	}

	# if $old and $new are identical, the file was correct, no need to rewrite
	if($old eq $new) {
		warn "___ $file is OK\n";
		$ok++;
		next;
	} else {
		warn "::: $file: writing fixed replacement\n";
		if(!write_desc($file, $new)) {
			warn "!!! $file: couldn't write replacement, needs human attention\n";
			$fails++;
			push @need_attention, $file;
			next;
		}
		system("diff -u \"$file.bak\" \"$file\" | cat -A 1>&2");
		$fixed++;
		run_git_commands($file) if $git;
	}
}

warn <<EOF;

Checked $checked files, $ok OK, $fixed fixed, $fails need manual fixing
EOF

if(@need_attention) {
	warn "\nThese files need human attention:\n\n";
	warn "$_\n" for @need_attention;
}

if($fails) {
	exit 2;
} elsif($fixed) {
	exit 1;
} else {
	exit 0;
}
# }

### read_desc($file);

# Read slack-desc file, return entire content as a scalar, or undef if
# read failed. Use 'slurp' mode to read the whole file, not line-based,
# because if there's no \n at the end of the last line, perl would add
# one for us (and, missing \n is a thing we want to catch and fix).

# The :bytes stops UTF-8 from being treated as Unicode, since only
# ASCII is allowed... but it also precludes any attempt to actually do
# anything with Unicode chars (e.g. replace copyright symbol with (C) or
# accented vowels with unaccented). Right now this is fine as fix_desc()
# doesn't try to do such things anyway. Note that we couldn't hardcode
# an encoding (such as UTF-8) here anyway, since we have no idea what
# encoding the input might be in. We'd have to try to detect it, which
# is potentially error-prone and not a can of worms I care to open.

sub read_desc {
	my $file = shift;
	my $content;
	undef local $/; # slurp entire file

	if(open my $fh, "<:bytes", $file) {
		$content = <$fh>;
		close $fh;
	} else {
		warn "!!! $file: $!\n"; # e.g. permission denied, file not found...
	}

	return $content;
}

### fix_desc($file, $old_content)

# Recreate slack-desc from old contents, return as a scalar.
# If the old content was correct, fix_desc's result should be
# identical to its input.

sub fix_desc {
	my $file = shift;
	my $old = shift;
	my $prgnam = get_prgnam($file);

	if(!$prgnam) {
		warn "!!! $file: couldn't determine PRGNAM from filesystem\n";
		return undef;
	}

	# split into lines, check for non-ASCII chars. have to do this
	# first so we can give correct line/column errors from original file.
	# if we did it after the grep/split below, we'd lose the line number.
	# if the original encoding was UTF-8 (or anything else that can have
	# multibyte characters), only the first column number can be accurate,
	# so the 2nd & further ones get a ~ in front.
	my @lines = split /\n/, $old;
	my $ok = 1;
	my $l = 1;

	for(@lines) {
		my $found = 0;

		while(/[\x80-\xff]+/gc) {
			$ok = 0;
			warn "!!! $file: non-ASCII character(s) at line $l, column " .
				($found ? "~" : "") .
				pos . "\n";
			$found = 1;
		}

		$l++;
	}

	return undef unless $ok;

	# get description lines only, minus the prgnam: prefix.
	# they aren't required to match $prgnam, since it might be wrong.
	@lines = grep { s/^\S+:\s*// } @lines;

	if(!@lines) {
		warn "!!! $file: no description lines found!\n";
		return undef;
	}

	# sanitize the description lines.
	# TODO: something about non-ASCII characters. For now just
	# warn if any are found.
	for(@lines) {
		s/\t/ /g;              # replace tabs with spaces
		s/[\x00-\x1f\x7f]//g;  # delete all other control chars
		s/^\s+//;              # remove leading whitespace
		s/\s+$//;              # remove trailing whitespace

		# the spec allows each line to be 71 characters, but that
		# includes the leading space after the colon (which we have
		# removed already). so:
		if(length($_) > 70) {
			warn "!!! $file: description line too long (>70 chars): $_\n";
			return undef;
		}
	}

	if(@lines < 11) { # not enough, add empties at bottom
		push @lines, "" for @lines+1..11;
	} elsif(@lines > 11) { # too many
		# remove empty lines from the bottom, if any.
		pop @lines while @lines > 11 && $lines[-1] eq "";

		if(@lines > 11) {
			# still too many, look for blanks we can remove,
			# starting at the bottom.

			my $cnt = @lines - 11; # remove this many blanks
			my @newlines = ();

			for(my $i = $#lines; $i >= 0; $i--) {
				if($lines[$i] ne "") {
					unshift @newlines, $lines[$i];
					next;
				}

				# found a blank
				if($cnt) {
					$cnt--;
				} else {
					unshift @newlines, "";
				}
			}
			@lines = @newlines;
		}

		# if we still got too many, give up.
		if(@lines > 11) {
			warn "!!! $file: >11 description lines\n";
			return undef;
		}
	}

	# the first description line is supposed to look like:
	# $prgnam: $prgnam (short desc)
	# sometimes the 2nd occurrance of $prgnam doesn't match
	# due to typos, or its case doesn't match. also sometimes
	# there's a stray space after the ( or before the ).
	# if the first line doesn't come close to matching this,
	# it's an error. otherwise we try to fix any formatting issues.
	for($lines[0]) {
		if(!m,\(\s*(.+?)\s*\),) {
			warn "!!! $file: first line isn't: PRGNAM (short desc)\n";
			return undef;
		}
		my $shortdesc = $1;
		$lines[0] = "$prgnam ($shortdesc)";
		if(length($lines[0]) > 70) {
			warn "!!! $file: short description too long\n";
			return undef;
		}
	}

	# compose fixed slack-desc. first, the preamble comments.
	my $new = <<EOF;
# HOW TO EDIT THIS FILE:
# The "handy ruler" below makes it easier to edit a package description.
# Line up the first '|' above the ':' following the base package name, and
# the '|' on the right side marks the last column you can put a character in.
# You must make exactly 11 lines for the formatting to be correct.  It's also
# customary to leave one space after the ':' except on otherwise blank lines.

EOF

	# correct number of spaces before the handy-ruler
	$new .= " " x length($prgnam);

	$new .= "|-----handy-ruler------------------------------------------------------|";
	$new .= "\n";

	# description lines with prgnam: prefix. if the line isn't blank,
	# include a space after the colon.
	for(@lines) {
		$new .= "$prgnam:";
		if(length $_) {
			$new .= " $_";
		}
		$new .= "\n";
	}

	return $new;
}

# write_desc($file, $content)

# Write fixed slack-desc. Returns false if write failed for
# any reason.

sub write_desc {
	my $file = shift;
	my $content = shift;

	# rename() isn't portable, but we're only trying to run
	# on Linux (in fact only on Slackware).
	rename($file, $file . ".bak");

	my $fh;
	if(!open $fh, ">", $file) {
		warn "$file: $!\n";
		return undef;
	}
	print $fh $content;
	close $fh;
}

### get_prgnam($file)

# Get PRGNAM from directory path. Don't look in .info file
# because it might not exist (we should be able to check a
# slack-desc in isolation). Works with absolute or relative
# paths, or just a naked filename.

# TODO: something less danger-prone than shelling out to readlink.
sub get_prgnam {
	my $file = shift;
	my $dir = `readlink -nf "$file"`;
	my @dirs = split /\//, $dir;
	pop @dirs; # throw out filename
	return pop @dirs; # return last dir name
}

### run_git_commands($file)

# Run appropriate 'git add' and 'git commit' commands for
# the file we just modified. Assumes we're in the top level of
# the repo.

sub run_git_commands {
	my $file = shift;
	$file =~ s,^\./+,,; # ./foo/bar/slack-desc => foo/bar/slack-desc

	my $prgcat = $file;
	$prgcat =~ s,/[^/]*$,,; # foo/bar/slack-desc => foo/bar

	system "git add $file";
	system "git commit -m'$prgcat: Fix slack-desc.'";
}