aboutsummaryrefslogtreecommitdiff
path: root/update_sbtxt.pl
blob: 952d0c30466fed152bd8ce9684ca1da08ead5da2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!/usr/bin/perl -w

#####
# update_sbtext.pl
#####

#####
# Written by B. Watson (urchlay@slackware.uk) on 20240806. If you
# modify this file, please include the date, a description of the
# changes, and your name, below.
#####

#####
# Revision history:
# 20240806, initial version, B. Watson.
#####

#####
# Description:

# Replacement for update_sbtext.sh. Does the same job but runs a good
# bit faster.

# Differences between update_sbtext.sh and update_sbtext.pl:

# 1. update_sbtext.pl is written in Perl (yeah, call me Captain Obvious).

# 2. update_sbtext.pl is approximaly 60 times as fast. With ~9400 builds
#    in the repo, the shell version ran in 15 minutes, and this one
#    runs in 15 *seconds*.

# 3. The output isn't *quite* identical to the .sh version: I made
#    the short description more consistent (always one space after
#    the colon; remove the duplicate package names caused by a bug in
#    the old script that manifested whenever the package name has a +
#    in it).

# 4. This script writes to SLACKBUILDS.TXT.new, and doesn't disturb
#    the existing SLACKBUILDS.TXT or SLACKBUILDS.TXT.gz until it's
#    done (then it overwrites the old files in one swell foop, so no
#    users ever see partial results).

# 5. If there are symlinks in the repo, they will show up in the
#    SLACKBUILD FILE LIST for the build they belong to. With the shell
#    version, they are ignored. This actually only affects one build
#    (desktop/mint-y-icons), and it's not clear to me that symlinks
#    should be allowed in the git repo anyway.

# I know most of my fellow SBo admins aren't real big on Perl, so I
# commented the hell out of this. If it still doesn't make sense, find
# me on IRC or by email and I'll try to make it make sense.
#####

#####
# These 2 variables are configurable settings, if you can think of a
# reason to change them.
#####
$slackbuilds_top_dir = "/slackbuilds/www/slackbuilds";
$output = "SLACKBUILDS.TXT";

#####
# Use only modules that ship with Slackware's perl package here!
use File::Find;
#####

####
# Rest of the file is hopefully bug-free code (hey, I can hope, right?)
####

# File::Find "wanted" predicate. When find() calls this, it sets
# $_ to the filename (the basename only) and $File::Find::dir
# to the directory (relative to find()'s 2nd argument). See
# "perldoc File::Find" if you need more information.
sub found_file {
	# don't process directories at all.
	return if -d $_;

	# $dot is always "."
	# $cat is the category (audio, development, etc).
	# $prgnam is (guess what?) the build name.
	# @path is the rest. for e.g. slack-desc, it'll be just the name.
	# if there's a subdirectory, it'll be dir/filename.
	my ($dot, $cat, $prgnam, @path) = split /\//, $File::Find::dir;

	# if there aren't at least 2 dir components, ignore.
	return unless defined $cat;
	return unless defined $prgnam;

	# %files hash key.
	$dir = "$dot/$cat/$prgnam";

	# %files values are array references, holding the subdirs under the
	# SlackBuild dir, if any, and the basename.
	push @{$files{$dir}}, join("/", @path, $_);

	return unless /(.*)\.info$/;

	# if we found an .info file, add it to the list.
	push @infos, "$dir/$_";
}

# Join together lines in .info files that are split by
# backslashes. This is actually more forgiving of spacing errors
# than it needs to be (better not to be so picky).
sub fix_backslashes {
	for($_[0]) {
		s,
			\s*   # optional spaces
			\\    # required: single backslash
			\s*   # optional spaces
			\n    # required: newline
			\s*   # optional spaces
		, ,gx;   # all the above gets replaced with a single space.
		return $_;
	}
}

# Read an entire file, join any lines back together if they're split
# with a backslash, return entire file contents as a scalar.
sub slurp_file {
	my $file = shift;

	local $/ = undef;
	open my $fh, '<', $file || die $!;
	my $content = <$fh>;
	close $fh;

	return fix_backslashes($content);
}

# Return just the value from the .info file contents, given the key.
# Assumes the content's already had fix_backslashes() called on it.
sub get_info_value {
	my $file = shift; # only needed for error message
	my $info = shift;
	my $key = shift;

	$$info =~ m,^$key="(.*?)",m or die "no match for $key in $file\n";
	return $1;
}

# get the first non-comment line of the slack-desc in $dir, remove
# the 'buildname: ' prefix, return the rest.
sub get_short_desc {
	my $buildname = quotemeta shift;
	my $dir = shift;
	my $file = $dir . "/slack-desc";

	open my $fh, '<', $file || die "$file: $!\n";
	my $line;
	while(<$fh>) {
		chomp;
		last if /^$buildname: /;
	}
	close $fh;

	die "$file: can't figure out short description\n" unless defined $_;

	s,^\S+:\s*,,;
	return $_;
}

# parse_info() actually prints the 'entry' in SLACKBUILDS.TXT for a
# given info file. Argument must be the path to the file, like
# ./category/progname/progname.info
sub parse_info {
	my $file = shift;
	my $loc;
	my $name;

	($loc, $name) = ($file =~ m,(^.*)/([^/]+)\.info$,);

	print "SLACKBUILD NAME: $name\n";
	print "SLACKBUILD LOCATION: $loc\n";
	print "SLACKBUILD FILES: " . join(" ", sort @{$files{$loc}}) . "\n";

	my $content = slurp_file($file);

	for (qw/VERSION DOWNLOAD DOWNLOAD_x86_64 MD5SUM MD5SUM_x86_64 REQUIRES/) {
		print "SLACKBUILD $_: " . get_info_value($file, \$content, $_) . "\n";
	}

	# N.B. update_sbtext.sh always puts 2 spaces after the : here, unless
	# the script name has a + in it (e.g. wmweather+, atari++), in which
	# case it puts one space, and duplicates the package name. I consider
	# this a bug in the original script; here, there's always one space
	# and no dup package names.
	print "SLACKBUILD SHORT DESCRIPTION: " . get_short_desc($name, $loc);

	print "\n\n";
}

# main()
$subdir = shift || die "Usage: $0 <version>\n";

chdir $slackbuilds_top_dir . "/" . $subdir || die $!;

open $outfh, '>', "$output.new" || die $!;
*STDOUT = $outfh;

# slight difference: .sh version printed this string to stdout, not
# stderr. I don't think anyone cares.
warn "$0: Updating SLACKBUILDS.TXT...\n";

# builds the %files and @infos tables.
find(\&found_file, '.');

# generate the entries, in sorted order.
parse_info($_) for sort @infos;

close $outfh;

# don't overwrite SLACKBUILDS.TXT (and .TXT.gz) until the end.
rename "$output.new", $output;
system("gzip -9c $output > $output.gz");