diff options
Diffstat (limited to 'bin')
-rw-r--r-- | bin/README.txt | 6 | ||||
-rwxr-xr-x | bin/makepkgdb.pl | 235 | ||||
-rwxr-xr-x | bin/whatis2sqlite.pl | 83 |
3 files changed, 324 insertions, 0 deletions
diff --git a/bin/README.txt b/bin/README.txt new file mode 100644 index 0000000..e0baa08 --- /dev/null +++ b/bin/README.txt @@ -0,0 +1,6 @@ +Scripts that generate the databases used by the bot plugins. + +These are in Perl, because they're pretty complex and I just started +learning Python last week (and I've used perl for over 20 years). + +See the comments at the top of each script for instructions. diff --git a/bin/makepkgdb.pl b/bin/makepkgdb.pl new file mode 100755 index 0000000..31838d3 --- /dev/null +++ b/bin/makepkgdb.pl @@ -0,0 +1,235 @@ +#!/usr/bin/perl -w + +# create database for limnoria bot Slackpkg plugin. +# reads PACKAGES.txt from the main (slackware64) and extra +# trees. Also has to have a copy of /var/log/scripts, since +# PACKAGES.txt doesn't have symlinks, and a copy of +# /var/log/packages since PACKAGES.txt doesn't have file contents. + +# the output is a huge pile of SQL insert statements, like half +# a million of them. best way to run this IMO: +# $ makepkgdb.pl > db.sql +# $ rm -f /path/to/dbfile +# $ sqlite3 -bail /path/to/dbfile < db.sql + +# this script was only tested for slackware 14.2. When 15.0 gets +# released, we'll rebuild the database using this script, which +# might or might not need to be modified. + +# special cases: /lib64/incoming gets the /incoming removed, and +# we manually add /bin/bash. + +### configurables: + +@pkglists = ( + '/data/mirrors/slackware/slackware64-14.2/PACKAGES.TXT', + '/data/mirrors/slackware/slackware64-14.2/extra/PACKAGES.TXT', +); + +# ...the above list could have included pasture and testing, but +# for that to work, you'd also have to have all the packages from +# pasture and testing installed (we don't right now). + +# these 2 are copies of /var/log/packages and /var/log/scripts from +# a full install of 14.2, with no 3rd-party packages added: +$pkgdir = "/home/urchlay/var.log/packages"; +$scriptsdir = "/home/urchlay/var.log/scripts"; + +### end configurables. + +sub init_db { + #unlink("testdb.sqlite3"); + #open P, "|sqlite3 testdb.sqlite3"; + #select P; + + print <<EOF; +pragma journal_mode = memory; +begin transaction; + +-- should include extra (and maybe pasture?) +create table categories ( + id integer primary key not null, + name text(10) not null +); + +-- compressed and uncompressed size? +create table packages ( + id integer primary key not null, + name varchar not null, + descrip varchar not null, + category integer not null, + foreign key(category) references categories(id) +); + +-- path should be something bigger than a varchar? (does it matter, for sqlite?) +create table files ( + id integer primary key not null, + path varchar not null, + symlink boolean not null default 0, + package integer not null, + foreign key(package) references packages(id), + check(symlink in (0,1)) +); + +EOF +} + + +# turn e.g. bash-completion-2.2-noarch-3 into bash-completion +# (supports arbitrary number of - in the package name). +sub getpkgname { + my $p = reverse shift; + (undef, undef, undef, $p) = split("-", $p, 4); + return reverse $p; +} + +# return ref-to-array of symlinks extracted from a scripts/ file. +sub getsymlinks { + my @links = (); + open my $f, "<$_[0]" or return \@links; + while(<$f>) { + next unless /^\( cd (\S+) ; ln -sf \S+ (\S+) \)/; + my $dir = $1; + my $file = $2; + #warn "link $dir/$file\n" if $_[0] =~ /bash-4/; + push @links, "$dir/$file"; + } + close $f; + #warn "links: " . join(" ", @links) . "\n" if $_[0] =~ /bash-4/; + return \@links; +} + +# return ref-to-array of files extracted from a packages/ file. +sub getfiles { + my @files = (); + open my $f, "<$_[0]" or return \@files; + while(<$f>) { + last if /^FILE LIST:/; + } + while(<$f>) { + chomp; + next if m,^\./$,; + next if m,^install/,; + s,^lib64/incoming,lib64,; + push @files, $_; + } + close $f; + push @files, "bin/bash" if $_[0] =~ /\/bash-[0-9]/; + return \@files; +} + +# given a package db entry as a multi-line string, extract the +# short description from it. +sub getdescrip { + my $pkgname = shift; + my $blob = shift; + if($blob =~ /^$pkgname:\s+(\S.*)$/m) { + my $line = $1; + if($line =~ /^$pkgname (?:- )?\((.+)\)/) { + $line = $1; + } + return $line; + } + return '(no description)'; +} + +# given a package db entry as a multi-line string, extract the +# category from it. +sub getcategory { + my $pkgname = shift; + my $blob = shift; + if($blob =~ /^PACKAGE LOCATION:\s+\.\/(?:(extra)|(?:slackware64\/(\w+)))/m) { + return $1 ? $1 : $2; + } + return "(unknown category)"; +} + +sub getfilelist { + my $pkgname = shift; + return @{$files{$pkgname}}; +} + +sub getsymlinklist { + my $pkgname = shift; + if(defined $symlinks{$pkgname}) { + return @{$symlinks{$pkgname}}; + } else { + return (); + } +} + +$catcount = 0; +sub getcategory_id { + my $catname = shift; + my $cat_id = $catname2id{$catname}; + if(defined $cat_id) { + return $cat_id; + } + $catcount++; + print "insert into categories values($catcount, '$catname');\n"; + $catname2id{$catname} = $catcount; + return $catcount; +} + +### main() + +# build hash of all symlinks, indexed by package name. +warn "reading symlinks from $scriptsdir/*\n"; +while(<$scriptsdir/*>) { + s,.*/,,; + my $p = getpkgname($_); + $symlinks{$p} = getsymlinks($scriptsdir . "/" . $_); +} + +warn "reading filelists from $pkgdir/*\n"; +while(<$pkgdir/*>) { + s,.*/,,; + my $p = getpkgname($_); + $files{$p} = getfiles($pkgdir . "/" . $_); +} + +#print for getfilelist('bash'); +#die; + +init_db(); + +$pkg_id = 0; +for(@pkglists) { + local $/ = ''; + open my $l, "<$_" or die $!; + warn "reading package list from $_\n"; + while(<$l>) { + next unless /^PACKAGE NAME:\s+(\S+)/; + my $pkgname = getpkgname($1); + my $descrip = getdescrip($pkgname, $_); + my $category = getcategory($pkgname, $_); + my $cat_id = getcategory_id($category); + $pkg_id++; + + $descrip =~ s,','',g; + print "insert into packages values ($pkg_id, '$pkgname', '$descrip', $cat_id);\n"; + + my @filelist = getfilelist($pkgname); + my @symlinklist = getsymlinklist($pkgname); + + for my $f (@filelist) { + print "insert into files values (null, '/$f', 0, $pkg_id);\n" + } + + for my $f (@symlinklist) { + print "insert into files values (null, '/$f', 1, $pkg_id);\n" + } + + #print "=== $cat_id: $category/$pkgname - $descrip\n"; + #print "files: " . join("\n ", @filelist) . "\n"; + #print "links: " . join("\n ", @symlinklist) . "\n\n"; + + } + close $l; +} + +print "commit;\n"; + +exit 0; + + diff --git a/bin/whatis2sqlite.pl b/bin/whatis2sqlite.pl new file mode 100755 index 0000000..297a82a --- /dev/null +++ b/bin/whatis2sqlite.pl @@ -0,0 +1,83 @@ +#!/usr/bin/perl -w + +# whatis2sqlite.pl - create sqlite3 whatis database for use with +# limnoria Manpages plugin. + +# Usage: + +# $ rm -f /path/to/Manpages.sqlite3 +# $ perl whatis2sqlite.pl [whatis-file] | sqlite /path/to/Manpages.sqlite3 + +# then reload the Manpages plugin in the bot. + +# For Slackware 14.2, the whatis-file comes from /usr/man/whatis. +# For 15.0, we'll have to generate it according to the directions +# in "man whatis": +# $ whatis -w '*' | sort > whatis +# Note that man-db's databases will have to already exist, for that to work. + +push @ARGV, 'whatis' unless @ARGV; + +print <<EOF; +pragma journal_mode = memory; + +begin transaction; + +create table whatis ( + id integer primary key not null, + page varchar not null, + section char(5) not null, + desc varchar not null +); +EOF + +while(<>) { + my($name, $desc, $sect, $alias); + chomp; + + # 14.2's whatis has some garbage entries, skip them. + next if /^struct/; + next if /^and (put|with)/; + next if /bernd\.warken/; + + s/\s+$//g; + ($name, $desc) = split /\s+-\s+/, $_, 2; + $name =~ s/\s+/ /g; + ($sect) = $name =~ /\(([^)]+)\)/; + next if $sect eq '3p'; # symlink + + $alias = 0; + if($name =~ /\[([^]]+)\]/) { + $alias = $1; + } + + $name =~ s,\s.*,,; + $sect =~ s/^(.).*$/$1/; # no "3x", etc. + + # 14.2's whatis has some wrong sections, fix them. + $sect = '8' if $sect eq 'v'; # ebtables + $sect = '1' if $sect eq 'P'; # cdparanoia + $sect = '1' if $sect eq 'o'; # rclock, rxvt + + #print "$sect, $name, '$alias' => $desc\n"; + + make_sql($name, $sect, $desc); + make_sql($alias, $sect, $desc) if $alias; +} + +print "COMMIT;\n"; + +sub make_sql { + my $page = shift; + my $sect = shift; + my $desc = shift; + + return if $seen{"$page^^$sect"}++; + + # N.B. we don't escape quotes here because 14.2's whatis + # doesn't contain any double-quotes. When 15 is released, + # better check again! + print <<EOF +insert into whatis values (null, "$page", "$sect", "$desc"); +EOF +} |