From dd9d723a60449ecc6148ffe89fe100e2d5b74035 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Sat, 1 Feb 2025 23:41:37 -0500 Subject: bsgrep: added (grep for files with backslash continuation). --- bsgrep | 319 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ bsjoin | 1 + 2 files changed, 320 insertions(+) create mode 100755 bsgrep create mode 120000 bsjoin diff --git a/bsgrep b/bsgrep new file mode 100755 index 0000000..31492a3 --- /dev/null +++ b/bsgrep @@ -0,0 +1,319 @@ +#!/usr/bin/perl -w + +$VERSION = "0.0.1"; + +use Getopt::Std; + +($self = $0) =~ s,.*/,,; + +%printed = (); + +$SIG{__WARN__} = sub { + my $m = shift; + $m =~ s/ at \S+ line \d+\.$//; + # warnings that don't start with $self: are e.g. file access errors + # from the 'while(<>)'. + if($m !~ /^$self:/) { + $m = "$self: $m"; + $ret = 2 unless $opt{s}; + } + print STDERR $m unless $opt{s}; +}; + +sub grep_usage { + print "usage: $self [--help | -[iklnsvwq] [-d char] ...] [ ...]\n"; +} + +sub grep_options { + getopts('hd:vlkinrwsq', \%opt) || exit 1; + die("$self: -r option not supported yet (TODO)\n") if $opt{r}; + if($opt{h}) { + grep_usage(); + exit(0); + } +} + +sub print_line { + print "$ARGV:" if $filecount > 1; + print "$start_line:" if $opt{n}; + print "$_[0]\n"; +} + +sub join_help { + print "usage: $self [-knw] [ ...]\n"; + exit 0; +} + +sub join_options { + getopts('hd:knw', \%opt) || exit 1; + join_help() if $opt{h}; +} + +sub handle_line { + local $_ = $_[0]; + $ret = 0 if $ret == 1; + return if $opt{q}; + if($opt{l}) { + if(!$printed{$ARGV}++) { + print "$ARGV\n"; + } + } elsif($opt{v}) { + print_line($out) if $out !~ /$regex/; + } else { + print_line($out) if $out =~ /$regex/; + } +} + +### main() + +if(defined($ARGV[0])) { + if($ARGV[0] =~ /-help/) { + exec "perldoc $0"; + exit(1); + } elsif($ARGV[0] eq '--man') { + exec "pod2man --stderr -s1 -cUrchlaysStuff -r$VERSION -u $0"; + exit(1); + } +} + +if($self =~ /join/) { + join_options(); + $regex = '^'; # every string has a beginning... +} else { + grep_options(); + if(!($regex = shift)) { + grep_usage(); + die("$self: missing required regex argument\n"); + } + $regex = "(?i)$regex" if $opt{i}; +} + +$ret = 1; # return value from main(), set to 0 if anything matched. + +$filecount = @ARGV; # used to decide whether to print filename prefixes. + +$cont = quotemeta($opt{d} // '\\'); + +while(<>) { + chomp; + if(s/\r//) { + if(!$cr_warning) { + warn "$self: $ARGV: stripping carriage returns\n" unless $opt{s}; + $cr_warning = 1; + } + } + if(/$cont\s+$/) { + warn "$self: $ARGV, line $.: whitespace after continuation, malformed input?\n" unless $opt{s}; + } + s/^\s+// if $out && $opt{w}; + $start_line = $. unless defined $out; + $out .= $_; + if(/$cont$/) { + if($opt{k}) { + $out .= "\n"; + } else { + $out =~ s/$cont$//; + } + } else { + handle_line($out); + undef $out; + } +} continue { + # reset $. on each new file (perldoc -f eof) + if(eof) { + if($out) { + warn "$self: $ARGV: last line ends with continuation\n" unless $opt{s}; + handle_line($out); + undef $out; + } + close ARGV; + $cr_warning = 0; + } +} + +exit $ret; + +### rest of file is the docs + +=pod + +=head1 NAME + +bsgrep - search for strings in files with backslash continuation + +bsjoin - join lines with backslash continuation + +=head1 SYNOPSIS + +bsgrep B<[-hknw]> [I I<...>] + +bsjoin B<[-hvlkinrws]> [I I<...>] + +=head1 DESCRIPTION + +B searches uses a regular expression to search for strings +in a file, much like B(1). The difference is, B joins +together lines that use the backslash for continuation (e.g. as +B(1) does). + +Other differences: B doesn't support the full set of B +options, and it uses Perl regular expressions rather than POSIX. + +Input is read from one or more files, or standard input if no files +are given. Output goes to standard output. + +If B is run as B (via symbolic or hard link, or just +copying the executable), it will simply join together continued lines +without searching for anything. In this mode, only the B<-k>, B<-n>, +B<-w>, B<-h>, and B<--help> options are supported. + +=head1 OPTIONS + +These options work with both B and B: + +=over 4 + +=item -d I + +Use I as the continuation character, rather than a backslash. + +=item -k + +Keep the backslashes and newlines when joining continued lines together. +This option does not exist in B. + +=item -n + +Prefix output lines with line numbers (same as B). + +=item -w + +For continuation lines, remove any leading whitespace. This option is +specific to B. The B B<-w> option can be simulated with +the Perl B<\b> syntax in the regex. + +=item -h + +Prints a short help message and exits. Not compatible with B, which +uses B<-h> for something else. + +=item --help + +Prints this help text, via B(1). + +=item --man + +Prints this help text as a man page, via B(1). Suggested use: + + bsgrep --man > bsgrep.1 + +=back + +These options are only supported by B: + +=over 4 + +=item -i + +Case-insensitive search (same as B). + +=item -l + +Instead of printing lines that match, print only the names of files +that contain matches (same as B). + +=item -v + +Print only lines that do I match (same as B). + +=item -s + +Silence warnings (same as B). This includes error messages +about unreadable files as well as warnings about the input (see +B, below). + +=back + +=head1 EXAMPLE + +Given the file B (which comes from SlackBuilds.org), containing: + + PRGNAM="trs80-roms" + VERSION="20230516" + HOMEPAGE="https://sdltrs.sourceforge.net/docs/index.html" + DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip \ + http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip \ + https://www.tim-mann.org/trs80/ld4-631.zip \ + https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip \ + http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz \ + https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK" + MD5SUM="ecd2c47c0624885fbcfb17889241f0ed \ + 9b342f4401801bbc947e303cbeb9902f \ + f2678aa45b76d935a34a0cd2b108925d \ + 8a0f1567df8f166f4056a6a71ef7dce5 \ + 8bb7cf88a3bc1da890f1f29398120bf3 \ + 6f624bdbf4b410cfbe8603fa3bef44fa" + DOWNLOAD_x86_64="" + MD5SUM_x86_64="" + REQUIRES="" + MAINTAINER="B. Watson" + EMAIL="urchlay@slackware.uk" + +We can extract all the download URLs from the file with: + + $ bsgrep '^DOWNLOAD=' trs80-roms + + DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip https://www.tim-mann.org/trs80/ld4-631.zip https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK" + DOWNLOAD_x86_64="" + +All the URLs are listed as one long line (apologies for the ugly formatting). +Note that the whitespace that indents the continuation lines is +preserved. In this case, the whitespace is all spaces, but tabs would +be treated the same way. To compress the whitespace into a single space, +use the B<-w> option. + +=head1 DIAGNOSTICS + +Unless disabled with the B<-s> option, B may print these messages +on standard error: + + bsgrep: : stripping carriage returns + +The input file has MS-DOS/Windows CRLF line endings. B's +output will have these removed. Note that Unix-flavored tools that +understand continuation lines will generally fail when fed CRLF files. + + bsgrep: , line : whitespace after continuation, malformed input? + +In shell scripts (and most other uses of backslash continuation), a +line that ends with whitespace after the backslash is not treated as a +continuation line. This is a very easy error to create, when manually +editing files. The above warning will help you avoid this. As usual, +it can be ignored if you know exactly what you're doing. + + bsgrep: : last line ends with continuation + +This warning is self-explanatory. There's nothing for the last line +to continue onto, so this is almost certainly an error. + +The above warnings don't affect the exit status. + +=head1 EXIT STATUS + +0 if there were any matches, 1 if there were none, or 2 if there +were errors (e.g. nonexistent file). However, with B<-s>, the exit +status will be 0 or 1 even if there were errors. This is the same as +B's exit status. + +=head1 BUGS + +The main bug is that B's B<-r> (recursive) option is not +supported. Other options that aren't implemented but might be +someday include B<--color>, B<-a>, B<-A>, B<-B>, B<-C>. + +=head1 AUTHOR + +B was written by B. Watson and released +under the WTFPL: Do WTF you want with this. + +=cut diff --git a/bsjoin b/bsjoin new file mode 120000 index 0000000..ef6d065 --- /dev/null +++ b/bsjoin @@ -0,0 +1 @@ +bsgrep \ No newline at end of file -- cgit v1.2.3