aboutsummaryrefslogtreecommitdiff
path: root/bsgrep
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2025-02-01 23:41:37 -0500
committerB. Watson <urchlay@slackware.uk>2025-02-01 23:41:37 -0500
commitdd9d723a60449ecc6148ffe89fe100e2d5b74035 (patch)
tree7353a9bc6f844ccc2c1d430584089fdae3297b28 /bsgrep
parent09da294e23b6e443e5b21bf1576781fb5b5fbde7 (diff)
downloadmisc-scripts-dd9d723a60449ecc6148ffe89fe100e2d5b74035.tar.gz
bsgrep: added (grep for files with backslash continuation).
Diffstat (limited to 'bsgrep')
-rwxr-xr-xbsgrep319
1 files changed, 319 insertions, 0 deletions
diff --git a/bsgrep b/bsgrep
new file mode 100755
index 0000000..31492a3
--- /dev/null
+++ b/bsgrep
@@ -0,0 +1,319 @@
+#!/usr/bin/perl -w
+
+$VERSION = "0.0.1";
+
+use Getopt::Std;
+
+($self = $0) =~ s,.*/,,;
+
+%printed = ();
+
+$SIG{__WARN__} = sub {
+ my $m = shift;
+ $m =~ s/ at \S+ line \d+\.$//;
+ # warnings that don't start with $self: are e.g. file access errors
+ # from the 'while(<>)'.
+ if($m !~ /^$self:/) {
+ $m = "$self: $m";
+ $ret = 2 unless $opt{s};
+ }
+ print STDERR $m unless $opt{s};
+};
+
+sub grep_usage {
+ print "usage: $self [--help | -[iklnsvwq] [-d char] ...] <regex> [<file> ...]\n";
+}
+
+sub grep_options {
+ getopts('hd:vlkinrwsq', \%opt) || exit 1;
+ die("$self: -r option not supported yet (TODO)\n") if $opt{r};
+ if($opt{h}) {
+ grep_usage();
+ exit(0);
+ }
+}
+
+sub print_line {
+ print "$ARGV:" if $filecount > 1;
+ print "$start_line:" if $opt{n};
+ print "$_[0]\n";
+}
+
+sub join_help {
+ print "usage: $self [-knw] [<file> ...]\n";
+ exit 0;
+}
+
+sub join_options {
+ getopts('hd:knw', \%opt) || exit 1;
+ join_help() if $opt{h};
+}
+
+sub handle_line {
+ local $_ = $_[0];
+ $ret = 0 if $ret == 1;
+ return if $opt{q};
+ if($opt{l}) {
+ if(!$printed{$ARGV}++) {
+ print "$ARGV\n";
+ }
+ } elsif($opt{v}) {
+ print_line($out) if $out !~ /$regex/;
+ } else {
+ print_line($out) if $out =~ /$regex/;
+ }
+}
+
+### main()
+
+if(defined($ARGV[0])) {
+ if($ARGV[0] =~ /-help/) {
+ exec "perldoc $0";
+ exit(1);
+ } elsif($ARGV[0] eq '--man') {
+ exec "pod2man --stderr -s1 -cUrchlaysStuff -r$VERSION -u $0";
+ exit(1);
+ }
+}
+
+if($self =~ /join/) {
+ join_options();
+ $regex = '^'; # every string has a beginning...
+} else {
+ grep_options();
+ if(!($regex = shift)) {
+ grep_usage();
+ die("$self: missing required regex argument\n");
+ }
+ $regex = "(?i)$regex" if $opt{i};
+}
+
+$ret = 1; # return value from main(), set to 0 if anything matched.
+
+$filecount = @ARGV; # used to decide whether to print filename prefixes.
+
+$cont = quotemeta($opt{d} // '\\');
+
+while(<>) {
+ chomp;
+ if(s/\r//) {
+ if(!$cr_warning) {
+ warn "$self: $ARGV: stripping carriage returns\n" unless $opt{s};
+ $cr_warning = 1;
+ }
+ }
+ if(/$cont\s+$/) {
+ warn "$self: $ARGV, line $.: whitespace after continuation, malformed input?\n" unless $opt{s};
+ }
+ s/^\s+// if $out && $opt{w};
+ $start_line = $. unless defined $out;
+ $out .= $_;
+ if(/$cont$/) {
+ if($opt{k}) {
+ $out .= "\n";
+ } else {
+ $out =~ s/$cont$//;
+ }
+ } else {
+ handle_line($out);
+ undef $out;
+ }
+} continue {
+ # reset $. on each new file (perldoc -f eof)
+ if(eof) {
+ if($out) {
+ warn "$self: $ARGV: last line ends with continuation\n" unless $opt{s};
+ handle_line($out);
+ undef $out;
+ }
+ close ARGV;
+ $cr_warning = 0;
+ }
+}
+
+exit $ret;
+
+### rest of file is the docs
+
+=pod
+
+=head1 NAME
+
+bsgrep - search for strings in files with backslash continuation
+
+bsjoin - join lines with backslash continuation
+
+=head1 SYNOPSIS
+
+bsgrep B<[-hknw]> [I<file> I<...>]
+
+bsjoin B<[-hvlkinrws]> [I<file> I<...>]
+
+=head1 DESCRIPTION
+
+B<bsgrep> searches uses a regular expression to search for strings
+in a file, much like B<grep>(1). The difference is, B<bsgrep> joins
+together lines that use the backslash for continuation (e.g. as
+B<sh>(1) does).
+
+Other differences: B<bsgrep> doesn't support the full set of B<grep>
+options, and it uses Perl regular expressions rather than POSIX.
+
+Input is read from one or more files, or standard input if no files
+are given. Output goes to standard output.
+
+If B<bsgrep> is run as B<bsjoin> (via symbolic or hard link, or just
+copying the executable), it will simply join together continued lines
+without searching for anything. In this mode, only the B<-k>, B<-n>,
+B<-w>, B<-h>, and B<--help> options are supported.
+
+=head1 OPTIONS
+
+These options work with both B<bsgrep> and B<bsjoin>:
+
+=over 4
+
+=item -d I<char>
+
+Use I<char> as the continuation character, rather than a backslash.
+
+=item -k
+
+Keep the backslashes and newlines when joining continued lines together.
+This option does not exist in B<grep>.
+
+=item -n
+
+Prefix output lines with line numbers (same as B<grep>).
+
+=item -w
+
+For continuation lines, remove any leading whitespace. This option is
+specific to B<bsgrep>. The B<grep> B<-w> option can be simulated with
+the Perl B<\b> syntax in the regex.
+
+=item -h
+
+Prints a short help message and exits. Not compatible with B<grep>, which
+uses B<-h> for something else.
+
+=item --help
+
+Prints this help text, via B<perldoc>(1).
+
+=item --man
+
+Prints this help text as a man page, via B<pod2man>(1). Suggested use:
+
+ bsgrep --man > bsgrep.1
+
+=back
+
+These options are only supported by B<bsgrep>:
+
+=over 4
+
+=item -i
+
+Case-insensitive search (same as B<grep>).
+
+=item -l
+
+Instead of printing lines that match, print only the names of files
+that contain matches (same as B<grep>).
+
+=item -v
+
+Print only lines that do I<not> match (same as B<grep>).
+
+=item -s
+
+Silence warnings (same as B<grep>). This includes error messages
+about unreadable files as well as warnings about the input (see
+B<DIAGNOSTICS>, below).
+
+=back
+
+=head1 EXAMPLE
+
+Given the file B<trs80-roms.info> (which comes from SlackBuilds.org), containing:
+
+ PRGNAM="trs80-roms"
+ VERSION="20230516"
+ HOMEPAGE="https://sdltrs.sourceforge.net/docs/index.html"
+ DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip \
+ http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip \
+ https://www.tim-mann.org/trs80/ld4-631.zip \
+ https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip \
+ http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz \
+ https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK"
+ MD5SUM="ecd2c47c0624885fbcfb17889241f0ed \
+ 9b342f4401801bbc947e303cbeb9902f \
+ f2678aa45b76d935a34a0cd2b108925d \
+ 8a0f1567df8f166f4056a6a71ef7dce5 \
+ 8bb7cf88a3bc1da890f1f29398120bf3 \
+ 6f624bdbf4b410cfbe8603fa3bef44fa"
+ DOWNLOAD_x86_64=""
+ MD5SUM_x86_64=""
+ REQUIRES=""
+ MAINTAINER="B. Watson"
+ EMAIL="urchlay@slackware.uk"
+
+We can extract all the download URLs from the file with:
+
+ $ bsgrep '^DOWNLOAD=' trs80-roms
+
+ DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip https://www.tim-mann.org/trs80/ld4-631.zip https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK"
+ DOWNLOAD_x86_64=""
+
+All the URLs are listed as one long line (apologies for the ugly formatting).
+Note that the whitespace that indents the continuation lines is
+preserved. In this case, the whitespace is all spaces, but tabs would
+be treated the same way. To compress the whitespace into a single space,
+use the B<-w> option.
+
+=head1 DIAGNOSTICS
+
+Unless disabled with the B<-s> option, B<bsgrep> may print these messages
+on standard error:
+
+ bsgrep: <file>: stripping carriage returns
+
+The input file has MS-DOS/Windows CRLF line endings. B<bsgrep>'s
+output will have these removed. Note that Unix-flavored tools that
+understand continuation lines will generally fail when fed CRLF files.
+
+ bsgrep: <file>, line <line>: whitespace after continuation, malformed input?
+
+In shell scripts (and most other uses of backslash continuation), a
+line that ends with whitespace after the backslash is not treated as a
+continuation line. This is a very easy error to create, when manually
+editing files. The above warning will help you avoid this. As usual,
+it can be ignored if you know exactly what you're doing.
+
+ bsgrep: <file>: last line ends with continuation
+
+This warning is self-explanatory. There's nothing for the last line
+to continue onto, so this is almost certainly an error.
+
+The above warnings don't affect the exit status.
+
+=head1 EXIT STATUS
+
+0 if there were any matches, 1 if there were none, or 2 if there
+were errors (e.g. nonexistent file). However, with B<-s>, the exit
+status will be 0 or 1 even if there were errors. This is the same as
+B<grep>'s exit status.
+
+=head1 BUGS
+
+The main bug is that B<grep>'s B<-r> (recursive) option is not
+supported. Other options that aren't implemented but might be
+someday include B<--color>, B<-a>, B<-A>, B<-B>, B<-C>.
+
+=head1 AUTHOR
+
+B<bsgrep> was written by B. Watson <urchlay@slackware.uk> and released
+under the WTFPL: Do WTF you want with this.
+
+=cut