#!/usr/bin/perl -w $VERSION = "0.0.1"; use Getopt::Std; use File::Find; ($self = $0) =~ s,.*/,,; %printed = (); $SIG{__WARN__} = sub { my $m = shift; $m =~ s/ at \S+ line \d+\.$//; # warnings that don't start with $self: are e.g. file access errors # from the 'while(<>)'. if($m !~ /^$self:/) { $m = "$self: $m"; $ret = 2 unless $opt{s}; } print STDERR $m unless $opt{s}; }; sub grep_usage { print "usage: $self [--help | --version | -[iklnrsvwq] [-d char] ...] [ ...]\n"; } sub grep_options { getopts('hd:vlkinrwsq', \%opt) || exit 1; if($opt{h}) { grep_usage(); exit(0); } } sub print_line { print "$ARGV:" if $filecount > 1; print "$start_line:" if $opt{n}; print "$_[0]\n"; } sub join_help { print "usage: $self [--help | --version | -[knw] [-d char] ...] [ ...]\n"; exit 0; } sub join_options { getopts('hd:knw', \%opt) || exit 1; join_help() if $opt{h}; } sub handle_line { local $_ = $_[0]; $ret = 0 if $ret == 1; return if $opt{q}; if($opt{l}) { if(!$printed{$ARGV}++) { print "$ARGV\n"; } } elsif($opt{v}) { print_line($out) if $out !~ /$regex/; } else { print_line($out) if $out =~ /$regex/; } } ### main() if(defined($ARGV[0])) { if($ARGV[0] =~ /-help/) { exec "perldoc $0"; exit(1); } elsif($ARGV[0] eq '--man') { exec "pod2man --stderr -s1 -cUrchlaysStuff -r$VERSION -u $0"; exit(1); } elsif($ARGV[0] eq '--version') { print "bsgrep $VERSION\n"; exit(0); } } if($self =~ /join/) { join_options(); $regex = '^'; # every string has a beginning... } else { grep_options(); if(!($regex = shift)) { grep_usage(); die("$self: missing required regex argument\n"); } $regex = "(?i)$regex" if $opt{i}; } if($opt{r}) { for(@ARGV) { if(-d $_) { find({ wanted => sub { push @nargv, $_ if -f _; }, follow => 0, no_chdir => 1 }, $_); } else { push @nargv, $_; } } @ARGV = @nargv; } $ret = 1; # return value from main(), set to 0 if anything matched. $filecount = @ARGV; # used to decide whether to print filename prefixes. $cont = quotemeta($opt{d} // '\\'); while(<>) { chomp; if(s/\r//) { if(!$cr_warning) { warn "$self: $ARGV: stripping carriage returns\n" unless $opt{s}; $cr_warning = 1; } } if(/$cont\s+$/) { warn "$self: $ARGV, line $.: whitespace after continuation, malformed input?\n" unless $opt{s}; } s/^\s+// if $out && $opt{w}; $start_line = $. unless defined $out; $out .= $_; if(/$cont$/) { if($opt{k}) { $out .= "\n"; } else { $out =~ s/$cont$//; } } else { handle_line($out); undef $out; } } continue { # reset $. on each new file (perldoc -f eof) if(eof) { if($out) { warn "$self: $ARGV: last line ends with continuation\n" unless $opt{s}; handle_line($out); undef $out; } close ARGV; $cr_warning = 0; } } exit $ret; ### rest of file is the docs =pod =head1 NAME bsgrep - search for strings in files with backslash continuation bsjoin - join lines with backslash continuation =head1 SYNOPSIS bsgrep B<[-hknw]> [I I<...>] bsjoin B<[-hvlkinrws]> [I I<...>] =head1 DESCRIPTION B searches uses a regular expression to search for strings in a file, much like B(1). The difference is, B joins together lines that use the backslash for continuation (e.g. as B(1) does). Other differences: B doesn't support the full set of B options, and it uses Perl regular expressions rather than POSIX. Input is read from one or more files, or standard input if no files are given. Output goes to standard output. If B is run as B (via symbolic or hard link, or just copying the executable), it will simply join together continued lines without searching for anything. In this mode, only the B<-k>, B<-n>, B<-w>, B<-h>, B<--version>, and B<--help> options are supported. =head1 OPTIONS These options work with both B and B: =over 4 =item -d I Use I as the continuation character, rather than a backslash. =item -k Keep the backslashes and newlines when joining continued lines together. This option does not exist in B. =item -n Prefix output lines with line numbers (same as B). =item -w For continuation lines, remove any leading whitespace. This option is specific to B. The B B<-w> option can be simulated with the Perl B<\b> syntax in the regex. =item -h Prints a short help message and exits. Not compatible with B, which uses B<-h> for something else. =item --version Print the version of B and exit. =item --help Prints this help text, via B(1). =item --man Prints this help text as a man page, via B(1). Suggested use: bsgrep --man > bsgrep.1 =back These options are only supported by B: =over 4 =item -i Case-insensitive search (same as B). =item -l Instead of printing lines that match, print only the names of files that contain matches (same as B). =item -r Recursively read all files under each directory, following symlinks only if they're on the command line (same as B). =item -v Print only lines that do I match (same as B). =item -s Silence warnings (same as B). This includes error messages about unreadable files as well as warnings about the input (see B, below). =back =head1 EXAMPLE Given the file B (which comes from SlackBuilds.org), containing: PRGNAM="trs80-roms" VERSION="20230516" HOMEPAGE="https://sdltrs.sourceforge.net/docs/index.html" DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip \ http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip \ https://www.tim-mann.org/trs80/ld4-631.zip \ https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip \ http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz \ https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK" MD5SUM="ecd2c47c0624885fbcfb17889241f0ed \ 9b342f4401801bbc947e303cbeb9902f \ f2678aa45b76d935a34a0cd2b108925d \ 8a0f1567df8f166f4056a6a71ef7dce5 \ 8bb7cf88a3bc1da890f1f29398120bf3 \ 6f624bdbf4b410cfbe8603fa3bef44fa" DOWNLOAD_x86_64="" MD5SUM_x86_64="" REQUIRES="" MAINTAINER="B. Watson" EMAIL="urchlay@slackware.uk" We can extract all the download URLs from the file with: $ bsgrep '^DOWNLOAD=' trs80-roms DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip https://www.tim-mann.org/trs80/ld4-631.zip https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK" DOWNLOAD_x86_64="" All the URLs are listed as one long line (apologies for the ugly formatting). Note that the whitespace that indents the continuation lines is preserved. In this case, the whitespace is all spaces, but tabs would be treated the same way. To compress the whitespace into a single space, use the B<-w> option. =head1 DIAGNOSTICS Unless disabled with the B<-s> option, B may print these messages on standard error: bsgrep: : stripping carriage returns The input file has MS-DOS/Windows CRLF line endings. B's output will have these removed. Note that Unix-flavored tools that understand continuation lines will generally fail when fed CRLF files. bsgrep: , line : whitespace after continuation, malformed input? In shell scripts (and most other uses of backslash continuation), a line that ends with whitespace after the backslash is not treated as a continuation line. This is a very easy error to create, when manually editing files. The above warning will help you avoid this. As usual, it can be ignored if you know exactly what you're doing. bsgrep: : last line ends with continuation This warning is self-explanatory. There's nothing for the last line to continue onto, so this is almost certainly an error. The above warnings don't affect the exit status. =head1 EXIT STATUS 0 if there were any matches, 1 if there were none, or 2 if there were errors (e.g. nonexistent file). However, with B<-s>, the exit status will be 0 or 1 even if there were errors. This is the same as B's exit status. =head1 LIMITATIONS Not all b options are supported. Options that aren't implemented but might be someday include B<--color>, B<-a>, B<-A>, B<-B>, B<-C>. I don't intend to implement every single option B has, there are too many of them. There are no long options other than B<--help> and B<--version>. =head1 AUTHOR B was written by B. Watson and released under the WTFPL: Do WTF you want with this. =cut