#!/usr/bin/perl -w $VERSION = "0.0.1"; use Getopt::Std; use File::Find; ($self = $0) =~ s,.*/,,; %printed = (); $SIG{__WARN__} = sub { my $m = shift; # don't include the line number in warnings. $m =~ s/ at \S+ line \d+\.$//; # File::Find seems to use double newlines for its warnings.. $m =~ s/\n\n+/\n/; # warnings that don't start with $self: are e.g. file access errors # from the 'while(<>)' or File::Find. if($m !~ /^$self:/) { $m = "$self: $m"; $ret = 2 unless $opt{s}; } print STDERR $m unless $opt{s}; }; sub grep_usage { print "usage: $self [--help | --version | -[Fhiklnrsvwqz] [-d char] ...] [ ...]\n"; } sub grep_options { getopts('d:Fhiklnqrsvwz', \%opt) || exit 1; if($opt{h}) { grep_usage(); exit(0); } } sub print_line { print "$ARGV:" if $filecount > 1; print "$start_line:" if $opt{n}; print $_[0]; print $opt{z} ? "\0" : "\n"; } sub join_help { print "usage: $self [--help | --version | -[knwz] [-d char] ...] [ ...]\n"; exit 0; } sub join_options { getopts('hd:knwz', \%opt) || exit 1; join_help() if $opt{h}; } sub handle_line { local $_ = $_[0]; $ret = 0 if $ret == 1; return if $opt{q}; if($opt{l}) { if(!$printed{$ARGV}++) { print "$ARGV\n"; } } elsif($opt{v}) { print_line($out) if $out !~ /$regex/; } else { print_line($out) if $out =~ /$regex/; } } ### main() if(defined($ARGV[0])) { if($ARGV[0] =~ /-help/) { exec "perldoc $0"; exit(1); } elsif($ARGV[0] eq '--man') { exec "pod2man --stderr -s1 -cUrchlaysStuff -r$VERSION -u $0"; exit(1); } elsif($ARGV[0] eq '--version') { print "bsgrep $VERSION\n"; exit(0); } } if($self =~ /join/) { join_options(); $regex = '^'; # every string has a beginning... } else { grep_options(); if(!($regex = shift)) { grep_usage(); die("$self: missing required regex argument\n"); } $regex = quotemeta($regex) if $opt{F}; $regex = "(?i)$regex" if $opt{i}; } if($opt{r}) { @ARGV = (".") unless @ARGV; for(@ARGV) { if(-d $_) { find({ wanted => sub { push @nargv, $_ if -f _; }, follow => 0, no_chdir => 1 }, $_); } else { push @nargv, $_; } } @ARGV = @nargv; } $ret = 1; # return value from main(), set to 0 if anything matched. $filecount = @ARGV; # used to decide whether to print filename prefixes. $cont = quotemeta($opt{d} // '\\'); $/ = "\0" if $opt{z}; while(<>) { chomp; if(s/\r//) { if(!$cr_warning) { warn "$self: $ARGV: stripping carriage returns\n" unless $opt{s}; $cr_warning = 1; } } if(/$cont\s+$/) { warn "$self: $ARGV:$.: whitespace after continuation, malformed input?\n" unless $opt{s}; } s/^\s+// if $out && $opt{w}; $start_line = $. unless defined $out; $out .= $_; if(/$cont$/) { if(!$opt{k}) { $out =~ s/$cont$//; } } else { handle_line($out); undef $out; } } continue { # reset $. on each new file (perldoc -f eof) if(eof) { if($out) { warn "$self: $ARGV:$.: last line ends with continuation\n" unless $opt{s}; handle_line($out); undef $out; } close ARGV; $cr_warning = 0; } } exit $ret; ### rest of file is the docs =pod =head1 NAME bsgrep - search for strings in files with backslash continuation bsjoin - join lines with backslash continuation =head1 SYNOPSIS bsgrep [B<[-hknwz]> B<-d> I I<...>] [I I<...>] bsjoin [B<[-hiklnqrsvwz]> B<-d> I I<...>] [I I<...>] =head1 DESCRIPTION B (backslash grep) uses a regular expression to search for strings in a file, much like B(1). The main difference is, B joins together lines that use the backslash for continuation (e.g. as B(1) does). Other differences: B doesn't support the full set of B options, and it uses Perl regular expressions rather than POSIX. Input is read from one or more files, or standard input if no files are given. Output goes to standard output. The search is done after lines are joined together, so the regex can match text split across continuation lines. If B is run as B (via symbolic or hard link, or just copying the executable), it will simply join together continued lines without searching for anything. In this mode, only the B<-k>, B<-n>, B<-w>, B<-h>, B<--version>, and B<--help> options are supported. =head1 OPTIONS These options work with both B and B: =over 4 =item -d I Use I as the continuation character, rather than a backslash. =item -k Keep the continuation characters when joining continued lines together. This option does not exist in B. =item -n Prefix output lines with line numbers (same as B). For lines that are split with continuation characters, the line number will be that of the first line in the set. =item -w For continuation lines, remove any leading whitespace. This option is specific to B. The B B<-w> option can be simulated with the Perl B<\b> syntax in the regex. =item -h Prints a short help message and exits. Not compatible with B, which uses B<-h> for something else. =item -z Use zero bytes (ASCII NUL) rather than newlines for line terminators, for both input and output. Same as B. =item --version Print the version of B and exit. =item --help Prints this help text, via B(1). =item --man Prints this help text as a man page, via B(1). Suggested use: bsgrep --man > bsgrep.1 =back These options are only supported by B: =over 4 =item -F Treat pattern(s) as fixed strings, not regular expression(s). Same as B. =item -i Case-insensitive search (same as B). =item -l Instead of printing lines that match, print only the names of files that contain matches (same as B). =item -r Recursively read all files under each directory, following symlinks only if they're on the command line. If no files or directories are given, reads the current directory. Same as B. =item -v Print only lines that do I match (same as B). =item -s Silence warnings (same as B). This includes error messages about unreadable files as well as warnings about the input (see B, below). =back =head1 EXAMPLE Given the file B (which comes from SlackBuilds.org), containing: PRGNAM="trs80-roms" VERSION="20230516" HOMEPAGE="https://sdltrs.sourceforge.net/docs/index.html" DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip \ http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip \ https://www.tim-mann.org/trs80/ld4-631.zip \ https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip \ http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz \ https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK" MD5SUM="ecd2c47c0624885fbcfb17889241f0ed \ 9b342f4401801bbc947e303cbeb9902f \ f2678aa45b76d935a34a0cd2b108925d \ 8a0f1567df8f166f4056a6a71ef7dce5 \ 8bb7cf88a3bc1da890f1f29398120bf3 \ 6f624bdbf4b410cfbe8603fa3bef44fa" DOWNLOAD_x86_64="" MD5SUM_x86_64="" REQUIRES="" MAINTAINER="B. Watson" EMAIL="urchlay@slackware.uk" We can extract all the download URLs from the file with: $ bsgrep '^DOWNLOAD=' trs80-roms DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip https://www.tim-mann.org/trs80/ld4-631.zip https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK" DOWNLOAD_x86_64="" All the URLs are listed as one long line (apologies for the ugly formatting). Note that the whitespace that indents the continuation lines is preserved. In this case, the whitespace is all spaces, but tabs would be treated the same way. To compress the whitespace into a single space, use the B<-w> option. =head1 DIAGNOSTICS Unless disabled with the B<-s> option, B may print these messages on standard error: bsgrep: : stripping carriage returns The input file has MS-DOS/Windows CRLF line endings. B's output will have these removed. Note that Unix-flavored tools that understand continuation lines will generally fail when fed CRLF files. bsgrep: , line : whitespace after continuation, malformed input? In shell scripts (and most other uses of backslash continuation), a line that ends with whitespace after the backslash is not treated as a continuation line. This is a very easy error to create, when manually editing files. The above warning will help you avoid this. As usual, it can be ignored if you know exactly what you're doing. bsgrep: : last line ends with continuation This warning is self-explanatory. There's nothing for the last line to continue onto, so this is almost certainly an error. The above warnings don't affect the exit status. =head1 EXIT STATUS 0 if there were any matches, 1 if there were none, or 2 if there were errors (e.g. nonexistent file). However, with B<-s>, the exit status will be 0 or 1 even if there were errors. This is the same as B's exit status. =head1 LIMITATIONS B doesn't detect binary files like B does. It can and will print them to your terminal instead of "binary file matches". Not all b options are supported. Options that aren't implemented but might be someday include B<--color>, B<-a>, B<-A>, B<-B>, B<-C>, B<-o>. I don't intend to implement every single option B has, there are too many of them. There are no long options other than B<--help> and B<--version>. B does not comply with the POSIX (or any other) standard for B, and does not intend do. =head1 AUTHOR B was written by B. Watson and released under the WTFPL: Do WTF you want with this. =head1 SEE ALSO B(1), B(1) =cut