1 files changed, 512 insertions, 0 deletions
diff --git a/bsgrep b/bsgrep
new file mode 100755
index 0000000..19ca3aa
--- /dev/null
+++ b/bsgrep
@@ -0,0 +1,512 @@
+#!/usr/bin/perl -w
+
+$VERSION = "0.0.1";
+
+use Getopt::Std;
+use File::Find;
+
+($self = $0) =~ s,.*/,,;
+
+%printed = ();
+
+$SIG{__WARN__} = sub {
+	my $m = shift;
+
+	# don't include the line number in warnings.
+	$m =~ s/ at \S+ line \d+\.$//;
+
+	# File::Find seems to use double newlines for its warnings..
+	$m =~ s/\n\n+/\n/;
+
+	# warnings that don't start with $self: are e.g. file access errors
+	# from the 'while(<>)' or File::Find.
+	if($m !~ /^$self:/) {
+		$m = "$self: $m";
+		$ret = 2 unless $opt{q};
+	}
+
+	print STDERR $m unless $opt{s};
+};
+
+sub grep_options {
+	my @nargv;
+	my $was_e;
+
+	# first, grab all the -e options and remove them from @ARGV,
+	# because Getopt::Std doesn't support multiple occurrences of
+	# a flag with different args. probably it would be better to
+	# use Getopt::Long, but for now this works.
+	for(@ARGV) {
+		if($was_e) {
+			push @patterns, $_;
+			$was_e = 0;
+		} elsif($_ eq '-e') {
+			$was_e = 1;
+		} elsif($_ =~ /-e(.*)/) {
+			push @patterns, $1;
+		} else {
+			push @nargv, $_;
+		}
+	}
+	@ARGV = @nargv;
+
+	getopts('d:FiklnNqrsvwz', \%opt) || exit 1;
+}
+
+sub print_line {
+	print "$ARGV:" if $filecount > 1;
+	print "$start_line:" if $opt{n};
+	print $_[0];
+	print $opt{z} ? "\0" : "\n";
+}
+
+sub join_options {
+	getopts('d:knwz', \%opt) || exit 1;
+}
+
+sub handle_line {
+	my $match = 0;
+
+	for my $pat (@patterns) {
+		if($opt{v}) {
+			$match++ if $out !~ /$pat/;
+		} else {
+			$match++ if $out =~ /$pat/;
+		}
+	}
+
+	return unless $match;
+
+	if($opt{N}) {
+		return unless $match == @patterns;
+	}
+
+	$ret = 0 if $ret == 1;
+	return if $opt{q};
+
+	if($opt{l}) {
+		if(!$printed{$ARGV}++) {
+			print "$ARGV\n";
+		}
+	} else {
+		print_line($out);
+	}
+}
+
+### main()
+# TODO: do we need 'use locale'?
+# also, why does reading iso-8859-1 text auto-convert to utf-8?
+for (qw/LANG LC_CTYPE LC_ALL/) {
+	if(($ENV{$_} // "") =~ /utf-?8/i) {
+		binmode(\*STDIN, ':utf8');
+		binmode(\*STDOUT, ':utf8');
+		last;
+	}
+}
+
+if(defined($ARGV[0])) {
+  	if($ARGV[0] =~ /-help/) {
+		exec "perldoc $0";
+		exit(1);
+	} elsif($ARGV[0] eq '--man') {
+		exec "pod2man --stderr -s1 -cUrchlaysStuff -r$VERSION -u $0";
+		exit(1);
+	} elsif($ARGV[0] eq '--version') {
+		print "bsgrep $VERSION\n";
+		exit(0);
+	}
+}
+
+if($self =~ /join/) {
+	join_options();
+	push @patterns, '^'; # every string has a beginning...
+} else {
+	grep_options();
+
+	if(!@patterns) {
+		if(!($patterns[0] = shift)) {
+			grep_usage();
+			die("$self: missing required pattern argument\n");
+		}
+	}
+
+	map { $_ = quotemeta } @patterns if $opt{F};
+	map { $_ = "(?i)$_" } @patterns if $opt{i};
+}
+
+if($opt{r}) {
+	@ARGV = (".") unless @ARGV;
+	for(@ARGV) {
+		if(-d $_) {
+			find({
+				wanted => sub { push @nargv, $_ if -f _; },
+				follow => 0,
+				no_chdir => 1 },
+				$_);
+		} else {
+			push @nargv, $_;
+		}
+	}
+
+	@ARGV = @nargv;
+}
+
+$ret = 1; # return value from main(), set to 0 if anything matched.
+
+$filecount = @ARGV; # used to decide whether to print filename prefixes.
+
+$cont = quotemeta($opt{d} // '\\');
+
+$/ = "\0" if $opt{z};
+
+while(<>) {
+	chomp;
+	if(s/\r//) {
+		if(!$cr_warning) {
+			warn "$self: $ARGV: stripping carriage returns\n" unless $opt{s};
+			$cr_warning = 1;
+		}
+	}
+	if(/$cont\s+$/) {
+		warn "$self: $ARGV:$.: whitespace after continuation, malformed input?\n" unless $opt{s};
+	}
+	s/^\s+// if $out && $opt{w};
+	$start_line = $. unless defined $out;
+	$out .= $_;
+	if(/$cont$/) {
+		if(!$opt{k}) {
+			$out =~ s/$cont$//;
+		}
+	} else {
+		handle_line();
+		undef $out;
+	}
+} continue {
+	# reset $. on each new file (perldoc -f eof)
+	if(eof) {
+		if($out) {
+			warn "$self: $ARGV:$.: last line ends with continuation\n" unless $opt{s};
+			handle_line();
+			undef $out;
+		}
+		close ARGV;
+		$cr_warning = 0;
+	}
+}
+
+exit $ret;
+
+### rest of file is the docs
+
+=pod
+
+=head1 NAME
+
+bsgrep - search for strings in files with backslash continuation
+
+bsjoin - join lines with backslash continuation
+
+=head1 SYNOPSIS
+
+bsgrep [B<[-FiklnNqrsvwz]> B<-d> I<char> I<...>] [B<-e> I<pattern> ... | I<pattern>] [I<file> I<...>]
+
+bsjoin [B<[-knwz]> B<-d> I<char> I<...>] [I<file> I<...>]
+
+=head1 DESCRIPTION
+
+B<bsgrep> (backslash grep) uses a regular expression to search for
+strings in a file, much like B<grep>(1). The main difference is,
+B<bsgrep> joins together lines that use the backslash for continuation
+(e.g. as B<sh>(1) does).
+
+Other differences: B<bsgrep> doesn't support the full set of B<grep>
+options, and it uses Perl regular expressions rather than POSIX.
+
+Input is read from one or more files, or standard input if no files
+are given. Output goes to standard output.
+
+The search is done after lines are joined together, so the regex can
+match text split across continuation lines.
+
+If B<bsgrep> is run as B<bsjoin> (via symbolic or hard link, or just
+copying the executable), it will simply join together continued lines
+without searching for anything. In this mode, only the B<-k>, B<-n>,
+B<-w>, B<--version>, and B<--help> options are supported.
+
+=head1 OPTIONS
+
+These options work with both B<bsgrep> and B<bsjoin>:
+
+=over 4
+
+=item -d I<char>
+
+Use I<char> as the continuation character, rather than a backslash.
+Actually, there's no law that says it has to be a single character,
+if you can think of a use for a string here... though it's treated as
+a fixed string, not a regular expression. This option does not exist
+in B<grep>.
+
+=item -k
+
+Keep the continuation characters when joining continued lines together.
+This option does not exist in B<grep>.
+
+=item -n
+
+Prefix output lines with line numbers (same as B<grep>). For lines
+that are split with continuation characters, the line number will be
+that of the first line in the set. Same as B<grep>.
+
+=item -w
+
+For continuation lines, remove any leading whitespace. This option is
+specific to B<bsgrep>. The B<grep> B<-w> option can be simulated with
+the Perl B<\b> syntax in the regex.
+
+=item -z
+
+Use zero bytes (ASCII NUL) rather than newlines for line terminators,
+for both input and output. Same as B<grep>.
+
+=item --version
+
+Print the version of B<bsgrep> and exit.
+
+=item --help
+
+Prints this help text, via B<perldoc>(1).
+
+=item --man
+
+Prints this help text as a man page, via B<pod2man>(1). Suggested use:
+
+  bsgrep --man > bsgrep.1
+
+=back
+
+These options are only supported by B<bsgrep>:
+
+=over 4
+
+=item -e I<pattern>
+
+Use I<pattern> as the pattern. May be used multiple times, in which case
+they are ORed together (a line that matches any I<pattern> is a match)... unless
+the B<-N> option is used, q.v. Same as B<grep>.
+
+=item -F
+
+Treat pattern(s) as fixed strings, not regular expression(s). Same as B<grep>.
+
+=item -i
+
+Case-insensitive search (same as B<grep>).
+
+=item -l
+
+Instead of printing lines that match, print only the names of files
+that contain matches (same as B<grep>).
+
+=item -N
+
+When multiple patterns are given with multiple B<-e> options, only
+select lines that match all of the patterns; the default is to select
+lines that match any of the patterns. This option doesn't exist
+in B<grep>.
+
+=item -q
+
+Quiet: don't write to standard output. Exit status will be zero if
+a match was found, even if there were errors. This doesn't prevent
+warnings/errors being printed to standard error; use B<-s> to silence
+those. Same as B<grep>.
+
+=item -r
+
+Recursively read all files under each directory, following symlinks
+only if they're on the command line. If no files or directories are
+given, reads the current directory. Same as B<grep>.
+
+=item -s
+
+Silence warnings (same as B<grep>). This includes error messages
+about unreadable files as well as warnings about the input (see
+B<DIAGNOSTICS>, below).
+
+=item -v
+
+Print only lines that do I<not> match (same as B<grep>).
+
+=back
+
+=head1 EXAMPLE
+
+Given the file B<trs80-roms.info> (which comes from SlackBuilds.org), containing:
+
+  PRGNAM="trs80-roms"
+  VERSION="20230516"
+  HOMEPAGE="https://sdltrs.sourceforge.net/docs/index.html"
+  DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip \
+            http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip \
+            https://www.tim-mann.org/trs80/ld4-631.zip \
+            https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip \
+            http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz \
+            https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK"
+  MD5SUM="ecd2c47c0624885fbcfb17889241f0ed \
+          9b342f4401801bbc947e303cbeb9902f \
+          f2678aa45b76d935a34a0cd2b108925d \
+          8a0f1567df8f166f4056a6a71ef7dce5 \
+          8bb7cf88a3bc1da890f1f29398120bf3 \
+          6f624bdbf4b410cfbe8603fa3bef44fa"
+  DOWNLOAD_x86_64=""
+  MD5SUM_x86_64=""
+  REQUIRES=""
+  MAINTAINER="B. Watson"
+  EMAIL="urchlay@slackware.uk"
+
+We can extract all the download URLs from the file with:
+
+  $ bsgrep '^DOWNLOAD=' trs80-roms
+
+  DOWNLOAD="https://www.filfre.net/misc/trs_roms.zip           http://cpmarchives.classiccmp.org/trs80/mirrors/www.discover-net.net/~dmkeil/trs80/files/trs80-62.zip           https://www.tim-mann.org/trs80/ld4-631.zip           https://archive.org/download/mame-0.250-roms-split_202212/MAME%200.250%20ROMs%20%28split%29/trs80m4p.zip           http://www.tim-mann.org/trs80/xtrs-4.9d.tar.gz           https://www.classic-computers.org.nz/system-80/disks/NEWDOS_80sssd_jv1.DSK"
+  DOWNLOAD_x86_64=""
+
+All the URLs are listed as one long line (apologies for the ugly formatting).
+Note that the whitespace that indents the continuation lines is
+preserved. In this case, the whitespace is all spaces, but tabs would
+be treated the same way. To compress the whitespace into a single space,
+use the B<-w> option.
+
+=head1 DIAGNOSTICS
+
+Unless disabled with the B<-s> option, B<bsgrep> may print these messages
+on standard error:
+
+  bsgrep: <file>: stripping carriage returns
+
+The input file has MS-DOS/Windows CRLF line endings. B<bsgrep>'s
+output will have these removed. Note that other Unix-flavored tools
+that understand continuation lines will generally fail when fed CRLF
+files.
+
+  bsgrep: <file>, line <line>: whitespace after continuation, malformed input?
+
+In shell scripts (and most other uses of backslash continuation), a
+line that ends with whitespace after the backslash is not treated as a
+continuation line. This is a very easy error to create, when manually
+editing files. The above warning will help you avoid this. As usual,
+it can be ignored if you know exactly what you're doing.
+
+  bsgrep: <file>: last line ends with continuation
+
+This warning is self-explanatory. There's nothing for the last line
+to continue onto, so this is almost certainly an error.
+
+The above warnings don't affect the exit status.
+
+=head1 ENVIRONMENT
+
+B<bsgrep> doesn't define any environment variables of its own, but
+it does pay attention to B<LANG>, B<LC_ALL>, and B<LC_CTYPE>. If any
+of these contain the string I<UTF-8>, the input and output will be
+read/written as Unicode, encoded as UTF-8. If the input turns out not
+to be Unicode, it will be assumed ISO-8859-1, and converted to Unicode.
+
+=head1 EXIT STATUS
+
+0 if there were any matches, 1 if there were none, or 2 if there
+were errors (e.g. nonexistent file). However, with B<-q>, the exit
+status will be 0 or 1 even if there were errors. This is the same as
+B<grep>'s exit status.
+
+=head1 LIMITATIONS
+
+B<bsgrep> doesn't detect binary files like B<grep> does. It can and
+will print them to your terminal instead of "binary file matches".
+
+Not all B<grep> options are supported. Options that aren't implemented
+but might be someday include B<--color>, B<-a>, B<-A>, B<-B>, B<-C>, B<-o>.
+I don't intend to implement every single option B<grep> has, there are
+too many of them.
+
+There are no long options other than B<--help> and B<--version>.
+
+B<bsgrep> does not comply with the POSIX (or any other) standard for
+B<grep>, and does not intend do.
+
+Locale support isn't quite the same as B<grep>: in a UTF-8 locale,
+if the input isn't plain ASCII or valid UTF-8, it will be treated
+as ISO-8859-1, internally converted to Unicode, and output will be
+UTF-8. This isn't intended; it's a side-effect of how Perl UTF-8
+filehandles work. In non-UTF-8 locales, things should work as
+expected. I hope.
+
+=head1 AUTHOR
+
+B<bsgrep> was written by B. Watson <urchlay@slackware.uk> and released
+under the WTFPL: Do WTF you want with this.
+
+=head1 SEE ALSO
+
+B<grep>(1), B<perl>(1)
+
+=cut
+
+__END__
+
+implemented:
+       --help
+       -V, --version
+       -F, --fixed-strings
+       -e PATTERNS, --regexp=PATTERNS
+       -i, --ignore-case
+       -v, --invert-match
+       -q, --quiet, --silent
+       -s, --no-messages
+       -n, --line-number
+       -z, --null-data
+       -r, --recursive
+       -l, --files-with-matches
+
+todo:
+       -f FILE, --file=FILE
+       -y     Obsolete synonym for -i.
+       -c, --count
+       -R, --dereference-recursive
+       -L, --files-without-match
+       -Z, --null
+       -A NUM, --after-context=NUM
+       -B NUM, --before-context=NUM
+       -C NUM, -NUM, --context=NUM
+       -H, --with-filename
+       -h, --no-filename
+       -w, --word-regexp
+       -x, --line-regexp
+
+do not implement:
+       -E, --extended-regexp
+       -G, --basic-regexp
+       -P, --perl-regexp
+       --no-ignore-case
+
+undecided:
+       --color[=WHEN], --colour[=WHEN]
+       -m NUM, --max-count=NUM
+       -o, --only-matching
+       -b, --byte-offset
+       --label=LABEL
+       -T, --initial-tab
+       --group-separator=SEP
+       --no-group-separator
+       -a, --text
+       --binary-files=TYPE
+       -D ACTION, --devices=ACTION
+       -d ACTION, --directories=ACTION
+       --exclude=GLOB
+       --exclude-from=FILE
+       --exclude-dir=GLOB
+       -I
+       --include=GLOB
+       --line-buffered
+       -U, --binary