1 files changed, 1350 insertions, 0 deletions
diff --git a/scripts/urlmanager.pl b/scripts/urlmanager.pl
new file mode 100644
index 0000000..787e775
--- /dev/null
+++ b/scripts/urlmanager.pl
@@ -0,0 +1,1350 @@
+#!/usr/bin/perl
+
+# urlmanager script for irssi
+
+use warnings;
+use strict;
+
+use Fcntl qw/:flock/;
+use POSIX qw/strftime/;
+
+use Irssi qw/
+	settings_add_str settings_add_bool settings_add_int
+	settings_get_str settings_get_bool settings_get_int
+	settings_set_str settings_set_bool settings_set_int
+	command command_bind command_unbind
+	signal_emit signal_add_last signal_stop
+	timeout_add timeout_add_once timeout_remove
+	window_find_item/;
+
+our $VERSION = "0.1";
+our %IRSSI = (
+	authors     => 'Urchlay',
+	contact     => 'Urchlay on NewNet',
+	name        => 'urlmanager',
+	description =>
+		'Captures URLs said in channel and private messages ' .
+		'and saves them to a file, also adds several commands for ' .
+		'listing and opening captured URLs ' .
+		'(based on urlgrab.pl 0.2 by David Leadbetter)',
+	license     => 'GNU GPLv2 or later',
+	url         => 'none',
+);
+
+# 20110609 bkw: if irssi was started in a screen session from the console,
+# then detached, then reattached in an X session, DISPLAY will not be set.
+# This will confuse the user, as e.g. firefox will silently fail to run.
+# It won't do any harm to set DISPLAY=:0 if it's not set, and might help...
+{
+	my $disp = $ENV{DISPLAY};
+	$ENV{DISPLAY} = ":0" unless $disp;
+}
+
+# Workaround for a heisenbug, see:
+# http://bugs.irssi.org/index.php?do=details&task_id=242
+{ package Irssi::Nick }
+
+# Color constants.
+# Irssi.pm doesn't include symbolic mIRC-style color names...
+# NOTE: if you print e.g. $green . "12345", the "1" will be interpreted
+# as the 2nd digit of the color! Only good fix is to always put a space:
+# print $green . " 12345" works OK.
+# Declarations only; defined in init_colors()
+our ($bold_on, $bold_off, $green, $red, $yellow, $purple, $color_off);
+
+# @urls is a list of anonymous hashes, each representing one URL.
+# See read_url_file for hash elements.
+our @urls;
+
+# Most-recently-posted URL (the URL only, not a hash). Only used
+# for avoiding dups (see url_log).
+our $lasturl = "";
+
+# Have any URLs been captured since the last /ul -delete? This is to
+# (hopefully) protect the user
+our $captured_since_delete = 0;
+
+# Grr. Printing with print() or Irssi::print(), % chars are interpreted
+# as irssi formats. This causes URLs containing HTML %-escapes to come
+# out in weird colors. Using irssi's /echo is apparently the right way
+# to avoid this... though we get colored -!- in front of every line :(
+sub echo {
+	command("/echo $_") for @_;
+}
+
+# trim leading/trailing spaces
+sub trim {
+	$_[0] =~ s/(?:^\s*|\s$)//g;
+	return $_[0];
+}
+
+# read_url_file: called on script load with the log filename.
+# returns array of URL hashes, which will be empty if the file
+# wasn't present or was empty.
+sub read_url_file {
+	my $file = get_url_log_file();
+	my @got;
+
+	open URLLOG, "<$file" or return;
+	flock(URLLOG, Fcntl::LOCK_EX);
+	seek(URLLOG, 0, 1);
+	while(<URLLOG>) {
+		chomp;
+		my @fields = split " ";
+		push @got, {
+			stamp => $fields[0],
+			nick => $fields[1],
+			channel => $fields[2],
+			url => $fields[3],
+		};
+	}
+	close URLLOG;
+
+	return @got;
+}
+
+# rewrite the URL log file from arguments.
+sub write_url_file {
+	for(@_) {
+		$lasturl = "";
+		url_log(1, $_->{nick}, $_->{channel}, $_->{url}, $_->{stamp});
+	}
+}
+
+# Trim the log according to the appropriate settings.
+sub trim_url_log {
+	my $quiet = shift || 0;
+	my $max_lines = settings_get_int("urlm_max_log_lines") || 0;
+	my $max_age = settings_get_int("urlm_max_log_age") || 0;
+
+	return unless $max_lines || $max_age; # nothing to do!
+
+	my @keep_urls;
+	my $trimmed = 0;
+
+	if($max_age) {
+		for(@urls) {
+			if($_->{stamp} >= (time() - $max_age)) {
+				push @keep_urls, $_;
+			}
+		}
+	} else {
+		@keep_urls = @urls;
+	}
+
+	if($max_lines && (@keep_urls > $max_lines)) {
+		my $last = $#keep_urls;
+		my $first = $last - $max_lines + 1;
+		@keep_urls = @keep_urls[$first..$last];
+	}
+
+	$trimmed = (@urls - @keep_urls);
+
+	if($trimmed) {
+		clear_url_log();
+		write_url_file(@keep_urls);
+		print "Trimmed $trimmed URLs from log"; # unless $quiet;
+	}
+}
+
+# Clear the URL log, both the in-memory @urls and the on-disk file.
+sub clear_url_log {
+	my $file = get_url_log_file();
+	unlink $file; # or print "Can't delete $file: $!";
+	@urls = ();
+}
+
+# get_url_log_file: get value of our logfile setting, with
+# tilde expansion for user's homedir.
+sub get_url_log_file {
+	my $file = settings_get_str('urlm_log_file');
+	$file =~ s/^~/$ENV{HOME}/;
+	return $file;
+}
+
+# signal handler for "message public"
+# extract and log any URLs in the input text.
+sub url_public {
+	my ($server, $text, $nick, $hostmask, $channel) = @_;
+	my @got = find_urls($text);
+	url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "message own_public" and "message own_private"
+# extract and log any URLs in the input text.
+sub url_own {
+	my ($server, $text, $channel) = @_;
+	return unless settings_get_bool('urlm_log_own');
+	my @got = find_urls($text);
+	url_log(0, $server->{nick}, $channel, $_) for @got;
+}
+
+# signal handler for "message private", "message irc notice",
+# "message irc op_public", "message irc action"
+# extract and log any URLs in the input text.
+sub url_private {
+	my ($server, $text, $nick, $hostmask) = @_;
+	my @got = find_urls($text);
+	url_log(0, $nick, $server->{nick}, $_) for @got;
+}
+
+# signal handler for "message topic"
+# extract and log any URLs in the input text.
+sub url_topic {
+	my ($server, $channel, $text, $nick, $hostmask) = @_;
+	return if $nick eq $server->{nick}; # don't log own topic changes
+	my @got = find_urls($text);
+	url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "channel joined"
+# extract and log any URLs in the channel topic.
+sub url_join_topic {
+	my ($chan) = @_;
+	return unless $chan->{topic};
+	# don't log own topic changes
+	return if $chan->{topic_by} eq $chan->{server}->{nick};
+	my @got = find_urls($chan->{topic});
+	url_log(0, $chan->{topic_by}, $chan->{name}, $_) for @got;
+}
+
+# signal handler for "message part"
+# extract and log any URLs in the input text.
+sub url_part {
+	return unless settings_get_bool('urlm_log_partquit');
+	my ($server, $channel, $nick, $hostmask, $text) = @_;
+	return if $nick eq $server->{nick}; # don't log own parts (redundant?)
+	my @got = find_urls($text);
+	url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "message quit"
+# extract and log any URLs in the input text.
+sub url_quit {
+	return unless settings_get_bool('urlm_log_partquit');
+	my ($server, $nick, $hostmask, $text) = @_;
+	return if $nick eq $server->{nick}; # don't log own quits (redundant?)
+	my @got = find_urls($text);
+	url_log(0, $nick, $server->{nick}, $_) for @got;
+}
+
+# signal handler for "dcc chat message"
+# extract and log any URLs in the input text.
+# TODO: test this!
+sub url_dccmsg {
+	my ($dcc, $text) = @_;
+	my @got = find_urls($text);
+	url_log(0, $dcc->{nick}, $dcc->{server}->{nick}, $_) for @got;
+}
+
+# print_url_line:
+# print one formatted (colorful) line of /ul output
+sub print_url_line {
+	my ($maxnick, $maxchan, $num, $stamp, $nick, $channel, $url) = @_;
+
+	echo sprintf("%s%3s%s %s %11s%s %${maxnick}s%s %${maxchan}s%s  %s%s",
+			$bold_on, $num, $bold_off,
+			$green, $stamp,
+			$red, $nick,
+			$yellow, $channel,
+			$purple, $url,
+			$color_off);
+}
+
+sub url_list_cmd { # bound to /ul (sorry, this sub is a mess)
+	my $do_delete = 0;
+	my $listed = 0;
+	my @keep_urls;
+
+	my $arg = shift || "";
+	$arg = lc $arg;
+
+	$arg = trim($arg);
+
+	if($arg =~ /^-delete/) {
+		$arg =~ s/^-delete\s*//;
+		$do_delete++;
+
+		if($arg eq '') {
+			print "/ul -delete requires a parameter! (/ul help for details)";
+			return;
+		}
+	}
+
+	if(not @urls) {
+		print "No URLs in list!";
+		return;
+	}
+
+	my ($start, $end, $nick, $regex, $channel);
+	$arg = "-" if $arg eq 'all';
+
+	my $invert = 0;
+	if($arg =~ /^!(.*)/) {
+		$arg = $1;
+		$invert = 1;
+	}
+
+	if($arg eq "") {
+		$start = @urls-10;
+		$start = 0 if $start < 0;
+		$end = $#urls;
+	} elsif($arg =~ /^[&#](.*)/) {
+		$channel = $1;
+	} elsif($arg =~ /^\/(.*)\/?/) {
+		$regex = $1;
+	} elsif($arg =~ /^\d+$/) {
+		$start = $end = $arg;
+	} elsif($arg =~ /^[-\d]+$/) {
+		($start, $end) = split /-/, $arg, 2;
+		$start = 0 if $start eq "";
+		$end = $#urls if $end eq "";
+	} else {
+		$nick = $arg;
+	}
+
+	my $count = 0;
+	my @to_list;
+	for(@urls) {
+		my $list = 0;
+		if($nick && (lc($_->{nick}) =~ ("^" . quotemeta(lc $nick)))) {
+			$list++;
+		} elsif($regex) {
+			$list++ if $_->{url} =~ /$regex/i;
+		} elsif($channel) {
+			$list++ if $_->{channel} =~ /^[#&]?$channel$/i;
+		} elsif(defined($start) && defined($end)) {
+			$list++ if $count >= $start && $count <= $end;
+		}
+
+		$list = !$list if $invert;
+
+		if($list) {
+			$listed++;
+			push @to_list, [ $count, $_ ];
+		} elsif($do_delete) {
+			push @keep_urls, $_;
+		}
+
+		$count++;
+	}
+
+	if(@to_list) { # print the list if anything's supposed to be in it
+		my $maxnick = 4;
+		my $maxchan = 7;
+
+		for(@to_list) {
+			my ($num, $u) = @$_;
+			my $len = length($u->{nick});
+			$maxnick = $len if $len > $maxnick;
+			$len = length($u->{channel});
+			$maxchan = $len if $len > $maxchan;
+		}
+
+		print_url_line($maxnick, $maxchan,
+				"#", "When", "Nick", "Channel", "URL");
+
+		for(@to_list) {
+			my ($num, $u) = @$_;
+
+			my $stamp = strftime("%m/%d-%H:%M", localtime($u->{stamp}));
+			print_url_line($maxnick, $maxchan,
+					$num, $stamp, $u->{nick}, $u->{channel}, $u->{url});
+		}
+	}
+
+	if($do_delete) { # process -delete flag
+		my $deleted = scalar @urls - scalar @keep_urls;
+		if(not $deleted) {
+			print "No URLs deleted";
+			return;
+		}
+
+		my $file = get_url_log_file();
+		rename($file, "$file~") or print "Warning: can't backup log file: $!";
+
+		clear_url_log();
+		write_url_file(@keep_urls);
+
+		print $red . "These " . $deleted . " URLs have been deleted!" .
+			$color_off . " (" . @urls . " remain)";
+
+		$captured_since_delete = 0;
+	} else { # no -delete flag, show summary
+		print "Listed $listed of $count URLs";
+	}
+}
+
+sub urlm_say {
+}
+
+sub urlm_undo_delete {
+	my $yes = ($_[0] eq '-yes');
+
+	if($captured_since_delete && (not $yes)) {
+		print "urlm_undo_delete: doing this will throw away some URLs that " .
+			"were captured since the last delete. Re-run with '-yes' to do it " .
+			"anyway.";
+		return;
+	}
+
+	my $file = get_url_log_file();
+	my @oldurls = @urls;
+	clear_url_log();
+
+	if(not(rename("$file~", $file))) {
+		print "Can't restore log file: $!";
+		@urls = @oldurls;
+		write_url_file();
+		return;
+	}
+
+	@urls = read_url_file();
+	print "Restored " . @urls . " URLs from backup";
+	$captured_since_delete = 0;
+}
+
+# get_url_from_number:
+# Returns a URL hash from @urls, given the index into the array.
+# Returns undef if index is invalid or non-existent.
+# Accepts negative numbers as meaning "nth from the end of the list"
+# If index is empty string, returns the highest-numbered (most recent) URL.
+# If index is non-empty, non-numeric, then treat as a nick and open last
+# URL by that nick.
+sub get_url_from_number {
+	my $arg = shift;
+	if($arg eq '') {
+		$arg = $#urls;
+	} elsif($arg =~ /-\d+$/) {
+		$arg = $#urls + $arg;
+	}
+
+	if($arg !~ /^\d+$/) {
+#		print("Non-numeric URL number '$arg'");
+#		return;
+		$arg = trim($arg);
+		for(my $i = $#urls; $i >= 0; $i--) {
+			my $url = $urls[$i];
+			return $url if lc($arg) eq lc($url->{nick});
+		}
+		print("Can't find any URLs from nick '$arg'");
+		return;
+	}
+
+	if($arg > $#urls) {
+		print("No such URL number '$arg'");
+		return;
+	}
+
+	return $urls[$arg];
+}
+
+# url_open_cmd: open a URL with the given browser (auto-guesses which browser
+# to use if $browser is '').
+sub url_open_cmd {
+	my ($urlnum, $browser) = @_;
+	$browser = '' if ref $browser; # means it's a Server object
+
+	my $url = get_url_from_number($urlnum);
+	return if not $url;
+	my $link = $url->{url};
+
+	if(not $browser) { # guess browser...
+		# check for wget first...
+		if($link =~ m{/[^/]+\.(\w+)$}) {
+			my $ext = lc $1;
+			for(split " ", settings_get_str('urlm_wget_extensions')) {
+				if($ext eq lc($_)) {
+					url_open_wget_cmd($urlnum);
+					return;
+				}
+			}
+		}
+
+		# not a wget extension, check browser override patterns
+		$browser = settings_get_str('urlm_default_browser');
+		my @overrides = read_browser_overrides();
+
+OVERRIDE:
+		for(@overrides) {
+			my ($tag, $pats) = @$_;
+			for(@$pats) {
+				if($link =~ /$_/i) {
+					$browser = $tag;
+					last OVERRIDE;
+				}
+			}
+		}
+	}
+
+	my @browsers = read_browser_list();
+	my ($tag, $name, $format);
+	for(@browsers) {
+		if($_->{tag} eq $browser) {
+			($tag, $name, $format) = ($_->{tag}, $_->{name}, $_->{command});
+			last;
+		}
+	}
+
+	echo("$name - " . $link . " (" . $url->{nick} . ")");
+
+	$link =~ s/'/%27/g;  # be nice to the shell, escape single quotes
+	$link =~ s/\(/%28/g; # be nice to firefox, escape parens
+	$link =~ s/\)/%29/g; # firefox -remote 'openURL(url,new-tab)' *fails*
+	                     # if the url contains any () chars!
+
+	my $cmd = sprintf($format, $link) . " &>/dev/null &";
+	system($cmd);
+}
+
+# Open with wget in an irssi window
+# TODO: Maybe allow for using "fetch" instead of wget? (does anyone care?)
+our $wgetcount = 1;
+sub url_open_wget_cmd {     # bound to /urlm_wget /wget
+	my $arg = shift;
+
+	my @args = split " ", $arg;
+
+	my $urlnum = "";
+	if(@args && ($args[-1] =~ /^\d+$/)) {
+		$urlnum = pop @args;
+	}
+
+	my $url = get_url_from_number($urlnum);
+	return if not $url;
+
+	my $more_args = join(" ", @args);
+	$more_args .= " " if $more_args;
+
+	my $dir = settings_get_str('urlm_wget_dl_dir');
+	$dir =~ s/^~/$ENV{HOME}/;
+	$dir = "." if not $dir;
+
+	my $size = settings_get_int('urlm_wget_split_size');
+
+	if($size > 0) {
+		command("/window new split");
+		command("/window size $size");
+	} else { # size == 0, means "do not split"
+		command("/window new hidden");
+	}
+
+	# find an unused window name...
+	my $name = "urlm_wget_" . ($wgetcount++);
+	while(window_find_item($name)) {
+		$name = "urlm_wget_" . ($wgetcount++);
+	}
+
+	command("/window name $name");
+
+	if(settings_get_bool('urlm_wget_autoclose')) {
+		my $delay = settings_get_int('urlm_wget_autoclose_delay');
+		if($delay) {
+			print "This window will close $delay seconds after download is done";
+		} else {
+			print "This window will close when download is done";
+		}
+	} else {
+		print "Use " . $yellow . "/window close $name" . $color_off .
+			" to close this window";
+	}
+
+	my $args = trim(settings_get_str('urlm_wget_extra_args'));
+	$args .= " " if $args;
+
+	my $wget_bin = settings_get_str('urlm_wget_path');
+	$wget_bin =~ s/^~/$ENV{HOME}/;
+	$wget_bin = "wget" if not $wget_bin;
+
+	command(
+			"/exec -nosh " .
+			"-name $name " .
+			"$wget_bin " .
+			"-P $dir " .
+			$args .
+			$more_args .
+			$url->{url});
+
+	if($size) { # if using a hidden window, leave it focused
+		command("/window last");
+	}
+}
+
+# Close a window by name. This seems like a kludge...
+sub close_window {
+	return unless window_find_item($_[0]); # don't close if already closed!
+	command("/window goto " . $_[0]);
+	command("/window close");
+}
+
+# signal handler for "exec remove", autocloses our wget windows when
+# the wget processes exit, if requested.
+sub sig_exec_remove {
+	my ($proc, $status) = @_;
+	return unless settings_get_bool('urlm_wget_autoclose');
+
+	# target_win->name will be "" if window already closed!
+	my $name = $proc->{target_win}->{name} || "";
+	return unless $name =~ /^urlm_wget_\d+$/;
+
+	return unless window_find_item($name); # don't close if already closed!
+
+	my $delay = settings_get_int('urlm_wget_autoclose_delay');
+	if($delay > 0) {
+		timeout_add_once($delay * 1000, "close_window", $name);
+	} else {
+		close_window($name);
+	}
+}
+
+# kill a process by name (send SIGTERM)
+sub kill_proc {
+	command("/exec -15 " . $_[0]);
+}
+
+# kill wget processes on manual window close!
+# catch signal "window destroyed" and figure out a way to avoid
+# adding a timeout to close the window (since it's in the middle
+# of closing now...)
+sub sig_window_destroyed {
+	my $name = $_[0]->{name} || "";
+	return unless $name =~ /^urlm_wget_\d+$/;
+
+	# use a timeout to kill the process, instead of killing it directly.
+	# why? to avoid possible race condition where sig_exec_remove()
+	# tries to close the window that triggered this call to
+	# sig_window_destroyed().
+	timeout_add_once(1000, "kill_proc", $name);
+}
+
+# find_urls: extract all URLs from the input text, returns a list
+# (which may be empty).
+# Be VERY permissive about what we consider a URL.
+# 20100614 bkw: be a little less permissive
+# 20140530 bkw: stop catching dupe http://whatever and https://whatever
+
+# original sub:
+##sub find_urls {
+##	my @got = ($_[0] =~ m{(?:https?|ftp)://\S+}g);
+##	push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g;
+##	push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g;
+##	s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation
+##	return @got;
+##}
+
+# new version:
+sub find_urls {
+	my @got;
+	while($_[0] =~ s{(?:https?|ftp)://\S+}{}) {
+		push @got, $&;
+	}
+	push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g;
+	push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g;
+	s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation
+	return @got;
+}
+
+# Annoying bots have a tendency to do this:
+# <actual_person> check this out: http://www.blahblah.blah/path/to/stuff.html
+# <annoying_bot> Title: Stuff (at www.blahblah.blah)
+# To me, this is about the most useless function a bot can serve, plus it
+# breaks the /uo command.
+# just_domain() returns true if $1 is just the domain part of $2
+sub just_domain {
+	my ($new, $old) = @_;
+	s/^(ht|f)tps?:\/\/// for ($new, $old);
+	$old =~ s/\/.*//;
+	return 0 if $new =~ /\/./;
+	return $new eq $old;
+}
+
+# url_log: appends URL to the URL log file and to the @urls array.
+# Locks the file before writing, so should be safe even with multiple
+# instances of irssi.
+# $relog should be false if capturing a new URL from channel/msg text,
+# or true if re-logging an old URL (e.g. /ul -delete does this)
+sub url_log {
+	my($relog, $nick, $channel, $url, $stamp) = @_;
+	$nick =~ s/!.*//;
+
+	$stamp = time() unless $stamp;
+
+	return if lc $url eq lc $lasturl; # a tiny bit of protection from spam/flood
+	return if just_domain($url, $lasturl);
+
+	$lasturl = $url;
+
+	my $file = get_url_log_file();
+	open(URLLOG, ">>$file") or return;
+
+	flock(URLLOG, Fcntl::LOCK_EX);
+	seek(URLLOG, 0, 2);
+
+	print URLLOG time . " $nick $channel $url\n";
+	close(URLLOG);
+
+	push @urls, {
+		stamp => $stamp,
+		nick => $nick,
+		channel => $channel,
+		url => $url,
+	};
+
+	if(not $relog) {
+		if(not settings_get_bool('urlm_quiet_capture')) {
+			my $on = "";
+			if($channel =~ /^#/) {
+				$on = " on " . $green . $channel . $color_off;
+			}
+			echo "Captured URL #" . $#urls . " " .
+				$purple . $url . $color_off .
+				" from " .  $yellow . $nick . $color_off . $on;
+		}
+#		trim_url_log();
+		$captured_since_delete++;
+	}
+}
+
+# urlm_help generates /help topics for the browser commands.
+# The help for all the other commands is stored in text files in
+# ~/.irssi/help
+sub urlm_help {
+	my $arg = shift;
+	$arg = lc trim($arg);
+	my %bhelp;
+	our %urlm_help;
+
+	for(read_browser_list()) {
+		my $text = uc($_->{tag}) . " [<url#>]\n\n" .
+			"Open a URL with the external browser '" .
+			$_->{name} . "', using the command:\n" .
+			$_->{command} . "\n\n" .
+			"If [<url#>] is omitted, the most recent URL will be opened.\n";
+		$bhelp{$_->{tag}} = $text;
+		$bhelp{"urlm_open_" . $_->{tag}} = $text;
+	}
+
+	my $help = $bhelp{$arg} || return;
+	signal_stop();
+	print $help;
+}
+
+sub write_browser_overrides {
+	my @list = @{$_[0]};
+	my @strings;
+
+	for(@list) {
+		my ($tag, $pats) = @$_;
+		push @strings, join(":", $tag, @$pats);
+	}
+
+	my $setting = join("::", @strings);
+	settings_set_str('urlm_browser_overrides', $setting);
+}
+
+sub read_browser_overrides {
+	my @result;
+
+	my $list = settings_get_str('urlm_browser_overrides');
+	my @entries = split /::/, $list;
+
+	for(@entries) {
+		my @items = split /:/;
+		my $tag = shift @items;
+		push @result, [ $tag, \@items ];
+	}
+
+	return @result;
+}
+
+sub write_browser_list {
+	my @list = @{$_[0]};
+	my @strings;
+
+	for(@list) {
+		push @strings, join(":", $_->{tag}, $_->{name}, $_->{command});
+	}
+
+	my $setting = join("::", @strings);
+#print "/set urlm_browsers $setting";
+	settings_set_str('urlm_browsers', $setting);
+	signal_emit("setup changed");
+}
+
+sub read_browser_list {
+	my @result;
+
+	my $list = settings_get_str('urlm_browsers');
+	my @entries = split /::/, $list;
+
+	for(@entries) {
+		my @items = split /:/;
+		my $hash = {
+			tag => $items[0],
+			name => $items[1],
+			command => $items[2],
+		};
+
+		push @result, $hash;
+	}
+
+	return @result;
+}
+
+# commands:
+# urlm_add_browser <browser>:<fullname>:<cmd>
+sub urlm_add_browser {
+	my $arg = shift || "";
+	$arg =~ trim($arg);
+
+	if($arg !~ /^[^:]+:[^:]+:[^:]+$/) {
+		print "Usage: /urlm_add_browser tag:name:command";
+		return;
+	}
+
+	my ($tag, $name, $cmd) = split /:/, $arg;
+
+	$tag = trim($tag);
+	$tag = lc $tag;
+	$name = trim($name);
+
+	if($tag =~ /\W/) {
+		print "/urlm_add_browser: tag must consist of only " .
+			"letters, numbers, or underscores (_), not '$tag'";
+		return;
+	}
+
+	if($tag eq 'wget') {
+		print "/urlm_add_browser: 'wget' is reserved; use a different tag";
+		return;
+	}
+
+	if($cmd !~ /'[^']*\%s[^']*'/) {
+		print "/urlm_add_browser: command must contain '\%s' (single-quoted)";
+		return;
+	}
+
+	my @browsers = read_browser_list();
+	my $found = 0;
+	for(@browsers) {
+		if(lc($_->{tag}) eq $tag) {
+			print "Replaced old definition of $tag";
+			$_->{name} = $name;
+			$_->{command} = $cmd;
+			$found++;
+			last;
+		}
+	}
+
+	if(not $found) {
+		push @browsers, { tag => $tag, name => $name, command => $cmd };
+		print "push \@browsers, { tag => $tag, name => $name, command => $cmd }";
+		print "Added browser definition $tag";
+	}
+
+	write_browser_list(\@browsers);
+}
+
+# urlm_del_browser <browser>
+sub urlm_del_browser {
+	my $arg = shift || "";
+	$arg = trim($arg);
+	$arg = lc $arg;
+	return unless $arg;
+
+	# urlm_del_override() already prints "Browser not defined" if it
+	# wasn't defined, so no need to have urlm_del_browser() print it again.
+	urlm_del_override("$arg all");
+
+	my @browsers = read_browser_list();
+	my @keep_browsers;
+	my $found = 0;
+
+	for(@browsers) {
+		if(lc($_->{tag}) eq $arg) {
+			print "Deleted definition of $arg";
+			$found++;
+		} else {
+			push @keep_browsers, $_;
+		}
+	}
+
+	write_browser_list(\@keep_browsers) if $found;
+}
+
+# urlm_list_browsers
+sub urlm_list_browsers {
+	my @browsers = read_browser_list();
+	for(@browsers) {
+		print(
+			(settings_get_str('urlm_default_browser') eq $_->{tag} ?
+			 "[*]" :
+			 "   ") .
+			"Tag: $bold_on" . $_->{tag} . "$bold_off, " .
+			"Name: $bold_on" . $_->{name} . "$bold_off, " .
+			"Command: $bold_on" . $_->{command} . "$bold_off");
+	}
+}
+
+# urlm_add_wget_ext <ext>
+sub urlm_add_wget_ext {
+	my $arg = shift || "";
+	$arg = trim($arg);
+	$arg = lc $arg;
+
+	if(not $arg) {
+		print "Usage: /urlm_add_wget_ext <extension>";
+		return;
+	}
+
+	my @list = split " ", settings_get_str('urlm_wget_extensions');
+	if(grep { $_ eq $arg } @list) {
+		print "$arg is already in the wget extensions list";
+		return;
+	}
+
+	push @list, $arg;
+
+	settings_set_str('urlm_wget_extensions', join(" ", @list));
+	command("/set urlm_wget_extensions");
+}
+
+# urlm_del_wget_ext <ext>
+sub urlm_del_wget_ext {
+	my $arg = shift || "";
+	$arg = trim($arg);
+	$arg = lc $arg;
+
+	if(not $arg) {
+		print "Usage: /urlm_del_wget_ext <extension>";
+		return;
+	}
+
+	my @list = split " ", settings_get_str('urlm_wget_extensions');
+	if(!grep { $_ eq $arg } @list) {
+		print "$arg is not in the wget extensions list";
+		return;
+	}
+
+	@list = grep { $_ ne $arg } @list;
+
+	settings_set_str('urlm_wget_extensions', join(" ", @list));
+	command("/set urlm_wget_extensions");
+}
+
+# urlm_list_overrides [<browser>]
+sub urlm_list_overrides {
+	my $arg = shift || "";
+	$arg = trim($arg);
+
+	my $found = 0;
+	my @overrides = read_browser_overrides();
+	for(@overrides) {
+		my ($browser, $pats) = @$_;
+		if($arg eq $browser || not $arg) {
+			$found++;
+			my $count = 1;
+			for(@$pats) {
+				print $browser . "[$count]: " . $_;
+				$count++;
+			}
+		}
+	}
+
+	if(not $found) {
+		if($arg) {
+			print "No overrides for browser '$arg'";
+		} else {
+			print "No browser overrides";
+		}
+	}
+}
+
+# urlm_add_override <browser> <pattern>
+sub urlm_add_override {
+	my $arg = shift || "";
+	$arg = trim($arg); # do not lc($arg), the command may need caps!
+
+	my ($browser, $pattern) = split " ", $arg;
+	$browser = lc $browser;
+	if(not ($browser and $pattern)) {
+		print "Usage: /urlm_add_override <browser> <pattern>";
+		return;
+	}
+
+	if(!grep { $_->{tag} eq $browser } read_browser_list()) {
+		print "Browser $browser not defined in browser list";
+		return;
+	}
+
+	eval "qr{$pattern}";
+	if($@) {
+		print "Pattern $pattern is not a valid Perl regex: $@";
+		return;
+	}
+
+	my @overrides = read_browser_overrides();
+	my $found = 0;
+	for(@overrides) {
+		my ($tag, $pats) = @$_;
+		next unless $tag eq $browser;
+
+		push @$pats, $pattern;
+		$found++;
+	}
+
+	if(not $found) {
+		push @overrides, [ $browser, [ $pattern ] ];
+	}
+
+	print "Added override for $browser: $pattern";
+	write_browser_overrides(\@overrides);
+}
+
+# urlm_del_override <browser> <number>|<all>
+sub urlm_del_override {
+	my $arg = shift || "";
+	$arg = trim($arg);
+	$arg = lc $arg;
+
+	my ($browser, $number) = split " ", $arg;
+
+	if(not($browser and $number)) {
+		print "Usage: /urlm_del_override <browser> <number>|all"
+	}
+
+	if(!grep { $_->{tag} eq $browser } read_browser_list()) {
+		print "Browser $browser not defined in browser list";
+		return;
+	}
+
+	if($number ne 'all' && $number !~ /^[1-9]\d*$/) {
+		print "Bad override '$number': must be a number >= 1, or 'all'";
+		return;
+	}
+
+	my @overrides = read_browser_overrides();
+	my @keep_overrides = ();
+	my $found = 0;
+	for(@overrides) {
+		my ($tag, $pats) = @$_;
+
+		if($tag ne $browser) {
+			push @keep_overrides, $_;
+			next;
+		}
+
+		$found += @$pats, next if $number eq 'all';
+
+		if($number > @$pats) {
+			print "Value $number out of range";
+			next;
+		}
+
+		$found++;
+		undef $pats->[$number - 1];
+		@$pats = grep { defined $_ } @$pats;
+
+		push @keep_overrides, $_ if @$pats;
+	}
+
+	if($found) {
+		print "Deleted $found overrides";
+		write_browser_overrides(\@keep_overrides);
+	} else {
+		print "No matching overrides";
+	}
+}
+
+sub init_colors {
+	if(settings_get_bool('urlm_use_bold')) {
+		$bold_on = "\002";
+		$bold_off = "\002";
+	} else {
+		$bold_on = "";
+		$bold_off = "";
+	}
+
+	if(settings_get_bool('urlm_use_color')) {
+		$green = "\0033";
+		$red = "\0034";
+		$yellow = "\0037";
+		$purple = "\0036";
+		$color_off = "\003";
+	} else {
+		$green = "";
+		$red = "";
+		$yellow = "";
+		$purple = "";
+		$color_off = "";
+	}
+}
+
+# init_browsers(): dynamic bindings. Each browser tag gets bound to
+# /urlm_open_$tag, and (if short commands enabled) to /$tag.
+our @bound_refs;
+sub init_browsers {
+	# for this to work, the code ref can *NOT* be stored in a "my" var
+	# I think this is a bug in irssi, or possibly perl, but maybe I'm
+	# just being dumb...
+	for(@bound_refs) {
+		command_unbind($_->[0], $_->[1]);
+	}
+	@bound_refs = ();
+
+	my @browsers = read_browser_list();
+	for(@browsers) {
+		my $code = 'sub { url_open_cmd($_[0], "'. ($_->{tag}) . '"); };';
+		my $cmd = 'urlm_open_' . $_->{tag};
+		push @bound_refs, [ $cmd, eval $code ];
+
+		# again, no "my" vars, hence the ugly $bound_refs[$#bound_refs] kludge
+		command_bind($cmd, $bound_refs[$#bound_refs]->[1]);
+
+		if(settings_get_bool('urlm_short_cmds')) {
+			my $shortcmd = $_->{tag};
+			push @bound_refs, [ $shortcmd, $bound_refs[$#bound_refs]->[1] ];
+			command_bind($shortcmd, $bound_refs[$#bound_refs]->[1]);
+		}
+	}
+
+	command_unbind("ul", "url_list_cmd");
+	command_unbind("uo", "url_open_cmd");
+	command_unbind("wget", "url_open_wget_cmd");
+
+	if(settings_get_bool('urlm_short_cmds')) {
+		command_bind("ul", "url_list_cmd");
+		command_bind("uo", "url_open_cmd");
+		command_bind("wget", "url_open_wget_cmd");
+	}
+}
+
+sub init_settings { # call only once, at script load!
+# Where shall we save the URL log?
+	settings_add_str('urlmanager', 'urlm_log_file', "~/.irssi/urllog");
+
+# Where is the wget binary? Absolute path, or "wget" (searches PATH)
+	settings_add_str('urlmanager', 'urlm_wget_path', "wget");
+
+# Where should wget save files?
+	settings_add_str('urlmanager', 'urlm_wget_dl_dir', "~");
+
+# Extra arguments to pass to wget...
+	settings_add_str('urlmanager', 'urlm_wget_extra_args', "");
+
+# Do we log URLs from /part and /quit messages? Disabled by default
+# because so many people always /quit with the same spammish URL
+# e.g. "nimrod has quit [Quit: Try StupidIRC (http://someircclient.com)]"
+	settings_add_bool('urlmanager', 'urlm_log_partquit', 0);
+
+# Do we log URLs from our own public/private messages?
+	settings_add_bool('urlmanager', 'urlm_log_own', 1);
+
+# Cosmetics:
+	settings_add_bool('urlmanager', 'urlm_short_cmds', 1);
+	settings_add_bool('urlmanager', 'urlm_use_bold', 1);
+	settings_add_bool('urlmanager', 'urlm_use_color', 1);
+
+# Browser definitions. A double-colon-separated list. Each list item
+# is a single-colon separated list of (tag, name, command_format).
+# You may add browsers, and they will work as commands
+# without changing any other code.
+# The browser commands need to be non-blocking, and any stdout/err from
+# them will be ignored.
+# The %s gets replaced with the actual URL. *ALWAYS* use single-quotes
+# (like '%s'). *NEVER* omit the quotes or use double-quotes around the %s!
+# Failure to comply is a security hole!
+	settings_add_str('urlmanager', 'urlm_browsers',
+			'ff:Firefox:firefox -remote \'openurl(%s,new-tab)\'' .
+			'::' .
+			'ie:Internet Explorer:ie6 \'%s\'' .
+			'::' .
+			'us:links+screen:[ "$TERM" = "screen" ] && screen links \'%s\'' .
+			'::' .
+			'ut:links+xterm:xterm -e "links \'%s\'"' .
+			'::' .
+			'ux:Copy to X Clipboard:echo -n \'%s\'|xsel -i');
+
+# Default browser for /uo and /urlm_open commands
+	settings_add_str('urlmanager', 'urlm_default_browser', 'ff');
+
+# /uo and /urlm_open check this list.
+# Double-colon-separated list, each item is a single-colon-separated
+# list consisting of a browser tag and one or more patterns.
+# If a URL matches one of these
+# patterns, the browser tag will be used as the browser to open the URL
+# with, instead of the default.
+	settings_add_str('urlmanager', 'urlm_browser_overrides',
+			'ie:/[^/]*video\.google\.com:/[^/]*youtube\.com:/[^/]*gametrailers\.com');
+
+# If /uo or /urlm_open get a URL ending in one of these file extensions,
+# it will be downloaded with wget instead of being opened in a browser.
+	settings_add_str('urlmanager', 'urlm_wget_extensions',
+			'tar zip atr bas xex exe dcm car z gz rom cas torrent rar 7z');
+
+# trim log to this many lines. Use with urlm_log_trim_interval and/or
+# urlm_log_trim_startup. Set to 0 to disable.
+	settings_add_int('urlmanager', 'urlm_max_log_lines', 100);
+
+# trim log to this many seconds. Use with urlm_log_trim_interval and/or
+# urlm_log_trim_startup. Set to 0 to disable.
+	settings_add_int('urlmanager', 'urlm_max_log_age', 86400*7);
+
+# trim the log on script load.
+	settings_add_bool('urlmanager', 'urlm_log_trim_startup', 0);
+
+# auto-trim log this often (seconds). Set to 0 to disable.
+	settings_add_int('urlmanager', 'urlm_log_trim_interval', 60*60);
+
+# these control the behavior of windows created with /urlm_wget or /wget
+	settings_add_bool('urlmanager', 'urlm_wget_autoclose', 1);
+	settings_add_int('urlmanager', 'urlm_wget_autoclose_delay', 60);
+	settings_add_int('urlmanager', 'urlm_wget_split_size', 0);
+
+# say "Captured URL #xxx http://whatever from whoever" every time a URL
+# is captured?
+	settings_add_bool('urlmanager', 'urlm_quiet_capture', 0);
+
+# TODO: support these:
+
+# channels/nicks/sites we don't want to log
+#settings_add_str('urlmanager', 'urlm_ignore_channels');
+#settings_add_str('urlmanager', 'urlm_ignore_urls');
+
+# If true, go through the entire list every time a URL is logged,
+# checking for duplicates
+#settings_add_bool('urlmanager', 'urlm_ignore_dups');
+}
+
+our $trim_timeout_tag;
+sub init_trim_timeout {
+	timeout_remove($trim_timeout_tag) if($trim_timeout_tag);
+
+	my $millis = settings_get_int('urlm_log_trim_interval') * 1000;
+	if($millis > 0) {
+		$trim_timeout_tag = timeout_add($millis, "trim_url_log", 1);
+	}
+}
+
+# apply_settings: called on signal "setup changed" (when any /set urlm_*
+# changes value).
+# Anything that depends on the values of any of the settings should be
+# (re)initialized here.
+sub apply_settings {
+	init_colors();
+	init_browsers();
+	init_trim_timeout();
+	@urls = read_url_file();
+}
+
+sub init_signals { # call only once, at script load!
+	signal_add_last("message public", "url_public");
+	signal_add_last("message private", "url_private");
+	signal_add_last("message irc notice", "url_private");
+	signal_add_last("message irc op_public", "url_private");
+	signal_add_last("message irc action", "url_private");
+	signal_add_last("dcc chat message", "url_dccmsg");
+	signal_add_last("message topic", "url_topic");
+	signal_add_last("channel joined", "url_join_topic");
+	signal_add_last("setup changed", "apply_settings");
+	signal_add_last("message part", "url_part");
+	signal_add_last("message quit", "url_quit");
+	signal_add_last("message own_public", "url_own");
+	signal_add_last("message own_private", "url_own");
+	signal_add_last("exec remove", "sig_exec_remove");
+	signal_add_last("window destroyed", "sig_window_destroyed");
+}
+
+sub init_static_binds { # call only once, at script load!
+	# These binds are always on:
+	command_bind("urlm_list", "url_list_cmd");
+	command_bind("urlm_open", "url_open_cmd");
+	command_bind("urlm_wget", "url_open_wget_cmd");
+	command_bind("urlm_add_browser", "urlm_add_browser");
+	command_bind("urlm_del_browser", "urlm_del_browser");
+	command_bind("urlm_list_browsers", "urlm_list_browsers");
+	command_bind("urlm_add_wget_ext", "urlm_add_wget_ext");
+	command_bind("urlm_del_wget_ext", "urlm_del_wget_ext");
+	command_bind("urlm_list_overrides", "urlm_list_overrides");
+	command_bind("urlm_add_override", "urlm_add_override");
+	command_bind("urlm_del_override", "urlm_del_override");
+	command_bind("urlm_trim_log", "trim_url_log");
+	command_bind("urlm_undo_delete", "urlm_undo_delete");
+	command_bind("help", "urlm_help");
+}
+
+# Add per-user help dir to help_path, if not already present.
+sub init_help_path {
+	my $dir = "$ENV{HOME}/.irssi/help";
+	my $help_path = settings_get_str('help_path');
+
+	return if grep { $_ eq $dir } split /:/, $help_path;
+
+	$help_path .= ":$dir";
+	settings_set_str('help_path', $help_path);
+
+	signal_emit('setup_changed'); # 20100614 bkw: hmmm...
+}
+
+# bind signals and commands, now that everything's defined.
+init_settings();
+init_signals();
+init_static_binds();
+init_colors();
+init_browsers();
+init_help_path();
+@urls = read_url_file();
+trim_url_log() if settings_get_bool('urlm_log_trim_startup');
+init_trim_timeout();
+
+# make sure no leftover backup is lurking from a long time ago...
+unlink(get_url_log_file() . "~");
+
+# Print a helpful message for the user on script load...
+print $bold_on . "urlmanager.pl" . $bold_off . " loaded (" . @urls .
+	" URLs), type '" .  $yellow . "/help urlmanager" .
+	$color_off . "' for help.";
+
+# rest of file is POD docs
+=pod
+
+=head1 NAME
+
+urlmanager
+
+=head1 SYNOPSIS
+
+Yet another URL logger for irssi.
+
+=head1 DESCRIPTION
+
+Captures URLs in channel, privmsg, and DCC chat messages, logs them to a
+file. Provides an irssi command to list captured URLs and several commands
+to do various things with them (open in browser, download, copy to X11
+selection buffer).
+
+This documentation only includes installation instructions. For usage
+instructions, install the script and run B</urlm_help> within irssi.
+
+=head1 INSTALLATION
+
+Copy B<urlmanager.pl> to your B<~/.irssi/scripts> directory (create the
+directory if it doesn't exist). For auto-loading when irssi starts,
+create a symlink in B<~/.irssi/scripts/autorun>:
+
+=over 4
+
+mkdir -p ~/.irssi/scripts/autorun
+
+cp urlmanager.pl ~/.irssi/scripts
+
+cd ~/.irssi/scripts/autorun
+
+ln -s ../urlmanager.pl .
+
+=back
+
+=head1 CONFIGURATION
+
+All configuration is done from within irssi; read B</urlm_help>.
+=cut