aboutsummaryrefslogtreecommitdiff
path: root/scripts/urlmanager.pl
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/urlmanager.pl')
-rw-r--r--scripts/urlmanager.pl1350
1 files changed, 1350 insertions, 0 deletions
diff --git a/scripts/urlmanager.pl b/scripts/urlmanager.pl
new file mode 100644
index 0000000..787e775
--- /dev/null
+++ b/scripts/urlmanager.pl
@@ -0,0 +1,1350 @@
+#!/usr/bin/perl
+
+# urlmanager script for irssi
+
+use warnings;
+use strict;
+
+use Fcntl qw/:flock/;
+use POSIX qw/strftime/;
+
+use Irssi qw/
+ settings_add_str settings_add_bool settings_add_int
+ settings_get_str settings_get_bool settings_get_int
+ settings_set_str settings_set_bool settings_set_int
+ command command_bind command_unbind
+ signal_emit signal_add_last signal_stop
+ timeout_add timeout_add_once timeout_remove
+ window_find_item/;
+
+our $VERSION = "0.1";
+our %IRSSI = (
+ authors => 'Urchlay',
+ contact => 'Urchlay on NewNet',
+ name => 'urlmanager',
+ description =>
+ 'Captures URLs said in channel and private messages ' .
+ 'and saves them to a file, also adds several commands for ' .
+ 'listing and opening captured URLs ' .
+ '(based on urlgrab.pl 0.2 by David Leadbetter)',
+ license => 'GNU GPLv2 or later',
+ url => 'none',
+);
+
+# 20110609 bkw: if irssi was started in a screen session from the console,
+# then detached, then reattached in an X session, DISPLAY will not be set.
+# This will confuse the user, as e.g. firefox will silently fail to run.
+# It won't do any harm to set DISPLAY=:0 if it's not set, and might help...
+{
+ my $disp = $ENV{DISPLAY};
+ $ENV{DISPLAY} = ":0" unless $disp;
+}
+
+# Workaround for a heisenbug, see:
+# http://bugs.irssi.org/index.php?do=details&task_id=242
+{ package Irssi::Nick }
+
+# Color constants.
+# Irssi.pm doesn't include symbolic mIRC-style color names...
+# NOTE: if you print e.g. $green . "12345", the "1" will be interpreted
+# as the 2nd digit of the color! Only good fix is to always put a space:
+# print $green . " 12345" works OK.
+# Declarations only; defined in init_colors()
+our ($bold_on, $bold_off, $green, $red, $yellow, $purple, $color_off);
+
+# @urls is a list of anonymous hashes, each representing one URL.
+# See read_url_file for hash elements.
+our @urls;
+
+# Most-recently-posted URL (the URL only, not a hash). Only used
+# for avoiding dups (see url_log).
+our $lasturl = "";
+
+# Have any URLs been captured since the last /ul -delete? This is to
+# (hopefully) protect the user
+our $captured_since_delete = 0;
+
+# Grr. Printing with print() or Irssi::print(), % chars are interpreted
+# as irssi formats. This causes URLs containing HTML %-escapes to come
+# out in weird colors. Using irssi's /echo is apparently the right way
+# to avoid this... though we get colored -!- in front of every line :(
+sub echo {
+ command("/echo $_") for @_;
+}
+
+# trim leading/trailing spaces
+sub trim {
+ $_[0] =~ s/(?:^\s*|\s$)//g;
+ return $_[0];
+}
+
+# read_url_file: called on script load with the log filename.
+# returns array of URL hashes, which will be empty if the file
+# wasn't present or was empty.
+sub read_url_file {
+ my $file = get_url_log_file();
+ my @got;
+
+ open URLLOG, "<$file" or return;
+ flock(URLLOG, Fcntl::LOCK_EX);
+ seek(URLLOG, 0, 1);
+ while(<URLLOG>) {
+ chomp;
+ my @fields = split " ";
+ push @got, {
+ stamp => $fields[0],
+ nick => $fields[1],
+ channel => $fields[2],
+ url => $fields[3],
+ };
+ }
+ close URLLOG;
+
+ return @got;
+}
+
+# rewrite the URL log file from arguments.
+sub write_url_file {
+ for(@_) {
+ $lasturl = "";
+ url_log(1, $_->{nick}, $_->{channel}, $_->{url}, $_->{stamp});
+ }
+}
+
+# Trim the log according to the appropriate settings.
+sub trim_url_log {
+ my $quiet = shift || 0;
+ my $max_lines = settings_get_int("urlm_max_log_lines") || 0;
+ my $max_age = settings_get_int("urlm_max_log_age") || 0;
+
+ return unless $max_lines || $max_age; # nothing to do!
+
+ my @keep_urls;
+ my $trimmed = 0;
+
+ if($max_age) {
+ for(@urls) {
+ if($_->{stamp} >= (time() - $max_age)) {
+ push @keep_urls, $_;
+ }
+ }
+ } else {
+ @keep_urls = @urls;
+ }
+
+ if($max_lines && (@keep_urls > $max_lines)) {
+ my $last = $#keep_urls;
+ my $first = $last - $max_lines + 1;
+ @keep_urls = @keep_urls[$first..$last];
+ }
+
+ $trimmed = (@urls - @keep_urls);
+
+ if($trimmed) {
+ clear_url_log();
+ write_url_file(@keep_urls);
+ print "Trimmed $trimmed URLs from log"; # unless $quiet;
+ }
+}
+
+# Clear the URL log, both the in-memory @urls and the on-disk file.
+sub clear_url_log {
+ my $file = get_url_log_file();
+ unlink $file; # or print "Can't delete $file: $!";
+ @urls = ();
+}
+
+# get_url_log_file: get value of our logfile setting, with
+# tilde expansion for user's homedir.
+sub get_url_log_file {
+ my $file = settings_get_str('urlm_log_file');
+ $file =~ s/^~/$ENV{HOME}/;
+ return $file;
+}
+
+# signal handler for "message public"
+# extract and log any URLs in the input text.
+sub url_public {
+ my ($server, $text, $nick, $hostmask, $channel) = @_;
+ my @got = find_urls($text);
+ url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "message own_public" and "message own_private"
+# extract and log any URLs in the input text.
+sub url_own {
+ my ($server, $text, $channel) = @_;
+ return unless settings_get_bool('urlm_log_own');
+ my @got = find_urls($text);
+ url_log(0, $server->{nick}, $channel, $_) for @got;
+}
+
+# signal handler for "message private", "message irc notice",
+# "message irc op_public", "message irc action"
+# extract and log any URLs in the input text.
+sub url_private {
+ my ($server, $text, $nick, $hostmask) = @_;
+ my @got = find_urls($text);
+ url_log(0, $nick, $server->{nick}, $_) for @got;
+}
+
+# signal handler for "message topic"
+# extract and log any URLs in the input text.
+sub url_topic {
+ my ($server, $channel, $text, $nick, $hostmask) = @_;
+ return if $nick eq $server->{nick}; # don't log own topic changes
+ my @got = find_urls($text);
+ url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "channel joined"
+# extract and log any URLs in the channel topic.
+sub url_join_topic {
+ my ($chan) = @_;
+ return unless $chan->{topic};
+ # don't log own topic changes
+ return if $chan->{topic_by} eq $chan->{server}->{nick};
+ my @got = find_urls($chan->{topic});
+ url_log(0, $chan->{topic_by}, $chan->{name}, $_) for @got;
+}
+
+# signal handler for "message part"
+# extract and log any URLs in the input text.
+sub url_part {
+ return unless settings_get_bool('urlm_log_partquit');
+ my ($server, $channel, $nick, $hostmask, $text) = @_;
+ return if $nick eq $server->{nick}; # don't log own parts (redundant?)
+ my @got = find_urls($text);
+ url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "message quit"
+# extract and log any URLs in the input text.
+sub url_quit {
+ return unless settings_get_bool('urlm_log_partquit');
+ my ($server, $nick, $hostmask, $text) = @_;
+ return if $nick eq $server->{nick}; # don't log own quits (redundant?)
+ my @got = find_urls($text);
+ url_log(0, $nick, $server->{nick}, $_) for @got;
+}
+
+# signal handler for "dcc chat message"
+# extract and log any URLs in the input text.
+# TODO: test this!
+sub url_dccmsg {
+ my ($dcc, $text) = @_;
+ my @got = find_urls($text);
+ url_log(0, $dcc->{nick}, $dcc->{server}->{nick}, $_) for @got;
+}
+
+# print_url_line:
+# print one formatted (colorful) line of /ul output
+sub print_url_line {
+ my ($maxnick, $maxchan, $num, $stamp, $nick, $channel, $url) = @_;
+
+ echo sprintf("%s%3s%s %s %11s%s %${maxnick}s%s %${maxchan}s%s %s%s",
+ $bold_on, $num, $bold_off,
+ $green, $stamp,
+ $red, $nick,
+ $yellow, $channel,
+ $purple, $url,
+ $color_off);
+}
+
+sub url_list_cmd { # bound to /ul (sorry, this sub is a mess)
+ my $do_delete = 0;
+ my $listed = 0;
+ my @keep_urls;
+
+ my $arg = shift || "";
+ $arg = lc $arg;
+
+ $arg = trim($arg);
+
+ if($arg =~ /^-delete/) {
+ $arg =~ s/^-delete\s*//;
+ $do_delete++;
+
+ if($arg eq '') {
+ print "/ul -delete requires a parameter! (/ul help for details)";
+ return;
+ }
+ }
+
+ if(not @urls) {
+ print "No URLs in list!";
+ return;
+ }
+
+ my ($start, $end, $nick, $regex, $channel);
+ $arg = "-" if $arg eq 'all';
+
+ my $invert = 0;
+ if($arg =~ /^!(.*)/) {
+ $arg = $1;
+ $invert = 1;
+ }
+
+ if($arg eq "") {
+ $start = @urls-10;
+ $start = 0 if $start < 0;
+ $end = $#urls;
+ } elsif($arg =~ /^[&#](.*)/) {
+ $channel = $1;
+ } elsif($arg =~ /^\/(.*)\/?/) {
+ $regex = $1;
+ } elsif($arg =~ /^\d+$/) {
+ $start = $end = $arg;
+ } elsif($arg =~ /^[-\d]+$/) {
+ ($start, $end) = split /-/, $arg, 2;
+ $start = 0 if $start eq "";
+ $end = $#urls if $end eq "";
+ } else {
+ $nick = $arg;
+ }
+
+ my $count = 0;
+ my @to_list;
+ for(@urls) {
+ my $list = 0;
+ if($nick && (lc($_->{nick}) =~ ("^" . quotemeta(lc $nick)))) {
+ $list++;
+ } elsif($regex) {
+ $list++ if $_->{url} =~ /$regex/i;
+ } elsif($channel) {
+ $list++ if $_->{channel} =~ /^[#&]?$channel$/i;
+ } elsif(defined($start) && defined($end)) {
+ $list++ if $count >= $start && $count <= $end;
+ }
+
+ $list = !$list if $invert;
+
+ if($list) {
+ $listed++;
+ push @to_list, [ $count, $_ ];
+ } elsif($do_delete) {
+ push @keep_urls, $_;
+ }
+
+ $count++;
+ }
+
+ if(@to_list) { # print the list if anything's supposed to be in it
+ my $maxnick = 4;
+ my $maxchan = 7;
+
+ for(@to_list) {
+ my ($num, $u) = @$_;
+ my $len = length($u->{nick});
+ $maxnick = $len if $len > $maxnick;
+ $len = length($u->{channel});
+ $maxchan = $len if $len > $maxchan;
+ }
+
+ print_url_line($maxnick, $maxchan,
+ "#", "When", "Nick", "Channel", "URL");
+
+ for(@to_list) {
+ my ($num, $u) = @$_;
+
+ my $stamp = strftime("%m/%d-%H:%M", localtime($u->{stamp}));
+ print_url_line($maxnick, $maxchan,
+ $num, $stamp, $u->{nick}, $u->{channel}, $u->{url});
+ }
+ }
+
+ if($do_delete) { # process -delete flag
+ my $deleted = scalar @urls - scalar @keep_urls;
+ if(not $deleted) {
+ print "No URLs deleted";
+ return;
+ }
+
+ my $file = get_url_log_file();
+ rename($file, "$file~") or print "Warning: can't backup log file: $!";
+
+ clear_url_log();
+ write_url_file(@keep_urls);
+
+ print $red . "These " . $deleted . " URLs have been deleted!" .
+ $color_off . " (" . @urls . " remain)";
+
+ $captured_since_delete = 0;
+ } else { # no -delete flag, show summary
+ print "Listed $listed of $count URLs";
+ }
+}
+
+sub urlm_say {
+}
+
+sub urlm_undo_delete {
+ my $yes = ($_[0] eq '-yes');
+
+ if($captured_since_delete && (not $yes)) {
+ print "urlm_undo_delete: doing this will throw away some URLs that " .
+ "were captured since the last delete. Re-run with '-yes' to do it " .
+ "anyway.";
+ return;
+ }
+
+ my $file = get_url_log_file();
+ my @oldurls = @urls;
+ clear_url_log();
+
+ if(not(rename("$file~", $file))) {
+ print "Can't restore log file: $!";
+ @urls = @oldurls;
+ write_url_file();
+ return;
+ }
+
+ @urls = read_url_file();
+ print "Restored " . @urls . " URLs from backup";
+ $captured_since_delete = 0;
+}
+
+# get_url_from_number:
+# Returns a URL hash from @urls, given the index into the array.
+# Returns undef if index is invalid or non-existent.
+# Accepts negative numbers as meaning "nth from the end of the list"
+# If index is empty string, returns the highest-numbered (most recent) URL.
+# If index is non-empty, non-numeric, then treat as a nick and open last
+# URL by that nick.
+sub get_url_from_number {
+ my $arg = shift;
+ if($arg eq '') {
+ $arg = $#urls;
+ } elsif($arg =~ /-\d+$/) {
+ $arg = $#urls + $arg;
+ }
+
+ if($arg !~ /^\d+$/) {
+# print("Non-numeric URL number '$arg'");
+# return;
+ $arg = trim($arg);
+ for(my $i = $#urls; $i >= 0; $i--) {
+ my $url = $urls[$i];
+ return $url if lc($arg) eq lc($url->{nick});
+ }
+ print("Can't find any URLs from nick '$arg'");
+ return;
+ }
+
+ if($arg > $#urls) {
+ print("No such URL number '$arg'");
+ return;
+ }
+
+ return $urls[$arg];
+}
+
+# url_open_cmd: open a URL with the given browser (auto-guesses which browser
+# to use if $browser is '').
+sub url_open_cmd {
+ my ($urlnum, $browser) = @_;
+ $browser = '' if ref $browser; # means it's a Server object
+
+ my $url = get_url_from_number($urlnum);
+ return if not $url;
+ my $link = $url->{url};
+
+ if(not $browser) { # guess browser...
+ # check for wget first...
+ if($link =~ m{/[^/]+\.(\w+)$}) {
+ my $ext = lc $1;
+ for(split " ", settings_get_str('urlm_wget_extensions')) {
+ if($ext eq lc($_)) {
+ url_open_wget_cmd($urlnum);
+ return;
+ }
+ }
+ }
+
+ # not a wget extension, check browser override patterns
+ $browser = settings_get_str('urlm_default_browser');
+ my @overrides = read_browser_overrides();
+
+OVERRIDE:
+ for(@overrides) {
+ my ($tag, $pats) = @$_;
+ for(@$pats) {
+ if($link =~ /$_/i) {
+ $browser = $tag;
+ last OVERRIDE;
+ }
+ }
+ }
+ }
+
+ my @browsers = read_browser_list();
+ my ($tag, $name, $format);
+ for(@browsers) {
+ if($_->{tag} eq $browser) {
+ ($tag, $name, $format) = ($_->{tag}, $_->{name}, $_->{command});
+ last;
+ }
+ }
+
+ echo("$name - " . $link . " (" . $url->{nick} . ")");
+
+ $link =~ s/'/%27/g; # be nice to the shell, escape single quotes
+ $link =~ s/\(/%28/g; # be nice to firefox, escape parens
+ $link =~ s/\)/%29/g; # firefox -remote 'openURL(url,new-tab)' *fails*
+ # if the url contains any () chars!
+
+ my $cmd = sprintf($format, $link) . " &>/dev/null &";
+ system($cmd);
+}
+
+# Open with wget in an irssi window
+# TODO: Maybe allow for using "fetch" instead of wget? (does anyone care?)
+our $wgetcount = 1;
+sub url_open_wget_cmd { # bound to /urlm_wget /wget
+ my $arg = shift;
+
+ my @args = split " ", $arg;
+
+ my $urlnum = "";
+ if(@args && ($args[-1] =~ /^\d+$/)) {
+ $urlnum = pop @args;
+ }
+
+ my $url = get_url_from_number($urlnum);
+ return if not $url;
+
+ my $more_args = join(" ", @args);
+ $more_args .= " " if $more_args;
+
+ my $dir = settings_get_str('urlm_wget_dl_dir');
+ $dir =~ s/^~/$ENV{HOME}/;
+ $dir = "." if not $dir;
+
+ my $size = settings_get_int('urlm_wget_split_size');
+
+ if($size > 0) {
+ command("/window new split");
+ command("/window size $size");
+ } else { # size == 0, means "do not split"
+ command("/window new hidden");
+ }
+
+ # find an unused window name...
+ my $name = "urlm_wget_" . ($wgetcount++);
+ while(window_find_item($name)) {
+ $name = "urlm_wget_" . ($wgetcount++);
+ }
+
+ command("/window name $name");
+
+ if(settings_get_bool('urlm_wget_autoclose')) {
+ my $delay = settings_get_int('urlm_wget_autoclose_delay');
+ if($delay) {
+ print "This window will close $delay seconds after download is done";
+ } else {
+ print "This window will close when download is done";
+ }
+ } else {
+ print "Use " . $yellow . "/window close $name" . $color_off .
+ " to close this window";
+ }
+
+ my $args = trim(settings_get_str('urlm_wget_extra_args'));
+ $args .= " " if $args;
+
+ my $wget_bin = settings_get_str('urlm_wget_path');
+ $wget_bin =~ s/^~/$ENV{HOME}/;
+ $wget_bin = "wget" if not $wget_bin;
+
+ command(
+ "/exec -nosh " .
+ "-name $name " .
+ "$wget_bin " .
+ "-P $dir " .
+ $args .
+ $more_args .
+ $url->{url});
+
+ if($size) { # if using a hidden window, leave it focused
+ command("/window last");
+ }
+}
+
+# Close a window by name. This seems like a kludge...
+sub close_window {
+ return unless window_find_item($_[0]); # don't close if already closed!
+ command("/window goto " . $_[0]);
+ command("/window close");
+}
+
+# signal handler for "exec remove", autocloses our wget windows when
+# the wget processes exit, if requested.
+sub sig_exec_remove {
+ my ($proc, $status) = @_;
+ return unless settings_get_bool('urlm_wget_autoclose');
+
+ # target_win->name will be "" if window already closed!
+ my $name = $proc->{target_win}->{name} || "";
+ return unless $name =~ /^urlm_wget_\d+$/;
+
+ return unless window_find_item($name); # don't close if already closed!
+
+ my $delay = settings_get_int('urlm_wget_autoclose_delay');
+ if($delay > 0) {
+ timeout_add_once($delay * 1000, "close_window", $name);
+ } else {
+ close_window($name);
+ }
+}
+
+# kill a process by name (send SIGTERM)
+sub kill_proc {
+ command("/exec -15 " . $_[0]);
+}
+
+# kill wget processes on manual window close!
+# catch signal "window destroyed" and figure out a way to avoid
+# adding a timeout to close the window (since it's in the middle
+# of closing now...)
+sub sig_window_destroyed {
+ my $name = $_[0]->{name} || "";
+ return unless $name =~ /^urlm_wget_\d+$/;
+
+ # use a timeout to kill the process, instead of killing it directly.
+ # why? to avoid possible race condition where sig_exec_remove()
+ # tries to close the window that triggered this call to
+ # sig_window_destroyed().
+ timeout_add_once(1000, "kill_proc", $name);
+}
+
+# find_urls: extract all URLs from the input text, returns a list
+# (which may be empty).
+# Be VERY permissive about what we consider a URL.
+# 20100614 bkw: be a little less permissive
+# 20140530 bkw: stop catching dupe http://whatever and https://whatever
+
+# original sub:
+##sub find_urls {
+## my @got = ($_[0] =~ m{(?:https?|ftp)://\S+}g);
+## push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g;
+## push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g;
+## s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation
+## return @got;
+##}
+
+# new version:
+sub find_urls {
+ my @got;
+ while($_[0] =~ s{(?:https?|ftp)://\S+}{}) {
+ push @got, $&;
+ }
+ push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g;
+ push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g;
+ s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation
+ return @got;
+}
+
+# Annoying bots have a tendency to do this:
+# <actual_person> check this out: http://www.blahblah.blah/path/to/stuff.html
+# <annoying_bot> Title: Stuff (at www.blahblah.blah)
+# To me, this is about the most useless function a bot can serve, plus it
+# breaks the /uo command.
+# just_domain() returns true if $1 is just the domain part of $2
+sub just_domain {
+ my ($new, $old) = @_;
+ s/^(ht|f)tps?:\/\/// for ($new, $old);
+ $old =~ s/\/.*//;
+ return 0 if $new =~ /\/./;
+ return $new eq $old;
+}
+
+# url_log: appends URL to the URL log file and to the @urls array.
+# Locks the file before writing, so should be safe even with multiple
+# instances of irssi.
+# $relog should be false if capturing a new URL from channel/msg text,
+# or true if re-logging an old URL (e.g. /ul -delete does this)
+sub url_log {
+ my($relog, $nick, $channel, $url, $stamp) = @_;
+ $nick =~ s/!.*//;
+
+ $stamp = time() unless $stamp;
+
+ return if lc $url eq lc $lasturl; # a tiny bit of protection from spam/flood
+ return if just_domain($url, $lasturl);
+
+ $lasturl = $url;
+
+ my $file = get_url_log_file();
+ open(URLLOG, ">>$file") or return;
+
+ flock(URLLOG, Fcntl::LOCK_EX);
+ seek(URLLOG, 0, 2);
+
+ print URLLOG time . " $nick $channel $url\n";
+ close(URLLOG);
+
+ push @urls, {
+ stamp => $stamp,
+ nick => $nick,
+ channel => $channel,
+ url => $url,
+ };
+
+ if(not $relog) {
+ if(not settings_get_bool('urlm_quiet_capture')) {
+ my $on = "";
+ if($channel =~ /^#/) {
+ $on = " on " . $green . $channel . $color_off;
+ }
+ echo "Captured URL #" . $#urls . " " .
+ $purple . $url . $color_off .
+ " from " . $yellow . $nick . $color_off . $on;
+ }
+# trim_url_log();
+ $captured_since_delete++;
+ }
+}
+
+# urlm_help generates /help topics for the browser commands.
+# The help for all the other commands is stored in text files in
+# ~/.irssi/help
+sub urlm_help {
+ my $arg = shift;
+ $arg = lc trim($arg);
+ my %bhelp;
+ our %urlm_help;
+
+ for(read_browser_list()) {
+ my $text = uc($_->{tag}) . " [<url#>]\n\n" .
+ "Open a URL with the external browser '" .
+ $_->{name} . "', using the command:\n" .
+ $_->{command} . "\n\n" .
+ "If [<url#>] is omitted, the most recent URL will be opened.\n";
+ $bhelp{$_->{tag}} = $text;
+ $bhelp{"urlm_open_" . $_->{tag}} = $text;
+ }
+
+ my $help = $bhelp{$arg} || return;
+ signal_stop();
+ print $help;
+}
+
+sub write_browser_overrides {
+ my @list = @{$_[0]};
+ my @strings;
+
+ for(@list) {
+ my ($tag, $pats) = @$_;
+ push @strings, join(":", $tag, @$pats);
+ }
+
+ my $setting = join("::", @strings);
+ settings_set_str('urlm_browser_overrides', $setting);
+}
+
+sub read_browser_overrides {
+ my @result;
+
+ my $list = settings_get_str('urlm_browser_overrides');
+ my @entries = split /::/, $list;
+
+ for(@entries) {
+ my @items = split /:/;
+ my $tag = shift @items;
+ push @result, [ $tag, \@items ];
+ }
+
+ return @result;
+}
+
+sub write_browser_list {
+ my @list = @{$_[0]};
+ my @strings;
+
+ for(@list) {
+ push @strings, join(":", $_->{tag}, $_->{name}, $_->{command});
+ }
+
+ my $setting = join("::", @strings);
+#print "/set urlm_browsers $setting";
+ settings_set_str('urlm_browsers', $setting);
+ signal_emit("setup changed");
+}
+
+sub read_browser_list {
+ my @result;
+
+ my $list = settings_get_str('urlm_browsers');
+ my @entries = split /::/, $list;
+
+ for(@entries) {
+ my @items = split /:/;
+ my $hash = {
+ tag => $items[0],
+ name => $items[1],
+ command => $items[2],
+ };
+
+ push @result, $hash;
+ }
+
+ return @result;
+}
+
+# commands:
+# urlm_add_browser <browser>:<fullname>:<cmd>
+sub urlm_add_browser {
+ my $arg = shift || "";
+ $arg =~ trim($arg);
+
+ if($arg !~ /^[^:]+:[^:]+:[^:]+$/) {
+ print "Usage: /urlm_add_browser tag:name:command";
+ return;
+ }
+
+ my ($tag, $name, $cmd) = split /:/, $arg;
+
+ $tag = trim($tag);
+ $tag = lc $tag;
+ $name = trim($name);
+
+ if($tag =~ /\W/) {
+ print "/urlm_add_browser: tag must consist of only " .
+ "letters, numbers, or underscores (_), not '$tag'";
+ return;
+ }
+
+ if($tag eq 'wget') {
+ print "/urlm_add_browser: 'wget' is reserved; use a different tag";
+ return;
+ }
+
+ if($cmd !~ /'[^']*\%s[^']*'/) {
+ print "/urlm_add_browser: command must contain '\%s' (single-quoted)";
+ return;
+ }
+
+ my @browsers = read_browser_list();
+ my $found = 0;
+ for(@browsers) {
+ if(lc($_->{tag}) eq $tag) {
+ print "Replaced old definition of $tag";
+ $_->{name} = $name;
+ $_->{command} = $cmd;
+ $found++;
+ last;
+ }
+ }
+
+ if(not $found) {
+ push @browsers, { tag => $tag, name => $name, command => $cmd };
+ print "push \@browsers, { tag => $tag, name => $name, command => $cmd }";
+ print "Added browser definition $tag";
+ }
+
+ write_browser_list(\@browsers);
+}
+
+# urlm_del_browser <browser>
+sub urlm_del_browser {
+ my $arg = shift || "";
+ $arg = trim($arg);
+ $arg = lc $arg;
+ return unless $arg;
+
+ # urlm_del_override() already prints "Browser not defined" if it
+ # wasn't defined, so no need to have urlm_del_browser() print it again.
+ urlm_del_override("$arg all");
+
+ my @browsers = read_browser_list();
+ my @keep_browsers;
+ my $found = 0;
+
+ for(@browsers) {
+ if(lc($_->{tag}) eq $arg) {
+ print "Deleted definition of $arg";
+ $found++;
+ } else {
+ push @keep_browsers, $_;
+ }
+ }
+
+ write_browser_list(\@keep_browsers) if $found;
+}
+
+# urlm_list_browsers
+sub urlm_list_browsers {
+ my @browsers = read_browser_list();
+ for(@browsers) {
+ print(
+ (settings_get_str('urlm_default_browser') eq $_->{tag} ?
+ "[*]" :
+ " ") .
+ "Tag: $bold_on" . $_->{tag} . "$bold_off, " .
+ "Name: $bold_on" . $_->{name} . "$bold_off, " .
+ "Command: $bold_on" . $_->{command} . "$bold_off");
+ }
+}
+
+# urlm_add_wget_ext <ext>
+sub urlm_add_wget_ext {
+ my $arg = shift || "";
+ $arg = trim($arg);
+ $arg = lc $arg;
+
+ if(not $arg) {
+ print "Usage: /urlm_add_wget_ext <extension>";
+ return;
+ }
+
+ my @list = split " ", settings_get_str('urlm_wget_extensions');
+ if(grep { $_ eq $arg } @list) {
+ print "$arg is already in the wget extensions list";
+ return;
+ }
+
+ push @list, $arg;
+
+ settings_set_str('urlm_wget_extensions', join(" ", @list));
+ command("/set urlm_wget_extensions");
+}
+
+# urlm_del_wget_ext <ext>
+sub urlm_del_wget_ext {
+ my $arg = shift || "";
+ $arg = trim($arg);
+ $arg = lc $arg;
+
+ if(not $arg) {
+ print "Usage: /urlm_del_wget_ext <extension>";
+ return;
+ }
+
+ my @list = split " ", settings_get_str('urlm_wget_extensions');
+ if(!grep { $_ eq $arg } @list) {
+ print "$arg is not in the wget extensions list";
+ return;
+ }
+
+ @list = grep { $_ ne $arg } @list;
+
+ settings_set_str('urlm_wget_extensions', join(" ", @list));
+ command("/set urlm_wget_extensions");
+}
+
+# urlm_list_overrides [<browser>]
+sub urlm_list_overrides {
+ my $arg = shift || "";
+ $arg = trim($arg);
+
+ my $found = 0;
+ my @overrides = read_browser_overrides();
+ for(@overrides) {
+ my ($browser, $pats) = @$_;
+ if($arg eq $browser || not $arg) {
+ $found++;
+ my $count = 1;
+ for(@$pats) {
+ print $browser . "[$count]: " . $_;
+ $count++;
+ }
+ }
+ }
+
+ if(not $found) {
+ if($arg) {
+ print "No overrides for browser '$arg'";
+ } else {
+ print "No browser overrides";
+ }
+ }
+}
+
+# urlm_add_override <browser> <pattern>
+sub urlm_add_override {
+ my $arg = shift || "";
+ $arg = trim($arg); # do not lc($arg), the command may need caps!
+
+ my ($browser, $pattern) = split " ", $arg;
+ $browser = lc $browser;
+ if(not ($browser and $pattern)) {
+ print "Usage: /urlm_add_override <browser> <pattern>";
+ return;
+ }
+
+ if(!grep { $_->{tag} eq $browser } read_browser_list()) {
+ print "Browser $browser not defined in browser list";
+ return;
+ }
+
+ eval "qr{$pattern}";
+ if($@) {
+ print "Pattern $pattern is not a valid Perl regex: $@";
+ return;
+ }
+
+ my @overrides = read_browser_overrides();
+ my $found = 0;
+ for(@overrides) {
+ my ($tag, $pats) = @$_;
+ next unless $tag eq $browser;
+
+ push @$pats, $pattern;
+ $found++;
+ }
+
+ if(not $found) {
+ push @overrides, [ $browser, [ $pattern ] ];
+ }
+
+ print "Added override for $browser: $pattern";
+ write_browser_overrides(\@overrides);
+}
+
+# urlm_del_override <browser> <number>|<all>
+sub urlm_del_override {
+ my $arg = shift || "";
+ $arg = trim($arg);
+ $arg = lc $arg;
+
+ my ($browser, $number) = split " ", $arg;
+
+ if(not($browser and $number)) {
+ print "Usage: /urlm_del_override <browser> <number>|all"
+ }
+
+ if(!grep { $_->{tag} eq $browser } read_browser_list()) {
+ print "Browser $browser not defined in browser list";
+ return;
+ }
+
+ if($number ne 'all' && $number !~ /^[1-9]\d*$/) {
+ print "Bad override '$number': must be a number >= 1, or 'all'";
+ return;
+ }
+
+ my @overrides = read_browser_overrides();
+ my @keep_overrides = ();
+ my $found = 0;
+ for(@overrides) {
+ my ($tag, $pats) = @$_;
+
+ if($tag ne $browser) {
+ push @keep_overrides, $_;
+ next;
+ }
+
+ $found += @$pats, next if $number eq 'all';
+
+ if($number > @$pats) {
+ print "Value $number out of range";
+ next;
+ }
+
+ $found++;
+ undef $pats->[$number - 1];
+ @$pats = grep { defined $_ } @$pats;
+
+ push @keep_overrides, $_ if @$pats;
+ }
+
+ if($found) {
+ print "Deleted $found overrides";
+ write_browser_overrides(\@keep_overrides);
+ } else {
+ print "No matching overrides";
+ }
+}
+
+sub init_colors {
+ if(settings_get_bool('urlm_use_bold')) {
+ $bold_on = "\002";
+ $bold_off = "\002";
+ } else {
+ $bold_on = "";
+ $bold_off = "";
+ }
+
+ if(settings_get_bool('urlm_use_color')) {
+ $green = "\0033";
+ $red = "\0034";
+ $yellow = "\0037";
+ $purple = "\0036";
+ $color_off = "\003";
+ } else {
+ $green = "";
+ $red = "";
+ $yellow = "";
+ $purple = "";
+ $color_off = "";
+ }
+}
+
+# init_browsers(): dynamic bindings. Each browser tag gets bound to
+# /urlm_open_$tag, and (if short commands enabled) to /$tag.
+our @bound_refs;
+sub init_browsers {
+ # for this to work, the code ref can *NOT* be stored in a "my" var
+ # I think this is a bug in irssi, or possibly perl, but maybe I'm
+ # just being dumb...
+ for(@bound_refs) {
+ command_unbind($_->[0], $_->[1]);
+ }
+ @bound_refs = ();
+
+ my @browsers = read_browser_list();
+ for(@browsers) {
+ my $code = 'sub { url_open_cmd($_[0], "'. ($_->{tag}) . '"); };';
+ my $cmd = 'urlm_open_' . $_->{tag};
+ push @bound_refs, [ $cmd, eval $code ];
+
+ # again, no "my" vars, hence the ugly $bound_refs[$#bound_refs] kludge
+ command_bind($cmd, $bound_refs[$#bound_refs]->[1]);
+
+ if(settings_get_bool('urlm_short_cmds')) {
+ my $shortcmd = $_->{tag};
+ push @bound_refs, [ $shortcmd, $bound_refs[$#bound_refs]->[1] ];
+ command_bind($shortcmd, $bound_refs[$#bound_refs]->[1]);
+ }
+ }
+
+ command_unbind("ul", "url_list_cmd");
+ command_unbind("uo", "url_open_cmd");
+ command_unbind("wget", "url_open_wget_cmd");
+
+ if(settings_get_bool('urlm_short_cmds')) {
+ command_bind("ul", "url_list_cmd");
+ command_bind("uo", "url_open_cmd");
+ command_bind("wget", "url_open_wget_cmd");
+ }
+}
+
+sub init_settings { # call only once, at script load!
+# Where shall we save the URL log?
+ settings_add_str('urlmanager', 'urlm_log_file', "~/.irssi/urllog");
+
+# Where is the wget binary? Absolute path, or "wget" (searches PATH)
+ settings_add_str('urlmanager', 'urlm_wget_path', "wget");
+
+# Where should wget save files?
+ settings_add_str('urlmanager', 'urlm_wget_dl_dir', "~");
+
+# Extra arguments to pass to wget...
+ settings_add_str('urlmanager', 'urlm_wget_extra_args', "");
+
+# Do we log URLs from /part and /quit messages? Disabled by default
+# because so many people always /quit with the same spammish URL
+# e.g. "nimrod has quit [Quit: Try StupidIRC (http://someircclient.com)]"
+ settings_add_bool('urlmanager', 'urlm_log_partquit', 0);
+
+# Do we log URLs from our own public/private messages?
+ settings_add_bool('urlmanager', 'urlm_log_own', 1);
+
+# Cosmetics:
+ settings_add_bool('urlmanager', 'urlm_short_cmds', 1);
+ settings_add_bool('urlmanager', 'urlm_use_bold', 1);
+ settings_add_bool('urlmanager', 'urlm_use_color', 1);
+
+# Browser definitions. A double-colon-separated list. Each list item
+# is a single-colon separated list of (tag, name, command_format).
+# You may add browsers, and they will work as commands
+# without changing any other code.
+# The browser commands need to be non-blocking, and any stdout/err from
+# them will be ignored.
+# The %s gets replaced with the actual URL. *ALWAYS* use single-quotes
+# (like '%s'). *NEVER* omit the quotes or use double-quotes around the %s!
+# Failure to comply is a security hole!
+ settings_add_str('urlmanager', 'urlm_browsers',
+ 'ff:Firefox:firefox -remote \'openurl(%s,new-tab)\'' .
+ '::' .
+ 'ie:Internet Explorer:ie6 \'%s\'' .
+ '::' .
+ 'us:links+screen:[ "$TERM" = "screen" ] && screen links \'%s\'' .
+ '::' .
+ 'ut:links+xterm:xterm -e "links \'%s\'"' .
+ '::' .
+ 'ux:Copy to X Clipboard:echo -n \'%s\'|xsel -i');
+
+# Default browser for /uo and /urlm_open commands
+ settings_add_str('urlmanager', 'urlm_default_browser', 'ff');
+
+# /uo and /urlm_open check this list.
+# Double-colon-separated list, each item is a single-colon-separated
+# list consisting of a browser tag and one or more patterns.
+# If a URL matches one of these
+# patterns, the browser tag will be used as the browser to open the URL
+# with, instead of the default.
+ settings_add_str('urlmanager', 'urlm_browser_overrides',
+ 'ie:/[^/]*video\.google\.com:/[^/]*youtube\.com:/[^/]*gametrailers\.com');
+
+# If /uo or /urlm_open get a URL ending in one of these file extensions,
+# it will be downloaded with wget instead of being opened in a browser.
+ settings_add_str('urlmanager', 'urlm_wget_extensions',
+ 'tar zip atr bas xex exe dcm car z gz rom cas torrent rar 7z');
+
+# trim log to this many lines. Use with urlm_log_trim_interval and/or
+# urlm_log_trim_startup. Set to 0 to disable.
+ settings_add_int('urlmanager', 'urlm_max_log_lines', 100);
+
+# trim log to this many seconds. Use with urlm_log_trim_interval and/or
+# urlm_log_trim_startup. Set to 0 to disable.
+ settings_add_int('urlmanager', 'urlm_max_log_age', 86400*7);
+
+# trim the log on script load.
+ settings_add_bool('urlmanager', 'urlm_log_trim_startup', 0);
+
+# auto-trim log this often (seconds). Set to 0 to disable.
+ settings_add_int('urlmanager', 'urlm_log_trim_interval', 60*60);
+
+# these control the behavior of windows created with /urlm_wget or /wget
+ settings_add_bool('urlmanager', 'urlm_wget_autoclose', 1);
+ settings_add_int('urlmanager', 'urlm_wget_autoclose_delay', 60);
+ settings_add_int('urlmanager', 'urlm_wget_split_size', 0);
+
+# say "Captured URL #xxx http://whatever from whoever" every time a URL
+# is captured?
+ settings_add_bool('urlmanager', 'urlm_quiet_capture', 0);
+
+# TODO: support these:
+
+# channels/nicks/sites we don't want to log
+#settings_add_str('urlmanager', 'urlm_ignore_channels');
+#settings_add_str('urlmanager', 'urlm_ignore_urls');
+
+# If true, go through the entire list every time a URL is logged,
+# checking for duplicates
+#settings_add_bool('urlmanager', 'urlm_ignore_dups');
+}
+
+our $trim_timeout_tag;
+sub init_trim_timeout {
+ timeout_remove($trim_timeout_tag) if($trim_timeout_tag);
+
+ my $millis = settings_get_int('urlm_log_trim_interval') * 1000;
+ if($millis > 0) {
+ $trim_timeout_tag = timeout_add($millis, "trim_url_log", 1);
+ }
+}
+
+# apply_settings: called on signal "setup changed" (when any /set urlm_*
+# changes value).
+# Anything that depends on the values of any of the settings should be
+# (re)initialized here.
+sub apply_settings {
+ init_colors();
+ init_browsers();
+ init_trim_timeout();
+ @urls = read_url_file();
+}
+
+sub init_signals { # call only once, at script load!
+ signal_add_last("message public", "url_public");
+ signal_add_last("message private", "url_private");
+ signal_add_last("message irc notice", "url_private");
+ signal_add_last("message irc op_public", "url_private");
+ signal_add_last("message irc action", "url_private");
+ signal_add_last("dcc chat message", "url_dccmsg");
+ signal_add_last("message topic", "url_topic");
+ signal_add_last("channel joined", "url_join_topic");
+ signal_add_last("setup changed", "apply_settings");
+ signal_add_last("message part", "url_part");
+ signal_add_last("message quit", "url_quit");
+ signal_add_last("message own_public", "url_own");
+ signal_add_last("message own_private", "url_own");
+ signal_add_last("exec remove", "sig_exec_remove");
+ signal_add_last("window destroyed", "sig_window_destroyed");
+}
+
+sub init_static_binds { # call only once, at script load!
+ # These binds are always on:
+ command_bind("urlm_list", "url_list_cmd");
+ command_bind("urlm_open", "url_open_cmd");
+ command_bind("urlm_wget", "url_open_wget_cmd");
+ command_bind("urlm_add_browser", "urlm_add_browser");
+ command_bind("urlm_del_browser", "urlm_del_browser");
+ command_bind("urlm_list_browsers", "urlm_list_browsers");
+ command_bind("urlm_add_wget_ext", "urlm_add_wget_ext");
+ command_bind("urlm_del_wget_ext", "urlm_del_wget_ext");
+ command_bind("urlm_list_overrides", "urlm_list_overrides");
+ command_bind("urlm_add_override", "urlm_add_override");
+ command_bind("urlm_del_override", "urlm_del_override");
+ command_bind("urlm_trim_log", "trim_url_log");
+ command_bind("urlm_undo_delete", "urlm_undo_delete");
+ command_bind("help", "urlm_help");
+}
+
+# Add per-user help dir to help_path, if not already present.
+sub init_help_path {
+ my $dir = "$ENV{HOME}/.irssi/help";
+ my $help_path = settings_get_str('help_path');
+
+ return if grep { $_ eq $dir } split /:/, $help_path;
+
+ $help_path .= ":$dir";
+ settings_set_str('help_path', $help_path);
+
+ signal_emit('setup_changed'); # 20100614 bkw: hmmm...
+}
+
+# bind signals and commands, now that everything's defined.
+init_settings();
+init_signals();
+init_static_binds();
+init_colors();
+init_browsers();
+init_help_path();
+@urls = read_url_file();
+trim_url_log() if settings_get_bool('urlm_log_trim_startup');
+init_trim_timeout();
+
+# make sure no leftover backup is lurking from a long time ago...
+unlink(get_url_log_file() . "~");
+
+# Print a helpful message for the user on script load...
+print $bold_on . "urlmanager.pl" . $bold_off . " loaded (" . @urls .
+ " URLs), type '" . $yellow . "/help urlmanager" .
+ $color_off . "' for help.";
+
+# rest of file is POD docs
+=pod
+
+=head1 NAME
+
+urlmanager
+
+=head1 SYNOPSIS
+
+Yet another URL logger for irssi.
+
+=head1 DESCRIPTION
+
+Captures URLs in channel, privmsg, and DCC chat messages, logs them to a
+file. Provides an irssi command to list captured URLs and several commands
+to do various things with them (open in browser, download, copy to X11
+selection buffer).
+
+This documentation only includes installation instructions. For usage
+instructions, install the script and run B</urlm_help> within irssi.
+
+=head1 INSTALLATION
+
+Copy B<urlmanager.pl> to your B<~/.irssi/scripts> directory (create the
+directory if it doesn't exist). For auto-loading when irssi starts,
+create a symlink in B<~/.irssi/scripts/autorun>:
+
+=over 4
+
+mkdir -p ~/.irssi/scripts/autorun
+
+cp urlmanager.pl ~/.irssi/scripts
+
+cd ~/.irssi/scripts/autorun
+
+ln -s ../urlmanager.pl .
+
+=back
+
+=head1 CONFIGURATION
+
+All configuration is done from within irssi; read B</urlm_help>.
+=cut