diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/urlmanager.pl | 1350 |
1 files changed, 1350 insertions, 0 deletions
diff --git a/scripts/urlmanager.pl b/scripts/urlmanager.pl new file mode 100644 index 0000000..787e775 --- /dev/null +++ b/scripts/urlmanager.pl @@ -0,0 +1,1350 @@ +#!/usr/bin/perl + +# urlmanager script for irssi + +use warnings; +use strict; + +use Fcntl qw/:flock/; +use POSIX qw/strftime/; + +use Irssi qw/ + settings_add_str settings_add_bool settings_add_int + settings_get_str settings_get_bool settings_get_int + settings_set_str settings_set_bool settings_set_int + command command_bind command_unbind + signal_emit signal_add_last signal_stop + timeout_add timeout_add_once timeout_remove + window_find_item/; + +our $VERSION = "0.1"; +our %IRSSI = ( + authors => 'Urchlay', + contact => 'Urchlay on NewNet', + name => 'urlmanager', + description => + 'Captures URLs said in channel and private messages ' . + 'and saves them to a file, also adds several commands for ' . + 'listing and opening captured URLs ' . + '(based on urlgrab.pl 0.2 by David Leadbetter)', + license => 'GNU GPLv2 or later', + url => 'none', +); + +# 20110609 bkw: if irssi was started in a screen session from the console, +# then detached, then reattached in an X session, DISPLAY will not be set. +# This will confuse the user, as e.g. firefox will silently fail to run. +# It won't do any harm to set DISPLAY=:0 if it's not set, and might help... +{ + my $disp = $ENV{DISPLAY}; + $ENV{DISPLAY} = ":0" unless $disp; +} + +# Workaround for a heisenbug, see: +# http://bugs.irssi.org/index.php?do=details&task_id=242 +{ package Irssi::Nick } + +# Color constants. +# Irssi.pm doesn't include symbolic mIRC-style color names... +# NOTE: if you print e.g. $green . "12345", the "1" will be interpreted +# as the 2nd digit of the color! Only good fix is to always put a space: +# print $green . " 12345" works OK. +# Declarations only; defined in init_colors() +our ($bold_on, $bold_off, $green, $red, $yellow, $purple, $color_off); + +# @urls is a list of anonymous hashes, each representing one URL. +# See read_url_file for hash elements. +our @urls; + +# Most-recently-posted URL (the URL only, not a hash). Only used +# for avoiding dups (see url_log). +our $lasturl = ""; + +# Have any URLs been captured since the last /ul -delete? This is to +# (hopefully) protect the user +our $captured_since_delete = 0; + +# Grr. Printing with print() or Irssi::print(), % chars are interpreted +# as irssi formats. This causes URLs containing HTML %-escapes to come +# out in weird colors. Using irssi's /echo is apparently the right way +# to avoid this... though we get colored -!- in front of every line :( +sub echo { + command("/echo $_") for @_; +} + +# trim leading/trailing spaces +sub trim { + $_[0] =~ s/(?:^\s*|\s$)//g; + return $_[0]; +} + +# read_url_file: called on script load with the log filename. +# returns array of URL hashes, which will be empty if the file +# wasn't present or was empty. +sub read_url_file { + my $file = get_url_log_file(); + my @got; + + open URLLOG, "<$file" or return; + flock(URLLOG, Fcntl::LOCK_EX); + seek(URLLOG, 0, 1); + while(<URLLOG>) { + chomp; + my @fields = split " "; + push @got, { + stamp => $fields[0], + nick => $fields[1], + channel => $fields[2], + url => $fields[3], + }; + } + close URLLOG; + + return @got; +} + +# rewrite the URL log file from arguments. +sub write_url_file { + for(@_) { + $lasturl = ""; + url_log(1, $_->{nick}, $_->{channel}, $_->{url}, $_->{stamp}); + } +} + +# Trim the log according to the appropriate settings. +sub trim_url_log { + my $quiet = shift || 0; + my $max_lines = settings_get_int("urlm_max_log_lines") || 0; + my $max_age = settings_get_int("urlm_max_log_age") || 0; + + return unless $max_lines || $max_age; # nothing to do! + + my @keep_urls; + my $trimmed = 0; + + if($max_age) { + for(@urls) { + if($_->{stamp} >= (time() - $max_age)) { + push @keep_urls, $_; + } + } + } else { + @keep_urls = @urls; + } + + if($max_lines && (@keep_urls > $max_lines)) { + my $last = $#keep_urls; + my $first = $last - $max_lines + 1; + @keep_urls = @keep_urls[$first..$last]; + } + + $trimmed = (@urls - @keep_urls); + + if($trimmed) { + clear_url_log(); + write_url_file(@keep_urls); + print "Trimmed $trimmed URLs from log"; # unless $quiet; + } +} + +# Clear the URL log, both the in-memory @urls and the on-disk file. +sub clear_url_log { + my $file = get_url_log_file(); + unlink $file; # or print "Can't delete $file: $!"; + @urls = (); +} + +# get_url_log_file: get value of our logfile setting, with +# tilde expansion for user's homedir. +sub get_url_log_file { + my $file = settings_get_str('urlm_log_file'); + $file =~ s/^~/$ENV{HOME}/; + return $file; +} + +# signal handler for "message public" +# extract and log any URLs in the input text. +sub url_public { + my ($server, $text, $nick, $hostmask, $channel) = @_; + my @got = find_urls($text); + url_log(0, $nick, $channel, $_) for @got; +} + +# signal handler for "message own_public" and "message own_private" +# extract and log any URLs in the input text. +sub url_own { + my ($server, $text, $channel) = @_; + return unless settings_get_bool('urlm_log_own'); + my @got = find_urls($text); + url_log(0, $server->{nick}, $channel, $_) for @got; +} + +# signal handler for "message private", "message irc notice", +# "message irc op_public", "message irc action" +# extract and log any URLs in the input text. +sub url_private { + my ($server, $text, $nick, $hostmask) = @_; + my @got = find_urls($text); + url_log(0, $nick, $server->{nick}, $_) for @got; +} + +# signal handler for "message topic" +# extract and log any URLs in the input text. +sub url_topic { + my ($server, $channel, $text, $nick, $hostmask) = @_; + return if $nick eq $server->{nick}; # don't log own topic changes + my @got = find_urls($text); + url_log(0, $nick, $channel, $_) for @got; +} + +# signal handler for "channel joined" +# extract and log any URLs in the channel topic. +sub url_join_topic { + my ($chan) = @_; + return unless $chan->{topic}; + # don't log own topic changes + return if $chan->{topic_by} eq $chan->{server}->{nick}; + my @got = find_urls($chan->{topic}); + url_log(0, $chan->{topic_by}, $chan->{name}, $_) for @got; +} + +# signal handler for "message part" +# extract and log any URLs in the input text. +sub url_part { + return unless settings_get_bool('urlm_log_partquit'); + my ($server, $channel, $nick, $hostmask, $text) = @_; + return if $nick eq $server->{nick}; # don't log own parts (redundant?) + my @got = find_urls($text); + url_log(0, $nick, $channel, $_) for @got; +} + +# signal handler for "message quit" +# extract and log any URLs in the input text. +sub url_quit { + return unless settings_get_bool('urlm_log_partquit'); + my ($server, $nick, $hostmask, $text) = @_; + return if $nick eq $server->{nick}; # don't log own quits (redundant?) + my @got = find_urls($text); + url_log(0, $nick, $server->{nick}, $_) for @got; +} + +# signal handler for "dcc chat message" +# extract and log any URLs in the input text. +# TODO: test this! +sub url_dccmsg { + my ($dcc, $text) = @_; + my @got = find_urls($text); + url_log(0, $dcc->{nick}, $dcc->{server}->{nick}, $_) for @got; +} + +# print_url_line: +# print one formatted (colorful) line of /ul output +sub print_url_line { + my ($maxnick, $maxchan, $num, $stamp, $nick, $channel, $url) = @_; + + echo sprintf("%s%3s%s %s %11s%s %${maxnick}s%s %${maxchan}s%s %s%s", + $bold_on, $num, $bold_off, + $green, $stamp, + $red, $nick, + $yellow, $channel, + $purple, $url, + $color_off); +} + +sub url_list_cmd { # bound to /ul (sorry, this sub is a mess) + my $do_delete = 0; + my $listed = 0; + my @keep_urls; + + my $arg = shift || ""; + $arg = lc $arg; + + $arg = trim($arg); + + if($arg =~ /^-delete/) { + $arg =~ s/^-delete\s*//; + $do_delete++; + + if($arg eq '') { + print "/ul -delete requires a parameter! (/ul help for details)"; + return; + } + } + + if(not @urls) { + print "No URLs in list!"; + return; + } + + my ($start, $end, $nick, $regex, $channel); + $arg = "-" if $arg eq 'all'; + + my $invert = 0; + if($arg =~ /^!(.*)/) { + $arg = $1; + $invert = 1; + } + + if($arg eq "") { + $start = @urls-10; + $start = 0 if $start < 0; + $end = $#urls; + } elsif($arg =~ /^[&#](.*)/) { + $channel = $1; + } elsif($arg =~ /^\/(.*)\/?/) { + $regex = $1; + } elsif($arg =~ /^\d+$/) { + $start = $end = $arg; + } elsif($arg =~ /^[-\d]+$/) { + ($start, $end) = split /-/, $arg, 2; + $start = 0 if $start eq ""; + $end = $#urls if $end eq ""; + } else { + $nick = $arg; + } + + my $count = 0; + my @to_list; + for(@urls) { + my $list = 0; + if($nick && (lc($_->{nick}) =~ ("^" . quotemeta(lc $nick)))) { + $list++; + } elsif($regex) { + $list++ if $_->{url} =~ /$regex/i; + } elsif($channel) { + $list++ if $_->{channel} =~ /^[#&]?$channel$/i; + } elsif(defined($start) && defined($end)) { + $list++ if $count >= $start && $count <= $end; + } + + $list = !$list if $invert; + + if($list) { + $listed++; + push @to_list, [ $count, $_ ]; + } elsif($do_delete) { + push @keep_urls, $_; + } + + $count++; + } + + if(@to_list) { # print the list if anything's supposed to be in it + my $maxnick = 4; + my $maxchan = 7; + + for(@to_list) { + my ($num, $u) = @$_; + my $len = length($u->{nick}); + $maxnick = $len if $len > $maxnick; + $len = length($u->{channel}); + $maxchan = $len if $len > $maxchan; + } + + print_url_line($maxnick, $maxchan, + "#", "When", "Nick", "Channel", "URL"); + + for(@to_list) { + my ($num, $u) = @$_; + + my $stamp = strftime("%m/%d-%H:%M", localtime($u->{stamp})); + print_url_line($maxnick, $maxchan, + $num, $stamp, $u->{nick}, $u->{channel}, $u->{url}); + } + } + + if($do_delete) { # process -delete flag + my $deleted = scalar @urls - scalar @keep_urls; + if(not $deleted) { + print "No URLs deleted"; + return; + } + + my $file = get_url_log_file(); + rename($file, "$file~") or print "Warning: can't backup log file: $!"; + + clear_url_log(); + write_url_file(@keep_urls); + + print $red . "These " . $deleted . " URLs have been deleted!" . + $color_off . " (" . @urls . " remain)"; + + $captured_since_delete = 0; + } else { # no -delete flag, show summary + print "Listed $listed of $count URLs"; + } +} + +sub urlm_say { +} + +sub urlm_undo_delete { + my $yes = ($_[0] eq '-yes'); + + if($captured_since_delete && (not $yes)) { + print "urlm_undo_delete: doing this will throw away some URLs that " . + "were captured since the last delete. Re-run with '-yes' to do it " . + "anyway."; + return; + } + + my $file = get_url_log_file(); + my @oldurls = @urls; + clear_url_log(); + + if(not(rename("$file~", $file))) { + print "Can't restore log file: $!"; + @urls = @oldurls; + write_url_file(); + return; + } + + @urls = read_url_file(); + print "Restored " . @urls . " URLs from backup"; + $captured_since_delete = 0; +} + +# get_url_from_number: +# Returns a URL hash from @urls, given the index into the array. +# Returns undef if index is invalid or non-existent. +# Accepts negative numbers as meaning "nth from the end of the list" +# If index is empty string, returns the highest-numbered (most recent) URL. +# If index is non-empty, non-numeric, then treat as a nick and open last +# URL by that nick. +sub get_url_from_number { + my $arg = shift; + if($arg eq '') { + $arg = $#urls; + } elsif($arg =~ /-\d+$/) { + $arg = $#urls + $arg; + } + + if($arg !~ /^\d+$/) { +# print("Non-numeric URL number '$arg'"); +# return; + $arg = trim($arg); + for(my $i = $#urls; $i >= 0; $i--) { + my $url = $urls[$i]; + return $url if lc($arg) eq lc($url->{nick}); + } + print("Can't find any URLs from nick '$arg'"); + return; + } + + if($arg > $#urls) { + print("No such URL number '$arg'"); + return; + } + + return $urls[$arg]; +} + +# url_open_cmd: open a URL with the given browser (auto-guesses which browser +# to use if $browser is ''). +sub url_open_cmd { + my ($urlnum, $browser) = @_; + $browser = '' if ref $browser; # means it's a Server object + + my $url = get_url_from_number($urlnum); + return if not $url; + my $link = $url->{url}; + + if(not $browser) { # guess browser... + # check for wget first... + if($link =~ m{/[^/]+\.(\w+)$}) { + my $ext = lc $1; + for(split " ", settings_get_str('urlm_wget_extensions')) { + if($ext eq lc($_)) { + url_open_wget_cmd($urlnum); + return; + } + } + } + + # not a wget extension, check browser override patterns + $browser = settings_get_str('urlm_default_browser'); + my @overrides = read_browser_overrides(); + +OVERRIDE: + for(@overrides) { + my ($tag, $pats) = @$_; + for(@$pats) { + if($link =~ /$_/i) { + $browser = $tag; + last OVERRIDE; + } + } + } + } + + my @browsers = read_browser_list(); + my ($tag, $name, $format); + for(@browsers) { + if($_->{tag} eq $browser) { + ($tag, $name, $format) = ($_->{tag}, $_->{name}, $_->{command}); + last; + } + } + + echo("$name - " . $link . " (" . $url->{nick} . ")"); + + $link =~ s/'/%27/g; # be nice to the shell, escape single quotes + $link =~ s/\(/%28/g; # be nice to firefox, escape parens + $link =~ s/\)/%29/g; # firefox -remote 'openURL(url,new-tab)' *fails* + # if the url contains any () chars! + + my $cmd = sprintf($format, $link) . " &>/dev/null &"; + system($cmd); +} + +# Open with wget in an irssi window +# TODO: Maybe allow for using "fetch" instead of wget? (does anyone care?) +our $wgetcount = 1; +sub url_open_wget_cmd { # bound to /urlm_wget /wget + my $arg = shift; + + my @args = split " ", $arg; + + my $urlnum = ""; + if(@args && ($args[-1] =~ /^\d+$/)) { + $urlnum = pop @args; + } + + my $url = get_url_from_number($urlnum); + return if not $url; + + my $more_args = join(" ", @args); + $more_args .= " " if $more_args; + + my $dir = settings_get_str('urlm_wget_dl_dir'); + $dir =~ s/^~/$ENV{HOME}/; + $dir = "." if not $dir; + + my $size = settings_get_int('urlm_wget_split_size'); + + if($size > 0) { + command("/window new split"); + command("/window size $size"); + } else { # size == 0, means "do not split" + command("/window new hidden"); + } + + # find an unused window name... + my $name = "urlm_wget_" . ($wgetcount++); + while(window_find_item($name)) { + $name = "urlm_wget_" . ($wgetcount++); + } + + command("/window name $name"); + + if(settings_get_bool('urlm_wget_autoclose')) { + my $delay = settings_get_int('urlm_wget_autoclose_delay'); + if($delay) { + print "This window will close $delay seconds after download is done"; + } else { + print "This window will close when download is done"; + } + } else { + print "Use " . $yellow . "/window close $name" . $color_off . + " to close this window"; + } + + my $args = trim(settings_get_str('urlm_wget_extra_args')); + $args .= " " if $args; + + my $wget_bin = settings_get_str('urlm_wget_path'); + $wget_bin =~ s/^~/$ENV{HOME}/; + $wget_bin = "wget" if not $wget_bin; + + command( + "/exec -nosh " . + "-name $name " . + "$wget_bin " . + "-P $dir " . + $args . + $more_args . + $url->{url}); + + if($size) { # if using a hidden window, leave it focused + command("/window last"); + } +} + +# Close a window by name. This seems like a kludge... +sub close_window { + return unless window_find_item($_[0]); # don't close if already closed! + command("/window goto " . $_[0]); + command("/window close"); +} + +# signal handler for "exec remove", autocloses our wget windows when +# the wget processes exit, if requested. +sub sig_exec_remove { + my ($proc, $status) = @_; + return unless settings_get_bool('urlm_wget_autoclose'); + + # target_win->name will be "" if window already closed! + my $name = $proc->{target_win}->{name} || ""; + return unless $name =~ /^urlm_wget_\d+$/; + + return unless window_find_item($name); # don't close if already closed! + + my $delay = settings_get_int('urlm_wget_autoclose_delay'); + if($delay > 0) { + timeout_add_once($delay * 1000, "close_window", $name); + } else { + close_window($name); + } +} + +# kill a process by name (send SIGTERM) +sub kill_proc { + command("/exec -15 " . $_[0]); +} + +# kill wget processes on manual window close! +# catch signal "window destroyed" and figure out a way to avoid +# adding a timeout to close the window (since it's in the middle +# of closing now...) +sub sig_window_destroyed { + my $name = $_[0]->{name} || ""; + return unless $name =~ /^urlm_wget_\d+$/; + + # use a timeout to kill the process, instead of killing it directly. + # why? to avoid possible race condition where sig_exec_remove() + # tries to close the window that triggered this call to + # sig_window_destroyed(). + timeout_add_once(1000, "kill_proc", $name); +} + +# find_urls: extract all URLs from the input text, returns a list +# (which may be empty). +# Be VERY permissive about what we consider a URL. +# 20100614 bkw: be a little less permissive +# 20140530 bkw: stop catching dupe http://whatever and https://whatever + +# original sub: +##sub find_urls { +## my @got = ($_[0] =~ m{(?:https?|ftp)://\S+}g); +## push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g; +## push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g; +## s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation +## return @got; +##} + +# new version: +sub find_urls { + my @got; + while($_[0] =~ s{(?:https?|ftp)://\S+}{}) { + push @got, $&; + } + push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g; + push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g; + s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation + return @got; +} + +# Annoying bots have a tendency to do this: +# <actual_person> check this out: http://www.blahblah.blah/path/to/stuff.html +# <annoying_bot> Title: Stuff (at www.blahblah.blah) +# To me, this is about the most useless function a bot can serve, plus it +# breaks the /uo command. +# just_domain() returns true if $1 is just the domain part of $2 +sub just_domain { + my ($new, $old) = @_; + s/^(ht|f)tps?:\/\/// for ($new, $old); + $old =~ s/\/.*//; + return 0 if $new =~ /\/./; + return $new eq $old; +} + +# url_log: appends URL to the URL log file and to the @urls array. +# Locks the file before writing, so should be safe even with multiple +# instances of irssi. +# $relog should be false if capturing a new URL from channel/msg text, +# or true if re-logging an old URL (e.g. /ul -delete does this) +sub url_log { + my($relog, $nick, $channel, $url, $stamp) = @_; + $nick =~ s/!.*//; + + $stamp = time() unless $stamp; + + return if lc $url eq lc $lasturl; # a tiny bit of protection from spam/flood + return if just_domain($url, $lasturl); + + $lasturl = $url; + + my $file = get_url_log_file(); + open(URLLOG, ">>$file") or return; + + flock(URLLOG, Fcntl::LOCK_EX); + seek(URLLOG, 0, 2); + + print URLLOG time . " $nick $channel $url\n"; + close(URLLOG); + + push @urls, { + stamp => $stamp, + nick => $nick, + channel => $channel, + url => $url, + }; + + if(not $relog) { + if(not settings_get_bool('urlm_quiet_capture')) { + my $on = ""; + if($channel =~ /^#/) { + $on = " on " . $green . $channel . $color_off; + } + echo "Captured URL #" . $#urls . " " . + $purple . $url . $color_off . + " from " . $yellow . $nick . $color_off . $on; + } +# trim_url_log(); + $captured_since_delete++; + } +} + +# urlm_help generates /help topics for the browser commands. +# The help for all the other commands is stored in text files in +# ~/.irssi/help +sub urlm_help { + my $arg = shift; + $arg = lc trim($arg); + my %bhelp; + our %urlm_help; + + for(read_browser_list()) { + my $text = uc($_->{tag}) . " [<url#>]\n\n" . + "Open a URL with the external browser '" . + $_->{name} . "', using the command:\n" . + $_->{command} . "\n\n" . + "If [<url#>] is omitted, the most recent URL will be opened.\n"; + $bhelp{$_->{tag}} = $text; + $bhelp{"urlm_open_" . $_->{tag}} = $text; + } + + my $help = $bhelp{$arg} || return; + signal_stop(); + print $help; +} + +sub write_browser_overrides { + my @list = @{$_[0]}; + my @strings; + + for(@list) { + my ($tag, $pats) = @$_; + push @strings, join(":", $tag, @$pats); + } + + my $setting = join("::", @strings); + settings_set_str('urlm_browser_overrides', $setting); +} + +sub read_browser_overrides { + my @result; + + my $list = settings_get_str('urlm_browser_overrides'); + my @entries = split /::/, $list; + + for(@entries) { + my @items = split /:/; + my $tag = shift @items; + push @result, [ $tag, \@items ]; + } + + return @result; +} + +sub write_browser_list { + my @list = @{$_[0]}; + my @strings; + + for(@list) { + push @strings, join(":", $_->{tag}, $_->{name}, $_->{command}); + } + + my $setting = join("::", @strings); +#print "/set urlm_browsers $setting"; + settings_set_str('urlm_browsers', $setting); + signal_emit("setup changed"); +} + +sub read_browser_list { + my @result; + + my $list = settings_get_str('urlm_browsers'); + my @entries = split /::/, $list; + + for(@entries) { + my @items = split /:/; + my $hash = { + tag => $items[0], + name => $items[1], + command => $items[2], + }; + + push @result, $hash; + } + + return @result; +} + +# commands: +# urlm_add_browser <browser>:<fullname>:<cmd> +sub urlm_add_browser { + my $arg = shift || ""; + $arg =~ trim($arg); + + if($arg !~ /^[^:]+:[^:]+:[^:]+$/) { + print "Usage: /urlm_add_browser tag:name:command"; + return; + } + + my ($tag, $name, $cmd) = split /:/, $arg; + + $tag = trim($tag); + $tag = lc $tag; + $name = trim($name); + + if($tag =~ /\W/) { + print "/urlm_add_browser: tag must consist of only " . + "letters, numbers, or underscores (_), not '$tag'"; + return; + } + + if($tag eq 'wget') { + print "/urlm_add_browser: 'wget' is reserved; use a different tag"; + return; + } + + if($cmd !~ /'[^']*\%s[^']*'/) { + print "/urlm_add_browser: command must contain '\%s' (single-quoted)"; + return; + } + + my @browsers = read_browser_list(); + my $found = 0; + for(@browsers) { + if(lc($_->{tag}) eq $tag) { + print "Replaced old definition of $tag"; + $_->{name} = $name; + $_->{command} = $cmd; + $found++; + last; + } + } + + if(not $found) { + push @browsers, { tag => $tag, name => $name, command => $cmd }; + print "push \@browsers, { tag => $tag, name => $name, command => $cmd }"; + print "Added browser definition $tag"; + } + + write_browser_list(\@browsers); +} + +# urlm_del_browser <browser> +sub urlm_del_browser { + my $arg = shift || ""; + $arg = trim($arg); + $arg = lc $arg; + return unless $arg; + + # urlm_del_override() already prints "Browser not defined" if it + # wasn't defined, so no need to have urlm_del_browser() print it again. + urlm_del_override("$arg all"); + + my @browsers = read_browser_list(); + my @keep_browsers; + my $found = 0; + + for(@browsers) { + if(lc($_->{tag}) eq $arg) { + print "Deleted definition of $arg"; + $found++; + } else { + push @keep_browsers, $_; + } + } + + write_browser_list(\@keep_browsers) if $found; +} + +# urlm_list_browsers +sub urlm_list_browsers { + my @browsers = read_browser_list(); + for(@browsers) { + print( + (settings_get_str('urlm_default_browser') eq $_->{tag} ? + "[*]" : + " ") . + "Tag: $bold_on" . $_->{tag} . "$bold_off, " . + "Name: $bold_on" . $_->{name} . "$bold_off, " . + "Command: $bold_on" . $_->{command} . "$bold_off"); + } +} + +# urlm_add_wget_ext <ext> +sub urlm_add_wget_ext { + my $arg = shift || ""; + $arg = trim($arg); + $arg = lc $arg; + + if(not $arg) { + print "Usage: /urlm_add_wget_ext <extension>"; + return; + } + + my @list = split " ", settings_get_str('urlm_wget_extensions'); + if(grep { $_ eq $arg } @list) { + print "$arg is already in the wget extensions list"; + return; + } + + push @list, $arg; + + settings_set_str('urlm_wget_extensions', join(" ", @list)); + command("/set urlm_wget_extensions"); +} + +# urlm_del_wget_ext <ext> +sub urlm_del_wget_ext { + my $arg = shift || ""; + $arg = trim($arg); + $arg = lc $arg; + + if(not $arg) { + print "Usage: /urlm_del_wget_ext <extension>"; + return; + } + + my @list = split " ", settings_get_str('urlm_wget_extensions'); + if(!grep { $_ eq $arg } @list) { + print "$arg is not in the wget extensions list"; + return; + } + + @list = grep { $_ ne $arg } @list; + + settings_set_str('urlm_wget_extensions', join(" ", @list)); + command("/set urlm_wget_extensions"); +} + +# urlm_list_overrides [<browser>] +sub urlm_list_overrides { + my $arg = shift || ""; + $arg = trim($arg); + + my $found = 0; + my @overrides = read_browser_overrides(); + for(@overrides) { + my ($browser, $pats) = @$_; + if($arg eq $browser || not $arg) { + $found++; + my $count = 1; + for(@$pats) { + print $browser . "[$count]: " . $_; + $count++; + } + } + } + + if(not $found) { + if($arg) { + print "No overrides for browser '$arg'"; + } else { + print "No browser overrides"; + } + } +} + +# urlm_add_override <browser> <pattern> +sub urlm_add_override { + my $arg = shift || ""; + $arg = trim($arg); # do not lc($arg), the command may need caps! + + my ($browser, $pattern) = split " ", $arg; + $browser = lc $browser; + if(not ($browser and $pattern)) { + print "Usage: /urlm_add_override <browser> <pattern>"; + return; + } + + if(!grep { $_->{tag} eq $browser } read_browser_list()) { + print "Browser $browser not defined in browser list"; + return; + } + + eval "qr{$pattern}"; + if($@) { + print "Pattern $pattern is not a valid Perl regex: $@"; + return; + } + + my @overrides = read_browser_overrides(); + my $found = 0; + for(@overrides) { + my ($tag, $pats) = @$_; + next unless $tag eq $browser; + + push @$pats, $pattern; + $found++; + } + + if(not $found) { + push @overrides, [ $browser, [ $pattern ] ]; + } + + print "Added override for $browser: $pattern"; + write_browser_overrides(\@overrides); +} + +# urlm_del_override <browser> <number>|<all> +sub urlm_del_override { + my $arg = shift || ""; + $arg = trim($arg); + $arg = lc $arg; + + my ($browser, $number) = split " ", $arg; + + if(not($browser and $number)) { + print "Usage: /urlm_del_override <browser> <number>|all" + } + + if(!grep { $_->{tag} eq $browser } read_browser_list()) { + print "Browser $browser not defined in browser list"; + return; + } + + if($number ne 'all' && $number !~ /^[1-9]\d*$/) { + print "Bad override '$number': must be a number >= 1, or 'all'"; + return; + } + + my @overrides = read_browser_overrides(); + my @keep_overrides = (); + my $found = 0; + for(@overrides) { + my ($tag, $pats) = @$_; + + if($tag ne $browser) { + push @keep_overrides, $_; + next; + } + + $found += @$pats, next if $number eq 'all'; + + if($number > @$pats) { + print "Value $number out of range"; + next; + } + + $found++; + undef $pats->[$number - 1]; + @$pats = grep { defined $_ } @$pats; + + push @keep_overrides, $_ if @$pats; + } + + if($found) { + print "Deleted $found overrides"; + write_browser_overrides(\@keep_overrides); + } else { + print "No matching overrides"; + } +} + +sub init_colors { + if(settings_get_bool('urlm_use_bold')) { + $bold_on = "\002"; + $bold_off = "\002"; + } else { + $bold_on = ""; + $bold_off = ""; + } + + if(settings_get_bool('urlm_use_color')) { + $green = "\0033"; + $red = "\0034"; + $yellow = "\0037"; + $purple = "\0036"; + $color_off = "\003"; + } else { + $green = ""; + $red = ""; + $yellow = ""; + $purple = ""; + $color_off = ""; + } +} + +# init_browsers(): dynamic bindings. Each browser tag gets bound to +# /urlm_open_$tag, and (if short commands enabled) to /$tag. +our @bound_refs; +sub init_browsers { + # for this to work, the code ref can *NOT* be stored in a "my" var + # I think this is a bug in irssi, or possibly perl, but maybe I'm + # just being dumb... + for(@bound_refs) { + command_unbind($_->[0], $_->[1]); + } + @bound_refs = (); + + my @browsers = read_browser_list(); + for(@browsers) { + my $code = 'sub { url_open_cmd($_[0], "'. ($_->{tag}) . '"); };'; + my $cmd = 'urlm_open_' . $_->{tag}; + push @bound_refs, [ $cmd, eval $code ]; + + # again, no "my" vars, hence the ugly $bound_refs[$#bound_refs] kludge + command_bind($cmd, $bound_refs[$#bound_refs]->[1]); + + if(settings_get_bool('urlm_short_cmds')) { + my $shortcmd = $_->{tag}; + push @bound_refs, [ $shortcmd, $bound_refs[$#bound_refs]->[1] ]; + command_bind($shortcmd, $bound_refs[$#bound_refs]->[1]); + } + } + + command_unbind("ul", "url_list_cmd"); + command_unbind("uo", "url_open_cmd"); + command_unbind("wget", "url_open_wget_cmd"); + + if(settings_get_bool('urlm_short_cmds')) { + command_bind("ul", "url_list_cmd"); + command_bind("uo", "url_open_cmd"); + command_bind("wget", "url_open_wget_cmd"); + } +} + +sub init_settings { # call only once, at script load! +# Where shall we save the URL log? + settings_add_str('urlmanager', 'urlm_log_file', "~/.irssi/urllog"); + +# Where is the wget binary? Absolute path, or "wget" (searches PATH) + settings_add_str('urlmanager', 'urlm_wget_path', "wget"); + +# Where should wget save files? + settings_add_str('urlmanager', 'urlm_wget_dl_dir', "~"); + +# Extra arguments to pass to wget... + settings_add_str('urlmanager', 'urlm_wget_extra_args', ""); + +# Do we log URLs from /part and /quit messages? Disabled by default +# because so many people always /quit with the same spammish URL +# e.g. "nimrod has quit [Quit: Try StupidIRC (http://someircclient.com)]" + settings_add_bool('urlmanager', 'urlm_log_partquit', 0); + +# Do we log URLs from our own public/private messages? + settings_add_bool('urlmanager', 'urlm_log_own', 1); + +# Cosmetics: + settings_add_bool('urlmanager', 'urlm_short_cmds', 1); + settings_add_bool('urlmanager', 'urlm_use_bold', 1); + settings_add_bool('urlmanager', 'urlm_use_color', 1); + +# Browser definitions. A double-colon-separated list. Each list item +# is a single-colon separated list of (tag, name, command_format). +# You may add browsers, and they will work as commands +# without changing any other code. +# The browser commands need to be non-blocking, and any stdout/err from +# them will be ignored. +# The %s gets replaced with the actual URL. *ALWAYS* use single-quotes +# (like '%s'). *NEVER* omit the quotes or use double-quotes around the %s! +# Failure to comply is a security hole! + settings_add_str('urlmanager', 'urlm_browsers', + 'ff:Firefox:firefox -remote \'openurl(%s,new-tab)\'' . + '::' . + 'ie:Internet Explorer:ie6 \'%s\'' . + '::' . + 'us:links+screen:[ "$TERM" = "screen" ] && screen links \'%s\'' . + '::' . + 'ut:links+xterm:xterm -e "links \'%s\'"' . + '::' . + 'ux:Copy to X Clipboard:echo -n \'%s\'|xsel -i'); + +# Default browser for /uo and /urlm_open commands + settings_add_str('urlmanager', 'urlm_default_browser', 'ff'); + +# /uo and /urlm_open check this list. +# Double-colon-separated list, each item is a single-colon-separated +# list consisting of a browser tag and one or more patterns. +# If a URL matches one of these +# patterns, the browser tag will be used as the browser to open the URL +# with, instead of the default. + settings_add_str('urlmanager', 'urlm_browser_overrides', + 'ie:/[^/]*video\.google\.com:/[^/]*youtube\.com:/[^/]*gametrailers\.com'); + +# If /uo or /urlm_open get a URL ending in one of these file extensions, +# it will be downloaded with wget instead of being opened in a browser. + settings_add_str('urlmanager', 'urlm_wget_extensions', + 'tar zip atr bas xex exe dcm car z gz rom cas torrent rar 7z'); + +# trim log to this many lines. Use with urlm_log_trim_interval and/or +# urlm_log_trim_startup. Set to 0 to disable. + settings_add_int('urlmanager', 'urlm_max_log_lines', 100); + +# trim log to this many seconds. Use with urlm_log_trim_interval and/or +# urlm_log_trim_startup. Set to 0 to disable. + settings_add_int('urlmanager', 'urlm_max_log_age', 86400*7); + +# trim the log on script load. + settings_add_bool('urlmanager', 'urlm_log_trim_startup', 0); + +# auto-trim log this often (seconds). Set to 0 to disable. + settings_add_int('urlmanager', 'urlm_log_trim_interval', 60*60); + +# these control the behavior of windows created with /urlm_wget or /wget + settings_add_bool('urlmanager', 'urlm_wget_autoclose', 1); + settings_add_int('urlmanager', 'urlm_wget_autoclose_delay', 60); + settings_add_int('urlmanager', 'urlm_wget_split_size', 0); + +# say "Captured URL #xxx http://whatever from whoever" every time a URL +# is captured? + settings_add_bool('urlmanager', 'urlm_quiet_capture', 0); + +# TODO: support these: + +# channels/nicks/sites we don't want to log +#settings_add_str('urlmanager', 'urlm_ignore_channels'); +#settings_add_str('urlmanager', 'urlm_ignore_urls'); + +# If true, go through the entire list every time a URL is logged, +# checking for duplicates +#settings_add_bool('urlmanager', 'urlm_ignore_dups'); +} + +our $trim_timeout_tag; +sub init_trim_timeout { + timeout_remove($trim_timeout_tag) if($trim_timeout_tag); + + my $millis = settings_get_int('urlm_log_trim_interval') * 1000; + if($millis > 0) { + $trim_timeout_tag = timeout_add($millis, "trim_url_log", 1); + } +} + +# apply_settings: called on signal "setup changed" (when any /set urlm_* +# changes value). +# Anything that depends on the values of any of the settings should be +# (re)initialized here. +sub apply_settings { + init_colors(); + init_browsers(); + init_trim_timeout(); + @urls = read_url_file(); +} + +sub init_signals { # call only once, at script load! + signal_add_last("message public", "url_public"); + signal_add_last("message private", "url_private"); + signal_add_last("message irc notice", "url_private"); + signal_add_last("message irc op_public", "url_private"); + signal_add_last("message irc action", "url_private"); + signal_add_last("dcc chat message", "url_dccmsg"); + signal_add_last("message topic", "url_topic"); + signal_add_last("channel joined", "url_join_topic"); + signal_add_last("setup changed", "apply_settings"); + signal_add_last("message part", "url_part"); + signal_add_last("message quit", "url_quit"); + signal_add_last("message own_public", "url_own"); + signal_add_last("message own_private", "url_own"); + signal_add_last("exec remove", "sig_exec_remove"); + signal_add_last("window destroyed", "sig_window_destroyed"); +} + +sub init_static_binds { # call only once, at script load! + # These binds are always on: + command_bind("urlm_list", "url_list_cmd"); + command_bind("urlm_open", "url_open_cmd"); + command_bind("urlm_wget", "url_open_wget_cmd"); + command_bind("urlm_add_browser", "urlm_add_browser"); + command_bind("urlm_del_browser", "urlm_del_browser"); + command_bind("urlm_list_browsers", "urlm_list_browsers"); + command_bind("urlm_add_wget_ext", "urlm_add_wget_ext"); + command_bind("urlm_del_wget_ext", "urlm_del_wget_ext"); + command_bind("urlm_list_overrides", "urlm_list_overrides"); + command_bind("urlm_add_override", "urlm_add_override"); + command_bind("urlm_del_override", "urlm_del_override"); + command_bind("urlm_trim_log", "trim_url_log"); + command_bind("urlm_undo_delete", "urlm_undo_delete"); + command_bind("help", "urlm_help"); +} + +# Add per-user help dir to help_path, if not already present. +sub init_help_path { + my $dir = "$ENV{HOME}/.irssi/help"; + my $help_path = settings_get_str('help_path'); + + return if grep { $_ eq $dir } split /:/, $help_path; + + $help_path .= ":$dir"; + settings_set_str('help_path', $help_path); + + signal_emit('setup_changed'); # 20100614 bkw: hmmm... +} + +# bind signals and commands, now that everything's defined. +init_settings(); +init_signals(); +init_static_binds(); +init_colors(); +init_browsers(); +init_help_path(); +@urls = read_url_file(); +trim_url_log() if settings_get_bool('urlm_log_trim_startup'); +init_trim_timeout(); + +# make sure no leftover backup is lurking from a long time ago... +unlink(get_url_log_file() . "~"); + +# Print a helpful message for the user on script load... +print $bold_on . "urlmanager.pl" . $bold_off . " loaded (" . @urls . + " URLs), type '" . $yellow . "/help urlmanager" . + $color_off . "' for help."; + +# rest of file is POD docs +=pod + +=head1 NAME + +urlmanager + +=head1 SYNOPSIS + +Yet another URL logger for irssi. + +=head1 DESCRIPTION + +Captures URLs in channel, privmsg, and DCC chat messages, logs them to a +file. Provides an irssi command to list captured URLs and several commands +to do various things with them (open in browser, download, copy to X11 +selection buffer). + +This documentation only includes installation instructions. For usage +instructions, install the script and run B</urlm_help> within irssi. + +=head1 INSTALLATION + +Copy B<urlmanager.pl> to your B<~/.irssi/scripts> directory (create the +directory if it doesn't exist). For auto-loading when irssi starts, +create a symlink in B<~/.irssi/scripts/autorun>: + +=over 4 + +mkdir -p ~/.irssi/scripts/autorun + +cp urlmanager.pl ~/.irssi/scripts + +cd ~/.irssi/scripts/autorun + +ln -s ../urlmanager.pl . + +=back + +=head1 CONFIGURATION + +All configuration is done from within irssi; read B</urlm_help>. +=cut |