#!/usr/bin/perl # urlmanager script for irssi use warnings; use strict; use Fcntl qw/:flock/; use POSIX qw/strftime/; use Irssi qw/ settings_add_str settings_add_bool settings_add_int settings_get_str settings_get_bool settings_get_int settings_set_str settings_set_bool settings_set_int command command_bind command_unbind signal_emit signal_add_last signal_stop timeout_add timeout_add_once timeout_remove window_find_item/; our $VERSION = "0.1"; our %IRSSI = ( authors => 'Urchlay', contact => 'Urchlay on NewNet', name => 'urlmanager', description => 'Captures URLs said in channel and private messages ' . 'and saves them to a file, also adds several commands for ' . 'listing and opening captured URLs ' . '(based on urlgrab.pl 0.2 by David Leadbetter)', license => 'GNU GPLv2 or later', url => 'none', ); # 20110609 bkw: if irssi was started in a screen session from the console, # then detached, then reattached in an X session, DISPLAY will not be set. # This will confuse the user, as e.g. firefox will silently fail to run. # It won't do any harm to set DISPLAY=:0 if it's not set, and might help... { my $disp = $ENV{DISPLAY}; $ENV{DISPLAY} = ":0" unless $disp; } # Workaround for a heisenbug, see: # http://bugs.irssi.org/index.php?do=details&task_id=242 { package Irssi::Nick } # Color constants. # Irssi.pm doesn't include symbolic mIRC-style color names... # NOTE: if you print e.g. $green . "12345", the "1" will be interpreted # as the 2nd digit of the color! Only good fix is to always put a space: # print $green . " 12345" works OK. # Declarations only; defined in init_colors() our ($bold_on, $bold_off, $green, $red, $yellow, $purple, $color_off); # @urls is a list of anonymous hashes, each representing one URL. # See read_url_file for hash elements. our @urls; # Most-recently-posted URL (the URL only, not a hash). Only used # for avoiding dups (see url_log). our $lasturl = ""; # Have any URLs been captured since the last /ul -delete? This is to # (hopefully) protect the user our $captured_since_delete = 0; # Grr. Printing with print() or Irssi::print(), % chars are interpreted # as irssi formats. This causes URLs containing HTML %-escapes to come # out in weird colors. Using irssi's /echo is apparently the right way # to avoid this... though we get colored -!- in front of every line :( sub echo { command("/echo $_") for @_; } # trim leading/trailing spaces sub trim { $_[0] =~ s/(?:^\s*|\s$)//g; return $_[0]; } # read_url_file: called on script load with the log filename. # returns array of URL hashes, which will be empty if the file # wasn't present or was empty. sub read_url_file { my $file = get_url_log_file(); my @got; open URLLOG, "<$file" or return; flock(URLLOG, Fcntl::LOCK_EX); seek(URLLOG, 0, 1); while() { chomp; my @fields = split " "; push @got, { stamp => $fields[0], nick => $fields[1], channel => $fields[2], url => $fields[3], }; } close URLLOG; return @got; } # rewrite the URL log file from arguments. sub write_url_file { for(@_) { $lasturl = ""; url_log(1, $_->{nick}, $_->{channel}, $_->{url}, $_->{stamp}); } } # Trim the log according to the appropriate settings. sub trim_url_log { my $quiet = shift || 0; my $max_lines = settings_get_int("urlm_max_log_lines") || 0; my $max_age = settings_get_int("urlm_max_log_age") || 0; return unless $max_lines || $max_age; # nothing to do! my @keep_urls; my $trimmed = 0; if($max_age) { for(@urls) { if($_->{stamp} >= (time() - $max_age)) { push @keep_urls, $_; } } } else { @keep_urls = @urls; } if($max_lines && (@keep_urls > $max_lines)) { my $last = $#keep_urls; my $first = $last - $max_lines + 1; @keep_urls = @keep_urls[$first..$last]; } $trimmed = (@urls - @keep_urls); if($trimmed) { clear_url_log(); write_url_file(@keep_urls); print "Trimmed $trimmed URLs from log"; # unless $quiet; } } # Clear the URL log, both the in-memory @urls and the on-disk file. sub clear_url_log { my $file = get_url_log_file(); unlink $file; # or print "Can't delete $file: $!"; @urls = (); } # get_url_log_file: get value of our logfile setting, with # tilde expansion for user's homedir. sub get_url_log_file { my $file = settings_get_str('urlm_log_file'); $file =~ s/^~/$ENV{HOME}/; return $file; } # signal handler for "message public" # extract and log any URLs in the input text. sub url_public { my ($server, $text, $nick, $hostmask, $channel) = @_; my @got = find_urls($text); url_log(0, $nick, $channel, $_) for @got; } # signal handler for "message own_public" and "message own_private" # extract and log any URLs in the input text. sub url_own { my ($server, $text, $channel) = @_; return unless settings_get_bool('urlm_log_own'); my @got = find_urls($text); url_log(0, $server->{nick}, $channel, $_) for @got; } # signal handler for "message private", "message irc notice", # "message irc op_public", "message irc action" # extract and log any URLs in the input text. sub url_private { my ($server, $text, $nick, $hostmask) = @_; my @got = find_urls($text); url_log(0, $nick, $server->{nick}, $_) for @got; } # signal handler for "message topic" # extract and log any URLs in the input text. sub url_topic { my ($server, $channel, $text, $nick, $hostmask) = @_; return if $nick eq $server->{nick}; # don't log own topic changes my @got = find_urls($text); url_log(0, $nick, $channel, $_) for @got; } # signal handler for "channel joined" # extract and log any URLs in the channel topic. sub url_join_topic { my ($chan) = @_; return unless $chan->{topic}; # don't log own topic changes return if $chan->{topic_by} eq $chan->{server}->{nick}; my @got = find_urls($chan->{topic}); url_log(0, $chan->{topic_by}, $chan->{name}, $_) for @got; } # signal handler for "message part" # extract and log any URLs in the input text. sub url_part { return unless settings_get_bool('urlm_log_partquit'); my ($server, $channel, $nick, $hostmask, $text) = @_; return if $nick eq $server->{nick}; # don't log own parts (redundant?) my @got = find_urls($text); url_log(0, $nick, $channel, $_) for @got; } # signal handler for "message quit" # extract and log any URLs in the input text. sub url_quit { return unless settings_get_bool('urlm_log_partquit'); my ($server, $nick, $hostmask, $text) = @_; return if $nick eq $server->{nick}; # don't log own quits (redundant?) my @got = find_urls($text); url_log(0, $nick, $server->{nick}, $_) for @got; } # signal handler for "dcc chat message" # extract and log any URLs in the input text. # TODO: test this! sub url_dccmsg { my ($dcc, $text) = @_; my @got = find_urls($text); url_log(0, $dcc->{nick}, $dcc->{server}->{nick}, $_) for @got; } # print_url_line: # print one formatted (colorful) line of /ul output sub print_url_line { my ($maxnick, $maxchan, $num, $stamp, $nick, $channel, $url) = @_; echo sprintf("%s%3s%s %s %11s%s %${maxnick}s%s %${maxchan}s%s %s%s", $bold_on, $num, $bold_off, $green, $stamp, $red, $nick, $yellow, $channel, $purple, $url, $color_off); } sub url_list_cmd { # bound to /ul (sorry, this sub is a mess) my $do_delete = 0; my $listed = 0; my @keep_urls; my $arg = shift || ""; $arg = lc $arg; $arg = trim($arg); if($arg =~ /^-delete/) { $arg =~ s/^-delete\s*//; $do_delete++; if($arg eq '') { print "/ul -delete requires a parameter! (/ul help for details)"; return; } } if(not @urls) { print "No URLs in list!"; return; } my ($start, $end, $nick, $regex, $channel); $arg = "-" if $arg eq 'all'; my $invert = 0; if($arg =~ /^!(.*)/) { $arg = $1; $invert = 1; } if($arg eq "") { $start = @urls-10; $start = 0 if $start < 0; $end = $#urls; } elsif($arg =~ /^[&#](.*)/) { $channel = $1; } elsif($arg =~ /^\/(.*)\/?/) { $regex = $1; } elsif($arg =~ /^\d+$/) { $start = $end = $arg; } elsif($arg =~ /^[-\d]+$/) { ($start, $end) = split /-/, $arg, 2; $start = 0 if $start eq ""; $end = $#urls if $end eq ""; } else { $nick = $arg; } my $count = 0; my @to_list; for(@urls) { my $list = 0; if($nick && (lc($_->{nick}) =~ ("^" . quotemeta(lc $nick)))) { $list++; } elsif($regex) { $list++ if $_->{url} =~ /$regex/i; } elsif($channel) { $list++ if $_->{channel} =~ /^[#&]?$channel$/i; } elsif(defined($start) && defined($end)) { $list++ if $count >= $start && $count <= $end; } $list = !$list if $invert; if($list) { $listed++; push @to_list, [ $count, $_ ]; } elsif($do_delete) { push @keep_urls, $_; } $count++; } if(@to_list) { # print the list if anything's supposed to be in it my $maxnick = 4; my $maxchan = 7; for(@to_list) { my ($num, $u) = @$_; my $len = length($u->{nick}); $maxnick = $len if $len > $maxnick; $len = length($u->{channel}); $maxchan = $len if $len > $maxchan; } print_url_line($maxnick, $maxchan, "#", "When", "Nick", "Channel", "URL"); for(@to_list) { my ($num, $u) = @$_; my $stamp = strftime("%m/%d-%H:%M", localtime($u->{stamp})); print_url_line($maxnick, $maxchan, $num, $stamp, $u->{nick}, $u->{channel}, $u->{url}); } } if($do_delete) { # process -delete flag my $deleted = scalar @urls - scalar @keep_urls; if(not $deleted) { print "No URLs deleted"; return; } my $file = get_url_log_file(); rename($file, "$file~") or print "Warning: can't backup log file: $!"; clear_url_log(); write_url_file(@keep_urls); print $red . "These " . $deleted . " URLs have been deleted!" . $color_off . " (" . @urls . " remain)"; $captured_since_delete = 0; } else { # no -delete flag, show summary print "Listed $listed of $count URLs"; } } sub urlm_say { } sub urlm_undo_delete { my $yes = ($_[0] eq '-yes'); if($captured_since_delete && (not $yes)) { print "urlm_undo_delete: doing this will throw away some URLs that " . "were captured since the last delete. Re-run with '-yes' to do it " . "anyway."; return; } my $file = get_url_log_file(); my @oldurls = @urls; clear_url_log(); if(not(rename("$file~", $file))) { print "Can't restore log file: $!"; @urls = @oldurls; write_url_file(); return; } @urls = read_url_file(); print "Restored " . @urls . " URLs from backup"; $captured_since_delete = 0; } # get_url_from_number: # Returns a URL hash from @urls, given the index into the array. # Returns undef if index is invalid or non-existent. # Accepts negative numbers as meaning "nth from the end of the list" # If index is empty string, returns the highest-numbered (most recent) URL. # If index is non-empty, non-numeric, then treat as a nick and open last # URL by that nick. sub get_url_from_number { my $arg = shift; if($arg eq '') { $arg = $#urls; } elsif($arg =~ /-\d+$/) { $arg = $#urls + $arg; } if($arg !~ /^\d+$/) { # print("Non-numeric URL number '$arg'"); # return; $arg = trim($arg); for(my $i = $#urls; $i >= 0; $i--) { my $url = $urls[$i]; return $url if lc($arg) eq lc($url->{nick}); } print("Can't find any URLs from nick '$arg'"); return; } if($arg > $#urls) { print("No such URL number '$arg'"); return; } return $urls[$arg]; } # url_open_cmd: open a URL with the given browser (auto-guesses which browser # to use if $browser is ''). sub url_open_cmd { my ($urlnum, $browser) = @_; $browser = '' if ref $browser; # means it's a Server object my $url = get_url_from_number($urlnum); return if not $url; my $link = $url->{url}; if(not $browser) { # guess browser... # check for wget first... if($link =~ m{/[^/]+\.(\w+)$}) { my $ext = lc $1; for(split " ", settings_get_str('urlm_wget_extensions')) { if($ext eq lc($_)) { url_open_wget_cmd($urlnum); return; } } } # not a wget extension, check browser override patterns $browser = settings_get_str('urlm_default_browser'); my @overrides = read_browser_overrides(); OVERRIDE: for(@overrides) { my ($tag, $pats) = @$_; for(@$pats) { if($link =~ /$_/i) { $browser = $tag; last OVERRIDE; } } } } my @browsers = read_browser_list(); my ($tag, $name, $format); for(@browsers) { if($_->{tag} eq $browser) { ($tag, $name, $format) = ($_->{tag}, $_->{name}, $_->{command}); last; } } echo("$name - " . $link . " (" . $url->{nick} . ")"); $link =~ s/'/%27/g; # be nice to the shell, escape single quotes $link =~ s/\(/%28/g; # be nice to firefox, escape parens $link =~ s/\)/%29/g; # firefox -remote 'openURL(url,new-tab)' *fails* # if the url contains any () chars! my $cmd = sprintf($format, $link) . " &>/dev/null &"; system($cmd); } # Open with wget in an irssi window # TODO: Maybe allow for using "fetch" instead of wget? (does anyone care?) our $wgetcount = 1; sub url_open_wget_cmd { # bound to /urlm_wget /wget my $arg = shift; my @args = split " ", $arg; my $urlnum = ""; if(@args && ($args[-1] =~ /^\d+$/)) { $urlnum = pop @args; } my $url = get_url_from_number($urlnum); return if not $url; my $more_args = join(" ", @args); $more_args .= " " if $more_args; my $dir = settings_get_str('urlm_wget_dl_dir'); $dir =~ s/^~/$ENV{HOME}/; $dir = "." if not $dir; my $size = settings_get_int('urlm_wget_split_size'); if($size > 0) { command("/window new split"); command("/window size $size"); } else { # size == 0, means "do not split" command("/window new hidden"); } # find an unused window name... my $name = "urlm_wget_" . ($wgetcount++); while(window_find_item($name)) { $name = "urlm_wget_" . ($wgetcount++); } command("/window name $name"); if(settings_get_bool('urlm_wget_autoclose')) { my $delay = settings_get_int('urlm_wget_autoclose_delay'); if($delay) { print "This window will close $delay seconds after download is done"; } else { print "This window will close when download is done"; } } else { print "Use " . $yellow . "/window close $name" . $color_off . " to close this window"; } my $args = trim(settings_get_str('urlm_wget_extra_args')); $args .= " " if $args; my $wget_bin = settings_get_str('urlm_wget_path'); $wget_bin =~ s/^~/$ENV{HOME}/; $wget_bin = "wget" if not $wget_bin; command( "/exec -nosh " . "-name $name " . "$wget_bin " . "-P $dir " . $args . $more_args . $url->{url}); if($size) { # if using a hidden window, leave it focused command("/window last"); } } # Close a window by name. This seems like a kludge... sub close_window { return unless window_find_item($_[0]); # don't close if already closed! command("/window goto " . $_[0]); command("/window close"); } # signal handler for "exec remove", autocloses our wget windows when # the wget processes exit, if requested. sub sig_exec_remove { my ($proc, $status) = @_; return unless settings_get_bool('urlm_wget_autoclose'); # target_win->name will be "" if window already closed! my $name = $proc->{target_win}->{name} || ""; return unless $name =~ /^urlm_wget_\d+$/; return unless window_find_item($name); # don't close if already closed! my $delay = settings_get_int('urlm_wget_autoclose_delay'); if($delay > 0) { timeout_add_once($delay * 1000, "close_window", $name); } else { close_window($name); } } # kill a process by name (send SIGTERM) sub kill_proc { command("/exec -15 " . $_[0]); } # kill wget processes on manual window close! # catch signal "window destroyed" and figure out a way to avoid # adding a timeout to close the window (since it's in the middle # of closing now...) sub sig_window_destroyed { my $name = $_[0]->{name} || ""; return unless $name =~ /^urlm_wget_\d+$/; # use a timeout to kill the process, instead of killing it directly. # why? to avoid possible race condition where sig_exec_remove() # tries to close the window that triggered this call to # sig_window_destroyed(). timeout_add_once(1000, "kill_proc", $name); } # find_urls: extract all URLs from the input text, returns a list # (which may be empty). # Be VERY permissive about what we consider a URL. # 20100614 bkw: be a little less permissive # 20140530 bkw: stop catching dupe http://whatever and https://whatever # original sub: ##sub find_urls { ## my @got = ($_[0] =~ m{(?:https?|ftp)://\S+}g); ## push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g; ## push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g; ## s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation ## return @got; ##} # new version: sub find_urls { my @got; while($_[0] =~ s{(?:https?|ftp)://\S+}{}) { push @got, $&; } push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g; push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g; s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation return @got; } # Annoying bots have a tendency to do this: # check this out: http://www.blahblah.blah/path/to/stuff.html # Title: Stuff (at www.blahblah.blah) # To me, this is about the most useless function a bot can serve, plus it # breaks the /uo command. # just_domain() returns true if $1 is just the domain part of $2 sub just_domain { my ($new, $old) = @_; s/^(ht|f)tps?:\/\/// for ($new, $old); $old =~ s/\/.*//; return 0 if $new =~ /\/./; return $new eq $old; } # url_log: appends URL to the URL log file and to the @urls array. # Locks the file before writing, so should be safe even with multiple # instances of irssi. # $relog should be false if capturing a new URL from channel/msg text, # or true if re-logging an old URL (e.g. /ul -delete does this) sub url_log { my($relog, $nick, $channel, $url, $stamp) = @_; $nick =~ s/!.*//; $stamp = time() unless $stamp; return if lc $url eq lc $lasturl; # a tiny bit of protection from spam/flood return if just_domain($url, $lasturl); $lasturl = $url; my $file = get_url_log_file(); open(URLLOG, ">>$file") or return; flock(URLLOG, Fcntl::LOCK_EX); seek(URLLOG, 0, 2); print URLLOG time . " $nick $channel $url\n"; close(URLLOG); push @urls, { stamp => $stamp, nick => $nick, channel => $channel, url => $url, }; if(not $relog) { if(not settings_get_bool('urlm_quiet_capture')) { my $on = ""; if($channel =~ /^#/) { $on = " on " . $green . $channel . $color_off; } echo "Captured URL #" . $#urls . " " . $purple . $url . $color_off . " from " . $yellow . $nick . $color_off . $on; } # trim_url_log(); $captured_since_delete++; } } # urlm_help generates /help topics for the browser commands. # The help for all the other commands is stored in text files in # ~/.irssi/help sub urlm_help { my $arg = shift; $arg = lc trim($arg); my %bhelp; our %urlm_help; for(read_browser_list()) { my $text = uc($_->{tag}) . " []\n\n" . "Open a URL with the external browser '" . $_->{name} . "', using the command:\n" . $_->{command} . "\n\n" . "If [] is omitted, the most recent URL will be opened.\n"; $bhelp{$_->{tag}} = $text; $bhelp{"urlm_open_" . $_->{tag}} = $text; } my $help = $bhelp{$arg} || return; signal_stop(); print $help; } sub write_browser_overrides { my @list = @{$_[0]}; my @strings; for(@list) { my ($tag, $pats) = @$_; push @strings, join(":", $tag, @$pats); } my $setting = join("::", @strings); settings_set_str('urlm_browser_overrides', $setting); } sub read_browser_overrides { my @result; my $list = settings_get_str('urlm_browser_overrides'); my @entries = split /::/, $list; for(@entries) { my @items = split /:/; my $tag = shift @items; push @result, [ $tag, \@items ]; } return @result; } sub write_browser_list { my @list = @{$_[0]}; my @strings; for(@list) { push @strings, join(":", $_->{tag}, $_->{name}, $_->{command}); } my $setting = join("::", @strings); #print "/set urlm_browsers $setting"; settings_set_str('urlm_browsers', $setting); signal_emit("setup changed"); } sub read_browser_list { my @result; my $list = settings_get_str('urlm_browsers'); my @entries = split /::/, $list; for(@entries) { my @items = split /:/; my $hash = { tag => $items[0], name => $items[1], command => $items[2], }; push @result, $hash; } return @result; } # commands: # urlm_add_browser :: sub urlm_add_browser { my $arg = shift || ""; $arg =~ trim($arg); if($arg !~ /^[^:]+:[^:]+:[^:]+$/) { print "Usage: /urlm_add_browser tag:name:command"; return; } my ($tag, $name, $cmd) = split /:/, $arg; $tag = trim($tag); $tag = lc $tag; $name = trim($name); if($tag =~ /\W/) { print "/urlm_add_browser: tag must consist of only " . "letters, numbers, or underscores (_), not '$tag'"; return; } if($tag eq 'wget') { print "/urlm_add_browser: 'wget' is reserved; use a different tag"; return; } if($cmd !~ /'[^']*\%s[^']*'/) { print "/urlm_add_browser: command must contain '\%s' (single-quoted)"; return; } my @browsers = read_browser_list(); my $found = 0; for(@browsers) { if(lc($_->{tag}) eq $tag) { print "Replaced old definition of $tag"; $_->{name} = $name; $_->{command} = $cmd; $found++; last; } } if(not $found) { push @browsers, { tag => $tag, name => $name, command => $cmd }; print "push \@browsers, { tag => $tag, name => $name, command => $cmd }"; print "Added browser definition $tag"; } write_browser_list(\@browsers); } # urlm_del_browser sub urlm_del_browser { my $arg = shift || ""; $arg = trim($arg); $arg = lc $arg; return unless $arg; # urlm_del_override() already prints "Browser not defined" if it # wasn't defined, so no need to have urlm_del_browser() print it again. urlm_del_override("$arg all"); my @browsers = read_browser_list(); my @keep_browsers; my $found = 0; for(@browsers) { if(lc($_->{tag}) eq $arg) { print "Deleted definition of $arg"; $found++; } else { push @keep_browsers, $_; } } write_browser_list(\@keep_browsers) if $found; } # urlm_list_browsers sub urlm_list_browsers { my @browsers = read_browser_list(); for(@browsers) { print( (settings_get_str('urlm_default_browser') eq $_->{tag} ? "[*]" : " ") . "Tag: $bold_on" . $_->{tag} . "$bold_off, " . "Name: $bold_on" . $_->{name} . "$bold_off, " . "Command: $bold_on" . $_->{command} . "$bold_off"); } } # urlm_add_wget_ext sub urlm_add_wget_ext { my $arg = shift || ""; $arg = trim($arg); $arg = lc $arg; if(not $arg) { print "Usage: /urlm_add_wget_ext "; return; } my @list = split " ", settings_get_str('urlm_wget_extensions'); if(grep { $_ eq $arg } @list) { print "$arg is already in the wget extensions list"; return; } push @list, $arg; settings_set_str('urlm_wget_extensions', join(" ", @list)); command("/set urlm_wget_extensions"); } # urlm_del_wget_ext sub urlm_del_wget_ext { my $arg = shift || ""; $arg = trim($arg); $arg = lc $arg; if(not $arg) { print "Usage: /urlm_del_wget_ext "; return; } my @list = split " ", settings_get_str('urlm_wget_extensions'); if(!grep { $_ eq $arg } @list) { print "$arg is not in the wget extensions list"; return; } @list = grep { $_ ne $arg } @list; settings_set_str('urlm_wget_extensions', join(" ", @list)); command("/set urlm_wget_extensions"); } # urlm_list_overrides [] sub urlm_list_overrides { my $arg = shift || ""; $arg = trim($arg); my $found = 0; my @overrides = read_browser_overrides(); for(@overrides) { my ($browser, $pats) = @$_; if($arg eq $browser || not $arg) { $found++; my $count = 1; for(@$pats) { print $browser . "[$count]: " . $_; $count++; } } } if(not $found) { if($arg) { print "No overrides for browser '$arg'"; } else { print "No browser overrides"; } } } # urlm_add_override sub urlm_add_override { my $arg = shift || ""; $arg = trim($arg); # do not lc($arg), the command may need caps! my ($browser, $pattern) = split " ", $arg; $browser = lc $browser; if(not ($browser and $pattern)) { print "Usage: /urlm_add_override "; return; } if(!grep { $_->{tag} eq $browser } read_browser_list()) { print "Browser $browser not defined in browser list"; return; } eval "qr{$pattern}"; if($@) { print "Pattern $pattern is not a valid Perl regex: $@"; return; } my @overrides = read_browser_overrides(); my $found = 0; for(@overrides) { my ($tag, $pats) = @$_; next unless $tag eq $browser; push @$pats, $pattern; $found++; } if(not $found) { push @overrides, [ $browser, [ $pattern ] ]; } print "Added override for $browser: $pattern"; write_browser_overrides(\@overrides); } # urlm_del_override | sub urlm_del_override { my $arg = shift || ""; $arg = trim($arg); $arg = lc $arg; my ($browser, $number) = split " ", $arg; if(not($browser and $number)) { print "Usage: /urlm_del_override |all" } if(!grep { $_->{tag} eq $browser } read_browser_list()) { print "Browser $browser not defined in browser list"; return; } if($number ne 'all' && $number !~ /^[1-9]\d*$/) { print "Bad override '$number': must be a number >= 1, or 'all'"; return; } my @overrides = read_browser_overrides(); my @keep_overrides = (); my $found = 0; for(@overrides) { my ($tag, $pats) = @$_; if($tag ne $browser) { push @keep_overrides, $_; next; } $found += @$pats, next if $number eq 'all'; if($number > @$pats) { print "Value $number out of range"; next; } $found++; undef $pats->[$number - 1]; @$pats = grep { defined $_ } @$pats; push @keep_overrides, $_ if @$pats; } if($found) { print "Deleted $found overrides"; write_browser_overrides(\@keep_overrides); } else { print "No matching overrides"; } } sub init_colors { if(settings_get_bool('urlm_use_bold')) { $bold_on = "\002"; $bold_off = "\002"; } else { $bold_on = ""; $bold_off = ""; } if(settings_get_bool('urlm_use_color')) { $green = "\0033"; $red = "\0034"; $yellow = "\0037"; $purple = "\0036"; $color_off = "\003"; } else { $green = ""; $red = ""; $yellow = ""; $purple = ""; $color_off = ""; } } # init_browsers(): dynamic bindings. Each browser tag gets bound to # /urlm_open_$tag, and (if short commands enabled) to /$tag. our @bound_refs; sub init_browsers { # for this to work, the code ref can *NOT* be stored in a "my" var # I think this is a bug in irssi, or possibly perl, but maybe I'm # just being dumb... for(@bound_refs) { command_unbind($_->[0], $_->[1]); } @bound_refs = (); my @browsers = read_browser_list(); for(@browsers) { my $code = 'sub { url_open_cmd($_[0], "'. ($_->{tag}) . '"); };'; my $cmd = 'urlm_open_' . $_->{tag}; push @bound_refs, [ $cmd, eval $code ]; # again, no "my" vars, hence the ugly $bound_refs[$#bound_refs] kludge command_bind($cmd, $bound_refs[$#bound_refs]->[1]); if(settings_get_bool('urlm_short_cmds')) { my $shortcmd = $_->{tag}; push @bound_refs, [ $shortcmd, $bound_refs[$#bound_refs]->[1] ]; command_bind($shortcmd, $bound_refs[$#bound_refs]->[1]); } } command_unbind("ul", "url_list_cmd"); command_unbind("uo", "url_open_cmd"); command_unbind("wget", "url_open_wget_cmd"); if(settings_get_bool('urlm_short_cmds')) { command_bind("ul", "url_list_cmd"); command_bind("uo", "url_open_cmd"); command_bind("wget", "url_open_wget_cmd"); } } sub init_settings { # call only once, at script load! # Where shall we save the URL log? settings_add_str('urlmanager', 'urlm_log_file', "~/.irssi/urllog"); # Where is the wget binary? Absolute path, or "wget" (searches PATH) settings_add_str('urlmanager', 'urlm_wget_path', "wget"); # Where should wget save files? settings_add_str('urlmanager', 'urlm_wget_dl_dir', "~"); # Extra arguments to pass to wget... settings_add_str('urlmanager', 'urlm_wget_extra_args', ""); # Do we log URLs from /part and /quit messages? Disabled by default # because so many people always /quit with the same spammish URL # e.g. "nimrod has quit [Quit: Try StupidIRC (http://someircclient.com)]" settings_add_bool('urlmanager', 'urlm_log_partquit', 0); # Do we log URLs from our own public/private messages? settings_add_bool('urlmanager', 'urlm_log_own', 1); # Cosmetics: settings_add_bool('urlmanager', 'urlm_short_cmds', 1); settings_add_bool('urlmanager', 'urlm_use_bold', 1); settings_add_bool('urlmanager', 'urlm_use_color', 1); # Browser definitions. A double-colon-separated list. Each list item # is a single-colon separated list of (tag, name, command_format). # You may add browsers, and they will work as commands # without changing any other code. # The browser commands need to be non-blocking, and any stdout/err from # them will be ignored. # The %s gets replaced with the actual URL. *ALWAYS* use single-quotes # (like '%s'). *NEVER* omit the quotes or use double-quotes around the %s! # Failure to comply is a security hole! settings_add_str('urlmanager', 'urlm_browsers', 'ff:Firefox:firefox -remote \'openurl(%s,new-tab)\'' . '::' . 'ie:Internet Explorer:ie6 \'%s\'' . '::' . 'us:links+screen:[ "$TERM" = "screen" ] && screen links \'%s\'' . '::' . 'ut:links+xterm:xterm -e "links \'%s\'"' . '::' . 'ux:Copy to X Clipboard:echo -n \'%s\'|xsel -i'); # Default browser for /uo and /urlm_open commands settings_add_str('urlmanager', 'urlm_default_browser', 'ff'); # /uo and /urlm_open check this list. # Double-colon-separated list, each item is a single-colon-separated # list consisting of a browser tag and one or more patterns. # If a URL matches one of these # patterns, the browser tag will be used as the browser to open the URL # with, instead of the default. settings_add_str('urlmanager', 'urlm_browser_overrides', 'ie:/[^/]*video\.google\.com:/[^/]*youtube\.com:/[^/]*gametrailers\.com'); # If /uo or /urlm_open get a URL ending in one of these file extensions, # it will be downloaded with wget instead of being opened in a browser. settings_add_str('urlmanager', 'urlm_wget_extensions', 'tar zip atr bas xex exe dcm car z gz rom cas torrent rar 7z'); # trim log to this many lines. Use with urlm_log_trim_interval and/or # urlm_log_trim_startup. Set to 0 to disable. settings_add_int('urlmanager', 'urlm_max_log_lines', 100); # trim log to this many seconds. Use with urlm_log_trim_interval and/or # urlm_log_trim_startup. Set to 0 to disable. settings_add_int('urlmanager', 'urlm_max_log_age', 86400*7); # trim the log on script load. settings_add_bool('urlmanager', 'urlm_log_trim_startup', 0); # auto-trim log this often (seconds). Set to 0 to disable. settings_add_int('urlmanager', 'urlm_log_trim_interval', 60*60); # these control the behavior of windows created with /urlm_wget or /wget settings_add_bool('urlmanager', 'urlm_wget_autoclose', 1); settings_add_int('urlmanager', 'urlm_wget_autoclose_delay', 60); settings_add_int('urlmanager', 'urlm_wget_split_size', 0); # say "Captured URL #xxx http://whatever from whoever" every time a URL # is captured? settings_add_bool('urlmanager', 'urlm_quiet_capture', 0); # TODO: support these: # channels/nicks/sites we don't want to log #settings_add_str('urlmanager', 'urlm_ignore_channels'); #settings_add_str('urlmanager', 'urlm_ignore_urls'); # If true, go through the entire list every time a URL is logged, # checking for duplicates #settings_add_bool('urlmanager', 'urlm_ignore_dups'); } our $trim_timeout_tag; sub init_trim_timeout { timeout_remove($trim_timeout_tag) if($trim_timeout_tag); my $millis = settings_get_int('urlm_log_trim_interval') * 1000; if($millis > 0) { $trim_timeout_tag = timeout_add($millis, "trim_url_log", 1); } } # apply_settings: called on signal "setup changed" (when any /set urlm_* # changes value). # Anything that depends on the values of any of the settings should be # (re)initialized here. sub apply_settings { init_colors(); init_browsers(); init_trim_timeout(); @urls = read_url_file(); } sub init_signals { # call only once, at script load! signal_add_last("message public", "url_public"); signal_add_last("message private", "url_private"); signal_add_last("message irc notice", "url_private"); signal_add_last("message irc op_public", "url_private"); signal_add_last("message irc action", "url_private"); signal_add_last("dcc chat message", "url_dccmsg"); signal_add_last("message topic", "url_topic"); signal_add_last("channel joined", "url_join_topic"); signal_add_last("setup changed", "apply_settings"); signal_add_last("message part", "url_part"); signal_add_last("message quit", "url_quit"); signal_add_last("message own_public", "url_own"); signal_add_last("message own_private", "url_own"); signal_add_last("exec remove", "sig_exec_remove"); signal_add_last("window destroyed", "sig_window_destroyed"); } sub init_static_binds { # call only once, at script load! # These binds are always on: command_bind("urlm_list", "url_list_cmd"); command_bind("urlm_open", "url_open_cmd"); command_bind("urlm_wget", "url_open_wget_cmd"); command_bind("urlm_add_browser", "urlm_add_browser"); command_bind("urlm_del_browser", "urlm_del_browser"); command_bind("urlm_list_browsers", "urlm_list_browsers"); command_bind("urlm_add_wget_ext", "urlm_add_wget_ext"); command_bind("urlm_del_wget_ext", "urlm_del_wget_ext"); command_bind("urlm_list_overrides", "urlm_list_overrides"); command_bind("urlm_add_override", "urlm_add_override"); command_bind("urlm_del_override", "urlm_del_override"); command_bind("urlm_trim_log", "trim_url_log"); command_bind("urlm_undo_delete", "urlm_undo_delete"); command_bind("help", "urlm_help"); } # Add per-user help dir to help_path, if not already present. sub init_help_path { my $dir = "$ENV{HOME}/.irssi/help"; my $help_path = settings_get_str('help_path'); return if grep { $_ eq $dir } split /:/, $help_path; $help_path .= ":$dir"; settings_set_str('help_path', $help_path); signal_emit('setup changed'); } # bind signals and commands, now that everything's defined. init_settings(); init_signals(); init_static_binds(); init_colors(); init_browsers(); init_help_path(); @urls = read_url_file(); trim_url_log() if settings_get_bool('urlm_log_trim_startup'); init_trim_timeout(); # make sure no leftover backup is lurking from a long time ago... unlink(get_url_log_file() . "~"); # Print a helpful message for the user on script load... print $bold_on . "urlmanager.pl" . $bold_off . " loaded (" . @urls . " URLs), type '" . $yellow . "/help urlmanager" . $color_off . "' for help."; # rest of file is POD docs =pod =head1 NAME urlmanager =head1 SYNOPSIS Yet another URL logger for irssi. =head1 DESCRIPTION Captures URLs in channel, privmsg, and DCC chat messages, logs them to a file. Provides an irssi command to list captured URLs and several commands to do various things with them (open in browser, download, copy to X11 selection buffer). This documentation only includes installation instructions. For usage instructions, install the script and run B within irssi. =head1 INSTALLATION Copy B to your B<~/.irssi/scripts> directory (create the directory if it doesn't exist). For auto-loading when irssi starts, create a symlink in B<~/.irssi/scripts/autorun>: =over 4 mkdir -p ~/.irssi/scripts/autorun cp urlmanager.pl ~/.irssi/scripts cd ~/.irssi/scripts/autorun ln -s ../urlmanager.pl . =back =head1 CONFIGURATION All configuration is done from within irssi; read B. =cut