diff options
author | B. Watson <yalhcru@gmail.com> | 2015-08-12 04:30:10 -0400 |
---|---|---|
committer | B. Watson <yalhcru@gmail.com> | 2015-08-12 04:30:10 -0400 |
commit | 4ef0f5708e20509e427c706acedff6a22bf0faaa (patch) | |
tree | d12262b419530f40a3ef7cd24998f047ed443ebf | |
download | irssi-urlmanager-4ef0f5708e20509e427c706acedff6a22bf0faaa.tar.gz |
initial commit
35 files changed, 1852 insertions, 0 deletions
@@ -0,0 +1,10 @@ +Urchlay's url manager plugin for irssi. no README yet, for now see the +POD docs: + +$ perldoc scripts/urlmanager.pl + +Quickstart: + +mkdir -p ~/.irssi/scripts ~/.irssi/help +cp scripts/* ~/.irssi/scripts +cp help/* ~/.irssi/help diff --git a/help/urlm_add_browser b/help/urlm_add_browser new file mode 100644 index 0000000..d2c8327 --- /dev/null +++ b/help/urlm_add_browser @@ -0,0 +1,36 @@ + +URLM_ADD_BROWSER <tag>:<name>:<cmd-format> + +Define a new browser. The arguments must be separated by : (colon) +characters, since <name> and <cmd-format> may contain spaces (<cmd-format> +generally *requires* spaces). + +<tag> is a short unique identifier for the browser. Examples are "ff" for +Firefox or "moz" for Mozilla. The tag will be used to define a new irssi +command /urlm_open_<tag>, and (if urlm_short_cmds is ON) a new /<tag> +command. Tags must consist of only letters, numbers, or underscores, +and the special tag "wget" is reserved. + +<name> is the full human-readable name of the browser. It may contain +any characters you like, except for colons, and is only used for +identification purposes (e.g. /urlm_list_browsers output). + +<cmd-format> is the sprintf() format used to generate the full command +line required to run the browser. In simpler terms, it is the command +that runs the browser, with %s in place of the URL. It may contain +any characters other than colons. + +Examples: + /urlm_add_browser ff:Firefox:firefox -remote 'openurl(%s,new-tab)' + /urlm_add_browser ie:Internet Explorer:wine iexplore.exe '%s' + +The above examples define new /urlm_open_ff and /urlm_open_ie +commands. If urlm_short_cmds is ON, they also define new /ff and /ie +commands. + +Note: in <cmd-format>, the %s must occur inside a set of single-quotes. +It need not be the only thing inside the quotes, however (see the firefox +example). This is because a shell is used to spawn the external program. +The URLM_ADD_BROWSER command checks for the quotes, and refuses to +allow a <cmd-format> that's missing the quotes or the %s. + diff --git a/help/urlm_add_override b/help/urlm_add_override new file mode 100644 index 0000000..167808f --- /dev/null +++ b/help/urlm_add_override @@ -0,0 +1,15 @@ + +URLM_ADD_OVERRIDE <browser> <pattern> + +Add an override, for use with /urlm_open (q.v.) + +<browser> is the "tag", previously defined with URLM_ADD_BROWSER. + +<pattern> is a Perl regular expression (regex). Any URL matching the +regex (case-insensitive) will be open with <browser>. + +It's probably best to avoid conflicting/overlapping patterns, although +it can be done if you remember that the regexes are checked against each +URL in the order they are displayed in /urlm_list_overrides (which is +in fact the order in which they were defined). + diff --git a/help/urlm_browser_overrides b/help/urlm_browser_overrides new file mode 100644 index 0000000..1088280 --- /dev/null +++ b/help/urlm_browser_overrides @@ -0,0 +1,6 @@ + +Setting: URLM_BROWSER_OVERRIDES (private) + + Do not change directly; use /urlm_add_override, /urlm_del_override, + and /urlm_list_overrides instead. + diff --git a/help/urlm_browsers b/help/urlm_browsers new file mode 100644 index 0000000..eb89f27 --- /dev/null +++ b/help/urlm_browsers @@ -0,0 +1,6 @@ + +Setting: URLM_BROWSERS (private) + + Do not change directly; use /urlm_add_browser, /urlm_del_browser, + and /urlm_list_browsers instead. + diff --git a/help/urlm_default_browser b/help/urlm_default_browser new file mode 100644 index 0000000..d4d0448 --- /dev/null +++ b/help/urlm_default_browser @@ -0,0 +1,5 @@ + +Setting: URLM_DEFAULT_BROWSER (default: ff) + +Self-explanatory. One of the browser tags from URLM_LIST_BROWSERS. + diff --git a/help/urlm_del_browser b/help/urlm_del_browser new file mode 100644 index 0000000..4afd163 --- /dev/null +++ b/help/urlm_del_browser @@ -0,0 +1,12 @@ + +URLM_DEL_BROWSER <tag> + +Delete a browser from the browser-definition list. +K†~g> must have been previously defined with URLM_ADD_BROWSER. + +Deleting a browser also deletes any browser overrides defined for +taÒ}rowser. + +Example: to delete the "ie" definition (see above): + /urlm_del_browser ie + diff --git a/help/urlm_del_override b/help/urlm_del_override new file mode 100644 index 0000000..a1d03a9 --- /dev/null +++ b/help/urlm_del_override @@ -0,0 +1,7 @@ + +URLM_DEL_OVERRIDE <browser> <number>|all + +Delete an override. <number> is the number given in /urlm_list_overrides. +If "all" is used instead of a number, all overrides for <browser> will +be deleted. + diff --git a/help/urlm_list b/help/urlm_list new file mode 100644 index 0000000..6ec14ae --- /dev/null +++ b/help/urlm_list @@ -0,0 +1,55 @@ + +URLM_LIST [-delete] [<list>] + +Alternate command name: UL + +(Each command has a short, easy-to-type, but cryptic alternate name. If +you don't like cryptic command names, "/set urlm_short_cmds off" will +disable the short names). + +List URLs captured from channel and privmsg text. With no <list>, lists +the last 10 URLs. With -delete, lists and deletes listed URLS (the <list> is +required with -delete). + +If <list> is given, it may be: + +<number> + List URL #number only (example: /urlm_list 10) + +[<start>]-[<end>] + List URLs from #start to #end (example: /urlm_list 20-30) + if start omitted, beginning of list assumed. + if end omitted, end of list assumed. + "/urlm_list 10-" list from #10 to the end of the list, + "/urlm_list -" lists all URLs (same as "/urlm_list all"), + "/urlm_list -10" lists from start-of-list to #10). + +all + Lists all URLs. "/urlm_list all" is the same as "urlm_list -". + +<nick> + List URLs posted by user <nick>. + +<#channel> or <&channel> + List URLs posted in <channel> (example: /urlm_list #irssi). + +</urlmatch> + List URLs matching <urlmatch> (example: /urlm_list /google.com). + +Any <list> may be preceded with ! to invert its sense. Examples: + /urlm_list !/yahoo.com - List all URLs not matching yahoo.com + /urlm_list !bob - List all URLs not posted by nick "bob" + /urlm_list !#badchannel - List all URLs not posted in #badchannel + +Note: <nick>, <#channel>, </urlmatch> are all treated as case- + insensitive regular expressions. <nick> and <#channel> matches are + anchored at the start and end of the match (as though they were + prefixed with "^" and followed by "$"). </urlmatch> matches are + not anchored. + +Warning: BE CAREFUL with the -delete option! There is no confirmation, +and only one level of undo. It is recommended that you first run /urlm_list +<list> without the -delete option to be sure which items the -delete will +affect. If you've just deleted some URLs and want to restore them, run +/urlm_undo_delete + diff --git a/help/urlm_list_browsers b/help/urlm_list_browsers new file mode 100644 index 0000000..692b19e --- /dev/null +++ b/help/urlm_list_browsers @@ -0,0 +1,11 @@ + +URLM_LIST_BROWSERS + +Show the browser definition list. Items are listed in the same +format as used to define them: <browser>:<name>:<cmd-format>. The +default browser is marked with [*]. + +Note: urlm_add_browser, urlm_del_browser, and urlm_list_browsers store +the actual browser list as a string in the setting urlm_browsers. The +format of this setting is prickly; you shouldn't modify it directly. + diff --git a/help/urlm_list_overrides b/help/urlm_list_overrides new file mode 100644 index 0000000..ee1850c --- /dev/null +++ b/help/urlm_list_overrides @@ -0,0 +1,6 @@ + +URLM_LIST_OVERRIDES [<browser>] + +List override patterns for <browser> (optional: default w/no argument +is to list all overrides for all browsers). + diff --git a/help/urlm_log_file b/help/urlm_log_file new file mode 100644 index 0000000..3a1201d --- /dev/null +++ b/help/urlm_log_file @@ -0,0 +1,16 @@ + +Setting: URLM_LOG_FILE (default: ~/.irssi/urllog) [*] + +This is where URLs are stored. The file is updated immediately after +each URL is captured, and after any URL(s) are deleted. urlmanager +uses file locking (via flock()), so it's safe to run multiple instances +of irssi with the script loaded. + +The log file need not exist when the script is first loaded; it will +be created as soon as the first URL is captured. The directory for the +log file must already exist, though. Tilde expansion (~ meaning home +directory) is supported. + +When this setting is changed, the in-memory URL log is cleared and +repopulated from the new file. + diff --git a/help/urlm_log_own b/help/urlm_log_own new file mode 100644 index 0000000..39178ad --- /dev/null +++ b/help/urlm_log_own @@ -0,0 +1,6 @@ + +Setting: URLM_LOG_OWN + +Boolean; whether or not to capture and log URLs from your own public and +private messages. + diff --git a/help/urlm_log_partquit b/help/urlm_log_partquit new file mode 100644 index 0000000..885a2b7 --- /dev/null +++ b/help/urlm_log_partquit @@ -0,0 +1,7 @@ + +Setting: URLM_LOG_PARTQUIT (boolean, default: off) + +Capture URLs from /part and /quit messages. This is off by default +because so many people use IRC clients that include the client's web +site URL in the quit and part messages (a mild form of spam). + diff --git a/help/urlm_log_trim_interval b/help/urlm_log_trim_interval new file mode 100644 index 0000000..7d13cb9 --- /dev/null +++ b/help/urlm_log_trim_interval @@ -0,0 +1,24 @@ + +Setting: URLM_LOG_TRIM_INTERVAL + + How often (in seconds) do you want urlmanager to check the log and + trim it according to your urlm_max_log_lines and/or urlm_max_log_age + settings? Default is 3600 seconds (1 hour), which is probably OK for + most users. If you're in a lot of channels and exchange URLs with lots + of people, you might want to decrease this to keep the log from growing + too much. + + Note: urlm_log_trim_interval has no effect unless one or both of the + urlm_max_log_lines or urlm_max_log_age settings are set to non-zero + values. + + Note: Even with this setting set to zero, log trimming still occurs + at startup (unless urlm_log_trim_startup is OFF), and whenever a URL + is captured. To completely disable log trimming, set both + urlm_max_log_lines and urlm_max_log_age to zero. With these settings, + even manual trimming with /urlm_trim_log will do nothing. + + The lower you set this setting, the more CPU and disk access it requires + (unless you set it to zero, of course). Probably it's a bad idea to + use a value lower than 60 seconds here, under any conditions. + diff --git a/help/urlm_log_trim_startup b/help/urlm_log_trim_startup new file mode 100644 index 0000000..f33be64 --- /dev/null +++ b/help/urlm_log_trim_startup @@ -0,0 +1,7 @@ + +Setting: URLM_LOG_TRIM_STARTUP + + Boolean; controls whether urlmanager trims the log on startup. Has no + effect unless one or both of urlm_max_log_lines or urlm_max_log_age + are set to a non-zero value. + diff --git a/help/urlm_max_log_age b/help/urlm_max_log_age new file mode 100644 index 0000000..9c342ae --- /dev/null +++ b/help/urlm_max_log_age @@ -0,0 +1,17 @@ + +Setting: URLM_MAX_LOG_AGE + + Maximum age (in seconds) for URLs in the log. Set to zero to disable + age-based trimming. Any URLs older than this will be removed from the + log whenever the log is trimmed. This happens: + + - At startup (unless urlm_log_trim_startup is OFF) + - Every urlm_log_trim_interval seconds (if urlm_log_trim_interval + is non-zero) + - Any time a URL is captured + - When /urlm_trim_log is run manually + + This means that old URLs persist in the log for up to urlm_log_trim_interval + seconds past their expiration time; if this annoys you, decrease the + urlm_log_trim_interval to reduce the problem. + diff --git a/help/urlm_max_log_lines b/help/urlm_max_log_lines new file mode 100644 index 0000000..68e932c --- /dev/null +++ b/help/urlm_max_log_lines @@ -0,0 +1,11 @@ + +Setting: URLM_MAX_LOG_LINES + + Maximum number of URLs to keep in the log. May be set to zero to + disable trimming by length (in which case, trimming by age may still + be used or not, as desired). + + When set to non-zero, the log will be trimmed to this size by discarding + older (lower-numbered) URLs. See below for explanation of when trimming + is done. + diff --git a/help/urlm_open b/help/urlm_open new file mode 100644 index 0000000..1aac2c2 --- /dev/null +++ b/help/urlm_open @@ -0,0 +1,29 @@ + +URLM_OPEN [<arg>] - Open URL using "best" browser for the URL + +Alternate command name: UO + +Opens a URL according to the following rules: + +- If the URL's file extension (e.g. .zip) is found in urlm_wget_extensions, + the URL is downloaded with wget. (wget is run in an irssi window, so + you can monitor its progress) + +- Otherwise, if the URL matches any of the patterns in urlm_browser_overrides, + the URL is opened with the matching browser. (This is useful in cases + where e.g. you have Firefox as the default browser, but want all + youtube.com pages to open in Internet Explorer). See the + /urlm_add_override and /urlm_del_override commands for details. + +- Otherwise, the default browser (urlm_default_browser setting) is used. + +If <arg> is omitted, the default is to open the most recently captured URL. +If <arg> is provided, it must be a single numeric URL number (from the +output of /UL). Negative numbers are allowed, and are interpreted as +counting from the end of the list (so -1 means the second most recent +captured URL). + +Note: you do not need to define wget as a browser. urlmanager will use +its own internal wget support to run wget in a window if the file +extension is listed in urlm_wget_extensions. + diff --git a/help/urlm_quiet_capture b/help/urlm_quiet_capture new file mode 100644 index 0000000..eba5da2 --- /dev/null +++ b/help/urlm_quiet_capture @@ -0,0 +1,16 @@ + +Setting: URLM_QUIET_CAPTURE + +Boolean; whether or not to print "Captured URL #xx http://whatever from nick" +in the current window every time a URL is captured. Default: ON + +I can't think of a reason I'd ever want to turn this off, but maybe +it drives other people crazy... one warning: if you turn this setting off, +your default URL for "/urlm_open" may change between the time you notice +a URL in channel, and the time you try to open it (e.g. because someone in +another channel pasted another URL after the one you saw). + +Future versions of urlmanager may support an option to allow printing of +each captured URL in the window where it was captured, instead of the +current window. + diff --git a/help/urlm_short_cmds b/help/urlm_short_cmds new file mode 100644 index 0000000..3b15d48 --- /dev/null +++ b/help/urlm_short_cmds @@ -0,0 +1,14 @@ + +Setting: URLM_SHORT_CMDS (boolean, default: on) + +Whether or not to bind short command names, such as /ff as an alias +for /urlm_open_ff. The only commands that get short-name aliases are +/urlm_open (alias /uo) and the /urlm_open_* commands (which get the +browser tags as their short aliases). + +When this option is off, every command defined by urlmanager begins +with the string "urlm_", which acts as a sort of a namespace. Some +people might hate "polluting" the rest of the command namespace, or +maybe they already have a /uo command defined in another script. If +you're one of these people, /set urlm_short_cmds off. + diff --git a/help/urlm_trim_log b/help/urlm_trim_log new file mode 100644 index 0000000..59d3cf8 --- /dev/null +++ b/help/urlm_trim_log @@ -0,0 +1,12 @@ + +Setting: URLM_TRIM_LOG + +Manually trim the log file, according to the urlm_max_log_lines and +urlm_max_log_age settings (q.v.) + +Normally you won't need this command: instead you'll use the +urlm_log_trim_startup and/or urlm_log_trim_interval settings to +automatically keep the log size manageable. /urlm_trim_log might be +useful for recovering from an attack by floodbots that send URLs +to a channel... + diff --git a/help/urlm_undo_delete b/help/urlm_undo_delete new file mode 100644 index 0000000..1fbfea9 --- /dev/null +++ b/help/urlm_undo_delete @@ -0,0 +1,11 @@ + +URLM_UNDO_DELETE [-yes] + +Undo the last /urlm_list -delete operation. There is only one level of +undo, and you can't undo the undo. + +If any URLs have been captured since the last -delete, /urlm_undo_delete +will warn of this fact and refuse to restore, unless -yes is given. If +you override the warning with -yes, you WILL lose any URLs that have been +captured since the delete. + diff --git a/help/urlm_use_bold b/help/urlm_use_bold new file mode 100644 index 0000000..6c41e0b --- /dev/null +++ b/help/urlm_use_bold @@ -0,0 +1,7 @@ + +Setting: URLM_USE_BOLD (boolean, default: on) + +Whether or not you want bold in your /urlm_list output (and a few +other places within urlmanager). + + diff --git a/help/urlm_use_color b/help/urlm_use_color new file mode 100644 index 0000000..c2e453b --- /dev/null +++ b/help/urlm_use_color @@ -0,0 +1,12 @@ + +Setting: URLM_USE_COLOR (boolean, default: on) + +Whether or not you want color in your /urlm_list output. The author +finds the colors useful, but you might hate it. + +Note that urlmanager never sends text to the server (e.g. to a channel +or a query). With bold/color enabled, only you will see them, so you +won't be violating any "no mirc colors" channel rules. + +This setting has no effect if irssi's hide_colors is set to ON. + diff --git a/help/urlm_wget b/help/urlm_wget new file mode 100644 index 0000000..8ed7039 --- /dev/null +++ b/help/urlm_wget @@ -0,0 +1,29 @@ + +URLM_WGET [<wget-args] [<arg>] + +Alternate command name: WGET + +Downloads a URL with wget. <arg> is treated the same as /urlm_open (see +above). <wget-args>, if present, are passed to the wget process as-is +(see also the urlm_wget_extra_args setting). + +When urlmanager runs wget, it creates a new irssi window (split or hidden) +named urlm_wget_<number> (where <number> is a unique serial number generated +by urlmanager). This window behaves as a normal irssi window (shows up +in /window list, can be closed with /window close, etc). By default, +wget windows are created as hidden windows (change with urlm_wget_split_size +setting), which are automatically closed 60 seconds after the wget process +completes (change with urlm_wget_autoclose and urlm_wget_autoclose_delay +settings). + +To cancel a wget download, you may close its window while wget is still +running. This will kill the wget process, leaving any partially-downloaded +files behind (which may be resumed with wget's -c option). + +Note: If you need to pass a numeric argument to /urlm_wget as its last +argument, do not separate it from its command switch with a space. That +is, instead of "/urlm_wget -T 30", use "/urlm_wget -T30" (or its long +version, "/urlm_wget --timeout=30"). The reason for this restriction +is that urlmanager will interpret the last argument as a URL number, +if it's numeric. + diff --git a/help/urlm_wget_autoclose b/help/urlm_wget_autoclose new file mode 100644 index 0000000..42af64f --- /dev/null +++ b/help/urlm_wget_autoclose @@ -0,0 +1,13 @@ + +Setting: URLM_WGET_AUTOCLOSE + +Boolean; whether or not to automatically close windows created by running +wget (via /wget or /urlm_wget). Default: ON + +With this setting OFF, you'll have to manually close windows created +by /wget or /urlm_wget. + +Changing this setting only affects wget windows created after the change. +Any existing wget windows will still be autoclosed if the old value was +ON, or else they will not be autoclosed if the old value was OFF. + diff --git a/help/urlm_wget_autoclose_delay b/help/urlm_wget_autoclose_delay new file mode 100644 index 0000000..206657b --- /dev/null +++ b/help/urlm_wget_autoclose_delay @@ -0,0 +1,10 @@ + +Setting: URLM_WGET_AUTOCLOSE_DELAY + +Integer; how long to wait after a wget download is complete, before auto- +closing the window. No effect if urlm_wget_autoclose is OFF. + +With urlm_wget_autoclose ON, set urlm_wget_autoclose_delay to zero to +immediately close wget windows, or to a number of seconds to delay +before closing wget windows. + diff --git a/help/urlm_wget_dl_dir b/help/urlm_wget_dl_dir new file mode 100644 index 0000000..da06e82 --- /dev/null +++ b/help/urlm_wget_dl_dir @@ -0,0 +1,14 @@ + +Setting: URLM_WGET_DL_DIR (default: ~) [*] + +This is where files downloaded with wget will be saved. Tilde expansion +is supported. If this directory does not exist, it will be created by +wget, when it is run for the first time. + +[*] Normally, the log file and download directory should be an absolute +path, or relative to $HOME (with ~ expansion). If a relative path is +used (without ~), it will be resolved relative to the working directory +where irssi was started. + +FIXME: this setting may not contain whitespace characters (spaces or tabs). + diff --git a/help/urlm_wget_extensions b/help/urlm_wget_extensions new file mode 100644 index 0000000..ec87f9c --- /dev/null +++ b/help/urlm_wget_extensions @@ -0,0 +1,23 @@ + +Setting: URLM_WGET_EXTENSIONS (default: tar zip atr bas xex exe dcm car z gz torrent) + +Space-separated list of filename extensions. When /urlm_open is used on +a URL ending in one of these, the file will be downloaded with wget, +running in an irssi window. To disable wget, you may set this list to +an empty string, or use one of your defined browsers to open such files. + +If you want to use wget without running it in an irssi window, clear the +extension list, then define a browser like so: + +# download in background (no controlling terminal, no progress reports) +/urlm_add_browser dl:Download with wget:wget -b '%s' + +# download in new screen window (irssi must be running under GNU screen) +/urlm_add_browser dl:Download with wget:screen wget '%s' + +# download in new X window (irssi must be running under X) +/urlm_add_browser dl:Download with wget:xterm -e "wget '%s';echo 'press Enter to close window';read junk" + +Note that you may not define a browser tag as 'wget' (the examples above +all use 'dl' instead). + diff --git a/help/urlm_wget_extra_args b/help/urlm_wget_extra_args new file mode 100644 index 0000000..1ee593f --- /dev/null +++ b/help/urlm_wget_extra_args @@ -0,0 +1,5 @@ + +Setting: URLM_WGET_EXTRA_ARGS (default: <none>) + +Extra arguments to be passed to wget verbatim. Default is no arguments. + diff --git a/help/urlm_wget_path b/help/urlm_wget_path new file mode 100644 index 0000000..260a921 --- /dev/null +++ b/help/urlm_wget_path @@ -0,0 +1,9 @@ + +Setting: URLM_WGET_PATH (default: wget) + +The path to the wget binary. Either an absolute path such as /usr/bin/wget, +or the string wget (the default) to search the $PATH. Tilde expansion +is supported (e.g. /set urlm_wget_path ~/bin/wget) + +FIXME: this setting may not contain whitespace characters (spaces or tabs). + diff --git a/help/urlm_wget_split_size b/help/urlm_wget_split_size new file mode 100644 index 0000000..63beb07 --- /dev/null +++ b/help/urlm_wget_split_size @@ -0,0 +1,11 @@ + +Setting: URLM_WGET_SPLIT_SIZE + +Integer; how tall (in screen lines) to make split windows created by +/wget or /urlm_wget. Set to zero (the default) to use hidden (full-sized) +windows instead of split windows. + +Try not to set this too high; if urlmanager is unable to resize a window +to this size, the window will remain at the default size (as used +by the /window split command). + diff --git a/help/urlmanager b/help/urlmanager new file mode 100644 index 0000000..f979ad9 --- /dev/null +++ b/help/urlmanager @@ -0,0 +1,30 @@ + +URLMANAGER + +Commands: all urlmanager commands are prefixed with /urlm_ except the +short browser aliases (if urlm_short_cmds is enabled). To see the list +of commands, type "/help urlm". + +Settings: urlmanager's behaviour is controlled by quite a few settings. +All urlmanager settings are prefixed with "urlm_". +To see a full list of them, use "/set urlm". + +Log File: + +URLs are stored in a file (filename set with urlm_log_file), one URL +per line. + +Each line is a space-separated list: + +timestamp nick channel url + +This file may be edited (carefully) with a standard text editor, or +removed (to clear the URL list). If you edit or delete the file, reload +the script with "/run urlmanager.pl". (it's probably a +good idea to trim the file when it gets above a few hundred lines). + +Notes: +- The timestamp is expressed in seconds since the epoch. +- For a URL received in a private /msg or dcc chat, "channel" will + be the sending user. + diff --git a/scripts/urlmanager.pl b/scripts/urlmanager.pl new file mode 100644 index 0000000..787e775 --- /dev/null +++ b/scripts/urlmanager.pl @@ -0,0 +1,1350 @@ +#!/usr/bin/perl + +# urlmanager script for irssi + +use warnings; +use strict; + +use Fcntl qw/:flock/; +use POSIX qw/strftime/; + +use Irssi qw/ + settings_add_str settings_add_bool settings_add_int + settings_get_str settings_get_bool settings_get_int + settings_set_str settings_set_bool settings_set_int + command command_bind command_unbind + signal_emit signal_add_last signal_stop + timeout_add timeout_add_once timeout_remove + window_find_item/; + +our $VERSION = "0.1"; +our %IRSSI = ( + authors => 'Urchlay', + contact => 'Urchlay on NewNet', + name => 'urlmanager', + description => + 'Captures URLs said in channel and private messages ' . + 'and saves them to a file, also adds several commands for ' . + 'listing and opening captured URLs ' . + '(based on urlgrab.pl 0.2 by David Leadbetter)', + license => 'GNU GPLv2 or later', + url => 'none', +); + +# 20110609 bkw: if irssi was started in a screen session from the console, +# then detached, then reattached in an X session, DISPLAY will not be set. +# This will confuse the user, as e.g. firefox will silently fail to run. +# It won't do any harm to set DISPLAY=:0 if it's not set, and might help... +{ + my $disp = $ENV{DISPLAY}; + $ENV{DISPLAY} = ":0" unless $disp; +} + +# Workaround for a heisenbug, see: +# http://bugs.irssi.org/index.php?do=details&task_id=242 +{ package Irssi::Nick } + +# Color constants. +# Irssi.pm doesn't include symbolic mIRC-style color names... +# NOTE: if you print e.g. $green . "12345", the "1" will be interpreted +# as the 2nd digit of the color! Only good fix is to always put a space: +# print $green . " 12345" works OK. +# Declarations only; defined in init_colors() +our ($bold_on, $bold_off, $green, $red, $yellow, $purple, $color_off); + +# @urls is a list of anonymous hashes, each representing one URL. +# See read_url_file for hash elements. +our @urls; + +# Most-recently-posted URL (the URL only, not a hash). Only used +# for avoiding dups (see url_log). +our $lasturl = ""; + +# Have any URLs been captured since the last /ul -delete? This is to +# (hopefully) protect the user +our $captured_since_delete = 0; + +# Grr. Printing with print() or Irssi::print(), % chars are interpreted +# as irssi formats. This causes URLs containing HTML %-escapes to come +# out in weird colors. Using irssi's /echo is apparently the right way +# to avoid this... though we get colored -!- in front of every line :( +sub echo { + command("/echo $_") for @_; +} + +# trim leading/trailing spaces +sub trim { + $_[0] =~ s/(?:^\s*|\s$)//g; + return $_[0]; +} + +# read_url_file: called on script load with the log filename. +# returns array of URL hashes, which will be empty if the file +# wasn't present or was empty. +sub read_url_file { + my $file = get_url_log_file(); + my @got; + + open URLLOG, "<$file" or return; + flock(URLLOG, Fcntl::LOCK_EX); + seek(URLLOG, 0, 1); + while(<URLLOG>) { + chomp; + my @fields = split " "; + push @got, { + stamp => $fields[0], + nick => $fields[1], + channel => $fields[2], + url => $fields[3], + }; + } + close URLLOG; + + return @got; +} + +# rewrite the URL log file from arguments. +sub write_url_file { + for(@_) { + $lasturl = ""; + url_log(1, $_->{nick}, $_->{channel}, $_->{url}, $_->{stamp}); + } +} + +# Trim the log according to the appropriate settings. +sub trim_url_log { + my $quiet = shift || 0; + my $max_lines = settings_get_int("urlm_max_log_lines") || 0; + my $max_age = settings_get_int("urlm_max_log_age") || 0; + + return unless $max_lines || $max_age; # nothing to do! + + my @keep_urls; + my $trimmed = 0; + + if($max_age) { + for(@urls) { + if($_->{stamp} >= (time() - $max_age)) { + push @keep_urls, $_; + } + } + } else { + @keep_urls = @urls; + } + + if($max_lines && (@keep_urls > $max_lines)) { + my $last = $#keep_urls; + my $first = $last - $max_lines + 1; + @keep_urls = @keep_urls[$first..$last]; + } + + $trimmed = (@urls - @keep_urls); + + if($trimmed) { + clear_url_log(); + write_url_file(@keep_urls); + print "Trimmed $trimmed URLs from log"; # unless $quiet; + } +} + +# Clear the URL log, both the in-memory @urls and the on-disk file. +sub clear_url_log { + my $file = get_url_log_file(); + unlink $file; # or print "Can't delete $file: $!"; + @urls = (); +} + +# get_url_log_file: get value of our logfile setting, with +# tilde expansion for user's homedir. +sub get_url_log_file { + my $file = settings_get_str('urlm_log_file'); + $file =~ s/^~/$ENV{HOME}/; + return $file; +} + +# signal handler for "message public" +# extract and log any URLs in the input text. +sub url_public { + my ($server, $text, $nick, $hostmask, $channel) = @_; + my @got = find_urls($text); + url_log(0, $nick, $channel, $_) for @got; +} + +# signal handler for "message own_public" and "message own_private" +# extract and log any URLs in the input text. +sub url_own { + my ($server, $text, $channel) = @_; + return unless settings_get_bool('urlm_log_own'); + my @got = find_urls($text); + url_log(0, $server->{nick}, $channel, $_) for @got; +} + +# signal handler for "message private", "message irc notice", +# "message irc op_public", "message irc action" +# extract and log any URLs in the input text. +sub url_private { + my ($server, $text, $nick, $hostmask) = @_; + my @got = find_urls($text); + url_log(0, $nick, $server->{nick}, $_) for @got; +} + +# signal handler for "message topic" +# extract and log any URLs in the input text. +sub url_topic { + my ($server, $channel, $text, $nick, $hostmask) = @_; + return if $nick eq $server->{nick}; # don't log own topic changes + my @got = find_urls($text); + url_log(0, $nick, $channel, $_) for @got; +} + +# signal handler for "channel joined" +# extract and log any URLs in the channel topic. +sub url_join_topic { + my ($chan) = @_; + return unless $chan->{topic}; + # don't log own topic changes + return if $chan->{topic_by} eq $chan->{server}->{nick}; + my @got = find_urls($chan->{topic}); + url_log(0, $chan->{topic_by}, $chan->{name}, $_) for @got; +} + +# signal handler for "message part" +# extract and log any URLs in the input text. +sub url_part { + return unless settings_get_bool('urlm_log_partquit'); + my ($server, $channel, $nick, $hostmask, $text) = @_; + return if $nick eq $server->{nick}; # don't log own parts (redundant?) + my @got = find_urls($text); + url_log(0, $nick, $channel, $_) for @got; +} + +# signal handler for "message quit" +# extract and log any URLs in the input text. +sub url_quit { + return unless settings_get_bool('urlm_log_partquit'); + my ($server, $nick, $hostmask, $text) = @_; + return if $nick eq $server->{nick}; # don't log own quits (redundant?) + my @got = find_urls($text); + url_log(0, $nick, $server->{nick}, $_) for @got; +} + +# signal handler for "dcc chat message" +# extract and log any URLs in the input text. +# TODO: test this! +sub url_dccmsg { + my ($dcc, $text) = @_; + my @got = find_urls($text); + url_log(0, $dcc->{nick}, $dcc->{server}->{nick}, $_) for @got; +} + +# print_url_line: +# print one formatted (colorful) line of /ul output +sub print_url_line { + my ($maxnick, $maxchan, $num, $stamp, $nick, $channel, $url) = @_; + + echo sprintf("%s%3s%s %s %11s%s %${maxnick}s%s %${maxchan}s%s %s%s", + $bold_on, $num, $bold_off, + $green, $stamp, + $red, $nick, + $yellow, $channel, + $purple, $url, + $color_off); +} + +sub url_list_cmd { # bound to /ul (sorry, this sub is a mess) + my $do_delete = 0; + my $listed = 0; + my @keep_urls; + + my $arg = shift || ""; + $arg = lc $arg; + + $arg = trim($arg); + + if($arg =~ /^-delete/) { + $arg =~ s/^-delete\s*//; + $do_delete++; + + if($arg eq '') { + print "/ul -delete requires a parameter! (/ul help for details)"; + return; + } + } + + if(not @urls) { + print "No URLs in list!"; + return; + } + + my ($start, $end, $nick, $regex, $channel); + $arg = "-" if $arg eq 'all'; + + my $invert = 0; + if($arg =~ /^!(.*)/) { + $arg = $1; + $invert = 1; + } + + if($arg eq "") { + $start = @urls-10; + $start = 0 if $start < 0; + $end = $#urls; + } elsif($arg =~ /^[&#](.*)/) { + $channel = $1; + } elsif($arg =~ /^\/(.*)\/?/) { + $regex = $1; + } elsif($arg =~ /^\d+$/) { + $start = $end = $arg; + } elsif($arg =~ /^[-\d]+$/) { + ($start, $end) = split /-/, $arg, 2; + $start = 0 if $start eq ""; + $end = $#urls if $end eq ""; + } else { + $nick = $arg; + } + + my $count = 0; + my @to_list; + for(@urls) { + my $list = 0; + if($nick && (lc($_->{nick}) =~ ("^" . quotemeta(lc $nick)))) { + $list++; + } elsif($regex) { + $list++ if $_->{url} =~ /$regex/i; + } elsif($channel) { + $list++ if $_->{channel} =~ /^[#&]?$channel$/i; + } elsif(defined($start) && defined($end)) { + $list++ if $count >= $start && $count <= $end; + } + + $list = !$list if $invert; + + if($list) { + $listed++; + push @to_list, [ $count, $_ ]; + } elsif($do_delete) { + push @keep_urls, $_; + } + + $count++; + } + + if(@to_list) { # print the list if anything's supposed to be in it + my $maxnick = 4; + my $maxchan = 7; + + for(@to_list) { + my ($num, $u) = @$_; + my $len = length($u->{nick}); + $maxnick = $len if $len > $maxnick; + $len = length($u->{channel}); + $maxchan = $len if $len > $maxchan; + } + + print_url_line($maxnick, $maxchan, + "#", "When", "Nick", "Channel", "URL"); + + for(@to_list) { + my ($num, $u) = @$_; + + my $stamp = strftime("%m/%d-%H:%M", localtime($u->{stamp})); + print_url_line($maxnick, $maxchan, + $num, $stamp, $u->{nick}, $u->{channel}, $u->{url}); + } + } + + if($do_delete) { # process -delete flag + my $deleted = scalar @urls - scalar @keep_urls; + if(not $deleted) { + print "No URLs deleted"; + return; + } + + my $file = get_url_log_file(); + rename($file, "$file~") or print "Warning: can't backup log file: $!"; + + clear_url_log(); + write_url_file(@keep_urls); + + print $red . "These " . $deleted . " URLs have been deleted!" . + $color_off . " (" . @urls . " remain)"; + + $captured_since_delete = 0; + } else { # no -delete flag, show summary + print "Listed $listed of $count URLs"; + } +} + +sub urlm_say { +} + +sub urlm_undo_delete { + my $yes = ($_[0] eq '-yes'); + + if($captured_since_delete && (not $yes)) { + print "urlm_undo_delete: doing this will throw away some URLs that " . + "were captured since the last delete. Re-run with '-yes' to do it " . + "anyway."; + return; + } + + my $file = get_url_log_file(); + my @oldurls = @urls; + clear_url_log(); + + if(not(rename("$file~", $file))) { + print "Can't restore log file: $!"; + @urls = @oldurls; + write_url_file(); + return; + } + + @urls = read_url_file(); + print "Restored " . @urls . " URLs from backup"; + $captured_since_delete = 0; +} + +# get_url_from_number: +# Returns a URL hash from @urls, given the index into the array. +# Returns undef if index is invalid or non-existent. +# Accepts negative numbers as meaning "nth from the end of the list" +# If index is empty string, returns the highest-numbered (most recent) URL. +# If index is non-empty, non-numeric, then treat as a nick and open last +# URL by that nick. +sub get_url_from_number { + my $arg = shift; + if($arg eq '') { + $arg = $#urls; + } elsif($arg =~ /-\d+$/) { + $arg = $#urls + $arg; + } + + if($arg !~ /^\d+$/) { +# print("Non-numeric URL number '$arg'"); +# return; + $arg = trim($arg); + for(my $i = $#urls; $i >= 0; $i--) { + my $url = $urls[$i]; + return $url if lc($arg) eq lc($url->{nick}); + } + print("Can't find any URLs from nick '$arg'"); + return; + } + + if($arg > $#urls) { + print("No such URL number '$arg'"); + return; + } + + return $urls[$arg]; +} + +# url_open_cmd: open a URL with the given browser (auto-guesses which browser +# to use if $browser is ''). +sub url_open_cmd { + my ($urlnum, $browser) = @_; + $browser = '' if ref $browser; # means it's a Server object + + my $url = get_url_from_number($urlnum); + return if not $url; + my $link = $url->{url}; + + if(not $browser) { # guess browser... + # check for wget first... + if($link =~ m{/[^/]+\.(\w+)$}) { + my $ext = lc $1; + for(split " ", settings_get_str('urlm_wget_extensions')) { + if($ext eq lc($_)) { + url_open_wget_cmd($urlnum); + return; + } + } + } + + # not a wget extension, check browser override patterns + $browser = settings_get_str('urlm_default_browser'); + my @overrides = read_browser_overrides(); + +OVERRIDE: + for(@overrides) { + my ($tag, $pats) = @$_; + for(@$pats) { + if($link =~ /$_/i) { + $browser = $tag; + last OVERRIDE; + } + } + } + } + + my @browsers = read_browser_list(); + my ($tag, $name, $format); + for(@browsers) { + if($_->{tag} eq $browser) { + ($tag, $name, $format) = ($_->{tag}, $_->{name}, $_->{command}); + last; + } + } + + echo("$name - " . $link . " (" . $url->{nick} . ")"); + + $link =~ s/'/%27/g; # be nice to the shell, escape single quotes + $link =~ s/\(/%28/g; # be nice to firefox, escape parens + $link =~ s/\)/%29/g; # firefox -remote 'openURL(url,new-tab)' *fails* + # if the url contains any () chars! + + my $cmd = sprintf($format, $link) . " &>/dev/null &"; + system($cmd); +} + +# Open with wget in an irssi window +# TODO: Maybe allow for using "fetch" instead of wget? (does anyone care?) +our $wgetcount = 1; +sub url_open_wget_cmd { # bound to /urlm_wget /wget + my $arg = shift; + + my @args = split " ", $arg; + + my $urlnum = ""; + if(@args && ($args[-1] =~ /^\d+$/)) { + $urlnum = pop @args; + } + + my $url = get_url_from_number($urlnum); + return if not $url; + + my $more_args = join(" ", @args); + $more_args .= " " if $more_args; + + my $dir = settings_get_str('urlm_wget_dl_dir'); + $dir =~ s/^~/$ENV{HOME}/; + $dir = "." if not $dir; + + my $size = settings_get_int('urlm_wget_split_size'); + + if($size > 0) { + command("/window new split"); + command("/window size $size"); + } else { # size == 0, means "do not split" + command("/window new hidden"); + } + + # find an unused window name... + my $name = "urlm_wget_" . ($wgetcount++); + while(window_find_item($name)) { + $name = "urlm_wget_" . ($wgetcount++); + } + + command("/window name $name"); + + if(settings_get_bool('urlm_wget_autoclose')) { + my $delay = settings_get_int('urlm_wget_autoclose_delay'); + if($delay) { + print "This window will close $delay seconds after download is done"; + } else { + print "This window will close when download is done"; + } + } else { + print "Use " . $yellow . "/window close $name" . $color_off . + " to close this window"; + } + + my $args = trim(settings_get_str('urlm_wget_extra_args')); + $args .= " " if $args; + + my $wget_bin = settings_get_str('urlm_wget_path'); + $wget_bin =~ s/^~/$ENV{HOME}/; + $wget_bin = "wget" if not $wget_bin; + + command( + "/exec -nosh " . + "-name $name " . + "$wget_bin " . + "-P $dir " . + $args . + $more_args . + $url->{url}); + + if($size) { # if using a hidden window, leave it focused + command("/window last"); + } +} + +# Close a window by name. This seems like a kludge... +sub close_window { + return unless window_find_item($_[0]); # don't close if already closed! + command("/window goto " . $_[0]); + command("/window close"); +} + +# signal handler for "exec remove", autocloses our wget windows when +# the wget processes exit, if requested. +sub sig_exec_remove { + my ($proc, $status) = @_; + return unless settings_get_bool('urlm_wget_autoclose'); + + # target_win->name will be "" if window already closed! + my $name = $proc->{target_win}->{name} || ""; + return unless $name =~ /^urlm_wget_\d+$/; + + return unless window_find_item($name); # don't close if already closed! + + my $delay = settings_get_int('urlm_wget_autoclose_delay'); + if($delay > 0) { + timeout_add_once($delay * 1000, "close_window", $name); + } else { + close_window($name); + } +} + +# kill a process by name (send SIGTERM) +sub kill_proc { + command("/exec -15 " . $_[0]); +} + +# kill wget processes on manual window close! +# catch signal "window destroyed" and figure out a way to avoid +# adding a timeout to close the window (since it's in the middle +# of closing now...) +sub sig_window_destroyed { + my $name = $_[0]->{name} || ""; + return unless $name =~ /^urlm_wget_\d+$/; + + # use a timeout to kill the process, instead of killing it directly. + # why? to avoid possible race condition where sig_exec_remove() + # tries to close the window that triggered this call to + # sig_window_destroyed(). + timeout_add_once(1000, "kill_proc", $name); +} + +# find_urls: extract all URLs from the input text, returns a list +# (which may be empty). +# Be VERY permissive about what we consider a URL. +# 20100614 bkw: be a little less permissive +# 20140530 bkw: stop catching dupe http://whatever and https://whatever + +# original sub: +##sub find_urls { +## my @got = ($_[0] =~ m{(?:https?|ftp)://\S+}g); +## push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g; +## push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g; +## s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation +## return @got; +##} + +# new version: +sub find_urls { + my @got; + while($_[0] =~ s{(?:https?|ftp)://\S+}{}) { + push @got, $&; + } + push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g; + push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g; + s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation + return @got; +} + +# Annoying bots have a tendency to do this: +# <actual_person> check this out: http://www.blahblah.blah/path/to/stuff.html +# <annoying_bot> Title: Stuff (at www.blahblah.blah) +# To me, this is about the most useless function a bot can serve, plus it +# breaks the /uo command. +# just_domain() returns true if $1 is just the domain part of $2 +sub just_domain { + my ($new, $old) = @_; + s/^(ht|f)tps?:\/\/// for ($new, $old); + $old =~ s/\/.*//; + return 0 if $new =~ /\/./; + return $new eq $old; +} + +# url_log: appends URL to the URL log file and to the @urls array. +# Locks the file before writing, so should be safe even with multiple +# instances of irssi. +# $relog should be false if capturing a new URL from channel/msg text, +# or true if re-logging an old URL (e.g. /ul -delete does this) +sub url_log { + my($relog, $nick, $channel, $url, $stamp) = @_; + $nick =~ s/!.*//; + + $stamp = time() unless $stamp; + + return if lc $url eq lc $lasturl; # a tiny bit of protection from spam/flood + return if just_domain($url, $lasturl); + + $lasturl = $url; + + my $file = get_url_log_file(); + open(URLLOG, ">>$file") or return; + + flock(URLLOG, Fcntl::LOCK_EX); + seek(URLLOG, 0, 2); + + print URLLOG time . " $nick $channel $url\n"; + close(URLLOG); + + push @urls, { + stamp => $stamp, + nick => $nick, + channel => $channel, + url => $url, + }; + + if(not $relog) { + if(not settings_get_bool('urlm_quiet_capture')) { + my $on = ""; + if($channel =~ /^#/) { + $on = " on " . $green . $channel . $color_off; + } + echo "Captured URL #" . $#urls . " " . + $purple . $url . $color_off . + " from " . $yellow . $nick . $color_off . $on; + } +# trim_url_log(); + $captured_since_delete++; + } +} + +# urlm_help generates /help topics for the browser commands. +# The help for all the other commands is stored in text files in +# ~/.irssi/help +sub urlm_help { + my $arg = shift; + $arg = lc trim($arg); + my %bhelp; + our %urlm_help; + + for(read_browser_list()) { + my $text = uc($_->{tag}) . " [<url#>]\n\n" . + "Open a URL with the external browser '" . + $_->{name} . "', using the command:\n" . + $_->{command} . "\n\n" . + "If [<url#>] is omitted, the most recent URL will be opened.\n"; + $bhelp{$_->{tag}} = $text; + $bhelp{"urlm_open_" . $_->{tag}} = $text; + } + + my $help = $bhelp{$arg} || return; + signal_stop(); + print $help; +} + +sub write_browser_overrides { + my @list = @{$_[0]}; + my @strings; + + for(@list) { + my ($tag, $pats) = @$_; + push @strings, join(":", $tag, @$pats); + } + + my $setting = join("::", @strings); + settings_set_str('urlm_browser_overrides', $setting); +} + +sub read_browser_overrides { + my @result; + + my $list = settings_get_str('urlm_browser_overrides'); + my @entries = split /::/, $list; + + for(@entries) { + my @items = split /:/; + my $tag = shift @items; + push @result, [ $tag, \@items ]; + } + + return @result; +} + +sub write_browser_list { + my @list = @{$_[0]}; + my @strings; + + for(@list) { + push @strings, join(":", $_->{tag}, $_->{name}, $_->{command}); + } + + my $setting = join("::", @strings); +#print "/set urlm_browsers $setting"; + settings_set_str('urlm_browsers', $setting); + signal_emit("setup changed"); +} + +sub read_browser_list { + my @result; + + my $list = settings_get_str('urlm_browsers'); + my @entries = split /::/, $list; + + for(@entries) { + my @items = split /:/; + my $hash = { + tag => $items[0], + name => $items[1], + command => $items[2], + }; + + push @result, $hash; + } + + return @result; +} + +# commands: +# urlm_add_browser <browser>:<fullname>:<cmd> +sub urlm_add_browser { + my $arg = shift || ""; + $arg =~ trim($arg); + + if($arg !~ /^[^:]+:[^:]+:[^:]+$/) { + print "Usage: /urlm_add_browser tag:name:command"; + return; + } + + my ($tag, $name, $cmd) = split /:/, $arg; + + $tag = trim($tag); + $tag = lc $tag; + $name = trim($name); + + if($tag =~ /\W/) { + print "/urlm_add_browser: tag must consist of only " . + "letters, numbers, or underscores (_), not '$tag'"; + return; + } + + if($tag eq 'wget') { + print "/urlm_add_browser: 'wget' is reserved; use a different tag"; + return; + } + + if($cmd !~ /'[^']*\%s[^']*'/) { + print "/urlm_add_browser: command must contain '\%s' (single-quoted)"; + return; + } + + my @browsers = read_browser_list(); + my $found = 0; + for(@browsers) { + if(lc($_->{tag}) eq $tag) { + print "Replaced old definition of $tag"; + $_->{name} = $name; + $_->{command} = $cmd; + $found++; + last; + } + } + + if(not $found) { + push @browsers, { tag => $tag, name => $name, command => $cmd }; + print "push \@browsers, { tag => $tag, name => $name, command => $cmd }"; + print "Added browser definition $tag"; + } + + write_browser_list(\@browsers); +} + +# urlm_del_browser <browser> +sub urlm_del_browser { + my $arg = shift || ""; + $arg = trim($arg); + $arg = lc $arg; + return unless $arg; + + # urlm_del_override() already prints "Browser not defined" if it + # wasn't defined, so no need to have urlm_del_browser() print it again. + urlm_del_override("$arg all"); + + my @browsers = read_browser_list(); + my @keep_browsers; + my $found = 0; + + for(@browsers) { + if(lc($_->{tag}) eq $arg) { + print "Deleted definition of $arg"; + $found++; + } else { + push @keep_browsers, $_; + } + } + + write_browser_list(\@keep_browsers) if $found; +} + +# urlm_list_browsers +sub urlm_list_browsers { + my @browsers = read_browser_list(); + for(@browsers) { + print( + (settings_get_str('urlm_default_browser') eq $_->{tag} ? + "[*]" : + " ") . + "Tag: $bold_on" . $_->{tag} . "$bold_off, " . + "Name: $bold_on" . $_->{name} . "$bold_off, " . + "Command: $bold_on" . $_->{command} . "$bold_off"); + } +} + +# urlm_add_wget_ext <ext> +sub urlm_add_wget_ext { + my $arg = shift || ""; + $arg = trim($arg); + $arg = lc $arg; + + if(not $arg) { + print "Usage: /urlm_add_wget_ext <extension>"; + return; + } + + my @list = split " ", settings_get_str('urlm_wget_extensions'); + if(grep { $_ eq $arg } @list) { + print "$arg is already in the wget extensions list"; + return; + } + + push @list, $arg; + + settings_set_str('urlm_wget_extensions', join(" ", @list)); + command("/set urlm_wget_extensions"); +} + +# urlm_del_wget_ext <ext> +sub urlm_del_wget_ext { + my $arg = shift || ""; + $arg = trim($arg); + $arg = lc $arg; + + if(not $arg) { + print "Usage: /urlm_del_wget_ext <extension>"; + return; + } + + my @list = split " ", settings_get_str('urlm_wget_extensions'); + if(!grep { $_ eq $arg } @list) { + print "$arg is not in the wget extensions list"; + return; + } + + @list = grep { $_ ne $arg } @list; + + settings_set_str('urlm_wget_extensions', join(" ", @list)); + command("/set urlm_wget_extensions"); +} + +# urlm_list_overrides [<browser>] +sub urlm_list_overrides { + my $arg = shift || ""; + $arg = trim($arg); + + my $found = 0; + my @overrides = read_browser_overrides(); + for(@overrides) { + my ($browser, $pats) = @$_; + if($arg eq $browser || not $arg) { + $found++; + my $count = 1; + for(@$pats) { + print $browser . "[$count]: " . $_; + $count++; + } + } + } + + if(not $found) { + if($arg) { + print "No overrides for browser '$arg'"; + } else { + print "No browser overrides"; + } + } +} + +# urlm_add_override <browser> <pattern> +sub urlm_add_override { + my $arg = shift || ""; + $arg = trim($arg); # do not lc($arg), the command may need caps! + + my ($browser, $pattern) = split " ", $arg; + $browser = lc $browser; + if(not ($browser and $pattern)) { + print "Usage: /urlm_add_override <browser> <pattern>"; + return; + } + + if(!grep { $_->{tag} eq $browser } read_browser_list()) { + print "Browser $browser not defined in browser list"; + return; + } + + eval "qr{$pattern}"; + if($@) { + print "Pattern $pattern is not a valid Perl regex: $@"; + return; + } + + my @overrides = read_browser_overrides(); + my $found = 0; + for(@overrides) { + my ($tag, $pats) = @$_; + next unless $tag eq $browser; + + push @$pats, $pattern; + $found++; + } + + if(not $found) { + push @overrides, [ $browser, [ $pattern ] ]; + } + + print "Added override for $browser: $pattern"; + write_browser_overrides(\@overrides); +} + +# urlm_del_override <browser> <number>|<all> +sub urlm_del_override { + my $arg = shift || ""; + $arg = trim($arg); + $arg = lc $arg; + + my ($browser, $number) = split " ", $arg; + + if(not($browser and $number)) { + print "Usage: /urlm_del_override <browser> <number>|all" + } + + if(!grep { $_->{tag} eq $browser } read_browser_list()) { + print "Browser $browser not defined in browser list"; + return; + } + + if($number ne 'all' && $number !~ /^[1-9]\d*$/) { + print "Bad override '$number': must be a number >= 1, or 'all'"; + return; + } + + my @overrides = read_browser_overrides(); + my @keep_overrides = (); + my $found = 0; + for(@overrides) { + my ($tag, $pats) = @$_; + + if($tag ne $browser) { + push @keep_overrides, $_; + next; + } + + $found += @$pats, next if $number eq 'all'; + + if($number > @$pats) { + print "Value $number out of range"; + next; + } + + $found++; + undef $pats->[$number - 1]; + @$pats = grep { defined $_ } @$pats; + + push @keep_overrides, $_ if @$pats; + } + + if($found) { + print "Deleted $found overrides"; + write_browser_overrides(\@keep_overrides); + } else { + print "No matching overrides"; + } +} + +sub init_colors { + if(settings_get_bool('urlm_use_bold')) { + $bold_on = "\002"; + $bold_off = "\002"; + } else { + $bold_on = ""; + $bold_off = ""; + } + + if(settings_get_bool('urlm_use_color')) { + $green = "\0033"; + $red = "\0034"; + $yellow = "\0037"; + $purple = "\0036"; + $color_off = "\003"; + } else { + $green = ""; + $red = ""; + $yellow = ""; + $purple = ""; + $color_off = ""; + } +} + +# init_browsers(): dynamic bindings. Each browser tag gets bound to +# /urlm_open_$tag, and (if short commands enabled) to /$tag. +our @bound_refs; +sub init_browsers { + # for this to work, the code ref can *NOT* be stored in a "my" var + # I think this is a bug in irssi, or possibly perl, but maybe I'm + # just being dumb... + for(@bound_refs) { + command_unbind($_->[0], $_->[1]); + } + @bound_refs = (); + + my @browsers = read_browser_list(); + for(@browsers) { + my $code = 'sub { url_open_cmd($_[0], "'. ($_->{tag}) . '"); };'; + my $cmd = 'urlm_open_' . $_->{tag}; + push @bound_refs, [ $cmd, eval $code ]; + + # again, no "my" vars, hence the ugly $bound_refs[$#bound_refs] kludge + command_bind($cmd, $bound_refs[$#bound_refs]->[1]); + + if(settings_get_bool('urlm_short_cmds')) { + my $shortcmd = $_->{tag}; + push @bound_refs, [ $shortcmd, $bound_refs[$#bound_refs]->[1] ]; + command_bind($shortcmd, $bound_refs[$#bound_refs]->[1]); + } + } + + command_unbind("ul", "url_list_cmd"); + command_unbind("uo", "url_open_cmd"); + command_unbind("wget", "url_open_wget_cmd"); + + if(settings_get_bool('urlm_short_cmds')) { + command_bind("ul", "url_list_cmd"); + command_bind("uo", "url_open_cmd"); + command_bind("wget", "url_open_wget_cmd"); + } +} + +sub init_settings { # call only once, at script load! +# Where shall we save the URL log? + settings_add_str('urlmanager', 'urlm_log_file', "~/.irssi/urllog"); + +# Where is the wget binary? Absolute path, or "wget" (searches PATH) + settings_add_str('urlmanager', 'urlm_wget_path', "wget"); + +# Where should wget save files? + settings_add_str('urlmanager', 'urlm_wget_dl_dir', "~"); + +# Extra arguments to pass to wget... + settings_add_str('urlmanager', 'urlm_wget_extra_args', ""); + +# Do we log URLs from /part and /quit messages? Disabled by default +# because so many people always /quit with the same spammish URL +# e.g. "nimrod has quit [Quit: Try StupidIRC (http://someircclient.com)]" + settings_add_bool('urlmanager', 'urlm_log_partquit', 0); + +# Do we log URLs from our own public/private messages? + settings_add_bool('urlmanager', 'urlm_log_own', 1); + +# Cosmetics: + settings_add_bool('urlmanager', 'urlm_short_cmds', 1); + settings_add_bool('urlmanager', 'urlm_use_bold', 1); + settings_add_bool('urlmanager', 'urlm_use_color', 1); + +# Browser definitions. A double-colon-separated list. Each list item +# is a single-colon separated list of (tag, name, command_format). +# You may add browsers, and they will work as commands +# without changing any other code. +# The browser commands need to be non-blocking, and any stdout/err from +# them will be ignored. +# The %s gets replaced with the actual URL. *ALWAYS* use single-quotes +# (like '%s'). *NEVER* omit the quotes or use double-quotes around the %s! +# Failure to comply is a security hole! + settings_add_str('urlmanager', 'urlm_browsers', + 'ff:Firefox:firefox -remote \'openurl(%s,new-tab)\'' . + '::' . + 'ie:Internet Explorer:ie6 \'%s\'' . + '::' . + 'us:links+screen:[ "$TERM" = "screen" ] && screen links \'%s\'' . + '::' . + 'ut:links+xterm:xterm -e "links \'%s\'"' . + '::' . + 'ux:Copy to X Clipboard:echo -n \'%s\'|xsel -i'); + +# Default browser for /uo and /urlm_open commands + settings_add_str('urlmanager', 'urlm_default_browser', 'ff'); + +# /uo and /urlm_open check this list. +# Double-colon-separated list, each item is a single-colon-separated +# list consisting of a browser tag and one or more patterns. +# If a URL matches one of these +# patterns, the browser tag will be used as the browser to open the URL +# with, instead of the default. + settings_add_str('urlmanager', 'urlm_browser_overrides', + 'ie:/[^/]*video\.google\.com:/[^/]*youtube\.com:/[^/]*gametrailers\.com'); + +# If /uo or /urlm_open get a URL ending in one of these file extensions, +# it will be downloaded with wget instead of being opened in a browser. + settings_add_str('urlmanager', 'urlm_wget_extensions', + 'tar zip atr bas xex exe dcm car z gz rom cas torrent rar 7z'); + +# trim log to this many lines. Use with urlm_log_trim_interval and/or +# urlm_log_trim_startup. Set to 0 to disable. + settings_add_int('urlmanager', 'urlm_max_log_lines', 100); + +# trim log to this many seconds. Use with urlm_log_trim_interval and/or +# urlm_log_trim_startup. Set to 0 to disable. + settings_add_int('urlmanager', 'urlm_max_log_age', 86400*7); + +# trim the log on script load. + settings_add_bool('urlmanager', 'urlm_log_trim_startup', 0); + +# auto-trim log this often (seconds). Set to 0 to disable. + settings_add_int('urlmanager', 'urlm_log_trim_interval', 60*60); + +# these control the behavior of windows created with /urlm_wget or /wget + settings_add_bool('urlmanager', 'urlm_wget_autoclose', 1); + settings_add_int('urlmanager', 'urlm_wget_autoclose_delay', 60); + settings_add_int('urlmanager', 'urlm_wget_split_size', 0); + +# say "Captured URL #xxx http://whatever from whoever" every time a URL +# is captured? + settings_add_bool('urlmanager', 'urlm_quiet_capture', 0); + +# TODO: support these: + +# channels/nicks/sites we don't want to log +#settings_add_str('urlmanager', 'urlm_ignore_channels'); +#settings_add_str('urlmanager', 'urlm_ignore_urls'); + +# If true, go through the entire list every time a URL is logged, +# checking for duplicates +#settings_add_bool('urlmanager', 'urlm_ignore_dups'); +} + +our $trim_timeout_tag; +sub init_trim_timeout { + timeout_remove($trim_timeout_tag) if($trim_timeout_tag); + + my $millis = settings_get_int('urlm_log_trim_interval') * 1000; + if($millis > 0) { + $trim_timeout_tag = timeout_add($millis, "trim_url_log", 1); + } +} + +# apply_settings: called on signal "setup changed" (when any /set urlm_* +# changes value). +# Anything that depends on the values of any of the settings should be +# (re)initialized here. +sub apply_settings { + init_colors(); + init_browsers(); + init_trim_timeout(); + @urls = read_url_file(); +} + +sub init_signals { # call only once, at script load! + signal_add_last("message public", "url_public"); + signal_add_last("message private", "url_private"); + signal_add_last("message irc notice", "url_private"); + signal_add_last("message irc op_public", "url_private"); + signal_add_last("message irc action", "url_private"); + signal_add_last("dcc chat message", "url_dccmsg"); + signal_add_last("message topic", "url_topic"); + signal_add_last("channel joined", "url_join_topic"); + signal_add_last("setup changed", "apply_settings"); + signal_add_last("message part", "url_part"); + signal_add_last("message quit", "url_quit"); + signal_add_last("message own_public", "url_own"); + signal_add_last("message own_private", "url_own"); + signal_add_last("exec remove", "sig_exec_remove"); + signal_add_last("window destroyed", "sig_window_destroyed"); +} + +sub init_static_binds { # call only once, at script load! + # These binds are always on: + command_bind("urlm_list", "url_list_cmd"); + command_bind("urlm_open", "url_open_cmd"); + command_bind("urlm_wget", "url_open_wget_cmd"); + command_bind("urlm_add_browser", "urlm_add_browser"); + command_bind("urlm_del_browser", "urlm_del_browser"); + command_bind("urlm_list_browsers", "urlm_list_browsers"); + command_bind("urlm_add_wget_ext", "urlm_add_wget_ext"); + command_bind("urlm_del_wget_ext", "urlm_del_wget_ext"); + command_bind("urlm_list_overrides", "urlm_list_overrides"); + command_bind("urlm_add_override", "urlm_add_override"); + command_bind("urlm_del_override", "urlm_del_override"); + command_bind("urlm_trim_log", "trim_url_log"); + command_bind("urlm_undo_delete", "urlm_undo_delete"); + command_bind("help", "urlm_help"); +} + +# Add per-user help dir to help_path, if not already present. +sub init_help_path { + my $dir = "$ENV{HOME}/.irssi/help"; + my $help_path = settings_get_str('help_path'); + + return if grep { $_ eq $dir } split /:/, $help_path; + + $help_path .= ":$dir"; + settings_set_str('help_path', $help_path); + + signal_emit('setup_changed'); # 20100614 bkw: hmmm... +} + +# bind signals and commands, now that everything's defined. +init_settings(); +init_signals(); +init_static_binds(); +init_colors(); +init_browsers(); +init_help_path(); +@urls = read_url_file(); +trim_url_log() if settings_get_bool('urlm_log_trim_startup'); +init_trim_timeout(); + +# make sure no leftover backup is lurking from a long time ago... +unlink(get_url_log_file() . "~"); + +# Print a helpful message for the user on script load... +print $bold_on . "urlmanager.pl" . $bold_off . " loaded (" . @urls . + " URLs), type '" . $yellow . "/help urlmanager" . + $color_off . "' for help."; + +# rest of file is POD docs +=pod + +=head1 NAME + +urlmanager + +=head1 SYNOPSIS + +Yet another URL logger for irssi. + +=head1 DESCRIPTION + +Captures URLs in channel, privmsg, and DCC chat messages, logs them to a +file. Provides an irssi command to list captured URLs and several commands +to do various things with them (open in browser, download, copy to X11 +selection buffer). + +This documentation only includes installation instructions. For usage +instructions, install the script and run B</urlm_help> within irssi. + +=head1 INSTALLATION + +Copy B<urlmanager.pl> to your B<~/.irssi/scripts> directory (create the +directory if it doesn't exist). For auto-loading when irssi starts, +create a symlink in B<~/.irssi/scripts/autorun>: + +=over 4 + +mkdir -p ~/.irssi/scripts/autorun + +cp urlmanager.pl ~/.irssi/scripts + +cd ~/.irssi/scripts/autorun + +ln -s ../urlmanager.pl . + +=back + +=head1 CONFIGURATION + +All configuration is done from within irssi; read B</urlm_help>. +=cut |