aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <yalhcru@gmail.com>2015-08-12 04:30:10 -0400
committerB. Watson <yalhcru@gmail.com>2015-08-12 04:30:10 -0400
commit4ef0f5708e20509e427c706acedff6a22bf0faaa (patch)
treed12262b419530f40a3ef7cd24998f047ed443ebf
downloadirssi-urlmanager-4ef0f5708e20509e427c706acedff6a22bf0faaa.tar.gz
initial commit
-rw-r--r--README10
-rw-r--r--help/urlm_add_browser36
-rw-r--r--help/urlm_add_override15
-rw-r--r--help/urlm_browser_overrides6
-rw-r--r--help/urlm_browsers6
-rw-r--r--help/urlm_default_browser5
-rw-r--r--help/urlm_del_browser12
-rw-r--r--help/urlm_del_override7
-rw-r--r--help/urlm_list55
-rw-r--r--help/urlm_list_browsers11
-rw-r--r--help/urlm_list_overrides6
-rw-r--r--help/urlm_log_file16
-rw-r--r--help/urlm_log_own6
-rw-r--r--help/urlm_log_partquit7
-rw-r--r--help/urlm_log_trim_interval24
-rw-r--r--help/urlm_log_trim_startup7
-rw-r--r--help/urlm_max_log_age17
-rw-r--r--help/urlm_max_log_lines11
-rw-r--r--help/urlm_open29
-rw-r--r--help/urlm_quiet_capture16
-rw-r--r--help/urlm_short_cmds14
-rw-r--r--help/urlm_trim_log12
-rw-r--r--help/urlm_undo_delete11
-rw-r--r--help/urlm_use_bold7
-rw-r--r--help/urlm_use_color12
-rw-r--r--help/urlm_wget29
-rw-r--r--help/urlm_wget_autoclose13
-rw-r--r--help/urlm_wget_autoclose_delay10
-rw-r--r--help/urlm_wget_dl_dir14
-rw-r--r--help/urlm_wget_extensions23
-rw-r--r--help/urlm_wget_extra_args5
-rw-r--r--help/urlm_wget_path9
-rw-r--r--help/urlm_wget_split_size11
-rw-r--r--help/urlmanager30
-rw-r--r--scripts/urlmanager.pl1350
35 files changed, 1852 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..c993e6a
--- /dev/null
+++ b/README
@@ -0,0 +1,10 @@
+Urchlay's url manager plugin for irssi. no README yet, for now see the
+POD docs:
+
+$ perldoc scripts/urlmanager.pl
+
+Quickstart:
+
+mkdir -p ~/.irssi/scripts ~/.irssi/help
+cp scripts/* ~/.irssi/scripts
+cp help/* ~/.irssi/help
diff --git a/help/urlm_add_browser b/help/urlm_add_browser
new file mode 100644
index 0000000..d2c8327
--- /dev/null
+++ b/help/urlm_add_browser
@@ -0,0 +1,36 @@
+
+URLM_ADD_BROWSER <tag>:<name>:<cmd-format>
+
+Define a new browser. The arguments must be separated by : (colon)
+characters, since <name> and <cmd-format> may contain spaces (<cmd-format>
+generally *requires* spaces).
+
+<tag> is a short unique identifier for the browser. Examples are "ff" for
+Firefox or "moz" for Mozilla. The tag will be used to define a new irssi
+command /urlm_open_<tag>, and (if urlm_short_cmds is ON) a new /<tag>
+command. Tags must consist of only letters, numbers, or underscores,
+and the special tag "wget" is reserved.
+
+<name> is the full human-readable name of the browser. It may contain
+any characters you like, except for colons, and is only used for
+identification purposes (e.g. /urlm_list_browsers output).
+
+<cmd-format> is the sprintf() format used to generate the full command
+line required to run the browser. In simpler terms, it is the command
+that runs the browser, with %s in place of the URL. It may contain
+any characters other than colons.
+
+Examples:
+ /urlm_add_browser ff:Firefox:firefox -remote 'openurl(%s,new-tab)'
+ /urlm_add_browser ie:Internet Explorer:wine iexplore.exe '%s'
+
+The above examples define new /urlm_open_ff and /urlm_open_ie
+commands. If urlm_short_cmds is ON, they also define new /ff and /ie
+commands.
+
+Note: in <cmd-format>, the %s must occur inside a set of single-quotes.
+It need not be the only thing inside the quotes, however (see the firefox
+example). This is because a shell is used to spawn the external program.
+The URLM_ADD_BROWSER command checks for the quotes, and refuses to
+allow a <cmd-format> that's missing the quotes or the %s.
+
diff --git a/help/urlm_add_override b/help/urlm_add_override
new file mode 100644
index 0000000..167808f
--- /dev/null
+++ b/help/urlm_add_override
@@ -0,0 +1,15 @@
+
+URLM_ADD_OVERRIDE <browser> <pattern>
+
+Add an override, for use with /urlm_open (q.v.)
+
+<browser> is the "tag", previously defined with URLM_ADD_BROWSER.
+
+<pattern> is a Perl regular expression (regex). Any URL matching the
+regex (case-insensitive) will be open with <browser>.
+
+It's probably best to avoid conflicting/overlapping patterns, although
+it can be done if you remember that the regexes are checked against each
+URL in the order they are displayed in /urlm_list_overrides (which is
+in fact the order in which they were defined).
+
diff --git a/help/urlm_browser_overrides b/help/urlm_browser_overrides
new file mode 100644
index 0000000..1088280
--- /dev/null
+++ b/help/urlm_browser_overrides
@@ -0,0 +1,6 @@
+
+Setting: URLM_BROWSER_OVERRIDES (private)
+
+ Do not change directly; use /urlm_add_override, /urlm_del_override,
+ and /urlm_list_overrides instead.
+
diff --git a/help/urlm_browsers b/help/urlm_browsers
new file mode 100644
index 0000000..eb89f27
--- /dev/null
+++ b/help/urlm_browsers
@@ -0,0 +1,6 @@
+
+Setting: URLM_BROWSERS (private)
+
+ Do not change directly; use /urlm_add_browser, /urlm_del_browser,
+ and /urlm_list_browsers instead.
+
diff --git a/help/urlm_default_browser b/help/urlm_default_browser
new file mode 100644
index 0000000..d4d0448
--- /dev/null
+++ b/help/urlm_default_browser
@@ -0,0 +1,5 @@
+
+Setting: URLM_DEFAULT_BROWSER (default: ff)
+
+Self-explanatory. One of the browser tags from URLM_LIST_BROWSERS.
+
diff --git a/help/urlm_del_browser b/help/urlm_del_browser
new file mode 100644
index 0000000..4afd163
--- /dev/null
+++ b/help/urlm_del_browser
@@ -0,0 +1,12 @@
+
+URLM_DEL_BROWSER <tag>
+
+Delete a browser from the browser-definition list.
+K†~g> must have been previously defined with URLM_ADD_BROWSER.
+
+Deleting a browser also deletes any browser overrides defined for
+taÒ}rowser.
+
+Example: to delete the "ie" definition (see above):
+ /urlm_del_browser ie
+
diff --git a/help/urlm_del_override b/help/urlm_del_override
new file mode 100644
index 0000000..a1d03a9
--- /dev/null
+++ b/help/urlm_del_override
@@ -0,0 +1,7 @@
+
+URLM_DEL_OVERRIDE <browser> <number>|all
+
+Delete an override. <number> is the number given in /urlm_list_overrides.
+If "all" is used instead of a number, all overrides for <browser> will
+be deleted.
+
diff --git a/help/urlm_list b/help/urlm_list
new file mode 100644
index 0000000..6ec14ae
--- /dev/null
+++ b/help/urlm_list
@@ -0,0 +1,55 @@
+
+URLM_LIST [-delete] [<list>]
+
+Alternate command name: UL
+
+(Each command has a short, easy-to-type, but cryptic alternate name. If
+you don't like cryptic command names, "/set urlm_short_cmds off" will
+disable the short names).
+
+List URLs captured from channel and privmsg text. With no <list>, lists
+the last 10 URLs. With -delete, lists and deletes listed URLS (the <list> is
+required with -delete).
+
+If <list> is given, it may be:
+
+<number>
+ List URL #number only (example: /urlm_list 10)
+
+[<start>]-[<end>]
+ List URLs from #start to #end (example: /urlm_list 20-30)
+ if start omitted, beginning of list assumed.
+ if end omitted, end of list assumed.
+ "/urlm_list 10-" list from #10 to the end of the list,
+ "/urlm_list -" lists all URLs (same as "/urlm_list all"),
+ "/urlm_list -10" lists from start-of-list to #10).
+
+all
+ Lists all URLs. "/urlm_list all" is the same as "urlm_list -".
+
+<nick>
+ List URLs posted by user <nick>.
+
+<#channel> or <&channel>
+ List URLs posted in <channel> (example: /urlm_list #irssi).
+
+</urlmatch>
+ List URLs matching <urlmatch> (example: /urlm_list /google.com).
+
+Any <list> may be preceded with ! to invert its sense. Examples:
+ /urlm_list !/yahoo.com - List all URLs not matching yahoo.com
+ /urlm_list !bob - List all URLs not posted by nick "bob"
+ /urlm_list !#badchannel - List all URLs not posted in #badchannel
+
+Note: <nick>, <#channel>, </urlmatch> are all treated as case-
+ insensitive regular expressions. <nick> and <#channel> matches are
+ anchored at the start and end of the match (as though they were
+ prefixed with "^" and followed by "$"). </urlmatch> matches are
+ not anchored.
+
+Warning: BE CAREFUL with the -delete option! There is no confirmation,
+and only one level of undo. It is recommended that you first run /urlm_list
+<list> without the -delete option to be sure which items the -delete will
+affect. If you've just deleted some URLs and want to restore them, run
+/urlm_undo_delete
+
diff --git a/help/urlm_list_browsers b/help/urlm_list_browsers
new file mode 100644
index 0000000..692b19e
--- /dev/null
+++ b/help/urlm_list_browsers
@@ -0,0 +1,11 @@
+
+URLM_LIST_BROWSERS
+
+Show the browser definition list. Items are listed in the same
+format as used to define them: <browser>:<name>:<cmd-format>. The
+default browser is marked with [*].
+
+Note: urlm_add_browser, urlm_del_browser, and urlm_list_browsers store
+the actual browser list as a string in the setting urlm_browsers. The
+format of this setting is prickly; you shouldn't modify it directly.
+
diff --git a/help/urlm_list_overrides b/help/urlm_list_overrides
new file mode 100644
index 0000000..ee1850c
--- /dev/null
+++ b/help/urlm_list_overrides
@@ -0,0 +1,6 @@
+
+URLM_LIST_OVERRIDES [<browser>]
+
+List override patterns for <browser> (optional: default w/no argument
+is to list all overrides for all browsers).
+
diff --git a/help/urlm_log_file b/help/urlm_log_file
new file mode 100644
index 0000000..3a1201d
--- /dev/null
+++ b/help/urlm_log_file
@@ -0,0 +1,16 @@
+
+Setting: URLM_LOG_FILE (default: ~/.irssi/urllog) [*]
+
+This is where URLs are stored. The file is updated immediately after
+each URL is captured, and after any URL(s) are deleted. urlmanager
+uses file locking (via flock()), so it's safe to run multiple instances
+of irssi with the script loaded.
+
+The log file need not exist when the script is first loaded; it will
+be created as soon as the first URL is captured. The directory for the
+log file must already exist, though. Tilde expansion (~ meaning home
+directory) is supported.
+
+When this setting is changed, the in-memory URL log is cleared and
+repopulated from the new file.
+
diff --git a/help/urlm_log_own b/help/urlm_log_own
new file mode 100644
index 0000000..39178ad
--- /dev/null
+++ b/help/urlm_log_own
@@ -0,0 +1,6 @@
+
+Setting: URLM_LOG_OWN
+
+Boolean; whether or not to capture and log URLs from your own public and
+private messages.
+
diff --git a/help/urlm_log_partquit b/help/urlm_log_partquit
new file mode 100644
index 0000000..885a2b7
--- /dev/null
+++ b/help/urlm_log_partquit
@@ -0,0 +1,7 @@
+
+Setting: URLM_LOG_PARTQUIT (boolean, default: off)
+
+Capture URLs from /part and /quit messages. This is off by default
+because so many people use IRC clients that include the client's web
+site URL in the quit and part messages (a mild form of spam).
+
diff --git a/help/urlm_log_trim_interval b/help/urlm_log_trim_interval
new file mode 100644
index 0000000..7d13cb9
--- /dev/null
+++ b/help/urlm_log_trim_interval
@@ -0,0 +1,24 @@
+
+Setting: URLM_LOG_TRIM_INTERVAL
+
+ How often (in seconds) do you want urlmanager to check the log and
+ trim it according to your urlm_max_log_lines and/or urlm_max_log_age
+ settings? Default is 3600 seconds (1 hour), which is probably OK for
+ most users. If you're in a lot of channels and exchange URLs with lots
+ of people, you might want to decrease this to keep the log from growing
+ too much.
+
+ Note: urlm_log_trim_interval has no effect unless one or both of the
+ urlm_max_log_lines or urlm_max_log_age settings are set to non-zero
+ values.
+
+ Note: Even with this setting set to zero, log trimming still occurs
+ at startup (unless urlm_log_trim_startup is OFF), and whenever a URL
+ is captured. To completely disable log trimming, set both
+ urlm_max_log_lines and urlm_max_log_age to zero. With these settings,
+ even manual trimming with /urlm_trim_log will do nothing.
+
+ The lower you set this setting, the more CPU and disk access it requires
+ (unless you set it to zero, of course). Probably it's a bad idea to
+ use a value lower than 60 seconds here, under any conditions.
+
diff --git a/help/urlm_log_trim_startup b/help/urlm_log_trim_startup
new file mode 100644
index 0000000..f33be64
--- /dev/null
+++ b/help/urlm_log_trim_startup
@@ -0,0 +1,7 @@
+
+Setting: URLM_LOG_TRIM_STARTUP
+
+ Boolean; controls whether urlmanager trims the log on startup. Has no
+ effect unless one or both of urlm_max_log_lines or urlm_max_log_age
+ are set to a non-zero value.
+
diff --git a/help/urlm_max_log_age b/help/urlm_max_log_age
new file mode 100644
index 0000000..9c342ae
--- /dev/null
+++ b/help/urlm_max_log_age
@@ -0,0 +1,17 @@
+
+Setting: URLM_MAX_LOG_AGE
+
+ Maximum age (in seconds) for URLs in the log. Set to zero to disable
+ age-based trimming. Any URLs older than this will be removed from the
+ log whenever the log is trimmed. This happens:
+
+ - At startup (unless urlm_log_trim_startup is OFF)
+ - Every urlm_log_trim_interval seconds (if urlm_log_trim_interval
+ is non-zero)
+ - Any time a URL is captured
+ - When /urlm_trim_log is run manually
+
+ This means that old URLs persist in the log for up to urlm_log_trim_interval
+ seconds past their expiration time; if this annoys you, decrease the
+ urlm_log_trim_interval to reduce the problem.
+
diff --git a/help/urlm_max_log_lines b/help/urlm_max_log_lines
new file mode 100644
index 0000000..68e932c
--- /dev/null
+++ b/help/urlm_max_log_lines
@@ -0,0 +1,11 @@
+
+Setting: URLM_MAX_LOG_LINES
+
+ Maximum number of URLs to keep in the log. May be set to zero to
+ disable trimming by length (in which case, trimming by age may still
+ be used or not, as desired).
+
+ When set to non-zero, the log will be trimmed to this size by discarding
+ older (lower-numbered) URLs. See below for explanation of when trimming
+ is done.
+
diff --git a/help/urlm_open b/help/urlm_open
new file mode 100644
index 0000000..1aac2c2
--- /dev/null
+++ b/help/urlm_open
@@ -0,0 +1,29 @@
+
+URLM_OPEN [<arg>] - Open URL using "best" browser for the URL
+
+Alternate command name: UO
+
+Opens a URL according to the following rules:
+
+- If the URL's file extension (e.g. .zip) is found in urlm_wget_extensions,
+ the URL is downloaded with wget. (wget is run in an irssi window, so
+ you can monitor its progress)
+
+- Otherwise, if the URL matches any of the patterns in urlm_browser_overrides,
+ the URL is opened with the matching browser. (This is useful in cases
+ where e.g. you have Firefox as the default browser, but want all
+ youtube.com pages to open in Internet Explorer). See the
+ /urlm_add_override and /urlm_del_override commands for details.
+
+- Otherwise, the default browser (urlm_default_browser setting) is used.
+
+If <arg> is omitted, the default is to open the most recently captured URL.
+If <arg> is provided, it must be a single numeric URL number (from the
+output of /UL). Negative numbers are allowed, and are interpreted as
+counting from the end of the list (so -1 means the second most recent
+captured URL).
+
+Note: you do not need to define wget as a browser. urlmanager will use
+its own internal wget support to run wget in a window if the file
+extension is listed in urlm_wget_extensions.
+
diff --git a/help/urlm_quiet_capture b/help/urlm_quiet_capture
new file mode 100644
index 0000000..eba5da2
--- /dev/null
+++ b/help/urlm_quiet_capture
@@ -0,0 +1,16 @@
+
+Setting: URLM_QUIET_CAPTURE
+
+Boolean; whether or not to print "Captured URL #xx http://whatever from nick"
+in the current window every time a URL is captured. Default: ON
+
+I can't think of a reason I'd ever want to turn this off, but maybe
+it drives other people crazy... one warning: if you turn this setting off,
+your default URL for "/urlm_open" may change between the time you notice
+a URL in channel, and the time you try to open it (e.g. because someone in
+another channel pasted another URL after the one you saw).
+
+Future versions of urlmanager may support an option to allow printing of
+each captured URL in the window where it was captured, instead of the
+current window.
+
diff --git a/help/urlm_short_cmds b/help/urlm_short_cmds
new file mode 100644
index 0000000..3b15d48
--- /dev/null
+++ b/help/urlm_short_cmds
@@ -0,0 +1,14 @@
+
+Setting: URLM_SHORT_CMDS (boolean, default: on)
+
+Whether or not to bind short command names, such as /ff as an alias
+for /urlm_open_ff. The only commands that get short-name aliases are
+/urlm_open (alias /uo) and the /urlm_open_* commands (which get the
+browser tags as their short aliases).
+
+When this option is off, every command defined by urlmanager begins
+with the string "urlm_", which acts as a sort of a namespace. Some
+people might hate "polluting" the rest of the command namespace, or
+maybe they already have a /uo command defined in another script. If
+you're one of these people, /set urlm_short_cmds off.
+
diff --git a/help/urlm_trim_log b/help/urlm_trim_log
new file mode 100644
index 0000000..59d3cf8
--- /dev/null
+++ b/help/urlm_trim_log
@@ -0,0 +1,12 @@
+
+Setting: URLM_TRIM_LOG
+
+Manually trim the log file, according to the urlm_max_log_lines and
+urlm_max_log_age settings (q.v.)
+
+Normally you won't need this command: instead you'll use the
+urlm_log_trim_startup and/or urlm_log_trim_interval settings to
+automatically keep the log size manageable. /urlm_trim_log might be
+useful for recovering from an attack by floodbots that send URLs
+to a channel...
+
diff --git a/help/urlm_undo_delete b/help/urlm_undo_delete
new file mode 100644
index 0000000..1fbfea9
--- /dev/null
+++ b/help/urlm_undo_delete
@@ -0,0 +1,11 @@
+
+URLM_UNDO_DELETE [-yes]
+
+Undo the last /urlm_list -delete operation. There is only one level of
+undo, and you can't undo the undo.
+
+If any URLs have been captured since the last -delete, /urlm_undo_delete
+will warn of this fact and refuse to restore, unless -yes is given. If
+you override the warning with -yes, you WILL lose any URLs that have been
+captured since the delete.
+
diff --git a/help/urlm_use_bold b/help/urlm_use_bold
new file mode 100644
index 0000000..6c41e0b
--- /dev/null
+++ b/help/urlm_use_bold
@@ -0,0 +1,7 @@
+
+Setting: URLM_USE_BOLD (boolean, default: on)
+
+Whether or not you want bold in your /urlm_list output (and a few
+other places within urlmanager).
+
+
diff --git a/help/urlm_use_color b/help/urlm_use_color
new file mode 100644
index 0000000..c2e453b
--- /dev/null
+++ b/help/urlm_use_color
@@ -0,0 +1,12 @@
+
+Setting: URLM_USE_COLOR (boolean, default: on)
+
+Whether or not you want color in your /urlm_list output. The author
+finds the colors useful, but you might hate it.
+
+Note that urlmanager never sends text to the server (e.g. to a channel
+or a query). With bold/color enabled, only you will see them, so you
+won't be violating any "no mirc colors" channel rules.
+
+This setting has no effect if irssi's hide_colors is set to ON.
+
diff --git a/help/urlm_wget b/help/urlm_wget
new file mode 100644
index 0000000..8ed7039
--- /dev/null
+++ b/help/urlm_wget
@@ -0,0 +1,29 @@
+
+URLM_WGET [<wget-args] [<arg>]
+
+Alternate command name: WGET
+
+Downloads a URL with wget. <arg> is treated the same as /urlm_open (see
+above). <wget-args>, if present, are passed to the wget process as-is
+(see also the urlm_wget_extra_args setting).
+
+When urlmanager runs wget, it creates a new irssi window (split or hidden)
+named urlm_wget_<number> (where <number> is a unique serial number generated
+by urlmanager). This window behaves as a normal irssi window (shows up
+in /window list, can be closed with /window close, etc). By default,
+wget windows are created as hidden windows (change with urlm_wget_split_size
+setting), which are automatically closed 60 seconds after the wget process
+completes (change with urlm_wget_autoclose and urlm_wget_autoclose_delay
+settings).
+
+To cancel a wget download, you may close its window while wget is still
+running. This will kill the wget process, leaving any partially-downloaded
+files behind (which may be resumed with wget's -c option).
+
+Note: If you need to pass a numeric argument to /urlm_wget as its last
+argument, do not separate it from its command switch with a space. That
+is, instead of "/urlm_wget -T 30", use "/urlm_wget -T30" (or its long
+version, "/urlm_wget --timeout=30"). The reason for this restriction
+is that urlmanager will interpret the last argument as a URL number,
+if it's numeric.
+
diff --git a/help/urlm_wget_autoclose b/help/urlm_wget_autoclose
new file mode 100644
index 0000000..42af64f
--- /dev/null
+++ b/help/urlm_wget_autoclose
@@ -0,0 +1,13 @@
+
+Setting: URLM_WGET_AUTOCLOSE
+
+Boolean; whether or not to automatically close windows created by running
+wget (via /wget or /urlm_wget). Default: ON
+
+With this setting OFF, you'll have to manually close windows created
+by /wget or /urlm_wget.
+
+Changing this setting only affects wget windows created after the change.
+Any existing wget windows will still be autoclosed if the old value was
+ON, or else they will not be autoclosed if the old value was OFF.
+
diff --git a/help/urlm_wget_autoclose_delay b/help/urlm_wget_autoclose_delay
new file mode 100644
index 0000000..206657b
--- /dev/null
+++ b/help/urlm_wget_autoclose_delay
@@ -0,0 +1,10 @@
+
+Setting: URLM_WGET_AUTOCLOSE_DELAY
+
+Integer; how long to wait after a wget download is complete, before auto-
+closing the window. No effect if urlm_wget_autoclose is OFF.
+
+With urlm_wget_autoclose ON, set urlm_wget_autoclose_delay to zero to
+immediately close wget windows, or to a number of seconds to delay
+before closing wget windows.
+
diff --git a/help/urlm_wget_dl_dir b/help/urlm_wget_dl_dir
new file mode 100644
index 0000000..da06e82
--- /dev/null
+++ b/help/urlm_wget_dl_dir
@@ -0,0 +1,14 @@
+
+Setting: URLM_WGET_DL_DIR (default: ~) [*]
+
+This is where files downloaded with wget will be saved. Tilde expansion
+is supported. If this directory does not exist, it will be created by
+wget, when it is run for the first time.
+
+[*] Normally, the log file and download directory should be an absolute
+path, or relative to $HOME (with ~ expansion). If a relative path is
+used (without ~), it will be resolved relative to the working directory
+where irssi was started.
+
+FIXME: this setting may not contain whitespace characters (spaces or tabs).
+
diff --git a/help/urlm_wget_extensions b/help/urlm_wget_extensions
new file mode 100644
index 0000000..ec87f9c
--- /dev/null
+++ b/help/urlm_wget_extensions
@@ -0,0 +1,23 @@
+
+Setting: URLM_WGET_EXTENSIONS (default: tar zip atr bas xex exe dcm car z gz torrent)
+
+Space-separated list of filename extensions. When /urlm_open is used on
+a URL ending in one of these, the file will be downloaded with wget,
+running in an irssi window. To disable wget, you may set this list to
+an empty string, or use one of your defined browsers to open such files.
+
+If you want to use wget without running it in an irssi window, clear the
+extension list, then define a browser like so:
+
+# download in background (no controlling terminal, no progress reports)
+/urlm_add_browser dl:Download with wget:wget -b '%s'
+
+# download in new screen window (irssi must be running under GNU screen)
+/urlm_add_browser dl:Download with wget:screen wget '%s'
+
+# download in new X window (irssi must be running under X)
+/urlm_add_browser dl:Download with wget:xterm -e "wget '%s';echo 'press Enter to close window';read junk"
+
+Note that you may not define a browser tag as 'wget' (the examples above
+all use 'dl' instead).
+
diff --git a/help/urlm_wget_extra_args b/help/urlm_wget_extra_args
new file mode 100644
index 0000000..1ee593f
--- /dev/null
+++ b/help/urlm_wget_extra_args
@@ -0,0 +1,5 @@
+
+Setting: URLM_WGET_EXTRA_ARGS (default: <none>)
+
+Extra arguments to be passed to wget verbatim. Default is no arguments.
+
diff --git a/help/urlm_wget_path b/help/urlm_wget_path
new file mode 100644
index 0000000..260a921
--- /dev/null
+++ b/help/urlm_wget_path
@@ -0,0 +1,9 @@
+
+Setting: URLM_WGET_PATH (default: wget)
+
+The path to the wget binary. Either an absolute path such as /usr/bin/wget,
+or the string wget (the default) to search the $PATH. Tilde expansion
+is supported (e.g. /set urlm_wget_path ~/bin/wget)
+
+FIXME: this setting may not contain whitespace characters (spaces or tabs).
+
diff --git a/help/urlm_wget_split_size b/help/urlm_wget_split_size
new file mode 100644
index 0000000..63beb07
--- /dev/null
+++ b/help/urlm_wget_split_size
@@ -0,0 +1,11 @@
+
+Setting: URLM_WGET_SPLIT_SIZE
+
+Integer; how tall (in screen lines) to make split windows created by
+/wget or /urlm_wget. Set to zero (the default) to use hidden (full-sized)
+windows instead of split windows.
+
+Try not to set this too high; if urlmanager is unable to resize a window
+to this size, the window will remain at the default size (as used
+by the /window split command).
+
diff --git a/help/urlmanager b/help/urlmanager
new file mode 100644
index 0000000..f979ad9
--- /dev/null
+++ b/help/urlmanager
@@ -0,0 +1,30 @@
+
+URLMANAGER
+
+Commands: all urlmanager commands are prefixed with /urlm_ except the
+short browser aliases (if urlm_short_cmds is enabled). To see the list
+of commands, type "/help urlm".
+
+Settings: urlmanager's behaviour is controlled by quite a few settings.
+All urlmanager settings are prefixed with "urlm_".
+To see a full list of them, use "/set urlm".
+
+Log File:
+
+URLs are stored in a file (filename set with urlm_log_file), one URL
+per line.
+
+Each line is a space-separated list:
+
+timestamp nick channel url
+
+This file may be edited (carefully) with a standard text editor, or
+removed (to clear the URL list). If you edit or delete the file, reload
+the script with "/run urlmanager.pl". (it's probably a
+good idea to trim the file when it gets above a few hundred lines).
+
+Notes:
+- The timestamp is expressed in seconds since the epoch.
+- For a URL received in a private /msg or dcc chat, "channel" will
+ be the sending user.
+
diff --git a/scripts/urlmanager.pl b/scripts/urlmanager.pl
new file mode 100644
index 0000000..787e775
--- /dev/null
+++ b/scripts/urlmanager.pl
@@ -0,0 +1,1350 @@
+#!/usr/bin/perl
+
+# urlmanager script for irssi
+
+use warnings;
+use strict;
+
+use Fcntl qw/:flock/;
+use POSIX qw/strftime/;
+
+use Irssi qw/
+ settings_add_str settings_add_bool settings_add_int
+ settings_get_str settings_get_bool settings_get_int
+ settings_set_str settings_set_bool settings_set_int
+ command command_bind command_unbind
+ signal_emit signal_add_last signal_stop
+ timeout_add timeout_add_once timeout_remove
+ window_find_item/;
+
+our $VERSION = "0.1";
+our %IRSSI = (
+ authors => 'Urchlay',
+ contact => 'Urchlay on NewNet',
+ name => 'urlmanager',
+ description =>
+ 'Captures URLs said in channel and private messages ' .
+ 'and saves them to a file, also adds several commands for ' .
+ 'listing and opening captured URLs ' .
+ '(based on urlgrab.pl 0.2 by David Leadbetter)',
+ license => 'GNU GPLv2 or later',
+ url => 'none',
+);
+
+# 20110609 bkw: if irssi was started in a screen session from the console,
+# then detached, then reattached in an X session, DISPLAY will not be set.
+# This will confuse the user, as e.g. firefox will silently fail to run.
+# It won't do any harm to set DISPLAY=:0 if it's not set, and might help...
+{
+ my $disp = $ENV{DISPLAY};
+ $ENV{DISPLAY} = ":0" unless $disp;
+}
+
+# Workaround for a heisenbug, see:
+# http://bugs.irssi.org/index.php?do=details&task_id=242
+{ package Irssi::Nick }
+
+# Color constants.
+# Irssi.pm doesn't include symbolic mIRC-style color names...
+# NOTE: if you print e.g. $green . "12345", the "1" will be interpreted
+# as the 2nd digit of the color! Only good fix is to always put a space:
+# print $green . " 12345" works OK.
+# Declarations only; defined in init_colors()
+our ($bold_on, $bold_off, $green, $red, $yellow, $purple, $color_off);
+
+# @urls is a list of anonymous hashes, each representing one URL.
+# See read_url_file for hash elements.
+our @urls;
+
+# Most-recently-posted URL (the URL only, not a hash). Only used
+# for avoiding dups (see url_log).
+our $lasturl = "";
+
+# Have any URLs been captured since the last /ul -delete? This is to
+# (hopefully) protect the user
+our $captured_since_delete = 0;
+
+# Grr. Printing with print() or Irssi::print(), % chars are interpreted
+# as irssi formats. This causes URLs containing HTML %-escapes to come
+# out in weird colors. Using irssi's /echo is apparently the right way
+# to avoid this... though we get colored -!- in front of every line :(
+sub echo {
+ command("/echo $_") for @_;
+}
+
+# trim leading/trailing spaces
+sub trim {
+ $_[0] =~ s/(?:^\s*|\s$)//g;
+ return $_[0];
+}
+
+# read_url_file: called on script load with the log filename.
+# returns array of URL hashes, which will be empty if the file
+# wasn't present or was empty.
+sub read_url_file {
+ my $file = get_url_log_file();
+ my @got;
+
+ open URLLOG, "<$file" or return;
+ flock(URLLOG, Fcntl::LOCK_EX);
+ seek(URLLOG, 0, 1);
+ while(<URLLOG>) {
+ chomp;
+ my @fields = split " ";
+ push @got, {
+ stamp => $fields[0],
+ nick => $fields[1],
+ channel => $fields[2],
+ url => $fields[3],
+ };
+ }
+ close URLLOG;
+
+ return @got;
+}
+
+# rewrite the URL log file from arguments.
+sub write_url_file {
+ for(@_) {
+ $lasturl = "";
+ url_log(1, $_->{nick}, $_->{channel}, $_->{url}, $_->{stamp});
+ }
+}
+
+# Trim the log according to the appropriate settings.
+sub trim_url_log {
+ my $quiet = shift || 0;
+ my $max_lines = settings_get_int("urlm_max_log_lines") || 0;
+ my $max_age = settings_get_int("urlm_max_log_age") || 0;
+
+ return unless $max_lines || $max_age; # nothing to do!
+
+ my @keep_urls;
+ my $trimmed = 0;
+
+ if($max_age) {
+ for(@urls) {
+ if($_->{stamp} >= (time() - $max_age)) {
+ push @keep_urls, $_;
+ }
+ }
+ } else {
+ @keep_urls = @urls;
+ }
+
+ if($max_lines && (@keep_urls > $max_lines)) {
+ my $last = $#keep_urls;
+ my $first = $last - $max_lines + 1;
+ @keep_urls = @keep_urls[$first..$last];
+ }
+
+ $trimmed = (@urls - @keep_urls);
+
+ if($trimmed) {
+ clear_url_log();
+ write_url_file(@keep_urls);
+ print "Trimmed $trimmed URLs from log"; # unless $quiet;
+ }
+}
+
+# Clear the URL log, both the in-memory @urls and the on-disk file.
+sub clear_url_log {
+ my $file = get_url_log_file();
+ unlink $file; # or print "Can't delete $file: $!";
+ @urls = ();
+}
+
+# get_url_log_file: get value of our logfile setting, with
+# tilde expansion for user's homedir.
+sub get_url_log_file {
+ my $file = settings_get_str('urlm_log_file');
+ $file =~ s/^~/$ENV{HOME}/;
+ return $file;
+}
+
+# signal handler for "message public"
+# extract and log any URLs in the input text.
+sub url_public {
+ my ($server, $text, $nick, $hostmask, $channel) = @_;
+ my @got = find_urls($text);
+ url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "message own_public" and "message own_private"
+# extract and log any URLs in the input text.
+sub url_own {
+ my ($server, $text, $channel) = @_;
+ return unless settings_get_bool('urlm_log_own');
+ my @got = find_urls($text);
+ url_log(0, $server->{nick}, $channel, $_) for @got;
+}
+
+# signal handler for "message private", "message irc notice",
+# "message irc op_public", "message irc action"
+# extract and log any URLs in the input text.
+sub url_private {
+ my ($server, $text, $nick, $hostmask) = @_;
+ my @got = find_urls($text);
+ url_log(0, $nick, $server->{nick}, $_) for @got;
+}
+
+# signal handler for "message topic"
+# extract and log any URLs in the input text.
+sub url_topic {
+ my ($server, $channel, $text, $nick, $hostmask) = @_;
+ return if $nick eq $server->{nick}; # don't log own topic changes
+ my @got = find_urls($text);
+ url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "channel joined"
+# extract and log any URLs in the channel topic.
+sub url_join_topic {
+ my ($chan) = @_;
+ return unless $chan->{topic};
+ # don't log own topic changes
+ return if $chan->{topic_by} eq $chan->{server}->{nick};
+ my @got = find_urls($chan->{topic});
+ url_log(0, $chan->{topic_by}, $chan->{name}, $_) for @got;
+}
+
+# signal handler for "message part"
+# extract and log any URLs in the input text.
+sub url_part {
+ return unless settings_get_bool('urlm_log_partquit');
+ my ($server, $channel, $nick, $hostmask, $text) = @_;
+ return if $nick eq $server->{nick}; # don't log own parts (redundant?)
+ my @got = find_urls($text);
+ url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "message quit"
+# extract and log any URLs in the input text.
+sub url_quit {
+ return unless settings_get_bool('urlm_log_partquit');
+ my ($server, $nick, $hostmask, $text) = @_;
+ return if $nick eq $server->{nick}; # don't log own quits (redundant?)
+ my @got = find_urls($text);
+ url_log(0, $nick, $server->{nick}, $_) for @got;
+}
+
+# signal handler for "dcc chat message"
+# extract and log any URLs in the input text.
+# TODO: test this!
+sub url_dccmsg {
+ my ($dcc, $text) = @_;
+ my @got = find_urls($text);
+ url_log(0, $dcc->{nick}, $dcc->{server}->{nick}, $_) for @got;
+}
+
+# print_url_line:
+# print one formatted (colorful) line of /ul output
+sub print_url_line {
+ my ($maxnick, $maxchan, $num, $stamp, $nick, $channel, $url) = @_;
+
+ echo sprintf("%s%3s%s %s %11s%s %${maxnick}s%s %${maxchan}s%s %s%s",
+ $bold_on, $num, $bold_off,
+ $green, $stamp,
+ $red, $nick,
+ $yellow, $channel,
+ $purple, $url,
+ $color_off);
+}
+
+sub url_list_cmd { # bound to /ul (sorry, this sub is a mess)
+ my $do_delete = 0;
+ my $listed = 0;
+ my @keep_urls;
+
+ my $arg = shift || "";
+ $arg = lc $arg;
+
+ $arg = trim($arg);
+
+ if($arg =~ /^-delete/) {
+ $arg =~ s/^-delete\s*//;
+ $do_delete++;
+
+ if($arg eq '') {
+ print "/ul -delete requires a parameter! (/ul help for details)";
+ return;
+ }
+ }
+
+ if(not @urls) {
+ print "No URLs in list!";
+ return;
+ }
+
+ my ($start, $end, $nick, $regex, $channel);
+ $arg = "-" if $arg eq 'all';
+
+ my $invert = 0;
+ if($arg =~ /^!(.*)/) {
+ $arg = $1;
+ $invert = 1;
+ }
+
+ if($arg eq "") {
+ $start = @urls-10;
+ $start = 0 if $start < 0;
+ $end = $#urls;
+ } elsif($arg =~ /^[&#](.*)/) {
+ $channel = $1;
+ } elsif($arg =~ /^\/(.*)\/?/) {
+ $regex = $1;
+ } elsif($arg =~ /^\d+$/) {
+ $start = $end = $arg;
+ } elsif($arg =~ /^[-\d]+$/) {
+ ($start, $end) = split /-/, $arg, 2;
+ $start = 0 if $start eq "";
+ $end = $#urls if $end eq "";
+ } else {
+ $nick = $arg;
+ }
+
+ my $count = 0;
+ my @to_list;
+ for(@urls) {
+ my $list = 0;
+ if($nick && (lc($_->{nick}) =~ ("^" . quotemeta(lc $nick)))) {
+ $list++;
+ } elsif($regex) {
+ $list++ if $_->{url} =~ /$regex/i;
+ } elsif($channel) {
+ $list++ if $_->{channel} =~ /^[#&]?$channel$/i;
+ } elsif(defined($start) && defined($end)) {
+ $list++ if $count >= $start && $count <= $end;
+ }
+
+ $list = !$list if $invert;
+
+ if($list) {
+ $listed++;
+ push @to_list, [ $count, $_ ];
+ } elsif($do_delete) {
+ push @keep_urls, $_;
+ }
+
+ $count++;
+ }
+
+ if(@to_list) { # print the list if anything's supposed to be in it
+ my $maxnick = 4;
+ my $maxchan = 7;
+
+ for(@to_list) {
+ my ($num, $u) = @$_;
+ my $len = length($u->{nick});
+ $maxnick = $len if $len > $maxnick;
+ $len = length($u->{channel});
+ $maxchan = $len if $len > $maxchan;
+ }
+
+ print_url_line($maxnick, $maxchan,
+ "#", "When", "Nick", "Channel", "URL");
+
+ for(@to_list) {
+ my ($num, $u) = @$_;
+
+ my $stamp = strftime("%m/%d-%H:%M", localtime($u->{stamp}));
+ print_url_line($maxnick, $maxchan,
+ $num, $stamp, $u->{nick}, $u->{channel}, $u->{url});
+ }
+ }
+
+ if($do_delete) { # process -delete flag
+ my $deleted = scalar @urls - scalar @keep_urls;
+ if(not $deleted) {
+ print "No URLs deleted";
+ return;
+ }
+
+ my $file = get_url_log_file();
+ rename($file, "$file~") or print "Warning: can't backup log file: $!";
+
+ clear_url_log();
+ write_url_file(@keep_urls);
+
+ print $red . "These " . $deleted . " URLs have been deleted!" .
+ $color_off . " (" . @urls . " remain)";
+
+ $captured_since_delete = 0;
+ } else { # no -delete flag, show summary
+ print "Listed $listed of $count URLs";
+ }
+}
+
+sub urlm_say {
+}
+
+sub urlm_undo_delete {
+ my $yes = ($_[0] eq '-yes');
+
+ if($captured_since_delete && (not $yes)) {
+ print "urlm_undo_delete: doing this will throw away some URLs that " .
+ "were captured since the last delete. Re-run with '-yes' to do it " .
+ "anyway.";
+ return;
+ }
+
+ my $file = get_url_log_file();
+ my @oldurls = @urls;
+ clear_url_log();
+
+ if(not(rename("$file~", $file))) {
+ print "Can't restore log file: $!";
+ @urls = @oldurls;
+ write_url_file();
+ return;
+ }
+
+ @urls = read_url_file();
+ print "Restored " . @urls . " URLs from backup";
+ $captured_since_delete = 0;
+}
+
+# get_url_from_number:
+# Returns a URL hash from @urls, given the index into the array.
+# Returns undef if index is invalid or non-existent.
+# Accepts negative numbers as meaning "nth from the end of the list"
+# If index is empty string, returns the highest-numbered (most recent) URL.
+# If index is non-empty, non-numeric, then treat as a nick and open last
+# URL by that nick.
+sub get_url_from_number {
+ my $arg = shift;
+ if($arg eq '') {
+ $arg = $#urls;
+ } elsif($arg =~ /-\d+$/) {
+ $arg = $#urls + $arg;
+ }
+
+ if($arg !~ /^\d+$/) {
+# print("Non-numeric URL number '$arg'");
+# return;
+ $arg = trim($arg);
+ for(my $i = $#urls; $i >= 0; $i--) {
+ my $url = $urls[$i];
+ return $url if lc($arg) eq lc($url->{nick});
+ }
+ print("Can't find any URLs from nick '$arg'");
+ return;
+ }
+
+ if($arg > $#urls) {
+ print("No such URL number '$arg'");
+ return;
+ }
+
+ return $urls[$arg];
+}
+
+# url_open_cmd: open a URL with the given browser (auto-guesses which browser
+# to use if $browser is '').
+sub url_open_cmd {
+ my ($urlnum, $browser) = @_;
+ $browser = '' if ref $browser; # means it's a Server object
+
+ my $url = get_url_from_number($urlnum);
+ return if not $url;
+ my $link = $url->{url};
+
+ if(not $browser) { # guess browser...
+ # check for wget first...
+ if($link =~ m{/[^/]+\.(\w+)$}) {
+ my $ext = lc $1;
+ for(split " ", settings_get_str('urlm_wget_extensions')) {
+ if($ext eq lc($_)) {
+ url_open_wget_cmd($urlnum);
+ return;
+ }
+ }
+ }
+
+ # not a wget extension, check browser override patterns
+ $browser = settings_get_str('urlm_default_browser');
+ my @overrides = read_browser_overrides();
+
+OVERRIDE:
+ for(@overrides) {
+ my ($tag, $pats) = @$_;
+ for(@$pats) {
+ if($link =~ /$_/i) {
+ $browser = $tag;
+ last OVERRIDE;
+ }
+ }
+ }
+ }
+
+ my @browsers = read_browser_list();
+ my ($tag, $name, $format);
+ for(@browsers) {
+ if($_->{tag} eq $browser) {
+ ($tag, $name, $format) = ($_->{tag}, $_->{name}, $_->{command});
+ last;
+ }
+ }
+
+ echo("$name - " . $link . " (" . $url->{nick} . ")");
+
+ $link =~ s/'/%27/g; # be nice to the shell, escape single quotes
+ $link =~ s/\(/%28/g; # be nice to firefox, escape parens
+ $link =~ s/\)/%29/g; # firefox -remote 'openURL(url,new-tab)' *fails*
+ # if the url contains any () chars!
+
+ my $cmd = sprintf($format, $link) . " &>/dev/null &";
+ system($cmd);
+}
+
+# Open with wget in an irssi window
+# TODO: Maybe allow for using "fetch" instead of wget? (does anyone care?)
+our $wgetcount = 1;
+sub url_open_wget_cmd { # bound to /urlm_wget /wget
+ my $arg = shift;
+
+ my @args = split " ", $arg;
+
+ my $urlnum = "";
+ if(@args && ($args[-1] =~ /^\d+$/)) {
+ $urlnum = pop @args;
+ }
+
+ my $url = get_url_from_number($urlnum);
+ return if not $url;
+
+ my $more_args = join(" ", @args);
+ $more_args .= " " if $more_args;
+
+ my $dir = settings_get_str('urlm_wget_dl_dir');
+ $dir =~ s/^~/$ENV{HOME}/;
+ $dir = "." if not $dir;
+
+ my $size = settings_get_int('urlm_wget_split_size');
+
+ if($size > 0) {
+ command("/window new split");
+ command("/window size $size");
+ } else { # size == 0, means "do not split"
+ command("/window new hidden");
+ }
+
+ # find an unused window name...
+ my $name = "urlm_wget_" . ($wgetcount++);
+ while(window_find_item($name)) {
+ $name = "urlm_wget_" . ($wgetcount++);
+ }
+
+ command("/window name $name");
+
+ if(settings_get_bool('urlm_wget_autoclose')) {
+ my $delay = settings_get_int('urlm_wget_autoclose_delay');
+ if($delay) {
+ print "This window will close $delay seconds after download is done";
+ } else {
+ print "This window will close when download is done";
+ }
+ } else {
+ print "Use " . $yellow . "/window close $name" . $color_off .
+ " to close this window";
+ }
+
+ my $args = trim(settings_get_str('urlm_wget_extra_args'));
+ $args .= " " if $args;
+
+ my $wget_bin = settings_get_str('urlm_wget_path');
+ $wget_bin =~ s/^~/$ENV{HOME}/;
+ $wget_bin = "wget" if not $wget_bin;
+
+ command(
+ "/exec -nosh " .
+ "-name $name " .
+ "$wget_bin " .
+ "-P $dir " .
+ $args .
+ $more_args .
+ $url->{url});
+
+ if($size) { # if using a hidden window, leave it focused
+ command("/window last");
+ }
+}
+
+# Close a window by name. This seems like a kludge...
+sub close_window {
+ return unless window_find_item($_[0]); # don't close if already closed!
+ command("/window goto " . $_[0]);
+ command("/window close");
+}
+
+# signal handler for "exec remove", autocloses our wget windows when
+# the wget processes exit, if requested.
+sub sig_exec_remove {
+ my ($proc, $status) = @_;
+ return unless settings_get_bool('urlm_wget_autoclose');
+
+ # target_win->name will be "" if window already closed!
+ my $name = $proc->{target_win}->{name} || "";
+ return unless $name =~ /^urlm_wget_\d+$/;
+
+ return unless window_find_item($name); # don't close if already closed!
+
+ my $delay = settings_get_int('urlm_wget_autoclose_delay');
+ if($delay > 0) {
+ timeout_add_once($delay * 1000, "close_window", $name);
+ } else {
+ close_window($name);
+ }
+}
+
+# kill a process by name (send SIGTERM)
+sub kill_proc {
+ command("/exec -15 " . $_[0]);
+}
+
+# kill wget processes on manual window close!
+# catch signal "window destroyed" and figure out a way to avoid
+# adding a timeout to close the window (since it's in the middle
+# of closing now...)
+sub sig_window_destroyed {
+ my $name = $_[0]->{name} || "";
+ return unless $name =~ /^urlm_wget_\d+$/;
+
+ # use a timeout to kill the process, instead of killing it directly.
+ # why? to avoid possible race condition where sig_exec_remove()
+ # tries to close the window that triggered this call to
+ # sig_window_destroyed().
+ timeout_add_once(1000, "kill_proc", $name);
+}
+
+# find_urls: extract all URLs from the input text, returns a list
+# (which may be empty).
+# Be VERY permissive about what we consider a URL.
+# 20100614 bkw: be a little less permissive
+# 20140530 bkw: stop catching dupe http://whatever and https://whatever
+
+# original sub:
+##sub find_urls {
+## my @got = ($_[0] =~ m{(?:https?|ftp)://\S+}g);
+## push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g;
+## push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g;
+## s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation
+## return @got;
+##}
+
+# new version:
+sub find_urls {
+ my @got;
+ while($_[0] =~ s{(?:https?|ftp)://\S+}{}) {
+ push @got, $&;
+ }
+ push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g;
+ push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g;
+ s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation
+ return @got;
+}
+
+# Annoying bots have a tendency to do this:
+# <actual_person> check this out: http://www.blahblah.blah/path/to/stuff.html
+# <annoying_bot> Title: Stuff (at www.blahblah.blah)
+# To me, this is about the most useless function a bot can serve, plus it
+# breaks the /uo command.
+# just_domain() returns true if $1 is just the domain part of $2
+sub just_domain {
+ my ($new, $old) = @_;
+ s/^(ht|f)tps?:\/\/// for ($new, $old);
+ $old =~ s/\/.*//;
+ return 0 if $new =~ /\/./;
+ return $new eq $old;
+}
+
+# url_log: appends URL to the URL log file and to the @urls array.
+# Locks the file before writing, so should be safe even with multiple
+# instances of irssi.
+# $relog should be false if capturing a new URL from channel/msg text,
+# or true if re-logging an old URL (e.g. /ul -delete does this)
+sub url_log {
+ my($relog, $nick, $channel, $url, $stamp) = @_;
+ $nick =~ s/!.*//;
+
+ $stamp = time() unless $stamp;
+
+ return if lc $url eq lc $lasturl; # a tiny bit of protection from spam/flood
+ return if just_domain($url, $lasturl);
+
+ $lasturl = $url;
+
+ my $file = get_url_log_file();
+ open(URLLOG, ">>$file") or return;
+
+ flock(URLLOG, Fcntl::LOCK_EX);
+ seek(URLLOG, 0, 2);
+
+ print URLLOG time . " $nick $channel $url\n";
+ close(URLLOG);
+
+ push @urls, {
+ stamp => $stamp,
+ nick => $nick,
+ channel => $channel,
+ url => $url,
+ };
+
+ if(not $relog) {
+ if(not settings_get_bool('urlm_quiet_capture')) {
+ my $on = "";
+ if($channel =~ /^#/) {
+ $on = " on " . $green . $channel . $color_off;
+ }
+ echo "Captured URL #" . $#urls . " " .
+ $purple . $url . $color_off .
+ " from " . $yellow . $nick . $color_off . $on;
+ }
+# trim_url_log();
+ $captured_since_delete++;
+ }
+}
+
+# urlm_help generates /help topics for the browser commands.
+# The help for all the other commands is stored in text files in
+# ~/.irssi/help
+sub urlm_help {
+ my $arg = shift;
+ $arg = lc trim($arg);
+ my %bhelp;
+ our %urlm_help;
+
+ for(read_browser_list()) {
+ my $text = uc($_->{tag}) . " [<url#>]\n\n" .
+ "Open a URL with the external browser '" .
+ $_->{name} . "', using the command:\n" .
+ $_->{command} . "\n\n" .
+ "If [<url#>] is omitted, the most recent URL will be opened.\n";
+ $bhelp{$_->{tag}} = $text;
+ $bhelp{"urlm_open_" . $_->{tag}} = $text;
+ }
+
+ my $help = $bhelp{$arg} || return;
+ signal_stop();
+ print $help;
+}
+
+sub write_browser_overrides {
+ my @list = @{$_[0]};
+ my @strings;
+
+ for(@list) {
+ my ($tag, $pats) = @$_;
+ push @strings, join(":", $tag, @$pats);
+ }
+
+ my $setting = join("::", @strings);
+ settings_set_str('urlm_browser_overrides', $setting);
+}
+
+sub read_browser_overrides {
+ my @result;
+
+ my $list = settings_get_str('urlm_browser_overrides');
+ my @entries = split /::/, $list;
+
+ for(@entries) {
+ my @items = split /:/;
+ my $tag = shift @items;
+ push @result, [ $tag, \@items ];
+ }
+
+ return @result;
+}
+
+sub write_browser_list {
+ my @list = @{$_[0]};
+ my @strings;
+
+ for(@list) {
+ push @strings, join(":", $_->{tag}, $_->{name}, $_->{command});
+ }
+
+ my $setting = join("::", @strings);
+#print "/set urlm_browsers $setting";
+ settings_set_str('urlm_browsers', $setting);
+ signal_emit("setup changed");
+}
+
+sub read_browser_list {
+ my @result;
+
+ my $list = settings_get_str('urlm_browsers');
+ my @entries = split /::/, $list;
+
+ for(@entries) {
+ my @items = split /:/;
+ my $hash = {
+ tag => $items[0],
+ name => $items[1],
+ command => $items[2],
+ };
+
+ push @result, $hash;
+ }
+
+ return @result;
+}
+
+# commands:
+# urlm_add_browser <browser>:<fullname>:<cmd>
+sub urlm_add_browser {
+ my $arg = shift || "";
+ $arg =~ trim($arg);
+
+ if($arg !~ /^[^:]+:[^:]+:[^:]+$/) {
+ print "Usage: /urlm_add_browser tag:name:command";
+ return;
+ }
+
+ my ($tag, $name, $cmd) = split /:/, $arg;
+
+ $tag = trim($tag);
+ $tag = lc $tag;
+ $name = trim($name);
+
+ if($tag =~ /\W/) {
+ print "/urlm_add_browser: tag must consist of only " .
+ "letters, numbers, or underscores (_), not '$tag'";
+ return;
+ }
+
+ if($tag eq 'wget') {
+ print "/urlm_add_browser: 'wget' is reserved; use a different tag";
+ return;
+ }
+
+ if($cmd !~ /'[^']*\%s[^']*'/) {
+ print "/urlm_add_browser: command must contain '\%s' (single-quoted)";
+ return;
+ }
+
+ my @browsers = read_browser_list();
+ my $found = 0;
+ for(@browsers) {
+ if(lc($_->{tag}) eq $tag) {
+ print "Replaced old definition of $tag";
+ $_->{name} = $name;
+ $_->{command} = $cmd;
+ $found++;
+ last;
+ }
+ }
+
+ if(not $found) {
+ push @browsers, { tag => $tag, name => $name, command => $cmd };
+ print "push \@browsers, { tag => $tag, name => $name, command => $cmd }";
+ print "Added browser definition $tag";
+ }
+
+ write_browser_list(\@browsers);
+}
+
+# urlm_del_browser <browser>
+sub urlm_del_browser {
+ my $arg = shift || "";
+ $arg = trim($arg);
+ $arg = lc $arg;
+ return unless $arg;
+
+ # urlm_del_override() already prints "Browser not defined" if it
+ # wasn't defined, so no need to have urlm_del_browser() print it again.
+ urlm_del_override("$arg all");
+
+ my @browsers = read_browser_list();
+ my @keep_browsers;
+ my $found = 0;
+
+ for(@browsers) {
+ if(lc($_->{tag}) eq $arg) {
+ print "Deleted definition of $arg";
+ $found++;
+ } else {
+ push @keep_browsers, $_;
+ }
+ }
+
+ write_browser_list(\@keep_browsers) if $found;
+}
+
+# urlm_list_browsers
+sub urlm_list_browsers {
+ my @browsers = read_browser_list();
+ for(@browsers) {
+ print(
+ (settings_get_str('urlm_default_browser') eq $_->{tag} ?
+ "[*]" :
+ " ") .
+ "Tag: $bold_on" . $_->{tag} . "$bold_off, " .
+ "Name: $bold_on" . $_->{name} . "$bold_off, " .
+ "Command: $bold_on" . $_->{command} . "$bold_off");
+ }
+}
+
+# urlm_add_wget_ext <ext>
+sub urlm_add_wget_ext {
+ my $arg = shift || "";
+ $arg = trim($arg);
+ $arg = lc $arg;
+
+ if(not $arg) {
+ print "Usage: /urlm_add_wget_ext <extension>";
+ return;
+ }
+
+ my @list = split " ", settings_get_str('urlm_wget_extensions');
+ if(grep { $_ eq $arg } @list) {
+ print "$arg is already in the wget extensions list";
+ return;
+ }
+
+ push @list, $arg;
+
+ settings_set_str('urlm_wget_extensions', join(" ", @list));
+ command("/set urlm_wget_extensions");
+}
+
+# urlm_del_wget_ext <ext>
+sub urlm_del_wget_ext {
+ my $arg = shift || "";
+ $arg = trim($arg);
+ $arg = lc $arg;
+
+ if(not $arg) {
+ print "Usage: /urlm_del_wget_ext <extension>";
+ return;
+ }
+
+ my @list = split " ", settings_get_str('urlm_wget_extensions');
+ if(!grep { $_ eq $arg } @list) {
+ print "$arg is not in the wget extensions list";
+ return;
+ }
+
+ @list = grep { $_ ne $arg } @list;
+
+ settings_set_str('urlm_wget_extensions', join(" ", @list));
+ command("/set urlm_wget_extensions");
+}
+
+# urlm_list_overrides [<browser>]
+sub urlm_list_overrides {
+ my $arg = shift || "";
+ $arg = trim($arg);
+
+ my $found = 0;
+ my @overrides = read_browser_overrides();
+ for(@overrides) {
+ my ($browser, $pats) = @$_;
+ if($arg eq $browser || not $arg) {
+ $found++;
+ my $count = 1;
+ for(@$pats) {
+ print $browser . "[$count]: " . $_;
+ $count++;
+ }
+ }
+ }
+
+ if(not $found) {
+ if($arg) {
+ print "No overrides for browser '$arg'";
+ } else {
+ print "No browser overrides";
+ }
+ }
+}
+
+# urlm_add_override <browser> <pattern>
+sub urlm_add_override {
+ my $arg = shift || "";
+ $arg = trim($arg); # do not lc($arg), the command may need caps!
+
+ my ($browser, $pattern) = split " ", $arg;
+ $browser = lc $browser;
+ if(not ($browser and $pattern)) {
+ print "Usage: /urlm_add_override <browser> <pattern>";
+ return;
+ }
+
+ if(!grep { $_->{tag} eq $browser } read_browser_list()) {
+ print "Browser $browser not defined in browser list";
+ return;
+ }
+
+ eval "qr{$pattern}";
+ if($@) {
+ print "Pattern $pattern is not a valid Perl regex: $@";
+ return;
+ }
+
+ my @overrides = read_browser_overrides();
+ my $found = 0;
+ for(@overrides) {
+ my ($tag, $pats) = @$_;
+ next unless $tag eq $browser;
+
+ push @$pats, $pattern;
+ $found++;
+ }
+
+ if(not $found) {
+ push @overrides, [ $browser, [ $pattern ] ];
+ }
+
+ print "Added override for $browser: $pattern";
+ write_browser_overrides(\@overrides);
+}
+
+# urlm_del_override <browser> <number>|<all>
+sub urlm_del_override {
+ my $arg = shift || "";
+ $arg = trim($arg);
+ $arg = lc $arg;
+
+ my ($browser, $number) = split " ", $arg;
+
+ if(not($browser and $number)) {
+ print "Usage: /urlm_del_override <browser> <number>|all"
+ }
+
+ if(!grep { $_->{tag} eq $browser } read_browser_list()) {
+ print "Browser $browser not defined in browser list";
+ return;
+ }
+
+ if($number ne 'all' && $number !~ /^[1-9]\d*$/) {
+ print "Bad override '$number': must be a number >= 1, or 'all'";
+ return;
+ }
+
+ my @overrides = read_browser_overrides();
+ my @keep_overrides = ();
+ my $found = 0;
+ for(@overrides) {
+ my ($tag, $pats) = @$_;
+
+ if($tag ne $browser) {
+ push @keep_overrides, $_;
+ next;
+ }
+
+ $found += @$pats, next if $number eq 'all';
+
+ if($number > @$pats) {
+ print "Value $number out of range";
+ next;
+ }
+
+ $found++;
+ undef $pats->[$number - 1];
+ @$pats = grep { defined $_ } @$pats;
+
+ push @keep_overrides, $_ if @$pats;
+ }
+
+ if($found) {
+ print "Deleted $found overrides";
+ write_browser_overrides(\@keep_overrides);
+ } else {
+ print "No matching overrides";
+ }
+}
+
+sub init_colors {
+ if(settings_get_bool('urlm_use_bold')) {
+ $bold_on = "\002";
+ $bold_off = "\002";
+ } else {
+ $bold_on = "";
+ $bold_off = "";
+ }
+
+ if(settings_get_bool('urlm_use_color')) {
+ $green = "\0033";
+ $red = "\0034";
+ $yellow = "\0037";
+ $purple = "\0036";
+ $color_off = "\003";
+ } else {
+ $green = "";
+ $red = "";
+ $yellow = "";
+ $purple = "";
+ $color_off = "";
+ }
+}
+
+# init_browsers(): dynamic bindings. Each browser tag gets bound to
+# /urlm_open_$tag, and (if short commands enabled) to /$tag.
+our @bound_refs;
+sub init_browsers {
+ # for this to work, the code ref can *NOT* be stored in a "my" var
+ # I think this is a bug in irssi, or possibly perl, but maybe I'm
+ # just being dumb...
+ for(@bound_refs) {
+ command_unbind($_->[0], $_->[1]);
+ }
+ @bound_refs = ();
+
+ my @browsers = read_browser_list();
+ for(@browsers) {
+ my $code = 'sub { url_open_cmd($_[0], "'. ($_->{tag}) . '"); };';
+ my $cmd = 'urlm_open_' . $_->{tag};
+ push @bound_refs, [ $cmd, eval $code ];
+
+ # again, no "my" vars, hence the ugly $bound_refs[$#bound_refs] kludge
+ command_bind($cmd, $bound_refs[$#bound_refs]->[1]);
+
+ if(settings_get_bool('urlm_short_cmds')) {
+ my $shortcmd = $_->{tag};
+ push @bound_refs, [ $shortcmd, $bound_refs[$#bound_refs]->[1] ];
+ command_bind($shortcmd, $bound_refs[$#bound_refs]->[1]);
+ }
+ }
+
+ command_unbind("ul", "url_list_cmd");
+ command_unbind("uo", "url_open_cmd");
+ command_unbind("wget", "url_open_wget_cmd");
+
+ if(settings_get_bool('urlm_short_cmds')) {
+ command_bind("ul", "url_list_cmd");
+ command_bind("uo", "url_open_cmd");
+ command_bind("wget", "url_open_wget_cmd");
+ }
+}
+
+sub init_settings { # call only once, at script load!
+# Where shall we save the URL log?
+ settings_add_str('urlmanager', 'urlm_log_file', "~/.irssi/urllog");
+
+# Where is the wget binary? Absolute path, or "wget" (searches PATH)
+ settings_add_str('urlmanager', 'urlm_wget_path', "wget");
+
+# Where should wget save files?
+ settings_add_str('urlmanager', 'urlm_wget_dl_dir', "~");
+
+# Extra arguments to pass to wget...
+ settings_add_str('urlmanager', 'urlm_wget_extra_args', "");
+
+# Do we log URLs from /part and /quit messages? Disabled by default
+# because so many people always /quit with the same spammish URL
+# e.g. "nimrod has quit [Quit: Try StupidIRC (http://someircclient.com)]"
+ settings_add_bool('urlmanager', 'urlm_log_partquit', 0);
+
+# Do we log URLs from our own public/private messages?
+ settings_add_bool('urlmanager', 'urlm_log_own', 1);
+
+# Cosmetics:
+ settings_add_bool('urlmanager', 'urlm_short_cmds', 1);
+ settings_add_bool('urlmanager', 'urlm_use_bold', 1);
+ settings_add_bool('urlmanager', 'urlm_use_color', 1);
+
+# Browser definitions. A double-colon-separated list. Each list item
+# is a single-colon separated list of (tag, name, command_format).
+# You may add browsers, and they will work as commands
+# without changing any other code.
+# The browser commands need to be non-blocking, and any stdout/err from
+# them will be ignored.
+# The %s gets replaced with the actual URL. *ALWAYS* use single-quotes
+# (like '%s'). *NEVER* omit the quotes or use double-quotes around the %s!
+# Failure to comply is a security hole!
+ settings_add_str('urlmanager', 'urlm_browsers',
+ 'ff:Firefox:firefox -remote \'openurl(%s,new-tab)\'' .
+ '::' .
+ 'ie:Internet Explorer:ie6 \'%s\'' .
+ '::' .
+ 'us:links+screen:[ "$TERM" = "screen" ] && screen links \'%s\'' .
+ '::' .
+ 'ut:links+xterm:xterm -e "links \'%s\'"' .
+ '::' .
+ 'ux:Copy to X Clipboard:echo -n \'%s\'|xsel -i');
+
+# Default browser for /uo and /urlm_open commands
+ settings_add_str('urlmanager', 'urlm_default_browser', 'ff');
+
+# /uo and /urlm_open check this list.
+# Double-colon-separated list, each item is a single-colon-separated
+# list consisting of a browser tag and one or more patterns.
+# If a URL matches one of these
+# patterns, the browser tag will be used as the browser to open the URL
+# with, instead of the default.
+ settings_add_str('urlmanager', 'urlm_browser_overrides',
+ 'ie:/[^/]*video\.google\.com:/[^/]*youtube\.com:/[^/]*gametrailers\.com');
+
+# If /uo or /urlm_open get a URL ending in one of these file extensions,
+# it will be downloaded with wget instead of being opened in a browser.
+ settings_add_str('urlmanager', 'urlm_wget_extensions',
+ 'tar zip atr bas xex exe dcm car z gz rom cas torrent rar 7z');
+
+# trim log to this many lines. Use with urlm_log_trim_interval and/or
+# urlm_log_trim_startup. Set to 0 to disable.
+ settings_add_int('urlmanager', 'urlm_max_log_lines', 100);
+
+# trim log to this many seconds. Use with urlm_log_trim_interval and/or
+# urlm_log_trim_startup. Set to 0 to disable.
+ settings_add_int('urlmanager', 'urlm_max_log_age', 86400*7);
+
+# trim the log on script load.
+ settings_add_bool('urlmanager', 'urlm_log_trim_startup', 0);
+
+# auto-trim log this often (seconds). Set to 0 to disable.
+ settings_add_int('urlmanager', 'urlm_log_trim_interval', 60*60);
+
+# these control the behavior of windows created with /urlm_wget or /wget
+ settings_add_bool('urlmanager', 'urlm_wget_autoclose', 1);
+ settings_add_int('urlmanager', 'urlm_wget_autoclose_delay', 60);
+ settings_add_int('urlmanager', 'urlm_wget_split_size', 0);
+
+# say "Captured URL #xxx http://whatever from whoever" every time a URL
+# is captured?
+ settings_add_bool('urlmanager', 'urlm_quiet_capture', 0);
+
+# TODO: support these:
+
+# channels/nicks/sites we don't want to log
+#settings_add_str('urlmanager', 'urlm_ignore_channels');
+#settings_add_str('urlmanager', 'urlm_ignore_urls');
+
+# If true, go through the entire list every time a URL is logged,
+# checking for duplicates
+#settings_add_bool('urlmanager', 'urlm_ignore_dups');
+}
+
+our $trim_timeout_tag;
+sub init_trim_timeout {
+ timeout_remove($trim_timeout_tag) if($trim_timeout_tag);
+
+ my $millis = settings_get_int('urlm_log_trim_interval') * 1000;
+ if($millis > 0) {
+ $trim_timeout_tag = timeout_add($millis, "trim_url_log", 1);
+ }
+}
+
+# apply_settings: called on signal "setup changed" (when any /set urlm_*
+# changes value).
+# Anything that depends on the values of any of the settings should be
+# (re)initialized here.
+sub apply_settings {
+ init_colors();
+ init_browsers();
+ init_trim_timeout();
+ @urls = read_url_file();
+}
+
+sub init_signals { # call only once, at script load!
+ signal_add_last("message public", "url_public");
+ signal_add_last("message private", "url_private");
+ signal_add_last("message irc notice", "url_private");
+ signal_add_last("message irc op_public", "url_private");
+ signal_add_last("message irc action", "url_private");
+ signal_add_last("dcc chat message", "url_dccmsg");
+ signal_add_last("message topic", "url_topic");
+ signal_add_last("channel joined", "url_join_topic");
+ signal_add_last("setup changed", "apply_settings");
+ signal_add_last("message part", "url_part");
+ signal_add_last("message quit", "url_quit");
+ signal_add_last("message own_public", "url_own");
+ signal_add_last("message own_private", "url_own");
+ signal_add_last("exec remove", "sig_exec_remove");
+ signal_add_last("window destroyed", "sig_window_destroyed");
+}
+
+sub init_static_binds { # call only once, at script load!
+ # These binds are always on:
+ command_bind("urlm_list", "url_list_cmd");
+ command_bind("urlm_open", "url_open_cmd");
+ command_bind("urlm_wget", "url_open_wget_cmd");
+ command_bind("urlm_add_browser", "urlm_add_browser");
+ command_bind("urlm_del_browser", "urlm_del_browser");
+ command_bind("urlm_list_browsers", "urlm_list_browsers");
+ command_bind("urlm_add_wget_ext", "urlm_add_wget_ext");
+ command_bind("urlm_del_wget_ext", "urlm_del_wget_ext");
+ command_bind("urlm_list_overrides", "urlm_list_overrides");
+ command_bind("urlm_add_override", "urlm_add_override");
+ command_bind("urlm_del_override", "urlm_del_override");
+ command_bind("urlm_trim_log", "trim_url_log");
+ command_bind("urlm_undo_delete", "urlm_undo_delete");
+ command_bind("help", "urlm_help");
+}
+
+# Add per-user help dir to help_path, if not already present.
+sub init_help_path {
+ my $dir = "$ENV{HOME}/.irssi/help";
+ my $help_path = settings_get_str('help_path');
+
+ return if grep { $_ eq $dir } split /:/, $help_path;
+
+ $help_path .= ":$dir";
+ settings_set_str('help_path', $help_path);
+
+ signal_emit('setup_changed'); # 20100614 bkw: hmmm...
+}
+
+# bind signals and commands, now that everything's defined.
+init_settings();
+init_signals();
+init_static_binds();
+init_colors();
+init_browsers();
+init_help_path();
+@urls = read_url_file();
+trim_url_log() if settings_get_bool('urlm_log_trim_startup');
+init_trim_timeout();
+
+# make sure no leftover backup is lurking from a long time ago...
+unlink(get_url_log_file() . "~");
+
+# Print a helpful message for the user on script load...
+print $bold_on . "urlmanager.pl" . $bold_off . " loaded (" . @urls .
+ " URLs), type '" . $yellow . "/help urlmanager" .
+ $color_off . "' for help.";
+
+# rest of file is POD docs
+=pod
+
+=head1 NAME
+
+urlmanager
+
+=head1 SYNOPSIS
+
+Yet another URL logger for irssi.
+
+=head1 DESCRIPTION
+
+Captures URLs in channel, privmsg, and DCC chat messages, logs them to a
+file. Provides an irssi command to list captured URLs and several commands
+to do various things with them (open in browser, download, copy to X11
+selection buffer).
+
+This documentation only includes installation instructions. For usage
+instructions, install the script and run B</urlm_help> within irssi.
+
+=head1 INSTALLATION
+
+Copy B<urlmanager.pl> to your B<~/.irssi/scripts> directory (create the
+directory if it doesn't exist). For auto-loading when irssi starts,
+create a symlink in B<~/.irssi/scripts/autorun>:
+
+=over 4
+
+mkdir -p ~/.irssi/scripts/autorun
+
+cp urlmanager.pl ~/.irssi/scripts
+
+cd ~/.irssi/scripts/autorun
+
+ln -s ../urlmanager.pl .
+
+=back
+
+=head1 CONFIGURATION
+
+All configuration is done from within irssi; read B</urlm_help>.
+=cut