From 4ef0f5708e20509e427c706acedff6a22bf0faaa Mon Sep 17 00:00:00 2001
From: "B. Watson" <yalhcru@gmail.com>
Date: Wed, 12 Aug 2015 04:30:10 -0400
Subject: initial commit

---
 README                         |   10 +
 help/urlm_add_browser          |   36 ++
 help/urlm_add_override         |   15 +
 help/urlm_browser_overrides    |    6 +
 help/urlm_browsers             |    6 +
 help/urlm_default_browser      |    5 +
 help/urlm_del_browser          |   12 +
 help/urlm_del_override         |    7 +
 help/urlm_list                 |   55 ++
 help/urlm_list_browsers        |   11 +
 help/urlm_list_overrides       |    6 +
 help/urlm_log_file             |   16 +
 help/urlm_log_own              |    6 +
 help/urlm_log_partquit         |    7 +
 help/urlm_log_trim_interval    |   24 +
 help/urlm_log_trim_startup     |    7 +
 help/urlm_max_log_age          |   17 +
 help/urlm_max_log_lines        |   11 +
 help/urlm_open                 |   29 +
 help/urlm_quiet_capture        |   16 +
 help/urlm_short_cmds           |   14 +
 help/urlm_trim_log             |   12 +
 help/urlm_undo_delete          |   11 +
 help/urlm_use_bold             |    7 +
 help/urlm_use_color            |   12 +
 help/urlm_wget                 |   29 +
 help/urlm_wget_autoclose       |   13 +
 help/urlm_wget_autoclose_delay |   10 +
 help/urlm_wget_dl_dir          |   14 +
 help/urlm_wget_extensions      |   23 +
 help/urlm_wget_extra_args      |    5 +
 help/urlm_wget_path            |    9 +
 help/urlm_wget_split_size      |   11 +
 help/urlmanager                |   30 +
 scripts/urlmanager.pl          | 1350 ++++++++++++++++++++++++++++++++++++++++
 35 files changed, 1852 insertions(+)
 create mode 100644 README
 create mode 100644 help/urlm_add_browser
 create mode 100644 help/urlm_add_override
 create mode 100644 help/urlm_browser_overrides
 create mode 100644 help/urlm_browsers
 create mode 100644 help/urlm_default_browser
 create mode 100644 help/urlm_del_browser
 create mode 100644 help/urlm_del_override
 create mode 100644 help/urlm_list
 create mode 100644 help/urlm_list_browsers
 create mode 100644 help/urlm_list_overrides
 create mode 100644 help/urlm_log_file
 create mode 100644 help/urlm_log_own
 create mode 100644 help/urlm_log_partquit
 create mode 100644 help/urlm_log_trim_interval
 create mode 100644 help/urlm_log_trim_startup
 create mode 100644 help/urlm_max_log_age
 create mode 100644 help/urlm_max_log_lines
 create mode 100644 help/urlm_open
 create mode 100644 help/urlm_quiet_capture
 create mode 100644 help/urlm_short_cmds
 create mode 100644 help/urlm_trim_log
 create mode 100644 help/urlm_undo_delete
 create mode 100644 help/urlm_use_bold
 create mode 100644 help/urlm_use_color
 create mode 100644 help/urlm_wget
 create mode 100644 help/urlm_wget_autoclose
 create mode 100644 help/urlm_wget_autoclose_delay
 create mode 100644 help/urlm_wget_dl_dir
 create mode 100644 help/urlm_wget_extensions
 create mode 100644 help/urlm_wget_extra_args
 create mode 100644 help/urlm_wget_path
 create mode 100644 help/urlm_wget_split_size
 create mode 100644 help/urlmanager
 create mode 100644 scripts/urlmanager.pl
diff --git a/README b/README
new file mode 100644
index 0000000..c993e6a
--- /dev/null
+++ b/README
@@ -0,0 +1,10 @@
+Urchlay's url manager plugin for irssi. no README yet, for now see the
+POD docs:
+
+$ perldoc scripts/urlmanager.pl
+
+Quickstart:
+
+mkdir -p ~/.irssi/scripts ~/.irssi/help
+cp scripts/* ~/.irssi/scripts
+cp help/* ~/.irssi/help
diff --git a/help/urlm_add_browser b/help/urlm_add_browser
new file mode 100644
index 0000000..d2c8327
--- /dev/null
+++ b/help/urlm_add_browser
@@ -0,0 +1,36 @@
+
+URLM_ADD_BROWSER <tag>:<name>:<cmd-format>
+
+Define a new browser. The arguments must be separated by : (colon)
+characters, since <name> and <cmd-format> may contain spaces (<cmd-format>
+generally *requires* spaces).
+
+<tag> is a short unique identifier for the browser. Examples are "ff" for
+Firefox or "moz" for Mozilla. The tag will be used to define a new irssi
+command /urlm_open_<tag>, and (if urlm_short_cmds is ON) a new /<tag>
+command. Tags must consist of only letters, numbers, or underscores,
+and the special tag "wget" is reserved.
+
+<name> is the full human-readable name of the browser. It may contain
+any characters you like, except for colons, and is only used for
+identification purposes (e.g. /urlm_list_browsers output).
+
+<cmd-format> is the sprintf() format used to generate the full command
+line required to run the browser. In simpler terms, it is the command
+that runs the browser, with %s in place of the URL. It may contain
+any characters other than colons.
+
+Examples:
+  /urlm_add_browser ff:Firefox:firefox -remote 'openurl(%s,new-tab)'
+  /urlm_add_browser ie:Internet Explorer:wine iexplore.exe '%s'
+
+The above examples define new /urlm_open_ff and /urlm_open_ie
+commands. If urlm_short_cmds is ON, they also define new /ff and /ie
+commands.
+
+Note: in <cmd-format>, the %s must occur inside a set of single-quotes.
+It need not be the only thing inside the quotes, however (see the firefox
+example). This is because a shell is used to spawn the external program.
+The URLM_ADD_BROWSER command checks for the quotes, and refuses to
+allow a <cmd-format> that's missing the quotes or the %s.
+
diff --git a/help/urlm_add_override b/help/urlm_add_override
new file mode 100644
index 0000000..167808f
--- /dev/null
+++ b/help/urlm_add_override
@@ -0,0 +1,15 @@
+
+URLM_ADD_OVERRIDE <browser> <pattern>
+
+Add an override, for use with /urlm_open (q.v.)
+
+<browser> is the "tag", previously defined with URLM_ADD_BROWSER.
+
+<pattern> is a Perl regular expression (regex). Any URL matching the
+regex (case-insensitive) will be open with <browser>.
+
+It's probably best to avoid conflicting/overlapping patterns, although
+it can be done if you remember that the regexes are checked against each
+URL in the order they are displayed in /urlm_list_overrides (which is
+in fact the order in which they were defined).
+
diff --git a/help/urlm_browser_overrides b/help/urlm_browser_overrides
new file mode 100644
index 0000000..1088280
--- /dev/null
+++ b/help/urlm_browser_overrides
@@ -0,0 +1,6 @@
+
+Setting: URLM_BROWSER_OVERRIDES (private)
+
+  Do not change directly; use /urlm_add_override, /urlm_del_override,
+  and /urlm_list_overrides instead.
+
diff --git a/help/urlm_browsers b/help/urlm_browsers
new file mode 100644
index 0000000..eb89f27
--- /dev/null
+++ b/help/urlm_browsers
@@ -0,0 +1,6 @@
+
+Setting: URLM_BROWSERS (private)
+
+  Do not change directly; use /urlm_add_browser, /urlm_del_browser,
+  and /urlm_list_browsers instead.
+
diff --git a/help/urlm_default_browser b/help/urlm_default_browser
new file mode 100644
index 0000000..d4d0448
--- /dev/null
+++ b/help/urlm_default_browser
@@ -0,0 +1,5 @@
+
+Setting: URLM_DEFAULT_BROWSER (default: ff)
+
+Self-explanatory. One of the browser tags from URLM_LIST_BROWSERS.
+
diff --git a/help/urlm_del_browser b/help/urlm_del_browser
new file mode 100644
index 0000000..4afd163
--- /dev/null
+++ b/help/urlm_del_browser
@@ -0,0 +1,12 @@
+
+URLM_DEL_BROWSER <tag>
+
+Delete a browser from the browser-definition list.
+K†~g> must have been previously defined with URLM_ADD_BROWSER.
+
+Deleting a browser also deletes any browser overrides defined for
+taÒ}rowser.
+
+Example: to delete the "ie" definition (see above):
+  /urlm_del_browser ie
+
diff --git a/help/urlm_del_override b/help/urlm_del_override
new file mode 100644
index 0000000..a1d03a9
--- /dev/null
+++ b/help/urlm_del_override
@@ -0,0 +1,7 @@
+
+URLM_DEL_OVERRIDE <browser> <number>|all
+
+Delete an override. <number> is the number given in /urlm_list_overrides.
+If "all" is used instead of a number, all overrides for <browser> will
+be deleted.
+
diff --git a/help/urlm_list b/help/urlm_list
new file mode 100644
index 0000000..6ec14ae
--- /dev/null
+++ b/help/urlm_list
@@ -0,0 +1,55 @@
+
+URLM_LIST [-delete] [<list>]
+
+Alternate command name: UL
+
+(Each command has a short, easy-to-type, but cryptic alternate name. If
+you don't like cryptic command names, "/set urlm_short_cmds off" will
+disable the short names).
+
+List URLs captured from channel and privmsg text. With no <list>, lists
+the last 10 URLs. With -delete, lists and deletes listed URLS (the <list> is
+required with -delete).
+
+If <list> is given, it may be:
+
+<number>
+  List URL #number only (example: /urlm_list 10)
+
+[<start>]-[<end>]
+  List URLs from #start to #end (example: /urlm_list 20-30)
+  if start omitted, beginning of list assumed.
+  if end omitted, end of list assumed.
+  "/urlm_list 10-" list from #10 to the end of the list,
+  "/urlm_list -" lists all URLs (same as "/urlm_list all"),
+  "/urlm_list -10" lists from start-of-list to #10).
+
+all
+  Lists all URLs. "/urlm_list all" is the same as "urlm_list -".
+
+<nick>
+  List URLs posted by user <nick>.
+
+<#channel> or <&channel>
+  List URLs posted in <channel> (example: /urlm_list #irssi).
+
+</urlmatch>
+  List URLs matching <urlmatch> (example: /urlm_list /google.com).
+
+Any <list> may be preceded with ! to invert its sense. Examples:
+  /urlm_list !/yahoo.com  - List all URLs not matching yahoo.com
+  /urlm_list !bob         - List all URLs not posted by nick "bob"
+  /urlm_list !#badchannel - List all URLs not posted in #badchannel
+
+Note: <nick>, <#channel>, </urlmatch> are all treated as case-
+  insensitive regular expressions. <nick> and <#channel> matches are
+  anchored at the start and end of the match (as though they were
+  prefixed with "^" and followed by "$").  </urlmatch> matches are
+  not anchored.
+
+Warning: BE CAREFUL with the -delete option! There is no confirmation,
+and only one level of undo. It is recommended that you first run /urlm_list
+<list> without the -delete option to be sure which items the -delete will
+affect. If you've just deleted some URLs and want to restore them, run
+/urlm_undo_delete
+
diff --git a/help/urlm_list_browsers b/help/urlm_list_browsers
new file mode 100644
index 0000000..692b19e
--- /dev/null
+++ b/help/urlm_list_browsers
@@ -0,0 +1,11 @@
+
+URLM_LIST_BROWSERS
+
+Show the browser definition list. Items are listed in the same
+format as used to define them: <browser>:<name>:<cmd-format>. The
+default browser is marked with [*].
+
+Note: urlm_add_browser, urlm_del_browser, and urlm_list_browsers store
+the actual browser list as a string in the setting urlm_browsers. The
+format of this setting is prickly; you shouldn't modify it directly.
+
diff --git a/help/urlm_list_overrides b/help/urlm_list_overrides
new file mode 100644
index 0000000..ee1850c
--- /dev/null
+++ b/help/urlm_list_overrides
@@ -0,0 +1,6 @@
+
+URLM_LIST_OVERRIDES [<browser>]
+
+List override patterns for <browser> (optional: default w/no argument
+is to list all overrides for all browsers).
+
diff --git a/help/urlm_log_file b/help/urlm_log_file
new file mode 100644
index 0000000..3a1201d
--- /dev/null
+++ b/help/urlm_log_file
@@ -0,0 +1,16 @@
+
+Setting: URLM_LOG_FILE (default: ~/.irssi/urllog) [*]
+
+This is where URLs are stored. The file is updated immediately after
+each URL is captured, and after any URL(s) are deleted. urlmanager
+uses file locking (via flock()), so it's safe to run multiple instances
+of irssi with the script loaded.
+
+The log file need not exist when the script is first loaded; it will
+be created as soon as the first URL is captured. The directory for the
+log file must already exist, though. Tilde expansion (~ meaning home
+directory) is supported.
+
+When this setting is changed, the in-memory URL log is cleared and
+repopulated from the new file.
+
diff --git a/help/urlm_log_own b/help/urlm_log_own
new file mode 100644
index 0000000..39178ad
--- /dev/null
+++ b/help/urlm_log_own
@@ -0,0 +1,6 @@
+
+Setting: URLM_LOG_OWN
+
+Boolean; whether or not to capture and log URLs from your own public and
+private messages.
+
diff --git a/help/urlm_log_partquit b/help/urlm_log_partquit
new file mode 100644
index 0000000..885a2b7
--- /dev/null
+++ b/help/urlm_log_partquit
@@ -0,0 +1,7 @@
+
+Setting: URLM_LOG_PARTQUIT (boolean, default: off)
+
+Capture URLs from /part and /quit messages. This is off by default
+because so many people use IRC clients that include the client's web
+site URL in the quit and part messages (a mild form of spam).
+
diff --git a/help/urlm_log_trim_interval b/help/urlm_log_trim_interval
new file mode 100644
index 0000000..7d13cb9
--- /dev/null
+++ b/help/urlm_log_trim_interval
@@ -0,0 +1,24 @@
+
+Setting: URLM_LOG_TRIM_INTERVAL
+
+  How often (in seconds) do you want urlmanager to check the log and
+  trim it according to your urlm_max_log_lines and/or urlm_max_log_age
+  settings? Default is 3600 seconds (1 hour), which is probably OK for
+  most users. If you're in a lot of channels and exchange URLs with lots
+  of people, you might want to decrease this to keep the log from growing
+  too much.
+
+  Note: urlm_log_trim_interval has no effect unless one or both of the
+  urlm_max_log_lines or urlm_max_log_age settings are set to non-zero
+  values.
+
+  Note: Even with this setting set to zero, log trimming still occurs
+  at startup (unless urlm_log_trim_startup is OFF), and whenever a URL
+  is captured. To completely disable log trimming, set both
+  urlm_max_log_lines and urlm_max_log_age to zero. With these settings,
+  even manual trimming with /urlm_trim_log will do nothing.
+
+  The lower you set this setting, the more CPU and disk access it requires
+  (unless you set it to zero, of course). Probably it's a bad idea to
+  use a value lower than 60 seconds here, under any conditions.
+
diff --git a/help/urlm_log_trim_startup b/help/urlm_log_trim_startup
new file mode 100644
index 0000000..f33be64
--- /dev/null
+++ b/help/urlm_log_trim_startup
@@ -0,0 +1,7 @@
+
+Setting: URLM_LOG_TRIM_STARTUP
+
+  Boolean; controls whether urlmanager trims the log on startup. Has no
+  effect unless one or both of urlm_max_log_lines or urlm_max_log_age
+  are set to a non-zero value.
+
diff --git a/help/urlm_max_log_age b/help/urlm_max_log_age
new file mode 100644
index 0000000..9c342ae
--- /dev/null
+++ b/help/urlm_max_log_age
@@ -0,0 +1,17 @@
+
+Setting: URLM_MAX_LOG_AGE
+
+  Maximum age (in seconds) for URLs in the log. Set to zero to disable
+  age-based trimming. Any URLs older than this will be removed from the
+  log whenever the log is trimmed. This happens:
+
+  - At startup (unless urlm_log_trim_startup is OFF)
+  - Every urlm_log_trim_interval seconds (if urlm_log_trim_interval
+    is non-zero)
+  - Any time a URL is captured
+  - When /urlm_trim_log is run manually
+
+  This means that old URLs persist in the log for up to urlm_log_trim_interval
+  seconds past their expiration time; if this annoys you, decrease the
+  urlm_log_trim_interval to reduce the problem.
+
diff --git a/help/urlm_max_log_lines b/help/urlm_max_log_lines
new file mode 100644
index 0000000..68e932c
--- /dev/null
+++ b/help/urlm_max_log_lines
@@ -0,0 +1,11 @@
+
+Setting: URLM_MAX_LOG_LINES
+
+  Maximum number of URLs to keep in the log. May be set to zero to
+  disable trimming by length (in which case, trimming by age may still
+  be used or not, as desired).
+
+  When set to non-zero, the log will be trimmed to this size by discarding
+  older (lower-numbered) URLs. See below for explanation of when trimming
+  is done.
+
diff --git a/help/urlm_open b/help/urlm_open
new file mode 100644
index 0000000..1aac2c2
--- /dev/null
+++ b/help/urlm_open
@@ -0,0 +1,29 @@
+
+URLM_OPEN [<arg>]    - Open URL using "best" browser for the URL
+
+Alternate command name: UO
+
+Opens a URL according to the following rules:
+
+- If the URL's file extension (e.g. .zip) is found in urlm_wget_extensions,
+  the URL is downloaded with wget. (wget is run in an irssi window, so
+  you can monitor its progress)
+
+- Otherwise, if the URL matches any of the patterns in urlm_browser_overrides,
+  the URL is opened with the matching browser. (This is useful in cases
+  where e.g. you have Firefox as the default browser, but want all
+  youtube.com pages to open in Internet Explorer). See the
+  /urlm_add_override and /urlm_del_override commands for details.
+
+- Otherwise, the default browser (urlm_default_browser setting) is used.
+
+If <arg> is omitted, the default is to open the most recently captured URL.
+If <arg> is provided, it must be a single numeric URL number (from the
+output of /UL). Negative numbers are allowed, and are interpreted as
+counting from the end of the list (so -1 means the second most recent
+captured URL).
+
+Note: you do not need to define wget as a browser. urlmanager will use
+its own internal wget support to run wget in a window if the file
+extension is listed in urlm_wget_extensions.
+
diff --git a/help/urlm_quiet_capture b/help/urlm_quiet_capture
new file mode 100644
index 0000000..eba5da2
--- /dev/null
+++ b/help/urlm_quiet_capture
@@ -0,0 +1,16 @@
+
+Setting: URLM_QUIET_CAPTURE
+
+Boolean; whether or not to print "Captured URL #xx http://whatever from nick"
+in the current window every time a URL is captured. Default: ON
+
+I can't think of a reason I'd ever want to turn this off, but maybe
+it drives other people crazy... one warning: if you turn this setting off,
+your default URL for "/urlm_open" may change between the time you notice
+a URL in channel, and the time you try to open it (e.g. because someone in
+another channel pasted another URL after the one you saw).
+
+Future versions of urlmanager may support an option to allow printing of
+each captured URL in the window where it was captured, instead of the
+current window.
+
diff --git a/help/urlm_short_cmds b/help/urlm_short_cmds
new file mode 100644
index 0000000..3b15d48
--- /dev/null
+++ b/help/urlm_short_cmds
@@ -0,0 +1,14 @@
+
+Setting: URLM_SHORT_CMDS (boolean, default: on)
+
+Whether or not to bind short command names, such as /ff as an alias
+for /urlm_open_ff. The only commands that get short-name aliases are
+/urlm_open (alias /uo) and the /urlm_open_* commands (which get the
+browser tags as their short aliases).
+
+When this option is off, every command defined by urlmanager begins
+with the string "urlm_", which acts as a sort of a namespace. Some
+people might hate "polluting" the rest of the command namespace, or
+maybe they already have a /uo command defined in another script. If
+you're one of these people, /set urlm_short_cmds off.
+
diff --git a/help/urlm_trim_log b/help/urlm_trim_log
new file mode 100644
index 0000000..59d3cf8
--- /dev/null
+++ b/help/urlm_trim_log
@@ -0,0 +1,12 @@
+
+Setting: URLM_TRIM_LOG
+
+Manually trim the log file, according to the urlm_max_log_lines and
+urlm_max_log_age settings (q.v.)
+
+Normally you won't need this command: instead you'll use the
+urlm_log_trim_startup and/or urlm_log_trim_interval settings to
+automatically keep the log size manageable. /urlm_trim_log might be
+useful for recovering from an attack by floodbots that send URLs
+to a channel...
+
diff --git a/help/urlm_undo_delete b/help/urlm_undo_delete
new file mode 100644
index 0000000..1fbfea9
--- /dev/null
+++ b/help/urlm_undo_delete
@@ -0,0 +1,11 @@
+
+URLM_UNDO_DELETE [-yes]
+
+Undo the last /urlm_list -delete operation. There is only one level of
+undo, and you can't undo the undo.
+
+If any URLs have been captured since the last -delete, /urlm_undo_delete
+will warn of this fact and refuse to restore, unless -yes is given. If
+you override the warning with -yes, you WILL lose any URLs that have been
+captured since the delete.
+
diff --git a/help/urlm_use_bold b/help/urlm_use_bold
new file mode 100644
index 0000000..6c41e0b
--- /dev/null
+++ b/help/urlm_use_bold
@@ -0,0 +1,7 @@
+
+Setting: URLM_USE_BOLD (boolean, default: on)
+
+Whether or not you want bold in your /urlm_list output (and a few
+other places within urlmanager).
+
+
diff --git a/help/urlm_use_color b/help/urlm_use_color
new file mode 100644
index 0000000..c2e453b
--- /dev/null
+++ b/help/urlm_use_color
@@ -0,0 +1,12 @@
+
+Setting: URLM_USE_COLOR (boolean, default: on)
+
+Whether or not you want color in your /urlm_list output. The author
+finds the colors useful, but you might hate it.
+
+Note that urlmanager never sends text to the server (e.g. to a channel
+or a query). With bold/color enabled, only you will see them, so you
+won't be violating any "no mirc colors" channel rules.
+
+This setting has no effect if irssi's hide_colors is set to ON.
+
diff --git a/help/urlm_wget b/help/urlm_wget
new file mode 100644
index 0000000..8ed7039
--- /dev/null
+++ b/help/urlm_wget
@@ -0,0 +1,29 @@
+
+URLM_WGET [<wget-args] [<arg>]
+
+Alternate command name: WGET
+
+Downloads a URL with wget. <arg> is treated the same as /urlm_open (see
+above). <wget-args>, if present, are passed to the wget process as-is
+(see also the urlm_wget_extra_args setting).
+
+When urlmanager runs wget, it creates a new irssi window (split or hidden)
+named urlm_wget_<number> (where <number> is a unique serial number generated
+by urlmanager). This window behaves as a normal irssi window (shows up
+in /window list, can be closed with /window close, etc). By default,
+wget windows are created as hidden windows (change with urlm_wget_split_size
+setting), which are automatically closed 60 seconds after the wget process
+completes (change with urlm_wget_autoclose and urlm_wget_autoclose_delay
+settings).
+
+To cancel a wget download, you may close its window while wget is still
+running. This will kill the wget process, leaving any partially-downloaded
+files behind (which may be resumed with wget's -c option).
+
+Note: If you need to pass a numeric argument to /urlm_wget as its last
+argument, do not separate it from its command switch with a space. That
+is, instead of "/urlm_wget -T 30", use "/urlm_wget -T30" (or its long
+version, "/urlm_wget --timeout=30"). The reason for this restriction
+is that urlmanager will interpret the last argument as a URL number,
+if it's numeric.
+
diff --git a/help/urlm_wget_autoclose b/help/urlm_wget_autoclose
new file mode 100644
index 0000000..42af64f
--- /dev/null
+++ b/help/urlm_wget_autoclose
@@ -0,0 +1,13 @@
+
+Setting: URLM_WGET_AUTOCLOSE
+
+Boolean; whether or not to automatically close windows created by running
+wget (via /wget or /urlm_wget). Default: ON
+
+With this setting OFF, you'll have to manually close windows created
+by /wget or /urlm_wget.
+
+Changing this setting only affects wget windows created after the change.
+Any existing wget windows will still be autoclosed if the old value was
+ON, or else they will not be autoclosed if the old value was OFF.
+
diff --git a/help/urlm_wget_autoclose_delay b/help/urlm_wget_autoclose_delay
new file mode 100644
index 0000000..206657b
--- /dev/null
+++ b/help/urlm_wget_autoclose_delay
@@ -0,0 +1,10 @@
+
+Setting: URLM_WGET_AUTOCLOSE_DELAY
+
+Integer; how long to wait after a wget download is complete, before auto-
+closing the window. No effect if urlm_wget_autoclose is OFF.
+
+With urlm_wget_autoclose ON, set urlm_wget_autoclose_delay to zero to
+immediately close wget windows, or to a number of seconds to delay
+before closing wget windows.
+
diff --git a/help/urlm_wget_dl_dir b/help/urlm_wget_dl_dir
new file mode 100644
index 0000000..da06e82
--- /dev/null
+++ b/help/urlm_wget_dl_dir
@@ -0,0 +1,14 @@
+
+Setting: URLM_WGET_DL_DIR (default: ~) [*]
+
+This is where files downloaded with wget will be saved. Tilde expansion
+is supported. If this directory does not exist, it will be created by
+wget, when it is run for the first time.
+
+[*] Normally, the log file and download directory should be an absolute
+path, or relative to $HOME (with ~ expansion). If a relative path is
+used (without ~), it will be resolved relative to the working directory
+where irssi was started.
+
+FIXME: this setting may not contain whitespace characters (spaces or tabs).
+
diff --git a/help/urlm_wget_extensions b/help/urlm_wget_extensions
new file mode 100644
index 0000000..ec87f9c
--- /dev/null
+++ b/help/urlm_wget_extensions
@@ -0,0 +1,23 @@
+
+Setting: URLM_WGET_EXTENSIONS (default: tar zip atr bas xex exe dcm car z gz torrent)
+
+Space-separated list of filename extensions. When /urlm_open is used on
+a URL ending in one of these, the file will be downloaded with wget,
+running in an irssi window. To disable wget, you may set this list to
+an empty string, or use one of your defined browsers to open such files.
+
+If you want to use wget without running it in an irssi window, clear the
+extension list, then define a browser like so:
+
+# download in background (no controlling terminal, no progress reports)
+/urlm_add_browser dl:Download with wget:wget -b '%s'
+
+# download in new screen window (irssi must be running under GNU screen)
+/urlm_add_browser dl:Download with wget:screen wget '%s'
+
+# download in new X window (irssi must be running under X)
+/urlm_add_browser dl:Download with wget:xterm -e "wget '%s';echo 'press Enter to close window';read junk"
+
+Note that you may not define a browser tag as 'wget' (the examples above
+all use 'dl' instead).
+
diff --git a/help/urlm_wget_extra_args b/help/urlm_wget_extra_args
new file mode 100644
index 0000000..1ee593f
--- /dev/null
+++ b/help/urlm_wget_extra_args
@@ -0,0 +1,5 @@
+
+Setting: URLM_WGET_EXTRA_ARGS (default: <none>)
+
+Extra arguments to be passed to wget verbatim. Default is no arguments.
+
diff --git a/help/urlm_wget_path b/help/urlm_wget_path
new file mode 100644
index 0000000..260a921
--- /dev/null
+++ b/help/urlm_wget_path
@@ -0,0 +1,9 @@
+
+Setting: URLM_WGET_PATH (default: wget)
+
+The path to the wget binary. Either an absolute path such as /usr/bin/wget,
+or the string wget (the default) to search the $PATH. Tilde expansion
+is supported (e.g. /set urlm_wget_path ~/bin/wget)
+
+FIXME: this setting may not contain whitespace characters (spaces or tabs).
+
diff --git a/help/urlm_wget_split_size b/help/urlm_wget_split_size
new file mode 100644
index 0000000..63beb07
--- /dev/null
+++ b/help/urlm_wget_split_size
@@ -0,0 +1,11 @@
+
+Setting: URLM_WGET_SPLIT_SIZE
+
+Integer; how tall (in screen lines) to make split windows created by
+/wget or /urlm_wget. Set to zero (the default) to use hidden (full-sized)
+windows instead of split windows.
+
+Try not to set this too high; if urlmanager is unable to resize a window
+to this size, the window will remain at the default size (as used
+by the /window split command).
+
diff --git a/help/urlmanager b/help/urlmanager
new file mode 100644
index 0000000..f979ad9
--- /dev/null
+++ b/help/urlmanager
@@ -0,0 +1,30 @@
+
+URLMANAGER
+
+Commands: all urlmanager commands are prefixed with /urlm_ except the
+short browser aliases (if urlm_short_cmds is enabled). To see the list
+of commands, type "/help urlm".
+
+Settings: urlmanager's behaviour is controlled by quite a few settings.
+All urlmanager settings are prefixed with "urlm_".
+To see a full list of them, use "/set urlm".
+
+Log File:
+
+URLs are stored in a file (filename set with urlm_log_file), one URL
+per line.
+
+Each line is a space-separated list:
+
+timestamp nick channel url
+
+This file may be edited (carefully) with a standard text editor, or
+removed (to clear the URL list). If you edit or delete the file, reload
+the script with "/run urlmanager.pl". (it's probably a
+good idea to trim the file when it gets above a few hundred lines).
+
+Notes:
+- The timestamp is expressed in seconds since the epoch.
+- For a URL received in a private /msg or dcc chat, "channel" will
+  be the sending user.
+
diff --git a/scripts/urlmanager.pl b/scripts/urlmanager.pl
new file mode 100644
index 0000000..787e775
--- /dev/null
+++ b/scripts/urlmanager.pl
@@ -0,0 +1,1350 @@
+#!/usr/bin/perl
+
+# urlmanager script for irssi
+
+use warnings;
+use strict;
+
+use Fcntl qw/:flock/;
+use POSIX qw/strftime/;
+
+use Irssi qw/
+	settings_add_str settings_add_bool settings_add_int
+	settings_get_str settings_get_bool settings_get_int
+	settings_set_str settings_set_bool settings_set_int
+	command command_bind command_unbind
+	signal_emit signal_add_last signal_stop
+	timeout_add timeout_add_once timeout_remove
+	window_find_item/;
+
+our $VERSION = "0.1";
+our %IRSSI = (
+	authors     => 'Urchlay',
+	contact     => 'Urchlay on NewNet',
+	name        => 'urlmanager',
+	description =>
+		'Captures URLs said in channel and private messages ' .
+		'and saves them to a file, also adds several commands for ' .
+		'listing and opening captured URLs ' .
+		'(based on urlgrab.pl 0.2 by David Leadbetter)',
+	license     => 'GNU GPLv2 or later',
+	url         => 'none',
+);
+
+# 20110609 bkw: if irssi was started in a screen session from the console,
+# then detached, then reattached in an X session, DISPLAY will not be set.
+# This will confuse the user, as e.g. firefox will silently fail to run.
+# It won't do any harm to set DISPLAY=:0 if it's not set, and might help...
+{
+	my $disp = $ENV{DISPLAY};
+	$ENV{DISPLAY} = ":0" unless $disp;
+}
+
+# Workaround for a heisenbug, see:
+# http://bugs.irssi.org/index.php?do=details&task_id=242
+{ package Irssi::Nick }
+
+# Color constants.
+# Irssi.pm doesn't include symbolic mIRC-style color names...
+# NOTE: if you print e.g. $green . "12345", the "1" will be interpreted
+# as the 2nd digit of the color! Only good fix is to always put a space:
+# print $green . " 12345" works OK.
+# Declarations only; defined in init_colors()
+our ($bold_on, $bold_off, $green, $red, $yellow, $purple, $color_off);
+
+# @urls is a list of anonymous hashes, each representing one URL.
+# See read_url_file for hash elements.
+our @urls;
+
+# Most-recently-posted URL (the URL only, not a hash). Only used
+# for avoiding dups (see url_log).
+our $lasturl = "";
+
+# Have any URLs been captured since the last /ul -delete? This is to
+# (hopefully) protect the user
+our $captured_since_delete = 0;
+
+# Grr. Printing with print() or Irssi::print(), % chars are interpreted
+# as irssi formats. This causes URLs containing HTML %-escapes to come
+# out in weird colors. Using irssi's /echo is apparently the right way
+# to avoid this... though we get colored -!- in front of every line :(
+sub echo {
+	command("/echo $_") for @_;
+}
+
+# trim leading/trailing spaces
+sub trim {
+	$_[0] =~ s/(?:^\s*|\s$)//g;
+	return $_[0];
+}
+
+# read_url_file: called on script load with the log filename.
+# returns array of URL hashes, which will be empty if the file
+# wasn't present or was empty.
+sub read_url_file {
+	my $file = get_url_log_file();
+	my @got;
+
+	open URLLOG, "<$file" or return;
+	flock(URLLOG, Fcntl::LOCK_EX);
+	seek(URLLOG, 0, 1);
+	while(<URLLOG>) {
+		chomp;
+		my @fields = split " ";
+		push @got, {
+			stamp => $fields[0],
+			nick => $fields[1],
+			channel => $fields[2],
+			url => $fields[3],
+		};
+	}
+	close URLLOG;
+
+	return @got;
+}
+
+# rewrite the URL log file from arguments.
+sub write_url_file {
+	for(@_) {
+		$lasturl = "";
+		url_log(1, $_->{nick}, $_->{channel}, $_->{url}, $_->{stamp});
+	}
+}
+
+# Trim the log according to the appropriate settings.
+sub trim_url_log {
+	my $quiet = shift || 0;
+	my $max_lines = settings_get_int("urlm_max_log_lines") || 0;
+	my $max_age = settings_get_int("urlm_max_log_age") || 0;
+
+	return unless $max_lines || $max_age; # nothing to do!
+
+	my @keep_urls;
+	my $trimmed = 0;
+
+	if($max_age) {
+		for(@urls) {
+			if($_->{stamp} >= (time() - $max_age)) {
+				push @keep_urls, $_;
+			}
+		}
+	} else {
+		@keep_urls = @urls;
+	}
+
+	if($max_lines && (@keep_urls > $max_lines)) {
+		my $last = $#keep_urls;
+		my $first = $last - $max_lines + 1;
+		@keep_urls = @keep_urls[$first..$last];
+	}
+
+	$trimmed = (@urls - @keep_urls);
+
+	if($trimmed) {
+		clear_url_log();
+		write_url_file(@keep_urls);
+		print "Trimmed $trimmed URLs from log"; # unless $quiet;
+	}
+}
+
+# Clear the URL log, both the in-memory @urls and the on-disk file.
+sub clear_url_log {
+	my $file = get_url_log_file();
+	unlink $file; # or print "Can't delete $file: $!";
+	@urls = ();
+}
+
+# get_url_log_file: get value of our logfile setting, with
+# tilde expansion for user's homedir.
+sub get_url_log_file {
+	my $file = settings_get_str('urlm_log_file');
+	$file =~ s/^~/$ENV{HOME}/;
+	return $file;
+}
+
+# signal handler for "message public"
+# extract and log any URLs in the input text.
+sub url_public {
+	my ($server, $text, $nick, $hostmask, $channel) = @_;
+	my @got = find_urls($text);
+	url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "message own_public" and "message own_private"
+# extract and log any URLs in the input text.
+sub url_own {
+	my ($server, $text, $channel) = @_;
+	return unless settings_get_bool('urlm_log_own');
+	my @got = find_urls($text);
+	url_log(0, $server->{nick}, $channel, $_) for @got;
+}
+
+# signal handler for "message private", "message irc notice",
+# "message irc op_public", "message irc action"
+# extract and log any URLs in the input text.
+sub url_private {
+	my ($server, $text, $nick, $hostmask) = @_;
+	my @got = find_urls($text);
+	url_log(0, $nick, $server->{nick}, $_) for @got;
+}
+
+# signal handler for "message topic"
+# extract and log any URLs in the input text.
+sub url_topic {
+	my ($server, $channel, $text, $nick, $hostmask) = @_;
+	return if $nick eq $server->{nick}; # don't log own topic changes
+	my @got = find_urls($text);
+	url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "channel joined"
+# extract and log any URLs in the channel topic.
+sub url_join_topic {
+	my ($chan) = @_;
+	return unless $chan->{topic};
+	# don't log own topic changes
+	return if $chan->{topic_by} eq $chan->{server}->{nick};
+	my @got = find_urls($chan->{topic});
+	url_log(0, $chan->{topic_by}, $chan->{name}, $_) for @got;
+}
+
+# signal handler for "message part"
+# extract and log any URLs in the input text.
+sub url_part {
+	return unless settings_get_bool('urlm_log_partquit');
+	my ($server, $channel, $nick, $hostmask, $text) = @_;
+	return if $nick eq $server->{nick}; # don't log own parts (redundant?)
+	my @got = find_urls($text);
+	url_log(0, $nick, $channel, $_) for @got;
+}
+
+# signal handler for "message quit"
+# extract and log any URLs in the input text.
+sub url_quit {
+	return unless settings_get_bool('urlm_log_partquit');
+	my ($server, $nick, $hostmask, $text) = @_;
+	return if $nick eq $server->{nick}; # don't log own quits (redundant?)
+	my @got = find_urls($text);
+	url_log(0, $nick, $server->{nick}, $_) for @got;
+}
+
+# signal handler for "dcc chat message"
+# extract and log any URLs in the input text.
+# TODO: test this!
+sub url_dccmsg {
+	my ($dcc, $text) = @_;
+	my @got = find_urls($text);
+	url_log(0, $dcc->{nick}, $dcc->{server}->{nick}, $_) for @got;
+}
+
+# print_url_line:
+# print one formatted (colorful) line of /ul output
+sub print_url_line {
+	my ($maxnick, $maxchan, $num, $stamp, $nick, $channel, $url) = @_;
+
+	echo sprintf("%s%3s%s %s %11s%s %${maxnick}s%s %${maxchan}s%s  %s%s",
+			$bold_on, $num, $bold_off,
+			$green, $stamp,
+			$red, $nick,
+			$yellow, $channel,
+			$purple, $url,
+			$color_off);
+}
+
+sub url_list_cmd { # bound to /ul (sorry, this sub is a mess)
+	my $do_delete = 0;
+	my $listed = 0;
+	my @keep_urls;
+
+	my $arg = shift || "";
+	$arg = lc $arg;
+
+	$arg = trim($arg);
+
+	if($arg =~ /^-delete/) {
+		$arg =~ s/^-delete\s*//;
+		$do_delete++;
+
+		if($arg eq '') {
+			print "/ul -delete requires a parameter! (/ul help for details)";
+			return;
+		}
+	}
+
+	if(not @urls) {
+		print "No URLs in list!";
+		return;
+	}
+
+	my ($start, $end, $nick, $regex, $channel);
+	$arg = "-" if $arg eq 'all';
+
+	my $invert = 0;
+	if($arg =~ /^!(.*)/) {
+		$arg = $1;
+		$invert = 1;
+	}
+
+	if($arg eq "") {
+		$start = @urls-10;
+		$start = 0 if $start < 0;
+		$end = $#urls;
+	} elsif($arg =~ /^[&#](.*)/) {
+		$channel = $1;
+	} elsif($arg =~ /^\/(.*)\/?/) {
+		$regex = $1;
+	} elsif($arg =~ /^\d+$/) {
+		$start = $end = $arg;
+	} elsif($arg =~ /^[-\d]+$/) {
+		($start, $end) = split /-/, $arg, 2;
+		$start = 0 if $start eq "";
+		$end = $#urls if $end eq "";
+	} else {
+		$nick = $arg;
+	}
+
+	my $count = 0;
+	my @to_list;
+	for(@urls) {
+		my $list = 0;
+		if($nick && (lc($_->{nick}) =~ ("^" . quotemeta(lc $nick)))) {
+			$list++;
+		} elsif($regex) {
+			$list++ if $_->{url} =~ /$regex/i;
+		} elsif($channel) {
+			$list++ if $_->{channel} =~ /^[#&]?$channel$/i;
+		} elsif(defined($start) && defined($end)) {
+			$list++ if $count >= $start && $count <= $end;
+		}
+
+		$list = !$list if $invert;
+
+		if($list) {
+			$listed++;
+			push @to_list, [ $count, $_ ];
+		} elsif($do_delete) {
+			push @keep_urls, $_;
+		}
+
+		$count++;
+	}
+
+	if(@to_list) { # print the list if anything's supposed to be in it
+		my $maxnick = 4;
+		my $maxchan = 7;
+
+		for(@to_list) {
+			my ($num, $u) = @$_;
+			my $len = length($u->{nick});
+			$maxnick = $len if $len > $maxnick;
+			$len = length($u->{channel});
+			$maxchan = $len if $len > $maxchan;
+		}
+
+		print_url_line($maxnick, $maxchan,
+				"#", "When", "Nick", "Channel", "URL");
+
+		for(@to_list) {
+			my ($num, $u) = @$_;
+
+			my $stamp = strftime("%m/%d-%H:%M", localtime($u->{stamp}));
+			print_url_line($maxnick, $maxchan,
+					$num, $stamp, $u->{nick}, $u->{channel}, $u->{url});
+		}
+	}
+
+	if($do_delete) { # process -delete flag
+		my $deleted = scalar @urls - scalar @keep_urls;
+		if(not $deleted) {
+			print "No URLs deleted";
+			return;
+		}
+
+		my $file = get_url_log_file();
+		rename($file, "$file~") or print "Warning: can't backup log file: $!";
+
+		clear_url_log();
+		write_url_file(@keep_urls);
+
+		print $red . "These " . $deleted . " URLs have been deleted!" .
+			$color_off . " (" . @urls . " remain)";
+
+		$captured_since_delete = 0;
+	} else { # no -delete flag, show summary
+		print "Listed $listed of $count URLs";
+	}
+}
+
+sub urlm_say {
+}
+
+sub urlm_undo_delete {
+	my $yes = ($_[0] eq '-yes');
+
+	if($captured_since_delete && (not $yes)) {
+		print "urlm_undo_delete: doing this will throw away some URLs that " .
+			"were captured since the last delete. Re-run with '-yes' to do it " .
+			"anyway.";
+		return;
+	}
+
+	my $file = get_url_log_file();
+	my @oldurls = @urls;
+	clear_url_log();
+
+	if(not(rename("$file~", $file))) {
+		print "Can't restore log file: $!";
+		@urls = @oldurls;
+		write_url_file();
+		return;
+	}
+
+	@urls = read_url_file();
+	print "Restored " . @urls . " URLs from backup";
+	$captured_since_delete = 0;
+}
+
+# get_url_from_number:
+# Returns a URL hash from @urls, given the index into the array.
+# Returns undef if index is invalid or non-existent.
+# Accepts negative numbers as meaning "nth from the end of the list"
+# If index is empty string, returns the highest-numbered (most recent) URL.
+# If index is non-empty, non-numeric, then treat as a nick and open last
+# URL by that nick.
+sub get_url_from_number {
+	my $arg = shift;
+	if($arg eq '') {
+		$arg = $#urls;
+	} elsif($arg =~ /-\d+$/) {
+		$arg = $#urls + $arg;
+	}
+
+	if($arg !~ /^\d+$/) {
+#		print("Non-numeric URL number '$arg'");
+#		return;
+		$arg = trim($arg);
+		for(my $i = $#urls; $i >= 0; $i--) {
+			my $url = $urls[$i];
+			return $url if lc($arg) eq lc($url->{nick});
+		}
+		print("Can't find any URLs from nick '$arg'");
+		return;
+	}
+
+	if($arg > $#urls) {
+		print("No such URL number '$arg'");
+		return;
+	}
+
+	return $urls[$arg];
+}
+
+# url_open_cmd: open a URL with the given browser (auto-guesses which browser
+# to use if $browser is '').
+sub url_open_cmd {
+	my ($urlnum, $browser) = @_;
+	$browser = '' if ref $browser; # means it's a Server object
+
+	my $url = get_url_from_number($urlnum);
+	return if not $url;
+	my $link = $url->{url};
+
+	if(not $browser) { # guess browser...
+		# check for wget first...
+		if($link =~ m{/[^/]+\.(\w+)$}) {
+			my $ext = lc $1;
+			for(split " ", settings_get_str('urlm_wget_extensions')) {
+				if($ext eq lc($_)) {
+					url_open_wget_cmd($urlnum);
+					return;
+				}
+			}
+		}
+
+		# not a wget extension, check browser override patterns
+		$browser = settings_get_str('urlm_default_browser');
+		my @overrides = read_browser_overrides();
+
+OVERRIDE:
+		for(@overrides) {
+			my ($tag, $pats) = @$_;
+			for(@$pats) {
+				if($link =~ /$_/i) {
+					$browser = $tag;
+					last OVERRIDE;
+				}
+			}
+		}
+	}
+
+	my @browsers = read_browser_list();
+	my ($tag, $name, $format);
+	for(@browsers) {
+		if($_->{tag} eq $browser) {
+			($tag, $name, $format) = ($_->{tag}, $_->{name}, $_->{command});
+			last;
+		}
+	}
+
+	echo("$name - " . $link . " (" . $url->{nick} . ")");
+
+	$link =~ s/'/%27/g;  # be nice to the shell, escape single quotes
+	$link =~ s/\(/%28/g; # be nice to firefox, escape parens
+	$link =~ s/\)/%29/g; # firefox -remote 'openURL(url,new-tab)' *fails*
+	                     # if the url contains any () chars!
+
+	my $cmd = sprintf($format, $link) . " &>/dev/null &";
+	system($cmd);
+}
+
+# Open with wget in an irssi window
+# TODO: Maybe allow for using "fetch" instead of wget? (does anyone care?)
+our $wgetcount = 1;
+sub url_open_wget_cmd {     # bound to /urlm_wget /wget
+	my $arg = shift;
+
+	my @args = split " ", $arg;
+
+	my $urlnum = "";
+	if(@args && ($args[-1] =~ /^\d+$/)) {
+		$urlnum = pop @args;
+	}
+
+	my $url = get_url_from_number($urlnum);
+	return if not $url;
+
+	my $more_args = join(" ", @args);
+	$more_args .= " " if $more_args;
+
+	my $dir = settings_get_str('urlm_wget_dl_dir');
+	$dir =~ s/^~/$ENV{HOME}/;
+	$dir = "." if not $dir;
+
+	my $size = settings_get_int('urlm_wget_split_size');
+
+	if($size > 0) {
+		command("/window new split");
+		command("/window size $size");
+	} else { # size == 0, means "do not split"
+		command("/window new hidden");
+	}
+
+	# find an unused window name...
+	my $name = "urlm_wget_" . ($wgetcount++);
+	while(window_find_item($name)) {
+		$name = "urlm_wget_" . ($wgetcount++);
+	}
+
+	command("/window name $name");
+
+	if(settings_get_bool('urlm_wget_autoclose')) {
+		my $delay = settings_get_int('urlm_wget_autoclose_delay');
+		if($delay) {
+			print "This window will close $delay seconds after download is done";
+		} else {
+			print "This window will close when download is done";
+		}
+	} else {
+		print "Use " . $yellow . "/window close $name" . $color_off .
+			" to close this window";
+	}
+
+	my $args = trim(settings_get_str('urlm_wget_extra_args'));
+	$args .= " " if $args;
+
+	my $wget_bin = settings_get_str('urlm_wget_path');
+	$wget_bin =~ s/^~/$ENV{HOME}/;
+	$wget_bin = "wget" if not $wget_bin;
+
+	command(
+			"/exec -nosh " .
+			"-name $name " .
+			"$wget_bin " .
+			"-P $dir " .
+			$args .
+			$more_args .
+			$url->{url});
+
+	if($size) { # if using a hidden window, leave it focused
+		command("/window last");
+	}
+}
+
+# Close a window by name. This seems like a kludge...
+sub close_window {
+	return unless window_find_item($_[0]); # don't close if already closed!
+	command("/window goto " . $_[0]);
+	command("/window close");
+}
+
+# signal handler for "exec remove", autocloses our wget windows when
+# the wget processes exit, if requested.
+sub sig_exec_remove {
+	my ($proc, $status) = @_;
+	return unless settings_get_bool('urlm_wget_autoclose');
+
+	# target_win->name will be "" if window already closed!
+	my $name = $proc->{target_win}->{name} || "";
+	return unless $name =~ /^urlm_wget_\d+$/;
+
+	return unless window_find_item($name); # don't close if already closed!
+
+	my $delay = settings_get_int('urlm_wget_autoclose_delay');
+	if($delay > 0) {
+		timeout_add_once($delay * 1000, "close_window", $name);
+	} else {
+		close_window($name);
+	}
+}
+
+# kill a process by name (send SIGTERM)
+sub kill_proc {
+	command("/exec -15 " . $_[0]);
+}
+
+# kill wget processes on manual window close!
+# catch signal "window destroyed" and figure out a way to avoid
+# adding a timeout to close the window (since it's in the middle
+# of closing now...)
+sub sig_window_destroyed {
+	my $name = $_[0]->{name} || "";
+	return unless $name =~ /^urlm_wget_\d+$/;
+
+	# use a timeout to kill the process, instead of killing it directly.
+	# why? to avoid possible race condition where sig_exec_remove()
+	# tries to close the window that triggered this call to
+	# sig_window_destroyed().
+	timeout_add_once(1000, "kill_proc", $name);
+}
+
+# find_urls: extract all URLs from the input text, returns a list
+# (which may be empty).
+# Be VERY permissive about what we consider a URL.
+# 20100614 bkw: be a little less permissive
+# 20140530 bkw: stop catching dupe http://whatever and https://whatever
+
+# original sub:
+##sub find_urls {
+##	my @got = ($_[0] =~ m{(?:https?|ftp)://\S+}g);
+##	push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g;
+##	push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g;
+##	s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation
+##	return @got;
+##}
+
+# new version:
+sub find_urls {
+	my @got;
+	while($_[0] =~ s{(?:https?|ftp)://\S+}{}) {
+		push @got, $&;
+	}
+	push @got, "http://$_" for $_[0] =~ /(?:www\d*\.[^.]+\.\S+)/g;
+	push @got, "ftp://$_" for $_[0] =~ /(?:ftp\d*\.[^.]+\.\S+)/g;
+	s/[>'",.:;!?)]+$// for @got; # remove trailing punctuation
+	return @got;
+}
+
+# Annoying bots have a tendency to do this:
+# <actual_person> check this out: http://www.blahblah.blah/path/to/stuff.html
+# <annoying_bot> Title: Stuff (at www.blahblah.blah)
+# To me, this is about the most useless function a bot can serve, plus it
+# breaks the /uo command.
+# just_domain() returns true if $1 is just the domain part of $2
+sub just_domain {
+	my ($new, $old) = @_;
+	s/^(ht|f)tps?:\/\/// for ($new, $old);
+	$old =~ s/\/.*//;
+	return 0 if $new =~ /\/./;
+	return $new eq $old;
+}
+
+# url_log: appends URL to the URL log file and to the @urls array.
+# Locks the file before writing, so should be safe even with multiple
+# instances of irssi.
+# $relog should be false if capturing a new URL from channel/msg text,
+# or true if re-logging an old URL (e.g. /ul -delete does this)
+sub url_log {
+	my($relog, $nick, $channel, $url, $stamp) = @_;
+	$nick =~ s/!.*//;
+
+	$stamp = time() unless $stamp;
+
+	return if lc $url eq lc $lasturl; # a tiny bit of protection from spam/flood
+	return if just_domain($url, $lasturl);
+
+	$lasturl = $url;
+
+	my $file = get_url_log_file();
+	open(URLLOG, ">>$file") or return;
+
+	flock(URLLOG, Fcntl::LOCK_EX);
+	seek(URLLOG, 0, 2);
+
+	print URLLOG time . " $nick $channel $url\n";
+	close(URLLOG);
+
+	push @urls, {
+		stamp => $stamp,
+		nick => $nick,
+		channel => $channel,
+		url => $url,
+	};
+
+	if(not $relog) {
+		if(not settings_get_bool('urlm_quiet_capture')) {
+			my $on = "";
+			if($channel =~ /^#/) {
+				$on = " on " . $green . $channel . $color_off;
+			}
+			echo "Captured URL #" . $#urls . " " .
+				$purple . $url . $color_off .
+				" from " .  $yellow . $nick . $color_off . $on;
+		}
+#		trim_url_log();
+		$captured_since_delete++;
+	}
+}
+
+# urlm_help generates /help topics for the browser commands.
+# The help for all the other commands is stored in text files in
+# ~/.irssi/help
+sub urlm_help {
+	my $arg = shift;
+	$arg = lc trim($arg);
+	my %bhelp;
+	our %urlm_help;
+
+	for(read_browser_list()) {
+		my $text = uc($_->{tag}) . " [<url#>]\n\n" .
+			"Open a URL with the external browser '" .
+			$_->{name} . "', using the command:\n" .
+			$_->{command} . "\n\n" .
+			"If [<url#>] is omitted, the most recent URL will be opened.\n";
+		$bhelp{$_->{tag}} = $text;
+		$bhelp{"urlm_open_" . $_->{tag}} = $text;
+	}
+
+	my $help = $bhelp{$arg} || return;
+	signal_stop();
+	print $help;
+}
+
+sub write_browser_overrides {
+	my @list = @{$_[0]};
+	my @strings;
+
+	for(@list) {
+		my ($tag, $pats) = @$_;
+		push @strings, join(":", $tag, @$pats);
+	}
+
+	my $setting = join("::", @strings);
+	settings_set_str('urlm_browser_overrides', $setting);
+}
+
+sub read_browser_overrides {
+	my @result;
+
+	my $list = settings_get_str('urlm_browser_overrides');
+	my @entries = split /::/, $list;
+
+	for(@entries) {
+		my @items = split /:/;
+		my $tag = shift @items;
+		push @result, [ $tag, \@items ];
+	}
+
+	return @result;
+}
+
+sub write_browser_list {
+	my @list = @{$_[0]};
+	my @strings;
+
+	for(@list) {
+		push @strings, join(":", $_->{tag}, $_->{name}, $_->{command});
+	}
+
+	my $setting = join("::", @strings);
+#print "/set urlm_browsers $setting";
+	settings_set_str('urlm_browsers', $setting);
+	signal_emit("setup changed");
+}
+
+sub read_browser_list {
+	my @result;
+
+	my $list = settings_get_str('urlm_browsers');
+	my @entries = split /::/, $list;
+
+	for(@entries) {
+		my @items = split /:/;
+		my $hash = {
+			tag => $items[0],
+			name => $items[1],
+			command => $items[2],
+		};
+
+		push @result, $hash;
+	}
+
+	return @result;
+}
+
+# commands:
+# urlm_add_browser <browser>:<fullname>:<cmd>
+sub urlm_add_browser {
+	my $arg = shift || "";
+	$arg =~ trim($arg);
+
+	if($arg !~ /^[^:]+:[^:]+:[^:]+$/) {
+		print "Usage: /urlm_add_browser tag:name:command";
+		return;
+	}
+
+	my ($tag, $name, $cmd) = split /:/, $arg;
+
+	$tag = trim($tag);
+	$tag = lc $tag;
+	$name = trim($name);
+
+	if($tag =~ /\W/) {
+		print "/urlm_add_browser: tag must consist of only " .
+			"letters, numbers, or underscores (_), not '$tag'";
+		return;
+	}
+
+	if($tag eq 'wget') {
+		print "/urlm_add_browser: 'wget' is reserved; use a different tag";
+		return;
+	}
+
+	if($cmd !~ /'[^']*\%s[^']*'/) {
+		print "/urlm_add_browser: command must contain '\%s' (single-quoted)";
+		return;
+	}
+
+	my @browsers = read_browser_list();
+	my $found = 0;
+	for(@browsers) {
+		if(lc($_->{tag}) eq $tag) {
+			print "Replaced old definition of $tag";
+			$_->{name} = $name;
+			$_->{command} = $cmd;
+			$found++;
+			last;
+		}
+	}
+
+	if(not $found) {
+		push @browsers, { tag => $tag, name => $name, command => $cmd };
+		print "push \@browsers, { tag => $tag, name => $name, command => $cmd }";
+		print "Added browser definition $tag";
+	}
+
+	write_browser_list(\@browsers);
+}
+
+# urlm_del_browser <browser>
+sub urlm_del_browser {
+	my $arg = shift || "";
+	$arg = trim($arg);
+	$arg = lc $arg;
+	return unless $arg;
+
+	# urlm_del_override() already prints "Browser not defined" if it
+	# wasn't defined, so no need to have urlm_del_browser() print it again.
+	urlm_del_override("$arg all");
+
+	my @browsers = read_browser_list();
+	my @keep_browsers;
+	my $found = 0;
+
+	for(@browsers) {
+		if(lc($_->{tag}) eq $arg) {
+			print "Deleted definition of $arg";
+			$found++;
+		} else {
+			push @keep_browsers, $_;
+		}
+	}
+
+	write_browser_list(\@keep_browsers) if $found;
+}
+
+# urlm_list_browsers
+sub urlm_list_browsers {
+	my @browsers = read_browser_list();
+	for(@browsers) {
+		print(
+			(settings_get_str('urlm_default_browser') eq $_->{tag} ?
+			 "[*]" :
+			 "   ") .
+			"Tag: $bold_on" . $_->{tag} . "$bold_off, " .
+			"Name: $bold_on" . $_->{name} . "$bold_off, " .
+			"Command: $bold_on" . $_->{command} . "$bold_off");
+	}
+}
+
+# urlm_add_wget_ext <ext>
+sub urlm_add_wget_ext {
+	my $arg = shift || "";
+	$arg = trim($arg);
+	$arg = lc $arg;
+
+	if(not $arg) {
+		print "Usage: /urlm_add_wget_ext <extension>";
+		return;
+	}
+
+	my @list = split " ", settings_get_str('urlm_wget_extensions');
+	if(grep { $_ eq $arg } @list) {
+		print "$arg is already in the wget extensions list";
+		return;
+	}
+
+	push @list, $arg;
+
+	settings_set_str('urlm_wget_extensions', join(" ", @list));
+	command("/set urlm_wget_extensions");
+}
+
+# urlm_del_wget_ext <ext>
+sub urlm_del_wget_ext {
+	my $arg = shift || "";
+	$arg = trim($arg);
+	$arg = lc $arg;
+
+	if(not $arg) {
+		print "Usage: /urlm_del_wget_ext <extension>";
+		return;
+	}
+
+	my @list = split " ", settings_get_str('urlm_wget_extensions');
+	if(!grep { $_ eq $arg } @list) {
+		print "$arg is not in the wget extensions list";
+		return;
+	}
+
+	@list = grep { $_ ne $arg } @list;
+
+	settings_set_str('urlm_wget_extensions', join(" ", @list));
+	command("/set urlm_wget_extensions");
+}
+
+# urlm_list_overrides [<browser>]
+sub urlm_list_overrides {
+	my $arg = shift || "";
+	$arg = trim($arg);
+
+	my $found = 0;
+	my @overrides = read_browser_overrides();
+	for(@overrides) {
+		my ($browser, $pats) = @$_;
+		if($arg eq $browser || not $arg) {
+			$found++;
+			my $count = 1;
+			for(@$pats) {
+				print $browser . "[$count]: " . $_;
+				$count++;
+			}
+		}
+	}
+
+	if(not $found) {
+		if($arg) {
+			print "No overrides for browser '$arg'";
+		} else {
+			print "No browser overrides";
+		}
+	}
+}
+
+# urlm_add_override <browser> <pattern>
+sub urlm_add_override {
+	my $arg = shift || "";
+	$arg = trim($arg); # do not lc($arg), the command may need caps!
+
+	my ($browser, $pattern) = split " ", $arg;
+	$browser = lc $browser;
+	if(not ($browser and $pattern)) {
+		print "Usage: /urlm_add_override <browser> <pattern>";
+		return;
+	}
+
+	if(!grep { $_->{tag} eq $browser } read_browser_list()) {
+		print "Browser $browser not defined in browser list";
+		return;
+	}
+
+	eval "qr{$pattern}";
+	if($@) {
+		print "Pattern $pattern is not a valid Perl regex: $@";
+		return;
+	}
+
+	my @overrides = read_browser_overrides();
+	my $found = 0;
+	for(@overrides) {
+		my ($tag, $pats) = @$_;
+		next unless $tag eq $browser;
+
+		push @$pats, $pattern;
+		$found++;
+	}
+
+	if(not $found) {
+		push @overrides, [ $browser, [ $pattern ] ];
+	}
+
+	print "Added override for $browser: $pattern";
+	write_browser_overrides(\@overrides);
+}
+
+# urlm_del_override <browser> <number>|<all>
+sub urlm_del_override {
+	my $arg = shift || "";
+	$arg = trim($arg);
+	$arg = lc $arg;
+
+	my ($browser, $number) = split " ", $arg;
+
+	if(not($browser and $number)) {
+		print "Usage: /urlm_del_override <browser> <number>|all"
+	}
+
+	if(!grep { $_->{tag} eq $browser } read_browser_list()) {
+		print "Browser $browser not defined in browser list";
+		return;
+	}
+
+	if($number ne 'all' && $number !~ /^[1-9]\d*$/) {
+		print "Bad override '$number': must be a number >= 1, or 'all'";
+		return;
+	}
+
+	my @overrides = read_browser_overrides();
+	my @keep_overrides = ();
+	my $found = 0;
+	for(@overrides) {
+		my ($tag, $pats) = @$_;
+
+		if($tag ne $browser) {
+			push @keep_overrides, $_;
+			next;
+		}
+
+		$found += @$pats, next if $number eq 'all';
+
+		if($number > @$pats) {
+			print "Value $number out of range";
+			next;
+		}
+
+		$found++;
+		undef $pats->[$number - 1];
+		@$pats = grep { defined $_ } @$pats;
+
+		push @keep_overrides, $_ if @$pats;
+	}
+
+	if($found) {
+		print "Deleted $found overrides";
+		write_browser_overrides(\@keep_overrides);
+	} else {
+		print "No matching overrides";
+	}
+}
+
+sub init_colors {
+	if(settings_get_bool('urlm_use_bold')) {
+		$bold_on = "\002";
+		$bold_off = "\002";
+	} else {
+		$bold_on = "";
+		$bold_off = "";
+	}
+
+	if(settings_get_bool('urlm_use_color')) {
+		$green = "\0033";
+		$red = "\0034";
+		$yellow = "\0037";
+		$purple = "\0036";
+		$color_off = "\003";
+	} else {
+		$green = "";
+		$red = "";
+		$yellow = "";
+		$purple = "";
+		$color_off = "";
+	}
+}
+
+# init_browsers(): dynamic bindings. Each browser tag gets bound to
+# /urlm_open_$tag, and (if short commands enabled) to /$tag.
+our @bound_refs;
+sub init_browsers {
+	# for this to work, the code ref can *NOT* be stored in a "my" var
+	# I think this is a bug in irssi, or possibly perl, but maybe I'm
+	# just being dumb...
+	for(@bound_refs) {
+		command_unbind($_->[0], $_->[1]);
+	}
+	@bound_refs = ();
+
+	my @browsers = read_browser_list();
+	for(@browsers) {
+		my $code = 'sub { url_open_cmd($_[0], "'. ($_->{tag}) . '"); };';
+		my $cmd = 'urlm_open_' . $_->{tag};
+		push @bound_refs, [ $cmd, eval $code ];
+
+		# again, no "my" vars, hence the ugly $bound_refs[$#bound_refs] kludge
+		command_bind($cmd, $bound_refs[$#bound_refs]->[1]);
+
+		if(settings_get_bool('urlm_short_cmds')) {
+			my $shortcmd = $_->{tag};
+			push @bound_refs, [ $shortcmd, $bound_refs[$#bound_refs]->[1] ];
+			command_bind($shortcmd, $bound_refs[$#bound_refs]->[1]);
+		}
+	}
+
+	command_unbind("ul", "url_list_cmd");
+	command_unbind("uo", "url_open_cmd");
+	command_unbind("wget", "url_open_wget_cmd");
+
+	if(settings_get_bool('urlm_short_cmds')) {
+		command_bind("ul", "url_list_cmd");
+		command_bind("uo", "url_open_cmd");
+		command_bind("wget", "url_open_wget_cmd");
+	}
+}
+
+sub init_settings { # call only once, at script load!
+# Where shall we save the URL log?
+	settings_add_str('urlmanager', 'urlm_log_file', "~/.irssi/urllog");
+
+# Where is the wget binary? Absolute path, or "wget" (searches PATH)
+	settings_add_str('urlmanager', 'urlm_wget_path', "wget");
+
+# Where should wget save files?
+	settings_add_str('urlmanager', 'urlm_wget_dl_dir', "~");
+
+# Extra arguments to pass to wget...
+	settings_add_str('urlmanager', 'urlm_wget_extra_args', "");
+
+# Do we log URLs from /part and /quit messages? Disabled by default
+# because so many people always /quit with the same spammish URL
+# e.g. "nimrod has quit [Quit: Try StupidIRC (http://someircclient.com)]"
+	settings_add_bool('urlmanager', 'urlm_log_partquit', 0);
+
+# Do we log URLs from our own public/private messages?
+	settings_add_bool('urlmanager', 'urlm_log_own', 1);
+
+# Cosmetics:
+	settings_add_bool('urlmanager', 'urlm_short_cmds', 1);
+	settings_add_bool('urlmanager', 'urlm_use_bold', 1);
+	settings_add_bool('urlmanager', 'urlm_use_color', 1);
+
+# Browser definitions. A double-colon-separated list. Each list item
+# is a single-colon separated list of (tag, name, command_format).
+# You may add browsers, and they will work as commands
+# without changing any other code.
+# The browser commands need to be non-blocking, and any stdout/err from
+# them will be ignored.
+# The %s gets replaced with the actual URL. *ALWAYS* use single-quotes
+# (like '%s'). *NEVER* omit the quotes or use double-quotes around the %s!
+# Failure to comply is a security hole!
+	settings_add_str('urlmanager', 'urlm_browsers',
+			'ff:Firefox:firefox -remote \'openurl(%s,new-tab)\'' .
+			'::' .
+			'ie:Internet Explorer:ie6 \'%s\'' .
+			'::' .
+			'us:links+screen:[ "$TERM" = "screen" ] && screen links \'%s\'' .
+			'::' .
+			'ut:links+xterm:xterm -e "links \'%s\'"' .
+			'::' .
+			'ux:Copy to X Clipboard:echo -n \'%s\'|xsel -i');
+
+# Default browser for /uo and /urlm_open commands
+	settings_add_str('urlmanager', 'urlm_default_browser', 'ff');
+
+# /uo and /urlm_open check this list.
+# Double-colon-separated list, each item is a single-colon-separated
+# list consisting of a browser tag and one or more patterns.
+# If a URL matches one of these
+# patterns, the browser tag will be used as the browser to open the URL
+# with, instead of the default.
+	settings_add_str('urlmanager', 'urlm_browser_overrides',
+			'ie:/[^/]*video\.google\.com:/[^/]*youtube\.com:/[^/]*gametrailers\.com');
+
+# If /uo or /urlm_open get a URL ending in one of these file extensions,
+# it will be downloaded with wget instead of being opened in a browser.
+	settings_add_str('urlmanager', 'urlm_wget_extensions',
+			'tar zip atr bas xex exe dcm car z gz rom cas torrent rar 7z');
+
+# trim log to this many lines. Use with urlm_log_trim_interval and/or
+# urlm_log_trim_startup. Set to 0 to disable.
+	settings_add_int('urlmanager', 'urlm_max_log_lines', 100);
+
+# trim log to this many seconds. Use with urlm_log_trim_interval and/or
+# urlm_log_trim_startup. Set to 0 to disable.
+	settings_add_int('urlmanager', 'urlm_max_log_age', 86400*7);
+
+# trim the log on script load.
+	settings_add_bool('urlmanager', 'urlm_log_trim_startup', 0);
+
+# auto-trim log this often (seconds). Set to 0 to disable.
+	settings_add_int('urlmanager', 'urlm_log_trim_interval', 60*60);
+
+# these control the behavior of windows created with /urlm_wget or /wget
+	settings_add_bool('urlmanager', 'urlm_wget_autoclose', 1);
+	settings_add_int('urlmanager', 'urlm_wget_autoclose_delay', 60);
+	settings_add_int('urlmanager', 'urlm_wget_split_size', 0);
+
+# say "Captured URL #xxx http://whatever from whoever" every time a URL
+# is captured?
+	settings_add_bool('urlmanager', 'urlm_quiet_capture', 0);
+
+# TODO: support these:
+
+# channels/nicks/sites we don't want to log
+#settings_add_str('urlmanager', 'urlm_ignore_channels');
+#settings_add_str('urlmanager', 'urlm_ignore_urls');
+
+# If true, go through the entire list every time a URL is logged,
+# checking for duplicates
+#settings_add_bool('urlmanager', 'urlm_ignore_dups');
+}
+
+our $trim_timeout_tag;
+sub init_trim_timeout {
+	timeout_remove($trim_timeout_tag) if($trim_timeout_tag);
+
+	my $millis = settings_get_int('urlm_log_trim_interval') * 1000;
+	if($millis > 0) {
+		$trim_timeout_tag = timeout_add($millis, "trim_url_log", 1);
+	}
+}
+
+# apply_settings: called on signal "setup changed" (when any /set urlm_*
+# changes value).
+# Anything that depends on the values of any of the settings should be
+# (re)initialized here.
+sub apply_settings {
+	init_colors();
+	init_browsers();
+	init_trim_timeout();
+	@urls = read_url_file();
+}
+
+sub init_signals { # call only once, at script load!
+	signal_add_last("message public", "url_public");
+	signal_add_last("message private", "url_private");
+	signal_add_last("message irc notice", "url_private");
+	signal_add_last("message irc op_public", "url_private");
+	signal_add_last("message irc action", "url_private");
+	signal_add_last("dcc chat message", "url_dccmsg");
+	signal_add_last("message topic", "url_topic");
+	signal_add_last("channel joined", "url_join_topic");
+	signal_add_last("setup changed", "apply_settings");
+	signal_add_last("message part", "url_part");
+	signal_add_last("message quit", "url_quit");
+	signal_add_last("message own_public", "url_own");
+	signal_add_last("message own_private", "url_own");
+	signal_add_last("exec remove", "sig_exec_remove");
+	signal_add_last("window destroyed", "sig_window_destroyed");
+}
+
+sub init_static_binds { # call only once, at script load!
+	# These binds are always on:
+	command_bind("urlm_list", "url_list_cmd");
+	command_bind("urlm_open", "url_open_cmd");
+	command_bind("urlm_wget", "url_open_wget_cmd");
+	command_bind("urlm_add_browser", "urlm_add_browser");
+	command_bind("urlm_del_browser", "urlm_del_browser");
+	command_bind("urlm_list_browsers", "urlm_list_browsers");
+	command_bind("urlm_add_wget_ext", "urlm_add_wget_ext");
+	command_bind("urlm_del_wget_ext", "urlm_del_wget_ext");
+	command_bind("urlm_list_overrides", "urlm_list_overrides");
+	command_bind("urlm_add_override", "urlm_add_override");
+	command_bind("urlm_del_override", "urlm_del_override");
+	command_bind("urlm_trim_log", "trim_url_log");
+	command_bind("urlm_undo_delete", "urlm_undo_delete");
+	command_bind("help", "urlm_help");
+}
+
+# Add per-user help dir to help_path, if not already present.
+sub init_help_path {
+	my $dir = "$ENV{HOME}/.irssi/help";
+	my $help_path = settings_get_str('help_path');
+
+	return if grep { $_ eq $dir } split /:/, $help_path;
+
+	$help_path .= ":$dir";
+	settings_set_str('help_path', $help_path);
+
+	signal_emit('setup_changed'); # 20100614 bkw: hmmm...
+}
+
+# bind signals and commands, now that everything's defined.
+init_settings();
+init_signals();
+init_static_binds();
+init_colors();
+init_browsers();
+init_help_path();
+@urls = read_url_file();
+trim_url_log() if settings_get_bool('urlm_log_trim_startup');
+init_trim_timeout();
+
+# make sure no leftover backup is lurking from a long time ago...
+unlink(get_url_log_file() . "~");
+
+# Print a helpful message for the user on script load...
+print $bold_on . "urlmanager.pl" . $bold_off . " loaded (" . @urls .
+	" URLs), type '" .  $yellow . "/help urlmanager" .
+	$color_off . "' for help.";
+
+# rest of file is POD docs
+=pod
+
+=head1 NAME
+
+urlmanager
+
+=head1 SYNOPSIS
+
+Yet another URL logger for irssi.
+
+=head1 DESCRIPTION
+
+Captures URLs in channel, privmsg, and DCC chat messages, logs them to a
+file. Provides an irssi command to list captured URLs and several commands
+to do various things with them (open in browser, download, copy to X11
+selection buffer).
+
+This documentation only includes installation instructions. For usage
+instructions, install the script and run B</urlm_help> within irssi.
+
+=head1 INSTALLATION
+
+Copy B<urlmanager.pl> to your B<~/.irssi/scripts> directory (create the
+directory if it doesn't exist). For auto-loading when irssi starts,
+create a symlink in B<~/.irssi/scripts/autorun>:
+
+=over 4
+
+mkdir -p ~/.irssi/scripts/autorun
+
+cp urlmanager.pl ~/.irssi/scripts
+
+cd ~/.irssi/scripts/autorun
+
+ln -s ../urlmanager.pl .
+
+=back
+
+=head1 CONFIGURATION
+
+All configuration is done from within irssi; read B</urlm_help>.
+=cut
-- 
cgit v1.2.3