sbosrcarch.conf: $curl*, @blacklist and @whitelist

author: B. Watson <yalhcru@gmail.com> 2015-10-20 19:06:43 -0400
committer: B. Watson <yalhcru@gmail.com> 2015-10-20 19:06:43 -0400
commit: 42f6d70256292db0da49fb3480eedf772de6182b (patch)
tree: dcb68d9a00df6890d3b7ac318bc9c4ae06ed40ff /sbosrcarch.conf
parent: 3c7cb8acab004585ae8184444024f0a2ec6298e1 (diff)
download: sbostuff-42f6d70256292db0da49fb3480eedf772de6182b.tar.gz
1 files changed, 125 insertions, 16 deletions
diff --git a/sbosrcarch.conf b/sbosrcarch.conf
index 311f99b..042ae2e 100644
--- a/sbosrcarch.conf
+++ b/sbosrcarch.conf
@@ -4,10 +4,16 @@
 # highlighting while editing this file, it's not a standalone perl
 # script.
 
-# This file must be called either sbosrcarch.conf or .sbosrcarch.conf,
-# and located in current directory, $HOME, /etc/sbosrcarch, or /etc.
-# It's parsed by perl, so it needs to be valid perl code. If in doubt,
-# try 'perl -c sbosrcarch.conf' to check the syntax.
+# This file is usually called either sbosrcarch.conf or .sbosrcarch.conf,
+# and located in current directory, $HOME, /etc/sbosrcarch, or /etc. You
+# can also use 'sbosrcarch -c config-file'.
+
+# This file is parsed by perl, so it needs to be valid perl code. If in
+# doubt, try 'perl -c sbosrcarch.conf' to check the syntax.
+
+# Options documented as 'required' have no default values. sbosrcarch
+# will abort, if any of them are missing from the config file. Other
+# options will default to the documented default values.
 
 # Rest of file is config values and (hopefully) explanatory comments.
 
@@ -22,11 +28,11 @@ $sbogiturl = "git://slackbuilds.org/slackbuilds.git";
 # this via 'git clone' if it doesn't already exist. Should stay on master
 # branch. This script will take care of pulling from SBo git, so this
 # dir shouldn't be your working repo that you use for any other purpose.
+# This can be located anywhere. It's slightly more efficient to locate
+# it on the same filesystem as $archivedir, but not critically so.
 
-# This must be located on the same filesystem (aka same mount point)
-# as $archivedir!
-
-$sbogitdir = "/home/urchlay/sbo-master/";
+#$sbogitdir = "/home/urchlay/sbo-master/";
+$sbogitdir = "/tmp/sbo-master/";
 
 # Branch to use, normally master (only change for testing purposes).
 #$sbogitbranch = "master"; $ TODO: implement
@@ -45,7 +51,7 @@ $archivedir = "/home/urchlay/sboarchive";
 # 'sbosrcarch create' after editing this config. If you decrease it,
 # run 'sbosrcarch trim' to get rid of files that are now over the limit.
 
-$maxfilemegs = 1;
+$maxfilemegs = 0.1;
 
 ## $symlinks (boolean, 0 or 1, optional, default 0)
 # 0 = use hard links for by-md5 tree, 1 = symlinks.
@@ -62,6 +68,74 @@ $maxfilemegs = 1;
 
 $symlinks = 0;
 
+## %user_agent_overrides (hash, optional, keys = regexes, values = strings)
+# Most download sites work better if the HTTP user agent header is
+# set to a normal browser (see $wgetrc_contents above). But some sites
+# "helpfully" redirect to an HTML page if using a browser, so list them
+# here.
+
+%user_agent_overrides = (
+		qr/(?:sourceforge|sf)\.net/ => 'wget',
+);
+
+## @retry_head_urls (array, optional, elements = regexes)
+# A few "cloud" type services (notably github) fail to deliver a
+# Content-Length in the initial attempt to get the file size. The
+# next time the request is tried, the Content-Length is usually there.
+# So we retry these requests, for sites known to do this.
+@retry_head_urls = (
+		qr/github\.com/
+);
+
+## $use_curl (boolean, 0 or 1, optional, default 1)
+# 1 = use curl for HTTP and HTTPS downloads. 0 = use wget.
+# curl seems a bit more reliable than wget, but the wget code in
+# sboarchive is better-tested. This option doesn't affect FTP downloads;
+# they're always done with perl's Net::FTP module.
+# At some point in the future, the wget code is likely to go away (when
+# the script author gets familiar enough with curl).
+
+# One major difference here: when using curl, sbosrcarch never does an
+# actual HEAD request (instead, it uses "curl --head -X GET" to send a
+# GET request, but exit curl immediately after the headers are retrieved).
+# The wget code first sends a HEAD, then (if it fails) a GET... but there's
+# no way to tell wget to stop after the headers, so it downloads a chunk
+# of the file even if we decide it's too large.
+
+# If the above is TL;DR for you, just stick with the default.
+
+$use_curl = 1;
+
+##### curl options (only used if $use_curl is true)
+
+## $curl (string, optional, default "curl")
+# Path to curl binary. Absolute paths will be used as-is, otherwise $PATH
+# will be searched.
+
+$curl = "curl";
+
+# $curlopts (string, required if $use_curl is true, no default)
+# Options to pass to curl. Recommended set is:
+#  -K/dev/null  - makes curl ignore any ~/.curlrc
+#  --insecure   - allows downloading when SSL cert can't be validated
+#  -L           - follow HTTP redirects
+#  -sS          - silent operation, except actual error messages
+#  --connect-timeout 60    - means what it says
+# Depending on whether curl is being used to determine file size or
+# actually download a file, other options will be added to these (but
+# nothing you should have to mess with).
+
+$curlopts = qw(
+	-K/dev/null
+	--insecure
+	--connect-timeout 60
+	--head
+	-L
+	-sS
+);
+
+##### wget options (only used if $use_curl is false)
+
 ## $wget (string, optional, default "wget")
 # Path to wget binary. Absolute paths will be used as-is, otherwise $PATH
 # will be searched.
@@ -119,12 +193,47 @@ check_certificate = off
 content_disposition = off
 EOF
 
-## %user_agent_overrides (hash, optional, keys = regexes, values = strings)
-# Most download sites work better if the HTTP user agent header is
-# set to a normal browser (see $wgetrc_contents above). But some sites
-# "helpfully" redirect to an HTML page if using a browser, so list them
-# here.
+## whitelist (optional, array of strings, default is empty)
 
-%user_agent_overrides = (
-		qr/(?:sourceforge|sf)\.net/ => 'wget',
+# The whitelist is a list of categories or category/prgnam that you
+# want to always mirror, regardless of file size limits. If you're a
+# SBo maintainer, you might want to list your own builds (and their
+# dependencies) here.
+
+# Example: if you maintain the system/foo and system/bar builds at SBo:
+# @whitelist = qw(
+#   system/foo
+#   system/bar
+# );
+
+@whitelist = qw(
+);
+
+## blacklist (optional, array of strings, default is empty)
+
+# The blacklist is a list of categories or category/prgnam that you want
+# to NEVER mirror.
+
+# Example: if you think games are frivolous, you can do this:
+# @blacklist = qw(
+#   games
+# );
+
+# This config file ships with development/jdk in @blacklist because
+# it's impossible to download the jdk source anyway (you need cookies
+# and javascript, and have to agree to the license terms interactively).
+# Removing it will just result in sbosrcarch downloading an HTML page
+# and deleting it because the md5sum doesn't match the actual source.
+@blacklist = qw(
+		development/jdk
 );
+
+# whitelist and blacklist are only applied to 'create' and 'update' modes.
+# The other modes (add, rm, purge, trim) don't use them.
+
+# In create and update, for each build, the whitelist and blacklist are
+# both checked. If a category is listed in one list, but a build inside
+# the category is listed in the other, the build is more specific than
+# the category so it "wins". Listing the same build or category in both
+# lists is the same as not listing it in either (except that a warning
+# will be printed).
author	B. Watson <yalhcru@gmail.com>	2015-10-20 19:06:43 -0400
committer	B. Watson <yalhcru@gmail.com>	2015-10-20 19:06:43 -0400
commit	42f6d70256292db0da49fb3480eedf772de6182b (patch)
tree	dcb68d9a00df6890d3b7ac318bc9c4ae06ed40ff /sbosrcarch.conf
parent	3c7cb8acab004585ae8184444024f0a2ec6298e1 (diff)
download	sbostuff-42f6d70256292db0da49fb3480eedf772de6182b.tar.gz