#!/usr/bin/perl ## Config file for sbosrcarch. The #! line above is just for syntax # highlighting while editing this file, it's not a standalone perl # script. # This file is usually called either sbosrcarch.conf or .sbosrcarch.conf, # and located in current directory, $HOME, /etc/sbosrcarch, or /etc. You # can also use 'sbosrcarch -c config-file'. # This file is parsed by perl, so it needs to be valid perl code. If in # doubt, try 'perl -c sbosrcarch.conf' to check the syntax. # Options documented as 'required' have no default values. sbosrcarch # will abort, if any of them are missing from the config file. Other # options will default to the documented default values. # Rest of file is config values and (hopefully) explanatory comments. ## $sbogiturl (string, required) # slackbuilds.org's master git URL (used with 'git clone'). # Unlikely that this will ever need to be changed. $sbogiturl = "git://slackbuilds.org/slackbuilds.git"; ## $sbogitdir (string, filesystem path, required) # Location of local copy of SBo git clone. 'sbosrcarch create' will create # this via 'git clone' if it doesn't already exist. Should stay on master # branch. This script will take care of pulling from SBo git, so this # dir shouldn't be your working repo that you use for any other purpose. # This can be located anywhere. It's slightly more efficient to locate # it on the same filesystem as $archivedir, but not critically so. $sbogitdir = "/home/urchlay/sbo-master/"; #$sbogitdir = "/tmp/sbo-master/"; # Branch to use, normally master (only change for testing purposes). #$sbogitbranch = "master"; $ TODO: implement ## $archivedir (string, filesystem path, required) # Location of archive (which you will serve by e.g. apache). # This must be located on the same filesystem as $sbogitdir unless # $symlinks is set to 1. $archivedir = "/home/urchlay/sboarchive"; ## $maxfilemegs (positive real number, optional, default 10) # Max file size, in megabytes (real ones, 2**10). Doesn't have to be an # integer. Set to 0 for "no limit". Files larger than this (according to # HTTP HEAD or FTP SIZE) won't be downloaded. If you increase this, re-run # 'sbosrcarch create' after editing this config. If you decrease it, # run 'sbosrcarch trim' to get rid of files that are now over the limit. #$maxfilemegs = 0.1; $maxfilemegs = 1; ## $symlinks (boolean, 0 or 1, optional, default 0) # 0 = use hard links for by-md5 tree, 1 = symlinks. # Which should you use? Well, if other people are going to rsync your # repo, hardlinks are more expensive (see the -a and -H options in the # rsync man page). If disk space is at a premium, symlinks eat a tiny # bit more space (but I mean *tiny*)... and you'll have to make sure # your web server follows symlinks if you use them. # If you change this for an existing archive, run 'sbosrcarch purge --rebuild' # to re-create the by-md5 tree with the new link type, otherwise you'll # end up with a mix of hard and soft links (no harm done, but it's ugly). $symlinks = 0; ## %user_agent_overrides (hash, optional, keys = regexes, values = strings) # Most download sites work better if the HTTP user agent header is # set to a normal browser (see $wgetrc_contents above). But some sites # "helpfully" redirect to an HTML page if using a browser, so list them # here. %user_agent_overrides = ( qr/(?:sourceforge|sf)\.net/ => 'wget', qr/www\.dropbox\.com/ => 'Wget/1.14 (linux-gnu)', ); ## @retry_head_urls (array, optional, elements = regexes) # A few "cloud" type services (notably github) fail to deliver a # Content-Length in the initial attempt to get the file size. The # next time the request is tried, the Content-Length is usually there. # So we retry these requests, for sites known to do this. @retry_head_urls = ( qr/github\.com/ ); ## $use_curl (boolean, 0 or 1, optional, default 1) # 1 = use curl for HTTP and HTTPS downloads. 0 = use wget. # curl seems a bit more reliable than wget, but the wget code in # sboarchive is better-tested. This option doesn't affect FTP downloads; # they're always done with perl's Net::FTP module. # At some point in the future, the wget code is likely to go away (when # the script author gets familiar enough with curl). # One major difference here: when using curl, sbosrcarch never does an # actual HEAD request (instead, it uses "curl --head -X GET" to send a # GET request, but exit curl immediately after the headers are retrieved). # The wget code first sends a HEAD, then (if it fails) a GET... but there's # no way to tell wget to stop after the headers, so it downloads a chunk # of the file even if we decide it's too large. # If the above is TL;DR for you, just stick with the default. $use_curl = 1; ##### curl options (only used if $use_curl is true) ## $curl (string, optional, default "curl") # Path to curl binary. Absolute paths will be used as-is, otherwise $PATH # will be searched. $curl = "curl"; # $curlopts (string, required if $use_curl is true, no default) # Options to pass to curl. Recommended set is: # -K/dev/null - makes curl ignore any ~/.curlrc # --insecure - allows downloading when SSL cert can't be validated # -L - follow HTTP redirects # -sS - silent operation, except actual error messages # --connect-timeout 60 - means what it says # Depending on whether curl is being used to determine file size or # actually download a file, other options will be added to these (but # nothing you should have to mess with). $curlopts = "-K/dev/null --insecure -L -sS --connect-timeout 60"; ##### wget options (only used if $use_curl is false) ## $wget (string, optional, default "wget") # Path to wget binary. Absolute paths will be used as-is, otherwise $PATH # will be searched. $wget = "wget"; ## $wgetargs (string, optional, default "") # Extra arguments to pass to wget. We're already creating a config file # and using it in place of .wgetrc and /etc/wgetrc, you don't need to # list --config here. $wgetargs = ""; # If your wget is older than version 1.14 or so, sbosrcarch will complain # that it doesn't support the --config option. In that case, the # $wgetrc_contents below won't be used. You can either copy $wgetrc_contents # to ~/.wgetrc, or use $wgetargs to set the config options on the command # line. Something like this: # $wgetargs = # "--timeout=30 ". # "--user-agent='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' ". # "--no-check-certificate ". # "--no-content-disposition"; # Unfortunately there's not a --no-robots option. Upgrading wget is a # better solution, and you can compile it with e.g. --prefix=/home/you/wget.new, # and set $wget = "/home/you/wget.new/bin/wget" above. ## $wgetrc_contents (string, optional, see "man wget" and/or the comments in # /etc/wgetrc for more information). # We don't trust the system-wide or user wgetrc, so we provide our own. # The check_certificate = off might be controversial. My take on it is # that it's better to download the file even if the server has a crappy # self-signed certificate, or one from a brand-new CA that wget doesn't # know about yet. These are just publically-available static files, # they'd just as well be served with plain HTTP. Feel free to change it # if you disagree. # For user_agent, I picked an ancient version of Firefox. Probably no # need to change it, but see user_agent_overrides below. # content_disposition needs to stay off. Don't change it. If you do, don't # complain when things break. # Might want to add this here: #timeout = 30 $wgetrc_contents = <