#!/usr/bin/perl

## Config file for sbosrcarch. The #! line above is just for syntax
# highlighting while editing this file, it's not a standalone perl
# script.

# This file is usually called either sbosrcarch.conf or .sbosrcarch.conf,
# and located in current directory, $HOME, /etc/sbosrcarch, or /etc. You
# can also use 'sbosrcarch -c config-file'.

# This file is parsed by perl, so it needs to be valid perl code. If in
# doubt, try 'perl -c sbosrcarch.conf' to check the syntax.

# Options documented as 'required' have no default values. sbosrcarch
# will abort, if any of them are missing from the config file. Other
# options will default to the documented default values.

# Rest of file is config values and (hopefully) explanatory comments.

## $sbogiturl (string, required)
# slackbuilds.org's master git URL (used with 'git clone').
# Unlikely that this will ever need to be changed.
$sbogiturl = "git://slackbuilds.org/slackbuilds.git";

## $sbogitdir (string, filesystem path, required)

# Location of local copy of SBo git clone. 'sbosrcarch create' will create
# this via 'git clone' if it doesn't already exist. Should stay on master
# branch. This script will take care of pulling from SBo git, so this
# dir shouldn't be your working repo that you use for any other purpose.
# This can be located anywhere. It's slightly more efficient to locate
# it on the same filesystem as $archivedir, but not critically so.

$sbogitdir = "/home/urchlay/sbo-master/";
#$sbogitdir = "/tmp/sbo-master/";

# Branch to use, normally master (only change for testing purposes).
#$sbogitbranch = "master"; $ TODO: implement

## $archivedir (string, filesystem path, required)
# Location of archive (which you will serve by e.g. apache).
# This must be located on the same filesystem as $sbogitdir unless
# $symlinks is set to 1.

$archivedir = "/home/urchlay/sboarchive";

## $maxfilemegs (positive real number, optional, default 10)
# Max file size, in megabytes (real ones, 2**10). Doesn't have to be an
# integer. Set to 0 for "no limit". Files larger than this (according to
# HTTP HEAD or FTP SIZE) won't be downloaded. If you increase this, re-run
# 'sbosrcarch create' after editing this config. If you decrease it,
# run 'sbosrcarch trim' to get rid of files that are now over the limit.

#$maxfilemegs = 0.1;
$maxfilemegs = 1;

## $symlinks (boolean, 0 or 1, optional, default 0)
# 0 = use hard links for by-md5 tree, 1 = symlinks.

# Which should you use? Well, if other people are going to rsync your
# repo, hardlinks are more expensive (see the -a and -H options in the
# rsync man page). If disk space is at a premium, symlinks eat a tiny
# bit more space (but I mean *tiny*)... and you'll have to make sure
# your web server follows symlinks if you use them.

# If you change this for an existing archive, run 'sbosrcarch purge --rebuild'
# to re-create the by-md5 tree with the new link type, otherwise you'll
# end up with a mix of hard and soft links (no harm done, but it's ugly).

$symlinks = 0;

## %user_agent_overrides (hash, optional, keys = regexes, values = strings)
# Most download sites work better if the HTTP user agent header is
# set to a normal browser (see $wgetrc_contents above). But some sites
# "helpfully" redirect to an HTML page if using a browser, so list them
# here.

%user_agent_overrides = (
		qr/(?:sourceforge|sf)\.net/ => 'wget',
		qr/www\.dropbox\.com/ => 'Wget/1.14 (linux-gnu)',
);

## @retry_head_urls (array, optional, elements = regexes)
# A few "cloud" type services (notably github) fail to deliver a
# Content-Length in the initial attempt to get the file size. The
# next time the request is tried, the Content-Length is usually there.
# So we retry these requests, for sites known to do this.
@retry_head_urls = (
		qr/github\.com/
);

## $use_curl (boolean, 0 or 1, optional, default 1)
# 1 = use curl for HTTP and HTTPS downloads. 0 = use wget.
# curl seems a bit more reliable than wget, but the wget code in
# sboarchive is better-tested. This option doesn't affect FTP downloads;
# they're always done with perl's Net::FTP module.
# At some point in the future, the wget code is likely to go away (when
# the script author gets familiar enough with curl).

# One major difference here: when using curl, sbosrcarch never does an
# actual HEAD request (instead, it uses "curl --head -X GET" to send a
# GET request, but exit curl immediately after the headers are retrieved).
# The wget code first sends a HEAD, then (if it fails) a GET... but there's
# no way to tell wget to stop after the headers, so it downloads a chunk
# of the file even if we decide it's too large.

# If the above is TL;DR for you, just stick with the default.

$use_curl = 1;

##### curl options (only used if $use_curl is true)

## $curl (string, optional, default "curl")
# Path to curl binary. Absolute paths will be used as-is, otherwise $PATH
# will be searched.

$curl = "curl";

# $curlopts (string, required if $use_curl is true, no default)
# Options to pass to curl. Recommended set is:
#  -K/dev/null  - makes curl ignore any ~/.curlrc
#  --insecure   - allows downloading when SSL cert can't be validated
#  -L           - follow HTTP redirects
#  -sS          - silent operation, except actual error messages
#  --connect-timeout 60    - means what it says
# Depending on whether curl is being used to determine file size or
# actually download a file, other options will be added to these (but
# nothing you should have to mess with).

$curlopts = "-K/dev/null --insecure -L -sS --connect-timeout 60";

##### wget options (only used if $use_curl is false)

## $wget (string, optional, default "wget")
# Path to wget binary. Absolute paths will be used as-is, otherwise $PATH
# will be searched.
$wget = "wget";

## $wgetargs (string, optional, default "")
# Extra arguments to pass to wget. We're already creating a config file
# and using it in place of .wgetrc and /etc/wgetrc, you don't need to
# list --config here.

$wgetargs = "";

# If your wget is older than version 1.14 or so, sbosrcarch will complain
# that it doesn't support the --config option. In that case, the
# $wgetrc_contents below won't be used. You can either copy $wgetrc_contents
# to ~/.wgetrc, or use $wgetargs to set the config options on the command
# line. Something like this:

# $wgetargs =
#  "--timeout=30 ".
#  "--user-agent='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' ".
#  "--no-check-certificate ".
#  "--no-content-disposition";

# Unfortunately there's not a --no-robots option. Upgrading wget is a
# better solution, and you can compile it with e.g. --prefix=/home/you/wget.new,
# and set $wget = "/home/you/wget.new/bin/wget" above.

## $wgetrc_contents (string, optional, see "man wget" and/or the comments in
# /etc/wgetrc for more information).

# We don't trust the system-wide or user wgetrc, so we provide our own.

# The check_certificate = off might be controversial. My take on it is
# that it's better to download the file even if the server has a crappy
# self-signed certificate, or one from a brand-new CA that wget doesn't
# know about yet. These are just publically-available static files,
# they'd just as well be served with plain HTTP. Feel free to change it
# if you disagree.

# For user_agent, I picked an ancient version of Firefox. Probably no
# need to change it, but see user_agent_overrides below.

# content_disposition needs to stay off. Don't change it. If you do, don't
# complain when things break.

# Might want to add this here:
#timeout = 30

$wgetrc_contents = <<EOF;
timeout = 30
robots = off
user_agent = Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
check_certificate = off
content_disposition = off
EOF

## whitelist (optional, array of strings, default is empty)

# The whitelist is a list of categories or category/prgnam that you
# want to always mirror, regardless of file size limits. If you're a
# SBo maintainer, you might want to list your own builds (and their
# dependencies) here.

# Example: if you maintain the system/foo and system/bar builds at SBo:
# @whitelist = qw(
#   system/foo
#   system/bar
# );

@whitelist = qw(
);

## blacklist (optional, array of strings, default is empty)

# The blacklist is a list of categories or category/prgnam that you want
# to NEVER mirror.

# Example: if you think games are frivolous, you can do this:
# @blacklist = qw(
#   games
# );

# This config file ships with development/jdk in @blacklist because
# it's impossible to download the jdk source anyway (you need cookies
# and javascript, and have to agree to the license terms interactively).
# Removing it will just result in sbosrcarch downloading an HTML page
# and deleting it because the md5sum doesn't match the actual source.
# The others listed here are similar (registration required, etc),
# or else the download links are unversioned tarballs that change
# regularly.

@blacklist = qw(
	academic/finchtv
	development/J-Link
	development/amd-app-sdk
	development/jdk
	development/smartsvn
	graphics/paraview
	graphics/vuescan
	multimedia/google-talkplugin
	office/treesheets
);

# For the whitelist and blacklist, place one category/prgnam or category
# per line, between the 'qw(' and ');'. Don't use trailing slashes for
# categories (see examples).

# The whitelist and blacklist are only applied to 'create' and
# 'update' modes. The other modes (add, rm, purge, trim) don't use
# them... though check mode will report if blacklisted files are found
# (but won't rm them).

# In create and update, for each build, the whitelist and blacklist are
# both checked. If a category is listed in one list, but a build inside
# the category is listed in the other, the build is more specific than
# the category so it "wins". Listing the same build or category in both
# lists is the same as not listing it in either (except that a warning
# will be printed).

# full category list, for easy copy/pasting into black/whitelist
#academic
#accessibility
#audio
#business
#desktop
#development
#games
#gis
#graphics
#ham
#haskell
#libraries
#misc
#multimedia
#network
#office
#perl
#python
#ruby
#system