sbosrcarch.conf


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264

#!/usr/bin/perl

## Config file for sbosrcarch. The #! line above is just for syntax
# highlighting while editing this file, it's not a standalone perl
# script.

# This file is usually called either sbosrcarch.conf or .sbosrcarch.conf,
# and located in current directory, $HOME, /etc/sbosrcarch, or /etc. You
# can also use 'sbosrcarch -c config-file'.

# This file is parsed by perl, so it needs to be valid perl code. If in
# doubt, try 'perl -c sbosrcarch.conf' to check the syntax.

# Options documented as 'required' have no default values. sbosrcarch
# will abort, if any of them are missing from the config file. Other
# options will default to the documented default values.

# Rest of file is config values and (hopefully) explanatory comments.

## $sbogiturl (string, required)
# slackbuilds.org's master git URL (used with 'git clone').
# Unlikely that this will ever need to be changed.
$sbogiturl = "git://slackbuilds.org/slackbuilds.git";

## $sbogitdir (string, filesystem path, required)

# Location of local copy of SBo git clone. 'sbosrcarch create' will create
# this via 'git clone' if it doesn't already exist. Should stay on master
# branch. This script will take care of pulling from SBo git, so this
# dir shouldn't be your working repo that you use for any other purpose.
# This can be located anywhere. It's slightly more efficient to locate
# it on the same filesystem as $archivedir, but not critically so.

#$sbogitdir = "/home/urchlay/sbo-master/";
$sbogitdir = "/tmp/sbo-master/";

# Branch to use, normally master (only change for testing purposes).
#$sbogitbranch = "master"; $ TODO: implement

## $archivedir (string, filesystem path, required)
# Location of archive (which you will serve by e.g. apache).
# This must be located on the same filesystem as $sbogitdir unless
# $symlinks is set to 1.

$archivedir = "/home/urchlay/sboarchive";

## $maxfilemegs (positive real number, optional, default 10)
# Max file size, in megabytes (real ones, 2**10). Doesn't have to be an
# integer. Set to 0 for "no limit". Files larger than this (according to
# HTTP HEAD or FTP SIZE) won't be downloaded. If you increase this, re-run
# 'sbosrcarch create' after editing this config. If you decrease it,
# run 'sbosrcarch trim' to get rid of files that are now over the limit.

#$maxfilemegs = 0.1;
$maxfilemegs = 0;

## $symlinks (boolean, 0 or 1, optional, default 0)
# 0 = use hard links for by-md5 tree, 1 = symlinks.

# Which should you use? Well, if other people are going to rsync your
# repo, hardlinks are more expensive (see the -a and -H options in the
# rsync man page). If disk space is at a premium, symlinks eat a tiny
# bit more space (but I mean *tiny*)... and you'll have to make sure
# your web server follows symlinks if you use them.

# If you change this for an existing archive, run 'sbosrcarch purge --rebuild'
# to re-create the by-md5 tree with the new link type, otherwise you'll
# end up with a mix of hard and soft links (no harm done, but it's ugly).

$symlinks = 0;

## %user_agent_overrides (hash, optional, keys = regexes, values = strings)
# Most download sites work better if the HTTP user agent header is
# set to a normal browser (see $wgetrc_contents above). But some sites
# "helpfully" redirect to an HTML page if using a browser, so list them
# here.

%user_agent_overrides = (
		qr/(?:sourceforge|sf)\.net/ => 'wget',
		qr/www\.dropbox\.com/ => 'Wget/1.14 (linux-gnu)',
);

## @retry_head_urls (array, optional, elements = regexes)
# A few "cloud" type services (notably github) fail to deliver a
# Content-Length in the initial attempt to get the file size. The
# next time the request is tried, the Content-Length is usually there.
# So we retry these requests, for sites known to do this.
@retry_head_urls = (
		qr/github\.com/
);

## $use_curl (boolean, 0 or 1, optional, default 1)
# 1 = use curl for HTTP and HTTPS downloads. 0 = use wget.
# curl seems a bit more reliable than wget, but the wget code in
# sboarchive is better-tested. This option doesn't affect FTP downloads;
# they're always done with perl's Net::FTP module.
# At some point in the future, the wget code is likely to go away (when
# the script author gets familiar enough with curl).

# One major difference here: when using curl, sbosrcarch never does an
# actual HEAD request (instead, it uses "curl --head -X GET" to send a
# GET request, but exit curl immediately after the headers are retrieved).
# The wget code first sends a HEAD, then (if it fails) a GET... but there's
# no way to tell wget to stop after the headers, so it downloads a chunk
# of the file even if we decide it's too large.

# If the above is TL;DR for you, just stick with the default.

$use_curl = 1;

##### curl options (only used if $use_curl is true)

## $curl (string, optional, default "curl")
# Path to curl binary. Absolute paths will be used as-is, otherwise $PATH
# will be searched.

$curl = "curl";

# $curlopts (string, required if $use_curl is true, no default)
# Options to pass to curl. Recommended set is:
#  -K/dev/null  - makes curl ignore any ~/.curlrc
#  --insecure   - allows downloading when SSL cert can't be validated
#  -L           - follow HTTP redirects
#  -sS          - silent operation, except actual error messages
#  --connect-timeout 60    - means what it says
# Depending on whether curl is being used to determine file size or
# actually download a file, other options will be added to these (but
# nothing you should have to mess with).

$curlopts = "-K/dev/null --insecure -L -sS --connect-timeout 60";

##### wget options (only used if $use_curl is false)

## $wget (string, optional, default "wget")
# Path to wget binary. Absolute paths will be used as-is, otherwise $PATH
# will be searched.
$wget = "wget";

## $wgetargs (string, optional, default "")
# Extra arguments to pass to wget. We're already creating a config file
# and using it in place of .wgetrc and /etc/wgetrc, you don't need to
# list --config here.

$wgetargs = "";

# If your wget is older than version 1.14 or so, sbosrcarch will complain
# that it doesn't support the --config option. In that case, the
# $wgetrc_contents below won't be used. You can either copy $wgetrc_contents
# to ~/.wgetrc, or use $wgetargs to set the config options on the command
# line. Something like this:

# $wgetargs =
#  "--timeout=30 ".
#  "--user-agent='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' ".
#  "--no-check-certificate ".
#  "--no-content-disposition";

# Unfortunately there's not a --no-robots option. Upgrading wget is a
# better solution, and you can compile it with e.g. --prefix=/home/you/wget.new,
# and set $wget = "/home/you/wget.new/bin/wget" above.

## $wgetrc_contents (string, optional, see "man wget" and/or the comments in
# /etc/wgetrc for more information).

# We don't trust the system-wide or user wgetrc, so we provide our own.

# The check_certificate = off might be controversial. My take on it is
# that it's better to download the file even if the server has a crappy
# self-signed certificate, or one from a brand-new CA that wget doesn't
# know about yet. These are just publically-available static files,
# they'd just as well be served with plain HTTP. Feel free to change it
# if you disagree.

# For user_agent, I picked an ancient version of Firefox. Probably no
# need to change it, but see user_agent_overrides below.

# content_disposition needs to stay off. Don't change it. If you do, don't
# complain when things break.

# Might want to add this here:
#timeout = 30

$wgetrc_contents = <<EOF;
timeout = 30
robots = off
user_agent = Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
check_certificate = off
content_disposition = off
EOF

## whitelist (optional, array of strings, default is empty)

# The whitelist is a list of categories or category/prgnam that you
# want to always mirror, regardless of file size limits. If you're a
# SBo maintainer, you might want to list your own builds (and their
# dependencies) here.

# Example: if you maintain the system/foo and system/bar builds at SBo:
# @whitelist = qw(
#   system/foo
#   system/bar
# );

@whitelist = qw(
);

## blacklist (optional, array of strings, default is empty)

# The blacklist is a list of categories or category/prgnam that you want
# to NEVER mirror.

# Example: if you think games are frivolous, you can do this:
# @blacklist = qw(
#   games
# );

# This config file ships with development/jdk in @blacklist because
# it's impossible to download the jdk source anyway (you need cookies
# and javascript, and have to agree to the license terms interactively).
# Removing it will just result in sbosrcarch downloading an HTML page
# and deleting it because the md5sum doesn't match the actual source.

@blacklist = qw(
		development/jdk
);

# For the whitelist and blacklist, place one category/prgnam or category
# per line, between the 'qw(' and ');'. Don't use trailing slashes for
# categories (see examples).

# The whitelist and blacklist are only applied to 'create' and
# 'update' modes. The other modes (add, rm, purge, trim) don't use
# them... though check mode will report if blacklisted files are found
# (but won't rm them).

# In create and update, for each build, the whitelist and blacklist are
# both checked. If a category is listed in one list, but a build inside
# the category is listed in the other, the build is more specific than
# the category so it "wins". Listing the same build or category in both
# lists is the same as not listing it in either (except that a warning
# will be printed).

# full category list, for easy copy/pasting into black/whitelist
#academic
#accessibility
#audio
#business
#desktop
#development
#games
#gis
#graphics
#ham
#haskell
#libraries
#misc
#multimedia
#network
#office
#perl
#python
#ruby
#system