aboutsummaryrefslogtreecommitdiff
path: root/sbolint
diff options
context:
space:
mode:
authorB. Watson <yalhcru@gmail.com>2020-06-10 19:42:04 -0400
committerB. Watson <yalhcru@gmail.com>2020-06-10 19:42:04 -0400
commitc3238e690a1f3254d282623e047f0124206de9b9 (patch)
treeaad4069d37a258d2d641fef67abd7a3317fde111 /sbolint
parentfdab64c728d172419b3cc3f8a566a3fd134ab366 (diff)
downloadsbostuff-c3238e690a1f3254d282623e047f0124206de9b9.tar.gz
cleanup, wip for eventual release
Diffstat (limited to 'sbolint')
-rwxr-xr-xsbolint83
1 files changed, 72 insertions, 11 deletions
diff --git a/sbolint b/sbolint
index 590f704..0fb6e3d 100755
--- a/sbolint
+++ b/sbolint
@@ -2,6 +2,9 @@
# ChangeLog:
+# 0.3 20200420 bkw:
+# - Check github URLs for validity.
+
# 0.2 20200103 bkw:
# - Use "git rev-parse" to decide if we're in a git repo, because
# "git status" traverses the whole repo looking for untracked files.
@@ -19,7 +22,7 @@
# 0.1 20141114 bkw, Initial release.
-$VERSION="0.2";
+$VERSION="0.3";
# generate man page with:
# pod2man --stderr -r0.2 -s1 -c"SBo Maintainer Tools" sbolint > sbolint.1
@@ -300,6 +303,8 @@ $tempdir = 0;
our %info = (); # has to be global, check_info sets it, check_script needs it
# main() {
+#check_github_url("testing", $_) for @ARGV;
+#exit 0;
while(@ARGV && ($ARGV[0] =~ /^-/)) {
my $opt = shift;
@@ -842,7 +847,7 @@ sub check_info {
# use a HEAD request for homepage, even if downloading other files
if($url_head || $url_download) {
- curl_head_request($info{HOMEPAGE}) || do {
+ curl_head_request($file, $info{HOMEPAGE}) || do {
log_warning("$file: HOMEPAGE URL broken?");
};
}
@@ -877,17 +882,19 @@ sub check_dl_and_md5 {
log_error("$file: we have " . @dlurls . " $dlkey URLs but " . @md5s . " $md5key" . " values");
}
- for(@dlurls) {
- if(!check_url($_)) {
- log_error("$file: $dlkey URL '$_' doesn't look like a valid URL (http, https, or ftp)");
+ for my $u (@dlurls) {
+ if(!check_url($u)) {
+ log_error("$file: $dlkey URL '$u' doesn't look like a valid URL (http, https, or ftp)");
+ next;
}
+ #check_github_url($file, $u);
+
if($url_head) {
- for(@dlurls) {
- curl_head_request($_) || do {
- log_warning("$file: $dlkey URL '$_' broken?");
- };
- }
+ curl_head_request($file, $u) || do {
+ warn '$u is '. $u;
+ log_warning("$file: $dlkey URL '$u' broken?");
+ };
} elsif($url_download) {
warn "$SELF: -d option not yet implemented\n";
}
@@ -912,9 +919,63 @@ sub check_url {
}
sub curl_head_request {
- return !system("curl --head --location --silent --fail $_[0] >/dev/null");
+ #return !system("curl --head --location --silent --fail $_[0] >/dev/null");
+ #warn $_[1];
+ my $file = $_[0];
+ my $client_filename = $_[1];
+ $client_filename =~ s,.*/,,;
+ my $curlcmd = "curl -m20 --head --location --silent --fail $_[1]";
+ open my $pipe, "$curlcmd|";
+ #warn "$curlcmd";
+ while(<$pipe>) {
+ chomp;
+ s/\r//;
+ if(/^content-disposition:\s+attachment;\s+filename=["']?(.*?)["']?$/i) {
+ #warn $1;
+ if(defined($client_filename) && ($client_filename ne $1)) {
+ log_warning("$file: download filename varies based on content disposition: '$1' vs. '$client_filename'");
+ }
+ }
+ }
+ return close($pipe);
}
+# WIP, maybe no longer needed
+## sub check_github_url {
+## my $file = shift;
+## my $url = shift;
+## return unless $url =~ m{(https?:)//github\.com};
+##
+## if($1 eq "http:") {
+## log_warning("$file: github URL $url should be https");
+## }
+##
+## (my $expect_filename = $url) =~ s,.*/,,;
+## my(undef, undef, undef, $user, $prog, $archive, $ver, $filename) = split /\//, $url;
+## warn "user $user, prog $prog, archive $archive, ver $ver, filename $filename, expect_filename $expect_filename\n";
+##
+## # assume these are correct, for now
+## return if $user eq 'downloads';
+## return if $archive eq 'releases';
+##
+## # TODO: work out what to do about /raw/
+## return if $archive eq 'raw';
+##
+## if($archive ne 'archive') {
+## log_warning("$file: unknown github URL type: $url");
+## return;
+## }
+##
+## # OK, good URLs look like this:
+## # https://github.com/jeetsukumaran/DendroPy/archive/v4.4.0/DendroPy-4.4.0.tar.gz
+## # ...and bad ones look like this:
+## # https://github.com/haiwen/seafile-client/archive/v4.4.2.tar.gz
+## # Corrected version of the bad one would be:
+## # https://github.com/haiwen/seafile-client/archive/v4.4.2/seafile-client-4.4.2.tar.gz
+## # Notice the "v" isn't part of the version number. It's not always there,
+## # and sometimes it's a different letter (r, or g, or capital V, etc).
+## }
+
# NOT going to police the script too much. Would end up rewriting most of
# the shell, in perl. Plus, it'd become a straitjacket. Here's what I'll
# implement: