From c3238e690a1f3254d282623e047f0124206de9b9 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Wed, 10 Jun 2020 19:42:04 -0400 Subject: cleanup, wip for eventual release --- sbolint | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 11 deletions(-) (limited to 'sbolint') diff --git a/sbolint b/sbolint index 590f704..0fb6e3d 100755 --- a/sbolint +++ b/sbolint @@ -2,6 +2,9 @@ # ChangeLog: +# 0.3 20200420 bkw: +# - Check github URLs for validity. + # 0.2 20200103 bkw: # - Use "git rev-parse" to decide if we're in a git repo, because # "git status" traverses the whole repo looking for untracked files. @@ -19,7 +22,7 @@ # 0.1 20141114 bkw, Initial release. -$VERSION="0.2"; +$VERSION="0.3"; # generate man page with: # pod2man --stderr -r0.2 -s1 -c"SBo Maintainer Tools" sbolint > sbolint.1 @@ -300,6 +303,8 @@ $tempdir = 0; our %info = (); # has to be global, check_info sets it, check_script needs it # main() { +#check_github_url("testing", $_) for @ARGV; +#exit 0; while(@ARGV && ($ARGV[0] =~ /^-/)) { my $opt = shift; @@ -842,7 +847,7 @@ sub check_info { # use a HEAD request for homepage, even if downloading other files if($url_head || $url_download) { - curl_head_request($info{HOMEPAGE}) || do { + curl_head_request($file, $info{HOMEPAGE}) || do { log_warning("$file: HOMEPAGE URL broken?"); }; } @@ -877,17 +882,19 @@ sub check_dl_and_md5 { log_error("$file: we have " . @dlurls . " $dlkey URLs but " . @md5s . " $md5key" . " values"); } - for(@dlurls) { - if(!check_url($_)) { - log_error("$file: $dlkey URL '$_' doesn't look like a valid URL (http, https, or ftp)"); + for my $u (@dlurls) { + if(!check_url($u)) { + log_error("$file: $dlkey URL '$u' doesn't look like a valid URL (http, https, or ftp)"); + next; } + #check_github_url($file, $u); + if($url_head) { - for(@dlurls) { - curl_head_request($_) || do { - log_warning("$file: $dlkey URL '$_' broken?"); - }; - } + curl_head_request($file, $u) || do { + warn '$u is '. $u; + log_warning("$file: $dlkey URL '$u' broken?"); + }; } elsif($url_download) { warn "$SELF: -d option not yet implemented\n"; } @@ -912,9 +919,63 @@ sub check_url { } sub curl_head_request { - return !system("curl --head --location --silent --fail $_[0] >/dev/null"); + #return !system("curl --head --location --silent --fail $_[0] >/dev/null"); + #warn $_[1]; + my $file = $_[0]; + my $client_filename = $_[1]; + $client_filename =~ s,.*/,,; + my $curlcmd = "curl -m20 --head --location --silent --fail $_[1]"; + open my $pipe, "$curlcmd|"; + #warn "$curlcmd"; + while(<$pipe>) { + chomp; + s/\r//; + if(/^content-disposition:\s+attachment;\s+filename=["']?(.*?)["']?$/i) { + #warn $1; + if(defined($client_filename) && ($client_filename ne $1)) { + log_warning("$file: download filename varies based on content disposition: '$1' vs. '$client_filename'"); + } + } + } + return close($pipe); } +# WIP, maybe no longer needed +## sub check_github_url { +## my $file = shift; +## my $url = shift; +## return unless $url =~ m{(https?:)//github\.com}; +## +## if($1 eq "http:") { +## log_warning("$file: github URL $url should be https"); +## } +## +## (my $expect_filename = $url) =~ s,.*/,,; +## my(undef, undef, undef, $user, $prog, $archive, $ver, $filename) = split /\//, $url; +## warn "user $user, prog $prog, archive $archive, ver $ver, filename $filename, expect_filename $expect_filename\n"; +## +## # assume these are correct, for now +## return if $user eq 'downloads'; +## return if $archive eq 'releases'; +## +## # TODO: work out what to do about /raw/ +## return if $archive eq 'raw'; +## +## if($archive ne 'archive') { +## log_warning("$file: unknown github URL type: $url"); +## return; +## } +## +## # OK, good URLs look like this: +## # https://github.com/jeetsukumaran/DendroPy/archive/v4.4.0/DendroPy-4.4.0.tar.gz +## # ...and bad ones look like this: +## # https://github.com/haiwen/seafile-client/archive/v4.4.2.tar.gz +## # Corrected version of the bad one would be: +## # https://github.com/haiwen/seafile-client/archive/v4.4.2/seafile-client-4.4.2.tar.gz +## # Notice the "v" isn't part of the version number. It's not always there, +## # and sometimes it's a different letter (r, or g, or capital V, etc). +## } + # NOT going to police the script too much. Would end up rewriting most of # the shell, in perl. Plus, it'd become a straitjacket. Here's what I'll # implement: -- cgit v1.2.3