diff options
author | B. Watson <urchlay@slackware.uk> | 2024-08-02 22:14:27 -0400 |
---|---|---|
committer | B. Watson <urchlay@slackware.uk> | 2024-08-02 22:14:27 -0400 |
commit | 481fa07d1a740e22fa65bf7d3cd181d6f5ab091e (patch) | |
tree | 205fb9088f0e7f04c4a037ee58dd1bb8415ef585 /sbolint | |
parent | adce112d7e2f5d79f07f9ac9f9abaeee19778d07 (diff) | |
download | sbo-maintainer-tools-481fa07d1a740e22fa65bf7d3cd181d6f5ab091e.tar.gz |
sbolint github URL checking, WIP.
Diffstat (limited to 'sbolint')
-rwxr-xr-x | sbolint | 119 |
1 files changed, 55 insertions, 64 deletions
@@ -1199,41 +1199,21 @@ sub curl_head_request { return close($pipe); } -# WIP, maybe no longer needed -## sub check_github_url { -## my $file = shift; -## my $url = shift; -## return unless $url =~ m{(https?:)//github\.com}; -## -## if($1 eq "http:") { -## log_warning("$file: github URL $url should be https"); -## } -## -## (my $expect_filename = $url) =~ s,.*/,,; -## my(undef, undef, undef, $user, $prog, $archive, $ver, $filename) = split /\//, $url; -## warn "user $user, prog $prog, archive $archive, ver $ver, filename $filename, expect_filename $expect_filename\n"; -## -## # assume these are correct, for now -## return if $user eq 'downloads'; -## return if $archive eq 'releases'; -## -## # TODO: work out what to do about /raw/ -## return if $archive eq 'raw'; -## -## if($archive ne 'archive') { -## log_warning("$file: unknown github URL type: $url"); -## return; -## } -## -## # OK, good URLs look like this: -## # https://github.com/jeetsukumaran/DendroPy/archive/v4.4.0/DendroPy-4.4.0.tar.gz -## # ...and bad ones look like this: -## # https://github.com/haiwen/seafile-client/archive/v4.4.2.tar.gz -## # Corrected version of the bad one would be: -## # https://github.com/haiwen/seafile-client/archive/v4.4.2/seafile-client-4.4.2.tar.gz -## # Notice the "v" isn't part of the version number. It's not always there, -## # and sometimes it's a different letter (r, or g, or capital V, etc). -## } +# github is a mess... +# OK, good URLs look like this: +# https://github.com/jeetsukumaran/DendroPy/archive/v4.4.0/DendroPy-4.4.0.tar.gz +# ...and bad ones look like this: +# https://github.com/haiwen/seafile-client/archive/v4.4.2.tar.gz +# Corrected version of the bad one would be: +# https://github.com/haiwen/seafile-client/archive/v4.4.2/seafile-client-4.4.2.tar.gz +# Notice the "v" isn't part of the version number. It's not always there. +# If there's a "v" and it's immediately followed by a number, it's not part of +# the version number. If it's followed by something other than a number, e.g. +# ver-1.0 or v.1.0, it *is* part of the version number. +# Since git allows / characters in tag names, we sometimes get a URL like: +# https://github.com/jeremysalwen/lv2file/archive/upstream/0.95/lv2file-upstream-0.95.tar.gz +# ...which is perfectly valid. However, this is invalid: +# https://github.com/jeremysalwen/lv2file/archive/upstream/0.95.tar.gz sub check_github_url { my $file = shift; @@ -1241,55 +1221,66 @@ sub check_github_url { my $old_url = $url; my $new_url; my $ext; + my $tag; + my $ver; + my $dir; # do not police releases/ or raw/ URLs, only archive/ - return unless $url =~ m{github\.com/.*archive/}; + return unless $url =~ m{github\.com/[^/]*/[^/]*/archive/}; if($url =~ s,refs/tags/,,) { log_error "$file: github URLs should not have refs/tags/"; } - #https: // site/ .../ .../ archive/ - (my $proto, undef, undef, $user, $proj, undef, $tag, $filename, $extra) = split /\//, $url; + #https: // site/ .../ .../ archive/ ...everything else. + (my $proto, undef, undef, $user, $proj, undef, @parts) = split /\//, $url; log_error "$file: github URLs must be https://" unless $proto eq 'https:'; - if(!defined $filename) { - log_error "$file: github URL is non-canonical (not enough components)"; - $tag =~ s,(\.tar\.gz|\.zip)$,,; - $ext = $1; - } + my $filename = pop @parts; + $filename =~ m,(\.tar\.gz|\.zip)$,; + $ext = $1; - # TODO: this is not an error, because github projects are allowed to have / - # in their tag names. So we get this *valid* URL: - # https://github.com/zfsonlinux/zfs-auto-snapshot/archive/upstream/1.2.4/zfs-auto-snapshot-upstream-1.2.4.tar.gz - if(defined $extra) { - #log_error "$file: github URL is non-canonical (too many components)"; - log_note "$file: github URL has extra components, don't (yet) know how to check it."; - return; + if(!defined $ext) { + log_warning("$file: github URL should end in .tar.gz or .zip"); + $ext = ""; } - # TODO: commit hashes have to be complete in the filename, and may be - # truncated in the dir name after arvhive/ - - my $ver = $tag; - $ver =~ s,^v(\d),$1,; - - if(defined $filename) { - $filename =~ /(\.tar\.gz|\.zip)$/; - $ext = $1; + if(@parts == 0) { + # filename loox like: tag.tar.gz + ($tag = $filename) =~ s,(\.tar\.gz|\.zip)$,,; + log_error "$file: github URL is non-canonical (not enough components)"; + $dir = $tag; + } elsif(@parts == 1) { + $tag = $dir = $parts[0]; + if($tag =~ /^[0-9a-f]{6,}$/ && $tag !~ /^20\d{6,}/) { + # commit hash. the /^20\d{6,}/ is to exclude ISO dates like 20240402 + if(length($tag) < 40) { + # shortened, maybe the full hash is in the filename? + if($filename =~ /([0-9a-f]{40})\./) { + $tag = $1; # leave $dir alone! it's allowed to be shortened. + } else { + # if not, we give up. + log_error("$file: github commit URL needs full 40-digit commit hash in filename"); + return; + } + } + } + } else { + $tag = join("-", @parts); + $dir = join("/", @parts); } - if(!defined $ext) { - log_warning "$file: github URL not .tar.gz or .zip"; - $ext = ""; - } + ($ver = $tag) =~ s,^v(\d),$1,i; + + # TODO: commit hashes have to be complete in the filename, and may be + # truncated in the dir name after archive/ if(defined $filename && $filename ne "$proj-$ver$ext") { log_error "$file: github URL has wrong filename $filename"; } - $new_url = "https://github.com/$user/$proj/archive/$tag/$proj-$ver$ext"; + $new_url = "https://github.com/$user/$proj/archive/$dir/$proj-$ver$ext"; if($old_url ne $new_url) { log_warning("$file: canonical github URL should be: $new_url"); |