aboutsummaryrefslogtreecommitdiff
path: root/sbolint
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2024-08-02 22:14:27 -0400
committerB. Watson <urchlay@slackware.uk>2024-08-02 22:14:27 -0400
commit481fa07d1a740e22fa65bf7d3cd181d6f5ab091e (patch)
tree205fb9088f0e7f04c4a037ee58dd1bb8415ef585 /sbolint
parentadce112d7e2f5d79f07f9ac9f9abaeee19778d07 (diff)
downloadsbo-maintainer-tools-481fa07d1a740e22fa65bf7d3cd181d6f5ab091e.tar.gz
sbolint github URL checking, WIP.
Diffstat (limited to 'sbolint')
-rwxr-xr-xsbolint119
1 files changed, 55 insertions, 64 deletions
diff --git a/sbolint b/sbolint
index 6a021e0..28b5ac9 100755
--- a/sbolint
+++ b/sbolint
@@ -1199,41 +1199,21 @@ sub curl_head_request {
return close($pipe);
}
-# WIP, maybe no longer needed
-## sub check_github_url {
-## my $file = shift;
-## my $url = shift;
-## return unless $url =~ m{(https?:)//github\.com};
-##
-## if($1 eq "http:") {
-## log_warning("$file: github URL $url should be https");
-## }
-##
-## (my $expect_filename = $url) =~ s,.*/,,;
-## my(undef, undef, undef, $user, $prog, $archive, $ver, $filename) = split /\//, $url;
-## warn "user $user, prog $prog, archive $archive, ver $ver, filename $filename, expect_filename $expect_filename\n";
-##
-## # assume these are correct, for now
-## return if $user eq 'downloads';
-## return if $archive eq 'releases';
-##
-## # TODO: work out what to do about /raw/
-## return if $archive eq 'raw';
-##
-## if($archive ne 'archive') {
-## log_warning("$file: unknown github URL type: $url");
-## return;
-## }
-##
-## # OK, good URLs look like this:
-## # https://github.com/jeetsukumaran/DendroPy/archive/v4.4.0/DendroPy-4.4.0.tar.gz
-## # ...and bad ones look like this:
-## # https://github.com/haiwen/seafile-client/archive/v4.4.2.tar.gz
-## # Corrected version of the bad one would be:
-## # https://github.com/haiwen/seafile-client/archive/v4.4.2/seafile-client-4.4.2.tar.gz
-## # Notice the "v" isn't part of the version number. It's not always there,
-## # and sometimes it's a different letter (r, or g, or capital V, etc).
-## }
+# github is a mess...
+# OK, good URLs look like this:
+# https://github.com/jeetsukumaran/DendroPy/archive/v4.4.0/DendroPy-4.4.0.tar.gz
+# ...and bad ones look like this:
+# https://github.com/haiwen/seafile-client/archive/v4.4.2.tar.gz
+# Corrected version of the bad one would be:
+# https://github.com/haiwen/seafile-client/archive/v4.4.2/seafile-client-4.4.2.tar.gz
+# Notice the "v" isn't part of the version number. It's not always there.
+# If there's a "v" and it's immediately followed by a number, it's not part of
+# the version number. If it's followed by something other than a number, e.g.
+# ver-1.0 or v.1.0, it *is* part of the version number.
+# Since git allows / characters in tag names, we sometimes get a URL like:
+# https://github.com/jeremysalwen/lv2file/archive/upstream/0.95/lv2file-upstream-0.95.tar.gz
+# ...which is perfectly valid. However, this is invalid:
+# https://github.com/jeremysalwen/lv2file/archive/upstream/0.95.tar.gz
sub check_github_url {
my $file = shift;
@@ -1241,55 +1221,66 @@ sub check_github_url {
my $old_url = $url;
my $new_url;
my $ext;
+ my $tag;
+ my $ver;
+ my $dir;
# do not police releases/ or raw/ URLs, only archive/
- return unless $url =~ m{github\.com/.*archive/};
+ return unless $url =~ m{github\.com/[^/]*/[^/]*/archive/};
if($url =~ s,refs/tags/,,) {
log_error "$file: github URLs should not have refs/tags/";
}
- #https: // site/ .../ .../ archive/
- (my $proto, undef, undef, $user, $proj, undef, $tag, $filename, $extra) = split /\//, $url;
+ #https: // site/ .../ .../ archive/ ...everything else.
+ (my $proto, undef, undef, $user, $proj, undef, @parts) = split /\//, $url;
log_error "$file: github URLs must be https://" unless $proto eq 'https:';
- if(!defined $filename) {
- log_error "$file: github URL is non-canonical (not enough components)";
- $tag =~ s,(\.tar\.gz|\.zip)$,,;
- $ext = $1;
- }
+ my $filename = pop @parts;
+ $filename =~ m,(\.tar\.gz|\.zip)$,;
+ $ext = $1;
- # TODO: this is not an error, because github projects are allowed to have /
- # in their tag names. So we get this *valid* URL:
- # https://github.com/zfsonlinux/zfs-auto-snapshot/archive/upstream/1.2.4/zfs-auto-snapshot-upstream-1.2.4.tar.gz
- if(defined $extra) {
- #log_error "$file: github URL is non-canonical (too many components)";
- log_note "$file: github URL has extra components, don't (yet) know how to check it.";
- return;
+ if(!defined $ext) {
+ log_warning("$file: github URL should end in .tar.gz or .zip");
+ $ext = "";
}
- # TODO: commit hashes have to be complete in the filename, and may be
- # truncated in the dir name after arvhive/
-
- my $ver = $tag;
- $ver =~ s,^v(\d),$1,;
-
- if(defined $filename) {
- $filename =~ /(\.tar\.gz|\.zip)$/;
- $ext = $1;
+ if(@parts == 0) {
+ # filename loox like: tag.tar.gz
+ ($tag = $filename) =~ s,(\.tar\.gz|\.zip)$,,;
+ log_error "$file: github URL is non-canonical (not enough components)";
+ $dir = $tag;
+ } elsif(@parts == 1) {
+ $tag = $dir = $parts[0];
+ if($tag =~ /^[0-9a-f]{6,}$/ && $tag !~ /^20\d{6,}/) {
+ # commit hash. the /^20\d{6,}/ is to exclude ISO dates like 20240402
+ if(length($tag) < 40) {
+ # shortened, maybe the full hash is in the filename?
+ if($filename =~ /([0-9a-f]{40})\./) {
+ $tag = $1; # leave $dir alone! it's allowed to be shortened.
+ } else {
+ # if not, we give up.
+ log_error("$file: github commit URL needs full 40-digit commit hash in filename");
+ return;
+ }
+ }
+ }
+ } else {
+ $tag = join("-", @parts);
+ $dir = join("/", @parts);
}
- if(!defined $ext) {
- log_warning "$file: github URL not .tar.gz or .zip";
- $ext = "";
- }
+ ($ver = $tag) =~ s,^v(\d),$1,i;
+
+ # TODO: commit hashes have to be complete in the filename, and may be
+ # truncated in the dir name after archive/
if(defined $filename && $filename ne "$proj-$ver$ext") {
log_error "$file: github URL has wrong filename $filename";
}
- $new_url = "https://github.com/$user/$proj/archive/$tag/$proj-$ver$ext";
+ $new_url = "https://github.com/$user/$proj/archive/$dir/$proj-$ver$ext";
if($old_url ne $new_url) {
log_warning("$file: canonical github URL should be: $new_url");