Instantly share code, notes, and snippets.
Last active
October 25, 2016 04:42
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save mackyle/9ea081513f6b90bb4470b7b2bc6e4bce to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# export-fixed-tags -- produce fast-import stream to fix broken tags | |
# Copyright (C) 2016 Kyle J. McKay. All rights reserved. | |
# License AGPLv3+ https://www.gnu.org/licenses/agpl.html | |
# | |
## Usage | |
## | |
## Some software used to convert repositories to Git format creates invalid | |
## tags during the conversion. In the case where these tags lack a signature, | |
## email and timestamp but DO have an author name and that name matches the | |
## name on the commit the tag refers to, the tag can be corrected by adding | |
## the missing information by taking it from the commit. | |
## | |
## Some repositories resulting from such bad conversions can be found at: | |
## | |
## http://git.savannah.gnu.org/ | |
## | |
## In particular, if a repository has bad tags a clone with fsckObjects=true | |
## will fail. For example: | |
## | |
## git -c transfer.fsckobjects=true clone http://git.savannah.gnu.org/r/automake.git | |
## | |
## will fail (unless the invalid tags have since been corrected). | |
## | |
## Running this script in a repository with any of these kind of bad tags will | |
## produce a git fast-import stream (on standard output) that can be passed to | |
## git fast-import to correct the tags (progress is reported to standard error). | |
## | |
## So, for example, the aforementioned automake repository's bad tags can be | |
## corrected using this script like so: | |
## | |
## git -c transfer.fsckobjects=false clone --mirror http://git.savannah.gnu.org/r/automake.git | |
## cd automake.git | |
## export-fixed-tags | git fast-import | |
## | |
## followed by a push to publish the corrected tags. | |
# | |
use strict; | |
use warnings; | |
use Encode; | |
my $encoder; | |
BEGIN { | |
$encoder = Encode::find_encoding('Windows-1252') || | |
Encode::find_encoding('ISO-8859-1') | |
or die "failed to load ISO-8859-1 encoder\n"; | |
} | |
sub to_utf8($;$) { | |
my $str = shift; | |
return undef unless defined $str; | |
my $ans; | |
if (Encode::is_utf8($str) || utf8::decode($str)) { | |
$ans = $str; | |
} else { | |
$ans = $encoder->decode($str, Encode::FB_DEFAULT); | |
} | |
utf8::encode($ans); | |
return $ans; | |
} | |
sub collect { | |
my $count = shift; | |
my $c = ''; | |
my $b = ''; | |
while ($count >= 32768) { | |
read(TAGS, $b, 32768); | |
$count -= 32768; | |
$c .= $b; | |
} | |
read(TAGS, $b, $count), $c .= $b if $count; | |
$c; | |
} | |
# mimics strbuf_addstr_without_crud in ident.c | |
# but also skips NULs since those are permitted in commit or tag headers | |
sub without_crud($) { | |
my $x = shift; | |
return undef unless defined($x); | |
$x =~ s/^[\x00-\x1f .,:;<>\x22\\']+//s; # remove crud from beginning | |
$x =~ s/[\x00-\x1f .,:;<>\x22\\']+$//s; # remove crud from the end | |
$x =~ s/[\n<>\0]//g; # remove internal \n \0 < and > | |
$x; | |
} | |
sub split_tagger($) { | |
my $g = shift; | |
defined($g) or return (); | |
my ($n, $t, $o); | |
($g, $o) = ($1, $2) if $g =~ /^(.*?)\s*([-+]\d\d\d\d)$/; | |
($g, $t) = ($1, 0 + $2) if $g =~ /^(.*?)\s*([-+]?\d+)$/; | |
($n, $g) = ($1, $2), $n =~ s/\s+$// if $g =~ /^\s*([^<]*)(.*)$/; | |
$g =~ s/\s+$//; | |
$g =~ s/^<+//; | |
$g =~ s/>+$//; | |
return ($n, $g, $t, $o); | |
} | |
sub tag_is_ok($) { | |
my $tag = shift; | |
my $g = $tag->{'tagger'}; | |
return undef unless defined($g); | |
my ($n, $e, $t, $o) = split_tagger($g); | |
{ | |
no warnings; | |
# print STDERR "\"$g\" -> \"$n\",\"$e\",\"$t\",\"$o\"\n"; | |
} | |
defined($n) && defined($e) && defined($t) && defined($o) or return undef; | |
$t >= 0 or return undef; | |
my $test = without_crud($n) . ' <' . without_crud($e) . '> ' . $t . ' ' . $o; | |
$g eq $test; | |
} | |
# return is ($cn, $ce, $cd, $an, $ae, $ad) where $cd and $ad are raw dates | |
sub commit_info($) { | |
split("\n", qx(git log -n 1 --date=raw --format='format:%cn%n%ce%n%cd%n%an%n%ae%n%ad' $_[0] --)) | |
} | |
my $cmd = <<'CMD'; | |
git for-each-ref refs/tags | | |
awk '$2=="tag"{print $1" "$3}' | | |
git cat-file --batch='%(objectname) %(objecttype) %(objectsize) %(rest)' | |
CMD | |
open TAGS, '-|', $cmd or die "could not run:\n$cmd"; | |
binmode TAGS; | |
my @tags = (); | |
while (<TAGS>) { | |
if (/^([0-9a-f]{40}) ([^ ]+) ([0-9]+) (refs\/[^ ]+)$/) { | |
my ($h, $t, $l, $r) = ($1, $2, $3, $4); | |
chomp $r; | |
my %tag = (); | |
$tag{'hash'} = $h; | |
$tag{'refname'} = $r; | |
$tag{'other'} = []; | |
$t eq "tag" or die "wtf: non-tag type in input, is git mad?\n"; | |
my $count = 0; | |
while (<TAGS>) { | |
$count += length($_); | |
chomp; | |
last if /^$/; | |
$tag{'object'} = $1, next if /^object ([0-9a-f]{40})$/; | |
$tag{'type'} = $1, next if /^type ([^ ]+)$/; | |
$tag{'tag'} = $1, next if /^tag ([^ ]+)$/; | |
$tag{'tagger'} = $1, next if /^tagger (.*)$/ || /^tagger()$/; | |
push(@{$tag{'other'}}, $_); | |
} | |
my $tm = collect(1 + $l - $count); | |
chomp $tm; | |
$tag{'message'} = $tm; | |
if (tag_is_ok(\%tag)) { | |
print STDERR "skipping OK tag $tag{refname} ($tag{tag})\n"; | |
} else { | |
print STDERR "processing invalid tag $tag{refname}\n"; | |
push(@tags, \%tag); | |
} | |
} | |
} | |
close(TAGS); | |
foreach (@tags) { | |
my %tag = %$_; | |
my ($r, $h) = ($tag{'refname'}, $tag{'hash'}); | |
$r && $h or die "programmer error: tag without refname and/or hash key(s)!"; | |
my ($o, $y, $t) = ($tag{'object'}, $tag{'type'}, $tag{'tag'}); | |
$o && $y && $t or | |
print(STDERR "refusing to process tag w/o object and type and tag fields: $r\n"), next; | |
$y eq 'commit' or | |
print(STDERR "refusing to process tag with non-commit type '$y': $r ($t)\n"), next; | |
$r eq "refs/tags/$t" or | |
print(STDERR "refusing to process tag with non-matching refname: $r ($t)\n"), next; | |
scalar(@{$tag{'other'}}) eq 0 or | |
print(STDERR "refusing to process tag with unknown header fields: $r ($t)\n"), next; | |
$tag{'message'} =~ /^-----BEGIN PGP SIGNATURE-----/m and | |
print(STDERR "refusing to process signed tag: $r ($t)\n"), next; | |
my ($cn, $ce, $cd, $an, $ae, $ad) = commit_info($o); | |
$cn && $ce && $cd && $an && $ae && $ad or | |
print(STDERR "skipping because no commit $o info availalbe: $r ($t)\n"), next; | |
# $_->{'commit'} = [$cn, $ce, $cd, $an, $ae, $ad]; | |
# print STDERR "$o = \"$cn\",\"$ce\",\"$cd\",\"$an\",\"$ae\",\"$ad\"\n"; | |
my ($n, $e, $tu, $to); | |
($n, $e, $tu, $to) = split_tagger($tag{'tagger'}) if defined($tag{'tagger'}); | |
$n = without_crud($n) if defined($n); | |
$e = without_crud($e) if defined($e); | |
if (!$n && !$e) { | |
$n = $cn; | |
$e = $ce; | |
} elsif ($n) { | |
if ($cn eq $n) { | |
$e = $ce; | |
} elsif ($an eq $n) { | |
$e = $ae; | |
} else { | |
print STDERR "skipping because no email available for \"$n\": $r ($t)\n"; | |
next; | |
} | |
} else { | |
if ($ce eq $e) { | |
$n = $cn; | |
} elsif ($ae eq $e) { | |
$n = $an; | |
} else { | |
print STDERR "skipping because no name available for <$e>: $r ($t)\n"; | |
next; | |
} | |
} | |
my $d; | |
if ($tu && !$to) { | |
$d = "$tu +0000"; | |
} elsif (!$tu) { | |
$d = $cd; | |
} else { | |
$d ="$tu $to"; | |
} | |
$t = to_utf8($t); | |
$n = to_utf8($n); | |
$e = to_utf8($e); | |
my $m = to_utf8($tag{'message'}); | |
{ | |
use bytes; | |
print "tag $t\nfrom $o\ntagger $n <$e> $d\n"; | |
print "data ", length($m), "\n", $m, "\n"; | |
} | |
} | |
print "done\n"; | |
#use Data::Dumper; | |
#print STDERR Data::Dumper->Dump([\@tags], ['*tags']); | |
exit 0; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment