Created
January 17, 2022 09:18
-
-
Save gpiffault/f8d95d6873054cdf72dab40868b48f5d to your computer and use it in GitHub Desktop.
Compare two tar archives, compare file contents with md5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import hashlib | |
import tarfile | |
def hash_dict(tar_path): | |
tar = tarfile.open(tar_path) | |
result = {} | |
for member in tar: | |
if member.name.endswith(".pyc"): | |
continue | |
try: | |
f = tar.extractfile(member) | |
except: | |
print("Error reading", tar_path, member.name) | |
continue | |
if f is None: | |
continue | |
h = hashlib.new('md5') | |
while chunk := f.read(102400): | |
h.update(chunk) | |
result[member.name] = h.hexdigest() | |
tar.close() | |
return result | |
def diff(path1, path2): | |
hd1 = hash_dict(path1) | |
hd2 = hash_dict(path2) | |
content_mismatch = [m for m in hd1.keys() & hd2.keys() if hd1[m] != hd2[m]] | |
if content_mismatch: | |
print("# File content mismatch") | |
print(*content_mismatch, sep="\n") | |
missing1 = hd2.keys() - hd1.keys() | |
if missing1: | |
print("# Missing files in tar 1") | |
print(*missing1, sep="\n") | |
missing2 = hd1.keys() - hd2.keys() | |
if missing2: | |
print("# Missing files in tar 2") | |
print(*missing2, sep="\n") | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("tar1") | |
parser.add_argument("tar2") | |
args = parser.parse_args() | |
diff(args.tar1, args.tar2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment