Last active
August 21, 2017 04:38
-
-
Save nanvel/7d1babe6cda45c60c939bea932f61c52 to your computer and use it in GitHub Desktop.
AWS S3 file ETag
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import binascii | |
import hashlib | |
import os | |
# Max size in bytes before uploading in parts | |
AWS_UPLOAD_MAX_SIZE = 20 * 1024 * 1024 | |
# Size of parts when uploading in parts | |
# make sure you use upload part size the same as your client | |
# be careful, botocore put_object can't do multipart: | |
# https://stackoverflow.com/questions/38442512/difference-between-upload-and-putobject-for-uploading-a-file-to-s3 | |
AWS_UPLOAD_PART_SIZE = 8 * 1024 * 1024 | |
def s3_etag(f): | |
""" | |
Source: https://stackoverflow.com/questions/6591047/etag-definition-changed-in-amazon-s3/28877788#28877788 | |
Get the md5 hash of a file stored in S3. | |
with open('./myfile.txt', 'rb') as f: | |
etag = s3_etag(f) | |
:return: md5 hash that will match the ETag in S3 | |
""" | |
f.seek(0, os.SEEK_END) | |
filesize = f.tell() | |
f.seek(0) | |
if filesize > AWS_UPLOAD_MAX_SIZE: | |
block_count = 0 | |
md5string = b'' | |
for block in iter(lambda: f.read(AWS_UPLOAD_PART_SIZE), b''): | |
h = hashlib.md5() | |
h.update(block) | |
md5string = md5string + binascii.unhexlify(h.hexdigest()) | |
block_count += 1 | |
h = hashlib.md5() | |
h.update(md5string) | |
return h.hexdigest() + "-" + str(block_count) | |
else: | |
h = hashlib.md5() | |
for block in iter(lambda: f.read(AWS_UPLOAD_PART_SIZE), b''): | |
h.update(block) | |
return h.hexdigest() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment