Skip to content

Instantly share code, notes, and snippets.

@robertofd1995
Last active November 5, 2019 11:43
Show Gist options
  • Save robertofd1995/47897ae4eb869fa4e94e4f2f601826c3 to your computer and use it in GitHub Desktop.
Save robertofd1995/47897ae4eb869fa4e94e4f2f601826c3 to your computer and use it in GitHub Desktop.
Copy S3 bucket objects across AWS accounts with confidence using aws cli
import csv
import subprocess
def from_summary_to_csv(file_path):
assert isinstance(file_path, str)
file = open(file_path, 'r')
lines = file.readlines()
lines = lines[0:-4] # to avoid the summary section in the last 4 lines
file.close()
csv_path = file_path.split(".")[0] + ".csv"
with open(csv_path, 'w') as out_file:
writer = csv.writer(out_file)
writer.writerow(('size', 'key'))
for line in lines:
aux = " ".join(line.split()).split(" ") # [date, time, size?, key]
writer.writerow(aux[2], aux[4])
return csv_path
def sync(source_bucket, destination_bucket):
command = "aws s3 sync s3://{source} s3://{destination}".format(
source=source_bucket, destination=destination_bucket
)
process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
command ="aws s3 ls --recursive s3://{source} --summarize > bucket-contents-source.txt".format(source=source_bucket)
process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True, executable="/bin/bash")
output, error = process.communicate()
command = "aws s3 ls --recursive s3://{source} --summarize > bucket-contents-new.txt".format(source=destination_bucket)
process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True, executable="/bin/bash")
output, error = process.communicate()
csv_source_path = from_summary_to_csv("bucket-contents-source.txt")
csv_new_path = from_summary_to_csv("bucket-contents-new.txt")
with open(csv_new_path, 'r') as t1, open(csv_source_path, 'r') as t2:
fileone = t1.readlines()
filetwo = t2.readlines()
with open('diff.csv', 'w') as outFile:
for line in filetwo:
if line not in fileone:
outFile.write(line)
if __name__ == '__main__':
sync()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment