You can see the tutorial in which i explain how to do the whole process here:
Last active
November 5, 2019 11:43
-
-
Save robertofd1995/47897ae4eb869fa4e94e4f2f601826c3 to your computer and use it in GitHub Desktop.
Copy S3 bucket objects across AWS accounts with confidence using aws cli
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import subprocess | |
def from_summary_to_csv(file_path): | |
assert isinstance(file_path, str) | |
file = open(file_path, 'r') | |
lines = file.readlines() | |
lines = lines[0:-4] # to avoid the summary section in the last 4 lines | |
file.close() | |
csv_path = file_path.split(".")[0] + ".csv" | |
with open(csv_path, 'w') as out_file: | |
writer = csv.writer(out_file) | |
writer.writerow(('size', 'key')) | |
for line in lines: | |
aux = " ".join(line.split()).split(" ") # [date, time, size?, key] | |
writer.writerow(aux[2], aux[4]) | |
return csv_path | |
def sync(source_bucket, destination_bucket): | |
command = "aws s3 sync s3://{source} s3://{destination}".format( | |
source=source_bucket, destination=destination_bucket | |
) | |
process = subprocess.Popen(command.split(), stdout=subprocess.PIPE) | |
output, error = process.communicate() | |
command ="aws s3 ls --recursive s3://{source} --summarize > bucket-contents-source.txt".format(source=source_bucket) | |
process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True, executable="/bin/bash") | |
output, error = process.communicate() | |
command = "aws s3 ls --recursive s3://{source} --summarize > bucket-contents-new.txt".format(source=destination_bucket) | |
process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True, executable="/bin/bash") | |
output, error = process.communicate() | |
csv_source_path = from_summary_to_csv("bucket-contents-source.txt") | |
csv_new_path = from_summary_to_csv("bucket-contents-new.txt") | |
with open(csv_new_path, 'r') as t1, open(csv_source_path, 'r') as t2: | |
fileone = t1.readlines() | |
filetwo = t2.readlines() | |
with open('diff.csv', 'w') as outFile: | |
for line in filetwo: | |
if line not in fileone: | |
outFile.write(line) | |
if __name__ == '__main__': | |
sync() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment