Last active
May 19, 2020 11:43
-
-
Save MatteoLacki/28a75544e31b8185f4bc42ebe0fe4b45 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""All this is meant to work only on Windows.""" | |
import os | |
from pathlib import Path | |
import subprocess | |
import sys | |
import time | |
import hashlib | |
import argparse | |
import logging | |
def age(file_path, unit='h'): | |
assert unit in ('s','h') | |
age_in_s = time.time() - os.path.getctime(file_path) | |
if unit == 's': | |
return age_in_s | |
else: | |
return age_in_s/3600 | |
def get_size_in_kilobytes(file_path): | |
return os.path.getsize(file_path) | |
def copy(source, target, *file_names): | |
"""Copy files with Robocopy.exe. | |
/is copies files if they do not differ. | |
""" | |
assert len(file_names) > 0, "Specify file names to copy." | |
cmd = f"robocopy {str(source)} {str(target)} {' '.join(file_names)} /is" | |
return subprocess.run(cmd.split()).returncode | |
def check_sum(file_path, algo=hashlib.blake2b, chunksize=8192): | |
"""algo (hashlib function): E..g hashlib.blake2b, hashlib.md5.""" | |
with open(file_path, "rb") as f: | |
file_hash = algo() | |
chunk = f.read(chunksize) | |
while chunk: | |
file_hash.update(chunk) | |
chunk = f.read(chunksize) | |
return file_hash.hexdigest() | |
def check_sums_aggree(file_name_0, file_name_1, **kwds): | |
return check_sum(file_name_0, **kwds) == check_sum(file_name_1, **kwds) | |
def sizes_aggree(file_name_0, file_name_1): | |
return get_size_in_kilobytes(file_name_0) == get_size_in_kilobytes(file_name_1) | |
#################################################################################### | |
ap = argparse.ArgumentParser(description='Sync files between folders.', | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter, | |
epilog=r"Example: python syncFiles.py C:\test\V*.raw V:\RAW_test") | |
ap.add_argument('source_pattern', | |
type=Path, | |
help='Pattern of the files to sync.') | |
ap.add_argument('target_folder', | |
type=Path, | |
help='Path to the folder that') | |
ap.add_argument('--min_age_hours', | |
type=float, | |
help='Minimal age in hours for the files to be copied.', | |
default=24) | |
ap.add_argument('--logs_path', | |
type=Path, | |
help='Where to save logs.', | |
default=r"C:\Logs\sync.log") | |
ap = ap.parse_args() | |
#################################################################################### | |
ap.logs_path.parent.mkdir(parents=True, exist_ok=True) | |
logging.basicConfig(filename=ap.logs_path, | |
level=logging.INFO, | |
format='%(asctime)s:%(name)s:%(levelname)s:%(message)s:') | |
log = logging.getLogger('syncFiles.py') | |
log.info("copying files") | |
log.info("FROM: " + str(ap.source_pattern)) | |
log.info("TO: " + str(ap.target_folder)) | |
log.info("How old are files in hours?: " + str(ap.target_folder)) | |
#################################################################################### | |
target_folder = ap.target_folder | |
source_folder = ap.source_pattern.parent | |
pattern = ap.source_pattern.name | |
old_files = [f for f in source_folder.glob(pattern) if age(f, 'h') >= ap.min_age_hours] | |
file_names = [f.name for f in old_files] | |
if not file_names: | |
err = f"no files matching pattern {ap.source_pattern}" | |
log.error(err) | |
print(err) | |
break | |
log.info(f"files older than {ap.min_age_hours} hours: {" ".join([str(f) for f in old_files])}") | |
copy(source_folder, target_folder, *file_names) | |
log.info("checking files and deleting wann alles stimmt.") | |
for sf in old_files: | |
tf = target_folder/sf.name | |
try: | |
if sizes_aggree(sf, tf): | |
log.info(f"File sizes aggree: {sf} {tf}") | |
if check_sums_aggree(sf, tf): | |
log.info(f"Check sums aggree: {sf} {tf}") | |
log.info(f"Deleting {sf}") | |
sf.unlink() | |
else: | |
log.error(f"Check sums differ: {sf} {tf}") | |
else: | |
log.error(f"Files sizes differ: {sf} {tf}") | |
except FileNotFoundError: | |
log.error(f"Target file missing: {tf}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment