Last active
June 29, 2020 20:09
-
-
Save andrejcremoznik/424777eebadaee258562bfcb424ee20c to your computer and use it in GitHub Desktop.
Diff files between 2 directories, copy new files from src to dest, then check if existing files in dest have bigger files in src and copy those over too.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# Copy files from one directory to another; | |
# - if the same file doesn't already exist in destination directory | |
# - if the file already exists, compare size | |
# - - if over 1 GB bigger, override existing file | |
# - - if bigger but less than 1 GB, write a log for manual review | |
# | |
# I need this to copy video files from multiple different folders to one large archive. | |
# All source folders are flat and contain files with the same naming convention. Sometimes | |
# a file in the source folder will be a bigger (and better) version of the file in the | |
# destination folder. Depending on the file size difference, that file should be replaced. | |
# | |
# Using checksums is out of the question because summing a few TB of data is not viable. | |
# What I needed was a simple script to work on file names and sizes. | |
# | |
# Usage: | |
# ./diffdirs.sh Source Dir/ Archive Dir/ # Will only output a summary of changes (dry run) | |
# ./diffdirs.sh Source Dir/ Archive Dir/ yes # Will copy the files | |
# | |
[ "$#" -lt "2" ] && { echo "Usage: ${0} <src_dir> <dest_dir> [dry_run=on|off]"; exit 1; } | |
src_dir=${1%/} | |
dest_dir=${2%/} | |
dry_run=${3:-on} | |
file_list="files.txt" | |
review_list="review.txt" | |
build_stats() | |
{ | |
input_dir=${1} | |
[ -f "${input_dir}/${file_list}" ] && rm "${input_dir}/${file_list}" | |
input_files=$(ls -AF "${input_dir}") | |
if [ "$(echo "${input_files}" | grep -c "\/$")" -gt 0 ]; then | |
echo "❌ ${input_dir} contains sub-directories." | |
exit 1 | |
fi | |
echo "${input_files}" > "${input_dir}/${file_list}" | |
} | |
copy_file() | |
{ | |
file=${1} | |
if [ "${dry_run}" = "on" ]; then | |
echo "Copy: ${file}" | |
else | |
echo "Copying: ${file}..." | |
cp -f "${src_dir}/${file}" "${dest_dir}/" | |
fi | |
} | |
# Create file lists for diffing | |
build_stats "${src_dir}" | |
build_stats "${dest_dir}" | |
files_diff=$(diff -u "${src_dir}/${file_list}" "${dest_dir}/${file_list}" | tail +4) | |
# Copy files that don't exist in DEST | |
echo "${files_diff}" | grep "^\-" | while read -r entry; do | |
copy_file "${entry#-}" | |
done | |
# Maybe copy files if bigger than in DEST | |
[ -f ${review_list} ] && rm ${review_list} | |
echo "${files_diff}" | grep "^\ " | while read -r entry; do | |
file=${entry# } | |
src_file_size=$(stat -c%s "${src_dir}/${file}") | |
dest_file_size=$(stat -c%s "${dest_dir}/${file}") | |
file_size_diff=$(( src_file_size - dest_file_size )) | |
if [ ${file_size_diff} -gt 1000000000 ]; then # File size diff over 1 GB | |
copy_file "${file}" | |
elif [ ${file_size_diff} -gt 0 ]; then | |
echo "Review: ${file} is bigger in ${src_dir}" | |
echo "${file}" >> ${review_list} | |
fi | |
done | |
if [ -f ${review_list} ]; then | |
echo "See ${review_list} for a list of files that are bigger in ${src_dir} but were not automatically replaced." | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment