Last active
September 8, 2020 14:07
-
-
Save s5unty/e636a1ca698c6817330825eba67941e7 to your computer and use it in GitHub Desktop.
pack huge directory(billion files) to multiple tar files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
export PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin" | |
export PACK_ROOT="${PACK_ROOT:-/}" | |
export PACK_NAME="${PACK_NAME:-$(shuf -zer -n5 {a..z})}" | |
export PACK_SIZE="${PACK_SIZE:-10000}" | |
export PACK_JOBS="${PACK_JOBS:-16}" | |
while [[ "$#" -gt 1 ]]; do | |
case $1 in | |
-C|--root) PACK_ROOT="$2"; shift ;; | |
-n|--name) PACK_NAME="$2"; shift ;; | |
-s|--size) PACK_SIZE="$2"; shift ;; | |
-j|--jobs) PACK_JOBS="$2"; shift ;; | |
*) echo "Unknown parameter passed: $1"; exit 1 ;; | |
esac | |
shift | |
done | |
# --exclude-tag,,, https://stackoverflow.com/a/13296077/1355228 | |
tar -c -v -P -f /dev/null --exclude-tag ".void" -C "$PACK_ROOT" "$1" \ | |
| tee "${PACK_NAME}.index" \ | |
| split -a 5 -d -l $PACK_SIZE --additional-suffix=".tmp" - "$PACK_NAME." | |
for one in $(ls -1 ${PACK_NAME}.*.tmp); do | |
num="${one%.tmp}" # some.thing.01234.tmp -> some.thing.01234 | |
num="${num##*.}" # some.thing.01234 -> 01234 | |
[[ $num =~ ^-?[0-9]+$ ]] || continue | |
# split thousands file into multiple sub-folders | |
# 00034,00134,01234,01034,10134,...,99934 -> 34 | |
dir="$(printf %02d $(expr $num % 100))" | |
mkdir -p $dir | |
log="$dir/${one%%.tmp}.log" | |
tar="$dir/${one%%.tmp}.tar.zst" | |
[[ -s $one ]] || continue | |
[[ -s $tar ]] && continue | |
sem -j $PACK_JOBS "tar \ | |
-c --zstd --numeric-owner --no-recursion \ | |
-P -V "$PACK_ROOT" -C "$PACK_ROOT" \ | |
-vv --index-file=$log -T $one -f $tar \ | |
&& rm -f -- $one" | |
# sem -j $PACK_JOBS "dar -Q -zlzop-3 -R $1 --include-from-file $one -c $PACK_NAME" | |
done | |
sem --wait |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment