Created
October 15, 2020 12:17
-
-
Save vk496/fd2a7ecd305a88bea10ad6a8a567266b to your computer and use it in GitHub Desktop.
Script to compress NetCDF files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env bash | |
# Author: Valentin Kivachuk Burda | |
DEPENDS=(nccopy awk wc grep find) | |
#https://stackoverflow.com/a/12436838/2757192 | |
function max_bg_procs { | |
if [[ $# -eq 0 ]] ; then | |
echo "Usage: max_bg_procs NUM_PROCS. Will wait until the number of background (&)" | |
echo " bash processes (as determined by 'jobs -pr') falls below NUM_PROCS" | |
return | |
fi | |
local max_number=$((0 + ${1:-0})) | |
while true; do | |
local current_number=$(jobs -pr | wc -l) | |
if [[ $current_number -lt $max_number ]]; then | |
break | |
fi | |
sleep 0.1 | |
done | |
} | |
function process_file { | |
local data=$(ncdump -sh "$1") | |
local num_vars=$(echo "$data" | grep ") ;" | wc -l) | |
local num_cvars=$(echo "$data" | grep ":_DeflateLevel" | cut -d' ' -f3 | wc -l) | |
if [[ $num_vars -ne $num_cvars ]]; then | |
# Compress | |
echo "[$2/$3] $1 COMPRESSING ($num_vars vars != $num_cvars compressed vars)...." | |
nccopy -d 7 "$1" "$1.npartial" && mv "$1.npartial" "$1" | |
else | |
echo "[$2/$3] $1 nop" | |
fi | |
} | |
for prog in ${DEPENDS[@]}; do | |
if ! hash $prog 2>/dev/null; then | |
echo "Missing $prog dependecy" | |
exit 1 | |
fi | |
done | |
if [[ -f "$1" ]]; then | |
process_file "$1" | |
exit 0 | |
elif [[ ! -d "$1" ]]; then | |
echo "$1 not a dir" | |
exit 1 | |
fi | |
total_files=$(find "$1" -iname '*.nc' |wc -l) | |
curf=0 | |
while IFS= read -r -d '' line; do | |
max_bg_procs $(nproc) | |
if [[ $(du -m "$line" | cut -f1) -lt 500 ]]; then | |
# Small enough to do it in multithread | |
process_file "$line" $curf $total_files & | |
else | |
process_file "$line" $curf $total_files | |
fi | |
((curf++)) | |
done < <(find "$1" -iname '*.nc' -print0) | |
wait $(jobs -rp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment