Skip to content

Instantly share code, notes, and snippets.

@giannisdaras
Last active August 30, 2024 08:20
Show Gist options
  • Save giannisdaras/444805d996f274bca162c529938b81e8 to your computer and use it in GitHub Desktop.
Save giannisdaras/444805d996f274bca162c529938b81e8 to your computer and use it in GitHub Desktop.
Custom Slurm commands
sbatch_test() {
local nodes=${1:-1} # Default to 1 node if not specified
shift # Remove the first argument (number of nodes)
sbatch \
--nodes="$nodes" \
--ntasks-per-node=1 \
--time=01:00:00 \
--job-name="test_job" \
--output="job_%j.out" \
"$@" \
--wrap="echo 'Job started on \$(hostname)'; sleep 60; echo 'Job finished'"
}
job_start() {
local job_id=$1
if [ -z "$job_id" ]; then
echo "Please provide a job ID as an argument."
return 1
fi
# Get the estimated start time
local start_time=$(squeue -j $job_id --start --noheader --format="%S")
if [ -z "$start_time" ] || [ "$start_time" = "N/A" ]; then
echo "No valid start time available for job $job_id. The job may not exist, may have already started, or may be waiting in the queue."
return 1
fi
# Convert start time to epoch
local start_epoch=$(date -d "$start_time" +%s 2>/dev/null)
if [ $? -ne 0 ]; then
echo "Unable to parse the start time: $start_time"
return 1
fi
# Get current time in epoch
local current_epoch=$(date +%s)
# Calculate the difference in seconds
local diff_seconds=$((start_epoch - current_epoch))
# Convert seconds to hours (rounding down)
local hours=$((diff_seconds / 3600))
echo "$hours hours until the job starts"
}
node_usage() {
local queue_name="${1:-gh}"
# Get total nodes in the queue
local total_nodes=$(sinfo -h -p "$queue_name" -o "%D" | awk '{sum += $1} END {print sum}')
# Get running nodes in the queue
local running_nodes=$(squeue -a -h -t running -p "$queue_name" -o "%D" | awk '{sum += $1} END {print sum}')
# Calculate the ratio
local ratio=$(awk "BEGIN {printf \"%.2f\", $running_nodes / $total_nodes}")
echo "Queue: $queue_name"
echo "Running Nodes: $running_nodes"
echo "Total Nodes: $total_nodes"
echo "Usage Ratio: $ratio"
echo "($running_nodes / $total_nodes)"
}
top_users() {
local queue_name="${1:-gh}"
squeue -a -h -o "%u %D %t" -p "$queue_name" |
awk '$3 == "R" {sum[$1] += $2} END {for (user in sum) print user, sum[user]}' |
sort -rnk2 |
head -n 5 |
awk '{printf "%d. User: %s, Nodes: %s\n", NR, $1, $2}'
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment