Skip to content

Instantly share code, notes, and snippets.

@jasonrig
Created August 9, 2019 03:47
Show Gist options
  • Save jasonrig/8a1f0b297734a4dc7ab2e1c1a79ee463 to your computer and use it in GitHub Desktop.
Save jasonrig/8a1f0b297734a4dc7ab2e1c1a79ee463 to your computer and use it in GitHub Desktop.
Example SLURM job script to start a Spark cluster
#!/bin/bash
#SBATCH --job-name spark-cluster
#SBATCH --account=qh82
#SBATCH --time=02:00:00
# --- Master resources ---
#SBATCH --nodes=1
#SBATCH --mem-per-cpu=1G
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=1
# --- Worker resources ---
#SBATCH packjob
#SBATCH --nodes=4
#SBATCH --mem-per-cpu=4G
#SBATCH --cpus-per-task=2
#SBATCH --ntasks-per-node=1
# --- Driver resources ---
#SBATCH packjob
#SBATCH --nodes=1
#SBATCH --mem-per-cpu=4G
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=1
module use $HOME/modulefiles --append
module load spark
JOB="$SLURM_JOB_NAME-$SLURM_JOB_ID"
export MASTER_URL="spark://$(hostname):7077"
export SPARK_LOG_DIR="$SLURM_SUBMIT_DIR/$JOB/logs"
export SPARK_WORKER_DIR="$SLURM_SUBMIT_DIR/$JOB/worker"
export START_WORKER_SCRIPT="$SLURM_SUBMIT_DIR/$JOB/.worker.sh"
export START_MASTER_SCRIPT="$SLURM_SUBMIT_DIR/$JOB/.master.sh"
export SPARK_LOCAL_DIRS="$TMPDIR/$JOB"
export SPARK_WORKER_CORES=$SLURM_CPUS_PER_TASK_PACK_GROUP_1
export SPARK_MEM=$(( $SLURM_MEM_PER_CPU_PACK_GROUP_1 * SLURM_CPUS_PER_TASK_PACK_GROUP_1 ))M
export SPARK_DAEMON_MEMORY=$SPARK_MEM
export SPARK_WORKER_MEMORY=$SPARK_MEM
export SPARK_EXECUTOR_MEMORY=$SPARK_MEM
export SPARK_IDENT_STRING=$SLURM_JOBID
export SPARK_TOTAL_EXECUTOR_CORES=$((SLURM_NTASKS_PACK_GROUP_1 * SLURM_CPUS_PER_TASK_PACK_GROUP_1))
mkdir -p "$SPARK_LOG_DIR" "$SPARK_WORKER_DIR"
cat << "EOF" > "$START_WORKER_SCRIPT"
#!/bin/bash
function finish {
rm -rf "$SPARK_LOCAL_DIRS"
}
trap finish EXIT
mkdir -p "$SPARK_LOCAL_DIRS"
export SPARK_NO_DAEMONIZE=1
start-slave.sh $*
EOF
cat << "EOF" > "$START_MASTER_SCRIPT"
#!/bin/bash
start-master.sh
srun --pack-group=2 \
--job-name="spark-driver" \
--output="$SPARK_LOG_DIR/spark-driver.out" \
"$DRIVER_SCRIPT"
scancel ${SLURM_JOB_ID}
EOF
chmod +x "$START_WORKER_SCRIPT" "$START_MASTER_SCRIPT"
srun --pack-group=0 \
--job-name="spark-master" \
"$START_MASTER_SCRIPT" &
srun --pack-group=1 \
--job-name="spark-worker" \
--output="$SPARK_LOG_DIR/spark-workers.out" \
--label \
"$START_WORKER_SCRIPT" "$MASTER_URL"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment