Created
July 22, 2020 20:45
-
-
Save ChrisDowning/a4556a6fc34c31975f70336faaab844d to your computer and use it in GitHub Desktop.
OpenHPC PEARC20 Slurm node startup script for a dynamic cluster (ARM nodes)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
export SLURM_HEADNODE_IPADDR=$(curl -sS http://169.254.169.254/latest/meta-data/local-ipv4) | |
export SLURM_HEADNODE_AWS_REGION=$(curl -sS http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r '.region') | |
export COMPUTE_SG=@COMPUTESG@ | |
export COMPUTE_SUBNET_ID=@SUBNETID@ | |
export AWS_DEFAULT_MAC=$(curl -sS http://169.254.169.254/latest/meta-data/mac) | |
export AWS_SECURITY=$(curl -sS http://169.254.169.254/latest/meta-data/network/interfaces/macs/$AWS_DEFAULT_MAC/security-group-ids) | |
export AWS_AMI=@COMPUTEAMI@ | |
export AWS_KEYNAME=@KEYNAME@ | |
export SLURM_ROOT=/etc/slurm | |
export SLURM_POWER_LOG=$SLURM_ROOT/power_save.log | |
export PATH=$PATH:/usr/local/bin:/usr/bin | |
function start_node() | |
{ | |
NODE_JSON=$(mktemp) | |
aws ec2 run-instances --image-id $AWS_AMI \ | |
--instance-type c6g.2xlarge \ | |
--key-name $AWS_KEYNAME \ | |
--security-group-ids "$COMPUTE_SG" \ | |
--subnet-id "$COMPUTE_SUBNET_ID" \ | |
--private-ip-address $2 \ | |
--iam-instance-profile Name=$COMPUTE_PROFILE \ | |
--user-data file://$SLURM_ROOT/slurm-compute-userdata.sh \ | |
--region $SLURM_HEADNODE_AWS_REGION \ | |
--block-device-mappings \ | |
'[ | |
{ | |
"DeviceName":"/dev/sda1", | |
"Ebs": { | |
"DeleteOnTermination": true, | |
"VolumeSize": 340 | |
} | |
} | |
]' \ | |
--tag-specifications \ | |
"ResourceType=instance,Tags= \ | |
[{Key=Name,Value=SlurmCompute}, | |
{Key=SlurmLabel,Value=$1}]" \ | |
|& tee $NODE_JSON >> $SLURM_POWER_LOG | |
} | |
function nametoip() | |
{ | |
echo $1 | tr "-" "." | cut -c 4- | |
} | |
echo "`date` Resume invoked $0 $*" >> $SLURM_POWER_LOG | |
hosts=$(scontrol show hostnames $1) | |
num_hosts=$(echo "$hosts" | wc -l) | |
for hostname in $hosts | |
do | |
private_ip=$(nametoip $hostname) | |
start_node $hostname $private_ip | |
scontrol update nodename=$hostname nodehostname=$hostname nodeaddr=$private_ip | |
rm $NODE_JSON | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment