Last active
July 12, 2018 12:19
-
-
Save paraita/4a48c78cc9e6afeaa6eb6640c36e6434 to your computer and use it in GitHub Desktop.
Azure bench deployment script for the 600 nodes on Azure with 8.2.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -x | |
SSH_USERNAME="hpcpeps" | |
SSH_PORT=22 | |
function debug { | |
DEBUGCONTENT=`echo $1 | base64 -w 0` | |
DEBUGMESSAGE="<QueueMessage><MessageText>$DEBUGCONTENT</MessageText></QueueMessage>" | |
curl -X POST -d "$DEBUGMESSAGE" "https://$STORAGEACCOUNT.queue.core.windows.net/debug/messages?$SASKEY" | |
} | |
# Downloads and install ProActive Node as a Systemd service | |
mkdir -p /opt/proactive | |
cd /opt/proactive | |
if apt-get --help; then | |
apt-get update | |
if ! apt-get install -y wget curl jq vim tree; then | |
sleep 10 | |
apt-get update | |
if ! apt-get install -y wget curl jq vim tree; then | |
>&2 echo "Fatal error: Unable to run apt-get" | |
halt | |
fi | |
fi | |
else | |
yum install epel-release -y | |
if ! yum -y install wget curl jq vim tree; then | |
sleep 10 | |
if ! yum -y install wget curl jq vim tree; then | |
>&2 echo "Fatal error: Unable to run yum" | |
halt | |
fi | |
fi | |
fi | |
wget --no-clobber https://s3.amazonaws.com/ci-materials/Latest_jre/jre-8u131-linux-x64.tar.gz | |
tar zxf jre-8u131-linux-x64.tar.gz | |
ln -s /opt/proactive/jre1.8.0_131 /opt/proactive/java | |
JSONCONFIG=`base64 -d /var/lib/waagent/CustomData` | |
echo $JSONCONFIG | |
export HOST=`echo $JSONCONFIG | jq '.rmurl' -r | sed 's/http[s]\?:\?\/\{2\}//g'` | |
export RMURL=`echo $JSONCONFIG | jq '.rmurl' -r | sed 's/http[s]\?:\?\/\{2\}//g'` | |
export CREDVALUE=`echo $JSONCONFIG | jq '.credentials' -r` | |
export NODESOURCENAME=`echo $JSONCONFIG | jq '.nodesourcename' -r` | |
export STORAGEACCOUNT=`echo $JSONCONFIG | jq '.storageaccount' -r` | |
export SASKEY=`echo $JSONCONFIG | jq '.saskey' -r` | |
export USERCUSTOMSCRIPTURL=`echo $JSONCONFIG | jq '.usercustomscripturl' -r` | |
export EXT_STORACC_RAW=`echo $JSONCONFIG | jq '.exernalstorageaccount' -r` | |
export EXT_STOR_ACC=`echo $EXT_STORACC_RAW | sed 's/\:.*$//'` | |
export EXT_STOR_ACC_SAS=`echo $EXT_STORACC_RAW | sed 's/^.*\:?//'` | |
debug "Azure scaleset script $HOSTNAME INFO: CustomData read properly (as I can write this message)" | |
if [ ! -z "$USERCUSTOMSCRIPTURL" ]; then | |
curl -X GET "$USERCUSTOMSCRIPTURL" > user_custom_script.sh | |
chmod +x user_custom_script.sh | |
./user_custom_script.sh | |
if [ $? -ne 0 ]; then | |
debug "Azure scaleset script $HOSTNAME FATAL: User custom script exited with error: $?" | |
halt #sleep 9999 #exit -1 | |
fi | |
fi | |
# PNP | |
PNP_PROPERTIES="-Dproactive.net.nolocal=true -Dproactive.communication.protocol=pnp" | |
#PAMR | |
PAMRHOST=`echo $HOST | cut -d: -f1` | |
PAMR_PROPERTIES="-Dproactive.net.nolocal=false -Dproactive.communication.protocol=pamr \ | |
-Dproactive.pamr.router.address=$RMURL \ | |
-Dproactive.pamrssh.address=localhost \ | |
-Dproactive.pamrssh.connect_timeout=300000 \ | |
-Dproactive.pamr.socketfactory=ssh -Dproactive.pamrssh.port=$SSH_PORT \ | |
-Dproactive.pamrssh.username=$SSH_USERNAME \ | |
-Dproactive.pamrssh.key_directory=/opt/proactive/.ssh \ | |
-Dproactive.process.builder.cleanup.time.seconds=120 \ | |
-Dpa.rm.node.source.ping.frequency=75000" | |
PROPERTIES=$PAMR_PROPERTIES | |
debug "Azure scaleset script $HOSTNAME INFO: $PROPERTIES" | |
# Getting node.jar | |
#wget --timestamping --tries=10 --timeout=10 --waitretry=10 http://$HOST/rest/node.jar | |
curl -X GET "https://$EXT_STOR_ACC.blob.core.windows.net/nodefiles/node820.jar?$EXT_STOR_ACC_SAS" > node.jar | |
# Getting the key | |
mkdir -p /opt/proactive/.ssh | |
curl -X GET "https://$EXT_STOR_ACC.blob.core.windows.net/nodefiles/azure_rsa?$EXT_STOR_ACC_SAS" > /opt/proactive/.ssh/id_rsa | |
curl -X GET "https://$EXT_STOR_ACC.blob.core.windows.net/nodefiles/azure_rsa.pub?$EXT_STOR_ACC_SAS" > /opt/proactive/.ssh/id_rsa.pub | |
typeset -r pepsuser=hpcpeps | |
typeset -i pepsuid=8298 | |
typeset -r pepsgroup=peps | |
typeset -i pepsgid=16000 | |
groupadd -g $pepsgid $pepsgroup | |
useradd -u $pepsuid -g $pepsgid $pepsuser | |
#chown -R activeeon /opt/proactive | |
chown -R hpcpeps:peps /opt/proactive | |
chmod 600 /opt/proactive/.ssh/id_rsa | |
chmod 600 /opt/proactive/.ssh/id_rsa.pub | |
# Read Azure Queue to get node name Deletion is performed on NS Side when nodes are properly registered | |
msg=$(curl "https://$STORAGEACCOUNT.queue.core.windows.net/nodeconfig/messages?visibilitytimeout=300&$SASKEY") | |
JSONMSG=`echo $msg | grep -oP '<MessageText>\K[^<]+' | base64 -d` | |
msgId=`echo $msg | grep -oP '<MessageId>\K[^<]+'` | |
popReceipt=`echo $msg | grep -oP '<PopReceipt>\K[^<]+'` | |
NODEBASENAME=`echo $JSONMSG | jq '.nodebasename' -r` | |
NODEINSTANCES=`echo $JSONMSG | jq '.nodeinstances' -r` | |
if [ -z "$NODEBASENAME" ]; then | |
debug "Azure scaleset script $HOSTNAME FATAL: Unable to retrieve node configuration from 'nodeconfig' queue" | |
halt #sleep 9999 #exit -1 | |
fi | |
NOW=`date` | |
debug "$HOSTNAME INFO: Start filling the Table on $NOW" | |
ENTITY="{'PartitionKey':'$HOSTNAME','RowKey':'$NODEBASENAME', 'NodesCount':'$NODEINSTANCES'}" | |
curl -H "Content-Type: application/json" -d "$ENTITY" -X POST "https://$STORAGEACCOUNT.table.core.windows.net/nodesperhost?$SASKEY" | |
if [ $? -ne 0 ]; then | |
debug "$HOSTNAME FATAL: Unable to register the host into 'nodesperhost' table" | |
halt #sleep 9999 #exit -1 | |
fi | |
NOW=`date` | |
debug "$HOSTNAME INFO: Terminated to fill the Table on $NOW" | |
# Generate proactive-node service description | |
cat > /etc/systemd/system/proactive-node.service <<EOL | |
[Unit] | |
After=sshd.service | |
[Service] | |
WorkingDirectory=/opt/proactive | |
ExecStart=/opt/proactive/java/bin/java -jar /opt/proactive/node.jar ${PROPERTIES} -v ${CREDVALUE} -w ${NODEINSTANCES} -r pamr://0/ -n ${NODEBASENAME} -s ${NODESOURCENAME} | |
User=hpcpeps | |
[Install] | |
WantedBy=default.target | |
EOL | |
chmod 664 /etc/systemd/system/proactive-node.service | |
# Install ProActive Node Service | |
systemctl daemon-reload | |
systemctl enable proactive-node.service | |
sysctl fs.inotify.max_user_watches=524288 # Support for large numnber of nodes | |
# Until here, if script fails, another host could reuse this nodeconfig message. | |
# Once the service is started, if something goes wrong, this node configuration will not be reusable from another host | |
curl -X DELETE "https://${STORAGEACCOUNT}.queue.core.windows.net/nodeconfig/messages/${msgId}?popreceipt=${popReceipt}&${SASKEY}" | |
if [ $? -ne 0 ]; then | |
debug "$HOSTNAME FATAL: Unable to delete nodeconfig from the queue" | |
halt #sleep 9999 #exit -1 | |
fi | |
# Debug message posted on debug queue | |
IP=`hostname -i` | |
debug "Azure scaleset script $HOSTNAME INFO: Service is ready to start: $IP , $NODEBASENAME , $NODEINSTANCES" | |
# Let's start the service | |
systemctl start proactive-node.service |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment