Last active
January 18, 2023 18:42
-
-
Save jackfrancis/911139fe51bdd6c7b8121e9aa41e536e to your computer and use it in GitHub Desktop.
Build large AKS cluster
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [ -z "$RESOURCE_GROUP" ]; then | |
echo "must provide a RESOURCE_GROUP env var" | |
exit 1; | |
fi | |
if [ -z "$REGION" ]; then | |
echo "must provide a REGION env var" | |
exit 1; | |
fi | |
if [ -z "$SUBSCRIPTION_ID" ]; then | |
echo "must provide a SUBSCRIPTION_ID env var" | |
exit 1; | |
fi | |
if [ -z "$NAME" ]; then | |
echo "must provide a NAME env var" | |
exit 1; | |
fi | |
export KUBERNETES_VERSION="${KUBERNETES_VERSION:-1.21.7}" | |
export SYSTEM_POOL_SKU="${USER_POOL_SKU:-Standard_D16s_v3}" | |
export USER_POOL_SKU="${USER_POOL_SKU:-Standard_D2s_v3}" | |
export NUM_USER_NODEPOOLS="${NUM_USER_NODEPOOLS:-5}" | |
export NODES_PER_SYSTEM_POOL="${NODES_PER_SYSTEM_POOL:-10}" | |
export NODES_PER_USER_POOL="${NODES_PER_USER_POOL:-998}" | |
export AKS_IAAS_RESOURCE_GROUP="MC_${RESOURCE_GROUP}_${NAME}_${REGION}" | |
export UNDERLAY="${UNDERLAY}" | |
export CLUSTER_VNET="${CLUSTER_VNET:-${NAME}vnet}" | |
export MAX_PODS="${MAX_PODS:-12}" | |
export ENABLE_SWIFT="${ENABLE_SWIFT:-false}" | |
export ENABLE_CALICO="${ENABLE_CALICO:-false}" | |
export ENABLE_NAT_GATEWAY="${ENABLE_NAT_GATEWAY:-true}" | |
export OUTBOUND_NAT_GATEWAY_IPS="${OUTBOUND_NAT_GATEWAY_IPS:-8}" | |
if [ "$TAGS" != "" ]; then | |
TAGS_ARG="--tags ${TAGS}" | |
fi | |
# Continually look for non-Succeeded VMSS instances | |
vmssHealthCheck() { | |
while true; do | |
NUM_VMSS=0 | |
NUM_TERMINAL_VMSS=0 | |
echo "$(date) Starting VMSS Health Remediation loop" | |
for VMSS in $(az vmss list -g $AKS_IAAS_RESOURCE_GROUP | jq -r '.[] | .name'); do | |
((NUM_VMSS++)) | |
NUM_DELETED_INSTANCES=0 | |
VMSS_PROVISIONING_STATE=$(az vmss show -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '.provisioningState') | |
echo $(date) VMSS $VMSS has a ProvisioningState of $VMSS_PROVISIONING_STATE | |
VMSS_CAPACITY=$(az vmss list -g $AKS_IAAS_RESOURCE_GROUP | jq -r --arg VMSS "$VMSS" '.[] | select(.name == $VMSS) | .sku.capacity') | |
echo $(date) VMSS $VMSS has a current capacity of $VMSS_CAPACITY | |
if [ "$VMSS_PROVISIONING_STATE" == "Succeeded" ] || [ "$VMSS_PROVISIONING_STATE" == "Failed" ]; then | |
((NUM_TERMINAL_VMSS++)) | |
HAS_FAILED_STATE_INSTANCE="false" | |
for TARGET_VMSS_INSTANCE in $(az vmss list-instances -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '.[] | select(.provisioningState == "Failed") | .name'); do | |
HAS_FAILED_STATE_INSTANCE="true" | |
echo $(date) Deleting VMSS $VMSS instance $TARGET_VMSS_INSTANCE | |
if ! az vmss delete-instances -n $VMSS -g $AKS_IAAS_RESOURCE_GROUP --instance-id ${TARGET_VMSS_INSTANCE##*_} --no-wait; then | |
sleep 30 | |
else | |
sleep 1 | |
((NUM_DELETED_INSTANCES++)) | |
fi | |
done | |
for TARGET_VMSS_INSTANCE in $(az vmss list-instances -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '.[].resources[] | select(.name == "vmssCSE" and .provisioningState == "Failed") | .id' | awk -F'/' '{print $9}'); do | |
HAS_FAILED_STATE_INSTANCE="true" | |
echo $(date) Deleting VMSS $VMSS instance $TARGET_VMSS_INSTANCE | |
if ! az vmss delete-instances -n $VMSS -g $AKS_IAAS_RESOURCE_GROUP --instance-id ${TARGET_VMSS_INSTANCE##*_}; then | |
sleep 30 | |
else | |
sleep 1 | |
((NUM_DELETED_INSTANCES++)) | |
fi | |
done | |
for TARGET_VMSS_INSTANCE in $(az vmss list-instances -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '.[].resources[] | select(.publisher == "Microsoft.AKS" and .provisioningState != "Succeeded" and .provisioningState != "Creating" and .provisioningState != "Deleting") | .id' | awk -F'/' '{print $9}'); do | |
HAS_FAILED_STATE_INSTANCE="true" | |
echo $(date) Deleting VMSS $VMSS instance $TARGET_VMSS_INSTANCE | |
if ! az vmss delete-instances -n $VMSS -g $AKS_IAAS_RESOURCE_GROUP --instance-id ${TARGET_VMSS_INSTANCE##*_}; then | |
sleep 30 | |
else | |
sleep 1 | |
((NUM_DELETED_INSTANCES++)) | |
fi | |
done | |
if [ "$HAS_FAILED_STATE_INSTANCE" == "true" ]; then | |
echo $(date) Waiting for $VMSS to reach a terminal ProvisioningState after failed instances were deleted... | |
sleep 30 | |
until [[ $(az vmss show -g $AKS_IAAS_RESOURCE_GROUP -n $VMSS | jq -r '. | select(.provisioningState == "Succeeded" or .provisioningState == "Failed") | .name') ]]; do | |
echo $(date) Waiting for $VMSS to reach a terminal ProvisioningState after failed instances were deleted... | |
sleep 30 | |
done | |
echo $(date) VMSS $VMSS is in a terminal state after failed instances were deleted! | |
fi | |
fi | |
if [ "$NUM_DELETED_INSTANCES" -gt "0" ]; then | |
echo $(date) Instances were deleted from VMSS $VMSS, ensuring that capacity is set to $VMSS_CAPACITY | |
az vmss scale --new-capacity $VMSS_CAPACITY -n $VMSS -g $AKS_IAAS_RESOURCE_GROUP --no-wait; | |
fi | |
done | |
sleep 150 | |
done | |
} | |
az group create -n $RESOURCE_GROUP -l $REGION | |
if [ "$ENABLE_SWIFT" == "true" ]; then | |
az network vnet create -g $RESOURCE_GROUP --name $CLUSTER_VNET --address-prefixes 10.0.0.0/8 -o none | |
az network vnet subnet create -g $RESOURCE_GROUP --vnet-name $CLUSTER_VNET --name vms --address-prefixes 10.240.0.0/16 -o none | |
az network vnet subnet create -g $RESOURCE_GROUP --vnet-name $CLUSTER_VNET --name pods --address-prefixes 10.241.0.0/16 -o none | |
fi | |
if [ "$AKS_OPTIONS" == "" ]; then | |
if [ "$ENABLE_SWIFT" == "true" ]; then | |
AKS_OPTIONS+=" --vnet-subnet-id /subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${CLUSTER_VNET}/subnets/vms --pod-subnet-id /subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${CLUSTER_VNET}/subnets/pods" | |
fi | |
if [ "$UNDERLAY" != "" ]; then | |
AKS_OPTIONS+=" --aks-custom-headers ControlPlaneUnderlay=${UNDERLAY}" | |
fi | |
if [ "$ENABLE_CALICO" == "true" ]; then | |
AKS_OPTIONS+=" --aks-custom-headers EnableNetworkPluginNone=true" | |
fi | |
if [ "$ENABLE_NAT_GATEWAY" == "true" ]; then | |
AKS_OPTIONS+=" --outbound-type managedNATGateway --nat-gateway-managed-outbound-ip-count ${OUTBOUND_NAT_GATEWAY_IPS}" | |
fi | |
fi | |
az aks create -g $RESOURCE_GROUP -n $NAME $TAGS_ARG --kubernetes-version $KUBERNETES_VERSION -l $REGION -c $NODES_PER_SYSTEM_POOL -s $SYSTEM_POOL_SKU --max-pods $MAX_PODS --network-plugin azure --node-osdisk-type ephemeral --uptime-sla $AKS_OPTIONS || exit 1 | |
until [[ $(az aks show -g $RESOURCE_GROUP -n $NAME | jq -r '.provisioningState') == "Succeeded" ]]; do | |
sleep 60 | |
done | |
if [ "$ENABLE_CLUSTER_AUTOSCALER" != "true" ]; then | |
az aks scale -g $RESOURCE_GROUP -n $NAME -c $NODES_PER_SYSTEM_POOL --nodepool-name nodepool1 --no-wait | |
if [ "$VMSS_HEALTH_CHECK" == "true" ]; then | |
# Run VMSS Health Check in the background | |
vmssHealthCheck & | |
VMSS_HEALTH_CHECK_PID=$! | |
fi | |
fi | |
until [[ $(az aks show -g $RESOURCE_GROUP -n $NAME | jq -r '.provisioningState') == "Succeeded" ]]; do | |
sleep 60 | |
done | |
((NUM_USER_NODEPOOLS++)) | |
for ((i=2; i<=$NUM_USER_NODEPOOLS; i++)); do | |
# Wait 5 mins to spread out Azure API calls | |
sleep 300 | |
if [ "$NODEPOOL_OPTIONS" == "" ]; then | |
if [ "$ENABLE_CLUSTER_AUTOSCALER" == "true" ]; then | |
NODEPOOL_OPTIONS+=" --enable-cluster-autoscaler --min-count 2 --max-count 900" | |
fi | |
if [ "$ENABLE_SWIFT" == "true" ]; then | |
NODEPOOL_OPTIONS+=" --vnet-subnet-id /subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${CLUSTER_VNET}/subnets/vms --pod-subnet-id /subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${CLUSTER_VNET}/subnets/pods" | |
fi | |
fi | |
az aks nodepool add -g $RESOURCE_GROUP --cluster-name $NAME -n nodepool$i -c $NODES_PER_USER_POOL $TAGS_ARG --max-pods $MAX_PODS -s $USER_POOL_SKU --node-osdisk-type ephemeral $NODEPOOL_OPTIONS --no-wait | |
done | |
cleanup() { | |
kill $VMSS_HEALTH_CHECK_PID >/dev/null 2>&1 | |
exit 0 | |
} | |
trap cleanup SIGINT | |
while true; do | |
sleep 30 | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment