Skip to content

Instantly share code, notes, and snippets.

@arafsheikh
Last active January 31, 2021 17:56
Show Gist options
  • Save arafsheikh/50b8c8ad17c5429ae0f2cfbefaee484b to your computer and use it in GitHub Desktop.
Save arafsheikh/50b8c8ad17c5429ae0f2cfbefaee484b to your computer and use it in GitHub Desktop.
Rolling restart an AWS MSK Kafka cluster
#!/bin/bash
# Usage:
# ./aws-msk-rolling-restart.sh <msk cluster arn>
# ./aws-msk-rolling-restart.sh <msk cluster arn> <region>
# ./aws-msk-rolling-restart.sh <msk cluster arn> <region> <profile name>
CLUSTER_ARN=$1
REGION=$2
PROFILE=$3
function reboot_broker_with_id() {
aws kafka reboot-broker --cluster-arn $CLUSTER_ARN --broker-ids $1 --region $REGION --output table $PROFILE
}
function get_cluster_state() {
aws kafka describe-cluster --cluster-arn $CLUSTER_ARN --region $REGION --output json $PROFILE | grep State | sed 's/^.*"State": "\([A-Z_]*\)".*$/\1/'
}
function get_num_nodes() {
aws kafka describe-cluster --cluster-arn $CLUSTER_ARN --region $REGION --output json $PROFILE | grep NumberOfBrokerNodes | sed 's/^.*"NumberOfBrokerNodes": \([0-9]*\).*$/\1/'
}
function wait_until_cluster_is_active() {
state=$(get_cluster_state)
if [[ -z $state ]]; then
echo "Got empty state from awscli response. Unable to proceed."
exit 1
fi
while [[ $state != "ACTIVE" ]]; do
echo "Cluster is in $state state. Waiting for 10 seconds..."
sleep 10
state=$(get_cluster_state)
done
}
if [[ -z $CLUSTER_ARN ]]; then
echo "Cluster ARN argument not specified"
exit 1
fi
if [[ -z $REGION ]]; then
if [[ -z $AWS_DEFAULT_REGION ]]; then
echo "Neither REGION argument nor AWS_DEFAULT_REGION env var are set"
exit 1
fi
REGION=$AWS_DEFAULT_REGION
fi
if [[ ! -z $PROFILE ]]; then
echo "Running script with profile: $PROFILE"
PROFILE="--profile $PROFILE"
fi
wait_until_cluster_is_active
echo "Initiating rolling restart for cluster: $CLUSTER_ARN in region: $REGION"
num_nodes=$(get_num_nodes)
echo "Cluster has $num_nodes nodes"
for i in $(seq 1 $num_nodes); do
echo "Rebooting broker $i"
reboot_broker_with_id $i
if [ $? != 0 ]; then
echo "Error running reboot-broker command"
exit 2
fi
wait_until_cluster_is_active
echo "Cluster is back to ACTIVE state"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment