Last active
January 31, 2021 17:56
-
-
Save arafsheikh/50b8c8ad17c5429ae0f2cfbefaee484b to your computer and use it in GitHub Desktop.
Rolling restart an AWS MSK Kafka cluster
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Usage: | |
# ./aws-msk-rolling-restart.sh <msk cluster arn> | |
# ./aws-msk-rolling-restart.sh <msk cluster arn> <region> | |
# ./aws-msk-rolling-restart.sh <msk cluster arn> <region> <profile name> | |
CLUSTER_ARN=$1 | |
REGION=$2 | |
PROFILE=$3 | |
function reboot_broker_with_id() { | |
aws kafka reboot-broker --cluster-arn $CLUSTER_ARN --broker-ids $1 --region $REGION --output table $PROFILE | |
} | |
function get_cluster_state() { | |
aws kafka describe-cluster --cluster-arn $CLUSTER_ARN --region $REGION --output json $PROFILE | grep State | sed 's/^.*"State": "\([A-Z_]*\)".*$/\1/' | |
} | |
function get_num_nodes() { | |
aws kafka describe-cluster --cluster-arn $CLUSTER_ARN --region $REGION --output json $PROFILE | grep NumberOfBrokerNodes | sed 's/^.*"NumberOfBrokerNodes": \([0-9]*\).*$/\1/' | |
} | |
function wait_until_cluster_is_active() { | |
state=$(get_cluster_state) | |
if [[ -z $state ]]; then | |
echo "Got empty state from awscli response. Unable to proceed." | |
exit 1 | |
fi | |
while [[ $state != "ACTIVE" ]]; do | |
echo "Cluster is in $state state. Waiting for 10 seconds..." | |
sleep 10 | |
state=$(get_cluster_state) | |
done | |
} | |
if [[ -z $CLUSTER_ARN ]]; then | |
echo "Cluster ARN argument not specified" | |
exit 1 | |
fi | |
if [[ -z $REGION ]]; then | |
if [[ -z $AWS_DEFAULT_REGION ]]; then | |
echo "Neither REGION argument nor AWS_DEFAULT_REGION env var are set" | |
exit 1 | |
fi | |
REGION=$AWS_DEFAULT_REGION | |
fi | |
if [[ ! -z $PROFILE ]]; then | |
echo "Running script with profile: $PROFILE" | |
PROFILE="--profile $PROFILE" | |
fi | |
wait_until_cluster_is_active | |
echo "Initiating rolling restart for cluster: $CLUSTER_ARN in region: $REGION" | |
num_nodes=$(get_num_nodes) | |
echo "Cluster has $num_nodes nodes" | |
for i in $(seq 1 $num_nodes); do | |
echo "Rebooting broker $i" | |
reboot_broker_with_id $i | |
if [ $? != 0 ]; then | |
echo "Error running reboot-broker command" | |
exit 2 | |
fi | |
wait_until_cluster_is_active | |
echo "Cluster is back to ACTIVE state" | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment