Last active
October 20, 2021 08:19
-
-
Save ZimbiX/8482514298e8eca419f48fb2a52e7ae4 to your computer and use it in GitHub Desktop.
A helper script for running a one-shot pod, typically as part of a deployment pipeline - e.g. to carry out a database migration. Developed in collaboration with @ceralena
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
################################################## | |
# kube-run-pod | |
# | |
# This is a helper script for running a one-shot pod, typically as part of a | |
# deployment pipeline - e.g. to carry out a database migration. | |
# | |
# All arguments are required. | |
# | |
# This script creates the pod specified by the caller. It waits until the | |
# specified container has exited, or until a timeout elapses, and then kills | |
# the pod. This means the pod can have sidecar containers that don't terminate | |
# automatically, such as cloud-sql-proxy. | |
# | |
# If we time out, the exit status of this script is 1. | |
# Otherwise, the exit status is the exit status of the container from the pod. | |
################################################## | |
set -Eeuo pipefail | |
echo '--- Validating input' | |
cleanup() { | |
echo "Cleaning up..." | |
jobs -p | xargs kill &>/dev/null || true | |
} | |
trap cleanup EXIT | |
USAGE="USAGE:\n\nkube-run-pod --pod-name=db-migration --container-name=db-migration --timeout-seconds=60 the-thing.yaml" | |
usage() { | |
echo -e "${USAGE}" >&2 | |
exit 1 | |
} | |
abort() { | |
echo "ERROR: $1" >&2 | |
exit 1 | |
} | |
OPTS=$(getopt -o '' -l pod-name:: -l container-name:: -l timeout-seconds:: -n 'parse-options' -- "$@") | |
if [ $? != 0 ] ; then | |
abort "Failed parsing options\n\n${USAGE}" | |
fi | |
eval set -- "$OPTS" | |
POD_NAME="" | |
CONTAINER_NAME="" | |
TIMEOUT_SECONDS="" | |
INIT_TIMEOUT_SECONDS=60 | |
while true; do | |
case "$1" in | |
--pod-name ) POD_NAME="$2"; shift; shift ;; | |
--container-name ) CONTAINER_NAME="$2"; shift; shift ;; | |
--timeout-seconds ) TIMEOUT_SECONDS="$2"; shift; shift ;; | |
-- ) shift; break ;; | |
* ) break ;; | |
esac | |
done | |
# after all the shifts above, we should only have a single positional argument left. | |
if [ $# -ne 1 ]; then | |
usage | |
fi | |
POD_SPEC=$1 | |
if | |
[ -z "$POD_NAME" ] || | |
[ -z "$POD_SPEC" ] || | |
[ -z "$CONTAINER_NAME" ] || | |
[ -z "$TIMEOUT_SECONDS" ] | |
then | |
usage | |
fi | |
# Make sure the file exists | |
if ! test -f "$POD_SPEC"; then | |
abort "pod spec file $POD_SPEC does not exist." | |
fi | |
echo '+++ Summary' | |
echo "Pod name: ${POD_NAME}" | |
echo "Spec file: ${POD_SPEC}" | |
echo "Container name: ${CONTAINER_NAME}" | |
echo "Timeout seconds: ${TIMEOUT_SECONDS}" | |
echo '--- Validating environment' | |
# This script has cleanup at the bottom, so we expect the pod not to exist. | |
# If it does still exist, it may be unsafe to proceed - perhaps this script has | |
# a bug whereby it's not cleaning it up properly, or concurrency groups aren't | |
# configured and a different instance of the deploy agent is currently running it. | |
# | |
# Regardless, we bail out here and force the developer to intervene rather than | |
# risk interrupting a pod that could be running a database migration. | |
if kubectl get pod "${POD_NAME}" &>/dev/null; then | |
kubectl get pod "${POD_NAME}" -o wide | |
abort "Found an existing pod named ${POD_NAME}; this shouldn't happen. Check the status of the pod and delete it manually if it's safe." | |
fi | |
echo '--- Running pod' | |
set -x | |
# Run the pod | |
kubectl apply -f "${POD_SPEC}" | |
# Wait until the pod is ready for us to tail its logs. | |
# If it's not ready after INIT_TIMEOUT_SECONDS, it's likely that it has | |
# ErrImagePull or CreateContainerConfigError state. In this case, we'll delete | |
# the pod after printing its status. | |
if ! kubectl wait \ | |
--for=condition=ContainersReady \ | |
--timeout="${INIT_TIMEOUT_SECONDS}"s \ | |
pod "${POD_NAME}"; then | |
set +x | |
# Output the pod status | |
kubectl get pod -o yaml "${POD_NAME}" || true | |
# Delete the pod - this should be safe if we haven't even successfully started it. | |
kubectl delete pod "${POD_NAME}" || true | |
abort "The pod was not ready after ${INIT_TIMEOUT_SECONDS}" | |
fi | |
echo '+++ Pod started' | |
# Show the state of the pod. | |
kubectl get pod "${POD_NAME}" -o wide | |
# Start showing the logs | |
kubectl logs --follow "${POD_NAME}" -c "${CONTAINER_NAME}" & | |
# Now, we will start a loop where we keep checking for a container status code, | |
# while keeping an eye on the clock. If we get a status code, we break out. If | |
# we've hit the timeout, we abort. Otherwise, we sleep for 1 second. | |
set +x | |
SECONDS=0 | |
while true; do | |
containerExitCode=$(kubectl get pod "${POD_NAME}" -o "jsonpath={..status.containerStatuses[?(@.name==\"${CONTAINER_NAME}\")].state.terminated.exitCode}") | |
# The above JSON path query will mostly return an empty string until the | |
# container has a status code, but there's a short window of time between | |
# the pod being marked Completed and the containerStatus block being fully | |
# populated where we'll start getting a newline instead of an empty string. | |
# | |
# To handle this, we go ahead and check for something that actually looks | |
# like an exit code. | |
if [[ "$containerExitCode" =~ ^[0-9]+$ ]]; then | |
break | |
elif [ "$SECONDS" -gt "$TIMEOUT_SECONDS" ]; then | |
echo -e '\nPod status:\n' | |
kubectl get pod "${POD_NAME}" -o wide || true | |
echo -e '\n=== BEWARE ===\n' >&2 | |
echo 'kube-run-pod does NOT automatically kill or cleanup the migration pod when we time out.' >&2 | |
echo 'It may still be running.' >&2 | |
abort "Timed out after ${TIMEOUT_SECONDS} seconds." | |
else | |
sleep 1 | |
fi | |
done | |
echo "Container exited with status code: ${containerExitCode}" | |
# Wait for the logs command to exit to make sure we see all the logs. | |
wait | |
echo '--- cleaning up' | |
set -x | |
kubectl delete pod "${POD_NAME}" | |
exit "$containerExitCode" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment