Created
February 25, 2021 10:50
-
-
Save abij/95b47edf8b6d176fba9ec796da96b715 to your computer and use it in GitHub Desktop.
Databricks init script to install Datadog agent and configure for Structured Streaming metrics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Inspiration based on: https://docs.databricks.com/_static/notebooks/datadog-init-script.html | |
# Improvements: | |
# - Use 'spark_url' in config (instead of resourcemanager_uri) | |
# - Use Databricks Secrets to store and retrieve datadog-api-key | |
# - Works with SingleNode-clusters | |
# - Update datadog-spark integration (when needed), to support Structured Streaming metrics. | |
# Create a file on each node own filesystem, | |
# but only execute on the driver! | |
cat <<EOF >>/tmp/start_datadog.sh | |
#!/bin/bash | |
set -e # Exit on errors | |
if [ \$DB_IS_DRIVER ]; then | |
echo "Running Datadog Agent installation script: \$0" | |
echo 'Step 0: Check presence of variable DATADOG_API_KEY' | |
[ -z "\$DATADOG_API_KEY" ] && echo 'Missing required Spark Environment setting: DATADOG_API_KEY={{secrets/scope-name/secret-name}}' && exit 1 | |
echo 'Step 1: On the driver, installing Datadog agent...' | |
DD_API_KEY="\$DATADOG_API_KEY" bash -c "\$(curl -L https://raw.githubusercontent.com/DataDog/datadog-agent/master/cmd/agent/install_script.sh)" | |
echo 'Step 2: Finished installation, detect cluster profile...' | |
if grep -q 'spark.databricks.cluster.profile singleNode' /tmp/custom-spark.conf ; then | |
echo ' > SingleNodeCluster, using "spark_driver_mode"' | |
DB_DRIVER_PORT=\$(grep 'CONF_UI_PORT' /tmp/driver-env.sh | cut -d'=' -f2) | |
SPARK_CLUSTER_MODE='spark_driver_mode' | |
else | |
echo ' > Normal cluster, using "spark_standalone_mode", waiting for master-params...' | |
while [ -z \$is_available ]; do | |
if [ -e "/tmp/master-params" ]; then | |
DB_DRIVER_PORT=\$(cat /tmp/master-params | cut -d' ' -f2) | |
SPARK_CLUSTER_MODE='spark_standalone_mode' | |
is_available=TRUE | |
fi | |
sleep 2 | |
done | |
fi | |
echo 'Step 3: Create Datadog Spark config file (/etc/datadog-agent/conf.d/spark.yaml)...' | |
echo "init_config: | |
instances: | |
- spark_url: http://\$DB_DRIVER_IP:\$DB_DRIVER_PORT | |
spark_cluster_mode: \$SPARK_CLUSTER_MODE | |
cluster_name: \$DB_CLUSTER_NAME" > /etc/datadog-agent/conf.d/spark.yaml | |
echo 'Step 4: Check version of "datadog-spark" to support Structured Streaming metrics.' | |
# See: https://github.com/DataDog/integrations-core/blob/master/spark/CHANGELOG.md | |
installed_version=\$(datadog-agent integration freeze | grep datadog-spark | cut -d'=' -f3) | |
minimal_version='1.19.1' | |
if [ "\$installed_version" = "\$(echo -e "\${installed_version}\n\${minimal_version}\n" | sort -V -r | head -n1)" ] ; then | |
echo " > datadog-spark version: \$installed_version (no need to update)" | |
else | |
echo " > update datadog-spark to version: \$minimal_version (current: \$installed_version)" | |
sudo -u dd-agent -- datadog-agent integration install datadog-spark==\$minimal_version | |
fi | |
echo 'Step 5: Finally, restarting Datadog agent with new config.' | |
sudo service datadog-agent restart | |
fi | |
EOF | |
if [ $DB_IS_DRIVER ]; then | |
chmod a+x /tmp/start_datadog.sh | |
/tmp/start_datadog.sh >> /tmp/start_datadog.log 2>&1 & disown | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment