Last active
August 1, 2024 23:14
-
-
Save onefoursix/24b8295776ea2ac4c4b5d20d04060975 to your computer and use it in GitHub Desktop.
A bash script that captures Java heap and CPU usage for a StreamSets Data Collector and writes the output in CSV format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#=================================================================================== | |
# | |
# FILE: get-streamsets-metrics.sh | |
# | |
# DESCRIPTION: A bash script that captures Java heap and CPU usage for a StreamSets | |
# Data Collector and writes the output in CSV format | |
# | |
# CONFIGURATION: Set the following variables at the head of the script: | |
# | |
# SDC_URL=https://<sdc hostname or IP>:<port> (or use http if appropriate) | |
# SDC_USER=<user> | |
# SDC_PASSWORD=<password> | |
# REFRESH_FREQUENCY_SECONDS=<num_seconds> # recommended to set this no lower than 15 seconds | |
# | |
# For example, on a test instance of SDC running on localhost with HTTP | |
# and using default login/password and capturing metrics twice per minute: | |
# | |
# SDC_URL=http://localhost:18630 | |
# SDC_USER=admin | |
# SDC_PASSWORD=admin | |
# REFRESH_FREQUENCY_SECONDS=30 | |
# | |
# TEST RUN: | |
# To test the script as a foreground process with output written to the | |
# console, run the script like this: | |
# | |
# $ ./get-streamsets-metrics.sh | |
# | |
# Stop the test run using ctrl-C | |
# | |
# USAGE: | |
# To run the script as a backgound process with the metrics piped to a file, | |
# use a command like this: | |
# | |
# $ nohup ./get-streamsets-metrics.sh > /tmp/streamsets-metrics.log 2>&1 & | |
# | |
# To stop the script running as a background process, find its PID: | |
# | |
# $ ps -ef | grep streamsets-metrics | |
# 501 33140 5678 0 11:01AM ttys000 0:00.02 /bin/bash ./get-streamsets-metrics.sh | |
# | |
# and then kill the process: | |
# | |
# $ kill 33140 | |
# | |
# EXAMPLE OUTPUT: | |
# | |
# Here is example output with a REFRESH_FREQUENCY_SECONDS set to 15 seconds that shows | |
# Java heap memory hovering around 1000MB with a high of 1297MB, and CPU around 95%: | |
# | |
# $ ./get-streamsets-metrics.sh | |
# timestamp,jvm.memory.heap.used(Mb),sdc_cpu_load_percentage | |
# 2024-08-01T10:55:02,1297,92.42 | |
# 2024-08-01T10:55:18,1036,76.00 | |
# 2024-08-01T10:55:33,1141,91.06 | |
# 2024-08-01T10:55:48,919,97.39 | |
# 2024-08-01T10:56:04,953,95.92 | |
# 2024-08-01T10:56:19,1266,84.23 | |
# 2024-08-01T10:56:34,953,96.59 | |
# 2024-08-01T10:56:50,1068,90.19 | |
# | |
# | |
# DISK SPACE NEEDED FOR THE METRICS FILE: | |
# The script will write ~30bytes each time it gathers metrics. So for example, | |
# if your refresh frequency is 15 seconds, it will write 120bytes per minute, | |
# which is about 170KB per day | |
# | |
#=================================================================================== | |
# Set these variables | |
#================================= | |
SDC_URL= | |
SDC_USER= | |
SDC_PASSWORD= | |
REFRESH_FREQUENCY_SECONDS= # recommended to set this no lower than 15 seconds | |
#================================= | |
# The JMX endpoint | |
SDC_JMX_ENDPOINT=${SDC_URL}/rest/v1/system/jmx | |
# Write a header line | |
echo "timestamp,jvm.memory.heap.used(Mb),sdc_cpu_load_percentage" | |
# Loop until the process is terminated | |
while true | |
do | |
# Get SDC's JMX metrics | |
curl -s -u ${SDC_USER}:${SDC_PASSWORD} -X GET -H "X-Requested-By:sdc" ${SDC_JMX_ENDPOINT} -o /tmp/metrics.json | |
# Get heap memory used in Mb | |
heap_str=`grep -A 2 "metrics:name=jvm.memory.heap.used" /tmp/metrics.json | grep "Value"` | |
heap_bytes=`sed 's/"Value" ://' <<< ${heap_str} | tr -d ' '` | |
heap_mb=$(echo "scale = 0; ${heap_bytes} / 1048576" | bc -l) | |
# Get SDC's CPU load % | |
cpu_str=`grep "ProcessCpuLoad" /tmp/metrics.json` | |
cpu_load=`sed 's/"ProcessCpuLoad" ://' <<< ${cpu_str} | tr -d ' ' | tr -d ','` | |
cpu_load_as_decimal=`printf '%.10f' ${cpu_load}` | |
cpu_load_percentage=$(echo "scale = 2; ${cpu_load_as_decimal} * 100" | bc -l) | |
cpu_load_percentage_trimmed=`printf '%.2f' ${cpu_load_percentage}` | |
# Write the metrics | |
echo `date '+%Y-%m-%dT%H:%M:%S'`,${heap_mb},${cpu_load_percentage_trimmed} | |
# Clean up our metrics file | |
rm -f /tmp/metrics.json | |
# Sleep | |
sleep ${REFRESH_FREQUENCY_SECONDS} | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment