Created
August 2, 2023 16:26
-
-
Save philpennock/660d01ae7cadc4331494add667375d3a to your computer and use it in GitHub Desktop.
An amalgamation of NATS-related content from multiple Icinga config files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ~~~~~~~~~~~~~~~~~~~~~~~~8< Your Commands File >8~~~~~~~~~~~~~~~~~~~~~~~~ | |
object CheckCommand "nats-server" { | |
import "plugin-check-command" | |
command = [PluginDir+"/nats", "server", "check", "connection"] | |
# If the context has been defined, outside Icinga alas, then that's the best way to just check for some context | |
env.NATS_CONTEXT = "$nats_context$" | |
env.NATS_URL = "$nats_url$" | |
env.NATS_CREDS = "$nats_creds_file$" | |
env.NATS_USER = "$nats_user$" | |
env.NATS_PASSWORD = "$nats_password$" | |
env.NATS_NKEY = "$nats_nkey_file$" | |
env.NATS_CERT = "$nats_tls_cert_file$" | |
env.NATS_KEY = "$nats_tls_key_file$" | |
env.NATS_CA = "$nats_tls_ca_file$" | |
env.NATS_TIMEOUT = "$nats_timeout$" # Go duration, default 5s; this is NOT the NAGIOS warn/critical threshold | |
arguments = { | |
"--connect-warn" = { | |
value = "$nats_connect_warn$" | |
description = "Warning threshold to allow for establishing connections" | |
# default (2021-06): 500ms | |
} | |
"--connect-critical" = { | |
value = "$nats_connect_critical$" | |
description = "Critical threshold to allow for establishing connections" | |
# default (2021-06): 1s | |
} | |
"--rtt-warn" = { | |
value = "$nats_rtt_warn$" | |
description = "Warning threshold to allow for server RTT" | |
# default (2021-06): 500ms | |
} | |
"--rtt-critical" = { | |
value = "$nats_rtt_critical$" | |
description = "Critical threshold to allow for server RTT" | |
# default (2021-06): 1s | |
} | |
"--req-warn" = { | |
value = "$nats_req_warn$" | |
description = "Warning threshold to allow for full round trip test" | |
# default (2021-06): 500ms | |
} | |
"--req-critical" = { | |
value = "$nats_req_critical$" | |
description = "Critical threshold to allow for full round trip test" | |
# default (2021-06): 1s | |
} | |
} | |
} | |
object CheckCommand "nats-jetstream" { | |
import "plugin-check-command" | |
command = [PluginDir+"/nats", "server", "check", "js"] | |
# If the context has been defined, outside Icinga alas, then that's the best way to just check for some context | |
env.NATS_CONTEXT = "$nats_context$" | |
env.NATS_URL = "$nats_url$" | |
env.NATS_CREDS = "$nats_creds_file$" | |
env.NATS_USER = "$nats_user$" | |
env.NATS_PASSWORD = "$nats_password$" | |
env.NATS_NKEY = "$nats_nkey_file$" | |
env.NATS_CERT = "$nats_tls_cert_file$" | |
env.NATS_KEY = "$nats_tls_key_file$" | |
env.NATS_CA = "$nats_tls_ca_file$" | |
env.NATS_TIMEOUT = "$nats_timeout$" # Go duration, default 5s | |
arguments = { | |
"--js-domain" = { | |
value = "$nats_js_domain$" | |
description = "JetStream Domain for targetting clusters on the NATS network" | |
} | |
} | |
} | |
# ~~~~~~~~~~~~~~~~~~~~~~~~~8< Your rules file >8~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
apply Service "nats-healthz" { | |
import "generic-service" | |
assign where host.vars.nats_healthz_https | |
check_command = "http" | |
vars += { | |
http_uri = "/healthz" | |
http_port = host.vars.nats_healthz_https | |
http_ssl = true | |
http_sni = true | |
# http_certificate value is days the cert has to still be valid for to avoid "warning"; when comma-separated, second is "critical" | |
http_certificate = "10,2" | |
# (nothing checks that the cert hostname matches) | |
http_expect_body_regex = "\"status\"\\s*:\\s*\"ok\"" | |
} | |
} | |
template Service "nats-server" { | |
import "generic-service" | |
check_command = "nats-server" | |
} | |
template Service "nats-jetstream" { | |
import "generic-service" | |
check_command = "nats-jetstream" | |
} | |
apply Service "nats" { | |
import "nats-server" | |
assign where host.vars.nats_server == true | |
vars.nats_url = host.vars.nats_url ? host.vars.nats_url : "nats://" + host.name | |
vars.slack_notifications = "enabled" | |
} | |
apply Service "nats-js" { | |
import "nats-jetstream" | |
assign where host.vars.nats_js_enabled == true | |
vars.nats_url = host.vars.nats_url ? host.vars.nats_url : "nats://" + host.name | |
vars.slack_notifications = "enabled" | |
} | |
# ~~~~~~~~~~~~~~~~~~~~~~~~~8< Your checks file >8~~~~~~~~~~~~~~~~~~~~~~~~~ | |
object Host "nats.lan" { | |
import "home-host" | |
address = "192.0.2.1" | |
vars.nats_server = true | |
vars.nats_js_enabled = true | |
vars.nats_user = "icinga" | |
vars.nats_password = "SSWs!HP07M1V/=u_Aeef7_.V60leHn" # freshly generated for this gist, could use a canary :) | |
vars.nats_healthz_https = 8222 | |
vars.slack_notifications = "enabled" | |
} | |
object HostGroup "nats-clusters" { | |
display_name = "NATS Clusters" | |
} | |
object ServiceGroup "nats" { | |
display_name = "NATS" | |
assign where service.check_command == "nats-server" | |
} | |
object ServiceGroup "nats-js" { | |
display_name = "NATS JetStream" | |
assign where service.check_command == "nats-jetstream" | |
} | |
# It's good to auto-gen hosts here, so that you build from lists of known servers. | |
object Host "prod-foo" { | |
display_name = "Foo: Production" | |
groups = ["nats-clusters"] | |
check_command = "dummy" | |
vars.dummy_state = 0 //Up | |
vars.dummy_text = "Everything OK." | |
vars.pt_remote = true | |
vars.nats_url = "tls://connect.example.org" | |
vars.nats_context = "foo-client1" # this would be managed outside of Icinga, however you manage contexts | |
vars.pt_nats_supercluster = true | |
} | |
object HostGroup "foo-clusters-prod" { | |
display_name = "Foo NATS Production Clusters" | |
} | |
object Host "foo-prod-us-east-2.aws.cloud.foo.example.org" { | |
display_name = "Foo-Geo: Production: us-east-2.aws.cloud.foo.example.org" | |
groups = ["foo-clusters-prod"] | |
check_command = "dummy" | |
check_interval = 5m | |
retry_interval = 30s | |
vars.dummy_state = 0 //Up | |
vars.dummy_text = "Everything OK." | |
vars.nats_url = "tls://us-east-2.aws.cloud.foo.example.org" | |
vars.nats_context = "foo-check1" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment