wosc · July 8, 2021 07:49 · elbaldfun · Mar 13, 2024
diff --git a/README.md b/README.md
diff --git a/fastly.vcl b/fastly.vcl
 # For completeness, this is our head-based sampling implementation, which runs on the Fastly CDN.

 sub set_traceparent {
    declare local var.trace_id STRING;
    declare local var.span_id STRING;
    declare local var.trace_flags STRING;
    set var.trace_id = randomstr(32, "1234567890abcdef");
    set var.span_id = randomstr(16, "1234567890abcdef");
    set var.trace_flags = "00";

    # Head-based sampling.
    # 16**32 is too big for ints, but assuming each character is equally likely
    # we can just as well only look at the e.g. last two characters to apply the samplerate.
    declare local var.samplerate INTEGER;
    set var.samplerate = 20;
    declare local var.trace_hash INTEGER;
    set var.trace_hash = std.strtol(substr(var.trace_id, -2), 16);
    set var.trace_hash %= var.samplerate;
    if (var.trace_hash == 0) {
        set var.trace_flags = "01";
    }

    set req.http.traceparent = "00-" var.trace_id "-" var.span_id "-" var.trace_flags;
    set req.http.tracestate = "zon=SampleRate:${samplerate}";
 }

 sub vcl_recv {
  if (fastly.ff.visits_this_service == 0 && req.restarts == 0) {
      call set_traceparent;
  }
 }
diff --git a/fluentbit.conf b/fluentbit.conf
 # fluentbit insists on splitting its configuration into different files;
 # for brevity, I've inlined the snippets here as comments instead.

 [SERVICE]
    daemon Off
    flush 5
    log_level info
    parsers_file /etc/td-agent-bit/zon-parsers.conf
 # [PARSER]
 #     Name        zon-haproxy-json
 #     Format      json
 #     Time_Key    fluentbit
 #     Time_Format %d/%b/%Y:%H:%M:%S.%L

 [INPUT]
    Name tail
    Path /var/log/haproxy.log
    Parser zon-haproxy-json
    Tag zon-haproxy
    DB /var/lib/fluent-bit-haproxy.sqlite
    # Sync is expensive, and we don't care that much about the logfile location
    DB.Sync Off
    Mem_Buf_Limit 200MB

 [FILTER]
    Name     throttle
    Match    *
    Rate     800
    Window   300
    Interval 1s

 [FILTER]
    Name lua
    Match zon-haproxy
    Script /etc/td-agent-bit/haproxy.lua
    Call add_microseconds
 # function add_microseconds(tag, timestamp, record)
 #   record["timestamp"] = record["timestamp"] * 1000
 #   return 1, timestamp, record
 # end

 [OUTPUT]
    Name http
    Match zon-haproxy
    host localhost
    port 9411
    uri /api/v2/spans
    format json
    json_date_key false
diff --git a/haproxy.cfg b/haproxy.cfg
 global
  log 127.0.0.1:514 len 8192 local0 info
 # Note: For the simplest setup, point haproxy directly to a fluentbit syslog input <https://docs.fluentbit.io/manual/v/1.7/pipeline/inputs/syslog>
 # If you have a more clasic setup that uses e.g. rsyslogd to write logfiles (which fluentbit then tails),
 # you have to remove the typical syslog prefix `Jul  8 06:25:01 myhostname haproxy[1477]:`, and only write JSON lines to the file
 # On Ubuntu, this can be done via an /etc/rsyslog.d/49-haproxy.conf like this:
 # module(load="imudp")
 # input(type="imudp" port="514")
 # template(name="onlymsg" type="string" string="%msg%\n")
 # local0.*       action(type="omfile" file="/var/log/haproxy.log" template="onlymsg")
 # & ~

 defaults
  mode http
  option httplog
  log-format '{"timestamp":%Ts%ms,"fluentbit":"%tr","duration":%Ta.e3,"tags":{"SampleRate":"%[var(txn.samplerate)]","sampling.priority":"%[var(txn.trace_flags)]","haproxy.backend_server":"%s","haproxy.tq":"%TR","haproxy.tw":"%Tw","haproxy.tc":"%Tc","haproxy.tr":"%Tr","haproxy.tt":"%Ta","haproxy.t_state":"%tsc","haproxy.actconn":"%ac","haproxy.feconn":"%fc","haproxy.beconn":"%bc","haproxy.srv_conn":"%sc","haproxy.retries":"%rc","haproxy.srv_queue":"%sq","haproxy.backend_queue":"%bq","http.host":"%[capture.req.hdr(0),json(utf8s)]","http.user_agent":"%[capture.req.hdr(1),json(utf8s)]","http.referrer":"%[capture.req.hdr(2),json(utf8s)]","http.client_ip":"%ci","http.url":"%HU","http.method":"%HM","http.status_code":"%ST","http.request_content_length":"%U","http.response_content_length":"%B","service.instance.id":"myhostname.example.com"},"localEndpoint":{"serviceName":"haproxy"},"traceId":"%[var(txn.trace_id)]","parentId":"%[var(txn.parent_id)]","id":"%[var(txn.span_id)]","kind":"SERVER"}'

  # The ordering of these is relevant
  capture request header Host len 30              # id 0
  capture request header User-Agent len 255       # id 1
  capture request header Referer len 255          # id 2
  capture request header traceparent len 255      # id 3
  capture request header tracestate  len 255      # id 4

  # Parse w3c traceparent
  http-request set-var(txn.trace_id) capture.req.hdr(3),regsub(\"^00-([^-]+)-([^-]+)-([0-9]+)$\",\"\1\")
  http-request set-var(txn.parent_id) capture.req.hdr(3),regsub(\"^00-([^-]+)-([^-]+)-([0-9]+)$\",\"\2\")
  http-request set-var(txn.trace_flags) capture.req.hdr(3),regsub(\"^00-([^-]+)-([^-]+)-([0-9]+)$\",\"\3\")
  # Parse custom w3c tracestate (see README)
  http-request set-var(txn.samplerate) capture.req.hdr(4),regsub(\"^.*zon=SampleRate:([0-9.]+).*$\",\"\1\")
  # Propagate traceparent
  # Kludgy way of using only 16 chars of a 32-char uuid
  http-request set-var(txn.span_id) uuid(4),regsub(\"^([^-]+)-([^-]+)-([^-]+)-([^-]+)-([^-]+)$\",\"\1\2\3\")
  http-request set-header traceparent 00-%[var(txn.trace_id)]-%[var(txn.span_id)]-%[var(txn.trace_flags)]
  
diff --git a/otel.yaml b/otel.yaml
 service:
  pipelines:
    traces:
      receivers: [zipkin]
      processors: [probabilistic_sampler]
      exporters: [otlp]

 receivers:
  zipkin:  # port 9411

 processors:
  probabilistic_sampler:
    # Only evaluate the `sampling.priority` span attribute here; we rely on traceparent flags for sampling (see README)
    sampling_percentage: 100

 exporters:
  otlp:
    endpoint: "api.honeycomb.io:443"
    headers:
      "x-honeycomb-team": "YOUR-API-KEY"
      "x-honeycomb-dataset": "YOUR-DATASET-NAME"
	# For completeness, this is our head-based sampling implementation, which runs on the Fastly CDN.

	sub set_traceparent {
	declare local var.trace_id STRING;
	declare local var.span_id STRING;
	declare local var.trace_flags STRING;
	set var.trace_id = randomstr(32, "1234567890abcdef");
	set var.span_id = randomstr(16, "1234567890abcdef");
	set var.trace_flags = "00";

	# Head-based sampling.
	# 16**32 is too big for ints, but assuming each character is equally likely
	# we can just as well only look at the e.g. last two characters to apply the samplerate.
	declare local var.samplerate INTEGER;
	set var.samplerate = 20;
	declare local var.trace_hash INTEGER;
	set var.trace_hash = std.strtol(substr(var.trace_id, -2), 16);
	set var.trace_hash %= var.samplerate;
	if (var.trace_hash == 0) {
	set var.trace_flags = "01";
	}

	set req.http.traceparent = "00-" var.trace_id "-" var.span_id "-" var.trace_flags;
	set req.http.tracestate = "zon=SampleRate:${samplerate}";
	}

	sub vcl_recv {
	if (fastly.ff.visits_this_service == 0 && req.restarts == 0) {
	call set_traceparent;
	}
	}
	# fluentbit insists on splitting its configuration into different files;
	# for brevity, I've inlined the snippets here as comments instead.

	[SERVICE]
	daemon Off
	flush 5
	log_level info
	parsers_file /etc/td-agent-bit/zon-parsers.conf
	# [PARSER]
	# Name zon-haproxy-json
	# Format json
	# Time_Key fluentbit
	# Time_Format %d/%b/%Y:%H:%M:%S.%L

	[INPUT]
	Name tail
	Path /var/log/haproxy.log
	Parser zon-haproxy-json
	Tag zon-haproxy
	DB /var/lib/fluent-bit-haproxy.sqlite
	# Sync is expensive, and we don't care that much about the logfile location
	DB.Sync Off
	Mem_Buf_Limit 200MB

	[FILTER]
	Name throttle
	Match *
	Rate 800
	Window 300
	Interval 1s

	[FILTER]
	Name lua
	Match zon-haproxy
	Script /etc/td-agent-bit/haproxy.lua
	Call add_microseconds
	# function add_microseconds(tag, timestamp, record)
	# record["timestamp"] = record["timestamp"] * 1000
	# return 1, timestamp, record
	# end

	[OUTPUT]
	Name http
	Match zon-haproxy
	host localhost
	port 9411
	uri /api/v2/spans
	format json
	json_date_key false
	global
	log 127.0.0.1:514 len 8192 local0 info
	# Note: For the simplest setup, point haproxy directly to a fluentbit syslog input <https://docs.fluentbit.io/manual/v/1.7/pipeline/inputs/syslog>
	# If you have a more clasic setup that uses e.g. rsyslogd to write logfiles (which fluentbit then tails),
	# you have to remove the typical syslog prefix `Jul 8 06:25:01 myhostname haproxy[1477]:`, and only write JSON lines to the file
	# On Ubuntu, this can be done via an /etc/rsyslog.d/49-haproxy.conf like this:
	# module(load="imudp")
	# input(type="imudp" port="514")
	# template(name="onlymsg" type="string" string="%msg%\n")
	# local0.* action(type="omfile" file="/var/log/haproxy.log" template="onlymsg")
	# & ~

	defaults
	mode http
	option httplog
	log-format '{"timestamp":%Ts%ms,"fluentbit":"%tr","duration":%Ta.e3,"tags":{"SampleRate":"%[var(txn.samplerate)]","sampling.priority":"%[var(txn.trace_flags)]","haproxy.backend_server":"%s","haproxy.tq":"%TR","haproxy.tw":"%Tw","haproxy.tc":"%Tc","haproxy.tr":"%Tr","haproxy.tt":"%Ta","haproxy.t_state":"%tsc","haproxy.actconn":"%ac","haproxy.feconn":"%fc","haproxy.beconn":"%bc","haproxy.srv_conn":"%sc","haproxy.retries":"%rc","haproxy.srv_queue":"%sq","haproxy.backend_queue":"%bq","http.host":"%[capture.req.hdr(0),json(utf8s)]","http.user_agent":"%[capture.req.hdr(1),json(utf8s)]","http.referrer":"%[capture.req.hdr(2),json(utf8s)]","http.client_ip":"%ci","http.url":"%HU","http.method":"%HM","http.status_code":"%ST","http.request_content_length":"%U","http.response_content_length":"%B","service.instance.id":"myhostname.example.com"},"localEndpoint":{"serviceName":"haproxy"},"traceId":"%[var(txn.trace_id)]","parentId":"%[var(txn.parent_id)]","id":"%[var(txn.span_id)]","kind":"SERVER"}'

	# The ordering of these is relevant
	capture request header Host len 30 # id 0
	capture request header User-Agent len 255 # id 1
	capture request header Referer len 255 # id 2
	capture request header traceparent len 255 # id 3
	capture request header tracestate len 255 # id 4

	# Parse w3c traceparent
	http-request set-var(txn.trace_id) capture.req.hdr(3),regsub(\"^00-([^-]+)-([^-]+)-([0-9]+)$\",\"\1\")
	http-request set-var(txn.parent_id) capture.req.hdr(3),regsub(\"^00-([^-]+)-([^-]+)-([0-9]+)$\",\"\2\")
	http-request set-var(txn.trace_flags) capture.req.hdr(3),regsub(\"^00-([^-]+)-([^-]+)-([0-9]+)$\",\"\3\")
	# Parse custom w3c tracestate (see README)
	http-request set-var(txn.samplerate) capture.req.hdr(4),regsub(\"^.zon=SampleRate:([0-9.]+).$\",\"\1\")
	# Propagate traceparent
	# Kludgy way of using only 16 chars of a 32-char uuid
	http-request set-var(txn.span_id) uuid(4),regsub(\"^([^-]+)-([^-]+)-([^-]+)-([^-]+)-([^-]+)$\",\"\1\2\3\")
	http-request set-header traceparent 00-%[var(txn.trace_id)]-%[var(txn.span_id)]-%[var(txn.trace_flags)]
	service:
	pipelines:
	traces:
	receivers: [zipkin]
	processors: [probabilistic_sampler]
	exporters: [otlp]

	receivers:
	zipkin: # port 9411

	processors:
	probabilistic_sampler:
	# Only evaluate the `sampling.priority` span attribute here; we rely on traceparent flags for sampling (see README)
	sampling_percentage: 100

	exporters:
	otlp:
	endpoint: "api.honeycomb.io:443"
	headers:
	"x-honeycomb-team": "YOUR-API-KEY"
	"x-honeycomb-dataset": "YOUR-DATASET-NAME"