From 285e6811f8ff3483865a6f7a4f167037ea9ecb30 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 27 Nov 2024 10:40:45 +0100 Subject: [PATCH 1/2] Use custom OTEL sampler --- packages/sync-service/config/runtime.exs | 5 ++- .../lib/electric/telemetry/sampler.ex | 45 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 packages/sync-service/lib/electric/telemetry/sampler.ex diff --git a/packages/sync-service/config/runtime.exs b/packages/sync-service/config/runtime.exs index 2203fbe3d8..8ed8d00200 100644 --- a/packages/sync-service/config/runtime.exs +++ b/packages/sync-service/config/runtime.exs @@ -82,8 +82,11 @@ otel_simple_processor = {:otel_simple_processor, %{exporter: {:otel_exporter_stdout, []}}} end +otel_sampling_ratio = env!("ELECTRIC_OTEL_SAMPLING_RATIO", :float, 0.01) + config :opentelemetry, - processors: [otel_batch_processor, otel_simple_processor] |> Enum.reject(&is_nil/1) + processors: [otel_batch_processor, otel_simple_processor] |> Enum.reject(&is_nil/1), + sampler: {Electric.Telemetry.Sampler, %{ratio: otel_sampling_ratio}} database_url = env!("DATABASE_URL", :string!) diff --git a/packages/sync-service/lib/electric/telemetry/sampler.ex b/packages/sync-service/lib/electric/telemetry/sampler.ex new file mode 100644 index 0000000000..6d8ef7d1b6 --- /dev/null +++ b/packages/sync-service/lib/electric/telemetry/sampler.ex @@ -0,0 +1,45 @@ +defmodule Electric.Telemetry.Sampler do + @moduledoc """ + Custom sampler that samples all spans except for specifically configured spans for which a given ratio is sampled. + """ + + require OpenTelemetry.Tracer, as: Tracer + + @behaviour :otel_sampler + + # Span names that are sampled probabilistically + @ratio_span_names [ + "pg_txn.replication_client.decode_message", + "pg_txn.replication_client.process_x_log_data", + "pg_txn.replication_client.transaction_received", + "shape_write.log_collector.handle_txn" + ] + + @impl :otel_sampler + def setup(%{ratio: ratio}) do + %{sampling_probability: ratio} + end + + @impl :otel_sampler + def description(%{sampling_probability: sampling_probability}) do + "Custom sampler that samples all spans except for specifically configured spans for which #{sampling_probability * 100}% are sampled." + end + + @impl true + def should_sample(ctx, _trace_id, _links, span_name, _span_kind, _attributes, %{ + sampling_probability: sampling_probability + }) do + tracestate = Tracer.current_span_ctx(ctx) |> OpenTelemetry.Span.tracestate() + + if span_name in @ratio_span_names do + if :rand.uniform() <= sampling_probability do + {:record_and_sample, [], tracestate} + else + {:drop, [], tracestate} + end + else + # Always sample other spans + {:record_and_sample, [], tracestate} + end + end +end From 34c00ddc5492279599d1e7d62e625f7596a182ed Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 27 Nov 2024 11:43:43 +0100 Subject: [PATCH 2/2] Probabilistic sampling of TX spans. --- packages/sync-service/config/runtime.exs | 13 ++++++++++++- .../sync-service/lib/electric/telemetry/sampler.ex | 9 +++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/packages/sync-service/config/runtime.exs b/packages/sync-service/config/runtime.exs index 8ed8d00200..b1b6b79f2b 100644 --- a/packages/sync-service/config/runtime.exs +++ b/packages/sync-service/config/runtime.exs @@ -86,7 +86,18 @@ otel_sampling_ratio = env!("ELECTRIC_OTEL_SAMPLING_RATIO", :float, 0.01) config :opentelemetry, processors: [otel_batch_processor, otel_simple_processor] |> Enum.reject(&is_nil/1), - sampler: {Electric.Telemetry.Sampler, %{ratio: otel_sampling_ratio}} + # sampler: {Electric.Telemetry.Sampler, %{ratio: otel_sampling_ratio}} + # Sample root spans based on our custom sampler + # and inherit sampling decision from remote parents + sampler: + {:parent_based, + %{ + root: {Electric.Telemetry.Sampler, %{ratio: otel_sampling_ratio}}, + remote_parent_sampled: :always_on, + remote_parent_not_sampled: :always_off, + local_parent_sampled: :always_on, + local_parent_not_sampled: :always_off + }} database_url = env!("DATABASE_URL", :string!) diff --git a/packages/sync-service/lib/electric/telemetry/sampler.ex b/packages/sync-service/lib/electric/telemetry/sampler.ex index 6d8ef7d1b6..eb27e1b79c 100644 --- a/packages/sync-service/lib/electric/telemetry/sampler.ex +++ b/packages/sync-service/lib/electric/telemetry/sampler.ex @@ -8,11 +8,8 @@ defmodule Electric.Telemetry.Sampler do @behaviour :otel_sampler # Span names that are sampled probabilistically - @ratio_span_names [ - "pg_txn.replication_client.decode_message", - "pg_txn.replication_client.process_x_log_data", - "pg_txn.replication_client.transaction_received", - "shape_write.log_collector.handle_txn" + @probabilistic_span_names [ + "pg_txn.replication_client.process_x_log_data" ] @impl :otel_sampler @@ -31,7 +28,7 @@ defmodule Electric.Telemetry.Sampler do }) do tracestate = Tracer.current_span_ctx(ctx) |> OpenTelemetry.Span.tracestate() - if span_name in @ratio_span_names do + if span_name in @probabilistic_span_names do if :rand.uniform() <= sampling_probability do {:record_and_sample, [], tracestate} else