diff --git a/api/envoy/config/route/v3/route_components.proto b/api/envoy/config/route/v3/route_components.proto index a3d4d009f0eb..c3cf19e150aa 100644 --- a/api/envoy/config/route/v3/route_components.proto +++ b/api/envoy/config/route/v3/route_components.proto @@ -815,7 +815,10 @@ message RouteAction { // value, the request will be mirrored. core.v3.RuntimeFractionalPercent runtime_fraction = 3; - // Determines if the trace span should be sampled. Defaults to true. + // Specifies whether the trace span for the shadow request should be sampled. If this field is not explicitly set, + // the shadow request will inherit the sampling decision of its parent span. This ensures consistency with the trace + // sampling policy of the original request and prevents oversampling, especially in scenarios where runtime sampling + // is disabled. google.protobuf.BoolValue trace_sampled = 4; // Disables appending the ``-shadow`` suffix to the shadowed ``Host`` header. Defaults to ``false``. diff --git a/changelogs/current.yaml b/changelogs/current.yaml index e788764300a6..60be44bb49ca 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -117,6 +117,13 @@ minor_behavior_changes: config because existing provider already has up to date routes config. This behavioral change can be temporarily reverted by setting runtime guard ``envoy.reloadable_features.normalize_rds_provider_config`` to false. +- area: router + change: | + Changed the behavior of shadow request sampling so that if trace sampling is not explicitly configured in the shadow + policy, the shadow request will inherit the parent's sampling decision. This means sampling will follow the trace + sampling policy of the original request, which prevents oversampling when runtime sampling is disabled. This + behavior can be temporarily reverted by setting the runtime guard + ``envoy.reloadable_features.shadow_policy_inherit_trace_sampling`` to ``false``. bug_fixes: # *Changes expected to improve the state of the world and are unlikely to have negative effects* diff --git a/envoy/router/router.h b/envoy/router/router.h index 2efc01474346..af94e4dbd5d7 100644 --- a/envoy/router/router.h +++ b/envoy/router/router.h @@ -538,7 +538,7 @@ class ShadowPolicy { /** * @return true if the trace span should be sampled. */ - virtual bool traceSampled() const PURE; + virtual absl::optional traceSampled() const PURE; /** * @return true if host name should be suffixed with "-shadow". diff --git a/source/common/router/config_impl.cc b/source/common/router/config_impl.cc index f158bca8b07c..af14c75da675 100644 --- a/source/common/router/config_impl.cc +++ b/source/common/router/config_impl.cc @@ -489,7 +489,18 @@ ShadowPolicyImpl::ShadowPolicyImpl(const RequestMirrorPolicy& config, absl::Stat default_value_.set_numerator(100); default_value_.set_denominator(envoy::type::v3::FractionalPercent::HUNDRED); } - trace_sampled_ = PROTOBUF_GET_WRAPPED_OR_DEFAULT(config, trace_sampled, true); + // If trace sampling is not explicitly configured in shadow_policy, we pass null optional to + // inherit the parent's sampling decision. This prevents oversampling when runtime sampling is + // disabled. + if (config.has_trace_sampled()) { + trace_sampled_ = config.trace_sampled().value(); + } else { + // If the shadow policy does not specify trace_sampled, we will inherit the parent's sampling + // decision. + const bool user_parent_sampling_decision = Runtime::runtimeFeatureEnabled( + "envoy.reloadable_features.shadow_policy_inherit_trace_sampling"); + trace_sampled_ = user_parent_sampling_decision ? absl::nullopt : absl::make_optional(true); + } } DecoratorImpl::DecoratorImpl(const envoy::config::route::v3::Decorator& decorator) diff --git a/source/common/router/config_impl.h b/source/common/router/config_impl.h index 344a31326ad5..4fded280a1c8 100644 --- a/source/common/router/config_impl.h +++ b/source/common/router/config_impl.h @@ -509,7 +509,7 @@ class ShadowPolicyImpl : public ShadowPolicy { const Http::LowerCaseString& clusterHeader() const override { return cluster_header_; } const std::string& runtimeKey() const override { return runtime_key_; } const envoy::type::v3::FractionalPercent& defaultValue() const override { return default_value_; } - bool traceSampled() const override { return trace_sampled_; } + absl::optional traceSampled() const override { return trace_sampled_; } bool disableShadowHostSuffixAppend() const override { return disable_shadow_host_suffix_append_; } private: @@ -519,7 +519,7 @@ class ShadowPolicyImpl : public ShadowPolicy { const Http::LowerCaseString cluster_header_; std::string runtime_key_; envoy::type::v3::FractionalPercent default_value_; - bool trace_sampled_; + absl::optional trace_sampled_; const bool disable_shadow_host_suffix_append_; }; diff --git a/source/common/router/router.cc b/source/common/router/router.cc index 197087dcfbd9..b37eb8b3159b 100644 --- a/source/common/router/router.cc +++ b/source/common/router/router.cc @@ -1064,7 +1064,6 @@ void Filter::maybeDoShadowing() { if (shadow_trailers_) { request->trailers(Http::createHeaderMap(*shadow_trailers_)); } - auto options = Http::AsyncClient::RequestOptions() .setTimeout(timeout_.global_timeout_) .setParentSpan(callbacks_->activeSpan()) diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index c1d277f38000..1c61f2fc22de 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -87,6 +87,7 @@ RUNTIME_GUARD(envoy_reloadable_features_reject_invalid_yaml); RUNTIME_GUARD(envoy_reloadable_features_report_stream_reset_error_code); RUNTIME_GUARD(envoy_reloadable_features_sanitize_http2_headers_without_nghttp2); RUNTIME_GUARD(envoy_reloadable_features_sanitize_sni_in_access_log); +RUNTIME_GUARD(envoy_reloadable_features_shadow_policy_inherit_trace_sampling); RUNTIME_GUARD(envoy_reloadable_features_skip_dns_lookup_for_proxied_requests); RUNTIME_GUARD(envoy_reloadable_features_streaming_shadow); RUNTIME_GUARD(envoy_reloadable_features_strict_duration_validation); diff --git a/test/common/router/config_impl_test.cc b/test/common/router/config_impl_test.cc index 8a25c063734f..8d3ba44ee7cf 100644 --- a/test/common/router/config_impl_test.cc +++ b/test/common/router/config_impl_test.cc @@ -11542,6 +11542,80 @@ TEST_F(CommonConfigImplTest, TestCommonConfig) { shared_config.ignorePathParametersInPathMatching()); } +TEST_F(RouteMatcherTest, RequestMirrorPoliciesWithTraceSampled) { + const std::string yaml = R"EOF( +virtual_hosts: +- name: www2 + domains: ["*"] + routes: + - match: + prefix: "/foo" + route: + request_mirror_policies: + # Policy with explicit trace_sampled set to false + - cluster: some_cluster + trace_sampled: false + # Policy with explicit trace_sampled set to true + - cluster: some_cluster2 + trace_sampled: true + # Policy with no trace_sampled specified + - cluster: some_cluster3 + cluster: www2 + )EOF"; + + factory_context_.cluster_manager_.initializeClusters( + {"www2", "some_cluster", "some_cluster2", "some_cluster3"}, {}); + + // Test with runtime flag disabled (old behavior) + { + TestScopedRuntime scoped_runtime; + scoped_runtime.mergeValues( + {{"envoy.reloadable_features.shadow_policy_inherit_trace_sampling", "false"}}); + + TestConfigImpl config(parseRouteConfigurationFromYaml(yaml), factory_context_, true, + creation_status_); + + Http::TestRequestHeaderMapImpl headers = genHeaders("www.databricks.com", "/foo", "GET"); + const auto& shadow_policies = config.route(headers, 0)->routeEntry()->shadowPolicies(); + + // First policy should have trace sampled = false + EXPECT_TRUE(shadow_policies[0]->traceSampled().has_value()); + EXPECT_FALSE(shadow_policies[0]->traceSampled().value()); + + // Second policy should have trace sampled = true + EXPECT_TRUE(shadow_policies[1]->traceSampled().has_value()); + EXPECT_TRUE(shadow_policies[1]->traceSampled().value()); + + // With flag disabled, unspecified should default to true + EXPECT_TRUE(shadow_policies[2]->traceSampled().has_value()); + EXPECT_TRUE(shadow_policies[2]->traceSampled().value()); + } + + // Test with runtime flag enabled (new behavior) + { + TestScopedRuntime scoped_runtime; + scoped_runtime.mergeValues( + {{"envoy.reloadable_features.shadow_policy_inherit_trace_sampling", "true"}}); + + TestConfigImpl config(parseRouteConfigurationFromYaml(yaml), factory_context_, true, + creation_status_); + + Http::TestRequestHeaderMapImpl headers = genHeaders("www.databricks.com", "/foo", "GET"); + const auto& shadow_policies = config.route(headers, 0)->routeEntry()->shadowPolicies(); + + // First policy should have trace sampled = false + EXPECT_TRUE(shadow_policies[0]->traceSampled().has_value()); + EXPECT_FALSE(shadow_policies[0]->traceSampled().value()); + + // Second policy should have trace sampled = true + EXPECT_TRUE(shadow_policies[1]->traceSampled().has_value()); + EXPECT_TRUE(shadow_policies[1]->traceSampled().value()); + + // With flag enabled, unspecified should be nullopt + EXPECT_FALSE(shadow_policies[2]->traceSampled().has_value()); + } +} + } // namespace } // namespace Router } // namespace Envoy