From dc874aa04d8af43e201d6694af14da2ece19cd6c Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Mon, 25 Nov 2024 16:54:30 +1100 Subject: [PATCH 01/43] Add feature toggle # Conflicts: # cmd/mimir/config-descriptor.json # cmd/mimir/help-all.txt.tmpl # docs/sources/mimir/configure/configuration-parameters/index.md # pkg/streamingpromql/config.go # pkg/streamingpromql/engine_test.go --- cmd/mimir/config-descriptor.json | 11 ++++++++++ cmd/mimir/help-all.txt.tmpl | 2 ++ .../configuration-parameters/index.md | 6 ++++++ pkg/streamingpromql/config.go | 3 +++ pkg/streamingpromql/engine_test.go | 20 ++++++++++++------- pkg/streamingpromql/query.go | 2 +- 6 files changed, 36 insertions(+), 8 deletions(-) diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 2254842528b..bbf81bbf5a6 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -2043,6 +2043,17 @@ "fieldFlag": "querier.mimir-query-engine.enable-histogram-quantile-function", "fieldType": "boolean", "fieldCategory": "experimental" + }, + { + "kind": "field", + "name": "enable_one_to_many_and_many_to_one_binary_operations", + "required": false, + "desc": "Enable support for one-to-many and many-to-one binary operations (group_left/group_right) in the Mimir query engine. Only applies if the MQE is in use.", + "fieldValue": null, + "fieldDefaultValue": true, + "fieldFlag": "querier.mimir-query-engine.enable-one-to-many-and-many-to-one-binary-operations", + "fieldType": "boolean", + "fieldCategory": "experimental" } ], "fieldValue": null, diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 6c4fd47ff04..b7650b75e06 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -2105,6 +2105,8 @@ Usage of ./cmd/mimir/mimir: [experimental] Enable support for binary logical operations in the Mimir query engine. Only applies if the MQE is in use. (default true) -querier.mimir-query-engine.enable-histogram-quantile-function [experimental] Enable support for the histogram_quantile function in the Mimir query engine. Only applies if the MQE is in use. (default true) + -querier.mimir-query-engine.enable-one-to-many-and-many-to-one-binary-operations + [experimental] Enable support for one-to-many and many-to-one binary operations (group_left/group_right) in the Mimir query engine. Only applies if the MQE is in use. (default true) -querier.mimir-query-engine.enable-scalar-scalar-binary-comparison-operations [experimental] Enable support for binary comparison operations between two scalars in the Mimir query engine. Only applies if the MQE is in use. (default true) -querier.mimir-query-engine.enable-scalars diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md index b21c2bd6317..5acb36a8415 100644 --- a/docs/sources/mimir/configure/configuration-parameters/index.md +++ b/docs/sources/mimir/configure/configuration-parameters/index.md @@ -1536,6 +1536,12 @@ mimir_query_engine: # Mimir query engine. Only applies if the MQE is in use. # CLI flag: -querier.mimir-query-engine.enable-histogram-quantile-function [enable_histogram_quantile_function: | default = true] + + # (experimental) Enable support for one-to-many and many-to-one binary + # operations (group_left/group_right) in the Mimir query engine. Only applies + # if the MQE is in use. + # CLI flag: -querier.mimir-query-engine.enable-one-to-many-and-many-to-one-binary-operations + [enable_one_to_many_and_many_to_one_binary_operations: | default = true] ``` ### frontend diff --git a/pkg/streamingpromql/config.go b/pkg/streamingpromql/config.go index 279ed62c96f..3f241e7ecd4 100644 --- a/pkg/streamingpromql/config.go +++ b/pkg/streamingpromql/config.go @@ -26,6 +26,7 @@ type FeatureToggles struct { EnableScalars bool `yaml:"enable_scalars" category:"experimental"` EnableSubqueries bool `yaml:"enable_subqueries" category:"experimental"` EnableHistogramQuantileFunction bool `yaml:"enable_histogram_quantile_function" category:"experimental"` + EnableOneToManyAndManyToOneBinaryOperations bool `yaml:"enable_one_to_many_and_many_to_one_binary_operations" category:"experimental"` } // EnableAllFeatures enables all features supported by MQE, including experimental or incomplete features. @@ -39,6 +40,7 @@ var EnableAllFeatures = FeatureToggles{ true, true, true, + true, } func (t *FeatureToggles) RegisterFlags(f *flag.FlagSet) { @@ -50,4 +52,5 @@ func (t *FeatureToggles) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&t.EnableScalars, "querier.mimir-query-engine.enable-scalars", true, "Enable support for scalars in the Mimir query engine. Only applies if the MQE is in use.") f.BoolVar(&t.EnableSubqueries, "querier.mimir-query-engine.enable-subqueries", true, "Enable support for subqueries in the Mimir query engine. Only applies if the MQE is in use.") f.BoolVar(&t.EnableHistogramQuantileFunction, "querier.mimir-query-engine.enable-histogram-quantile-function", true, "Enable support for the histogram_quantile function in the Mimir query engine. Only applies if the MQE is in use.") + f.BoolVar(&t.EnableOneToManyAndManyToOneBinaryOperations, "querier.mimir-query-engine.enable-one-to-many-and-many-to-one-binary-operations", true, "Enable support for one-to-many and many-to-one binary operations (group_left/group_right) in the Mimir query engine. Only applies if the MQE is in use.") } diff --git a/pkg/streamingpromql/engine_test.go b/pkg/streamingpromql/engine_test.go index d3e4ff2dfc8..3074eaab0b6 100644 --- a/pkg/streamingpromql/engine_test.go +++ b/pkg/streamingpromql/engine_test.go @@ -50,12 +50,10 @@ func TestUnsupportedPromQLFeatures(t *testing.T) { // The goal of this is not to list every conceivable expression that is unsupported, but to cover all the // different cases and make sure we produce a reasonable error message when these cases are encountered. unsupportedExpressions := map[string]string{ - "metric{} + on() group_left() other_metric{}": "binary expression with many-to-one matching", - "metric{} + on() group_right() other_metric{}": "binary expression with one-to-many matching", - "topk(5, metric{})": "'topk' aggregation with parameter", - `count_values("foo", metric{})`: "'count_values' aggregation with parameter", - "quantile_over_time(0.4, metric{}[5m])": "'quantile_over_time' function", - "quantile(0.95, metric{})": "'quantile' aggregation with parameter", + "topk(5, metric{})": "'topk' aggregation with parameter", + `count_values("foo", metric{})`: "'count_values' aggregation with parameter", + "quantile_over_time(0.4, metric{}[5m])": "'quantile_over_time' function", + "quantile(0.95, metric{})": "'quantile' aggregation with parameter", } for expression, expectedError := range unsupportedExpressions { @@ -157,12 +155,20 @@ func TestUnsupportedPromQLFeaturesWithFeatureToggles(t *testing.T) { requireQueryIsUnsupported(t, featureToggles, "sum_over_time(metric[1m:10s])", "subquery") }) - t.Run("classic histograms", func(t *testing.T) { + t.Run("histogram_quantile function", func(t *testing.T) { featureToggles := EnableAllFeatures featureToggles.EnableHistogramQuantileFunction = false requireQueryIsUnsupported(t, featureToggles, "histogram_quantile(0.5, metric)", "'histogram_quantile' function") }) + + t.Run("one-to-many and many-to-one binary operations", func(t *testing.T) { + featureToggles := EnableAllFeatures + featureToggles.EnableOneToManyAndManyToOneBinaryOperations = false + + requireQueryIsUnsupported(t, featureToggles, "metric{} + on() group_left() other_metric{}", "binary expression with many-to-one matching") + requireQueryIsUnsupported(t, featureToggles, "metric{} + on() group_right() other_metric{}", "binary expression with one-to-many matching") + }) } func requireQueryIsUnsupported(t *testing.T, toggles FeatureToggles, expression string, expectedError string) { diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index 2f840f2aff6..8a85ccbfd99 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -249,7 +249,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr, timeRange types return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with '%v'", e.Op)) } - if !e.Op.IsSetOperator() && e.VectorMatching.Card != parser.CardOneToOne { + if !e.Op.IsSetOperator() && e.VectorMatching.Card != parser.CardOneToOne && !q.engine.featureToggles.EnableOneToManyAndManyToOneBinaryOperations { return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with %v matching", e.VectorMatching.Card)) } From d9a467eb4a4a6f0831100087fa978aa8d0be8d79 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 26 Nov 2024 15:27:32 +1100 Subject: [PATCH 02/43] Enable upstream test cases --- .../testdata/upstream/collision.test | 9 +- .../testdata/upstream/operators.test | 134 ++++++++---------- 2 files changed, 63 insertions(+), 80 deletions(-) diff --git a/pkg/streamingpromql/testdata/upstream/collision.test b/pkg/streamingpromql/testdata/upstream/collision.test index c5484d19adc..11401326159 100644 --- a/pkg/streamingpromql/testdata/upstream/collision.test +++ b/pkg/streamingpromql/testdata/upstream/collision.test @@ -10,11 +10,10 @@ load 1s node_cpu_seconds_total{cpu="35",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449 node_cpu_seconds_total{cpu="89",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449 -# Unsupported by streaming engine. -# eval instant at 4s count by(namespace, pod, cpu) (node_cpu_seconds_total{cpu=~".*",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v"}) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{namespace="observability",pod="node-exporter-l454v"} -# {cpu="10",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 -# {cpu="35",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 -# {cpu="89",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 +eval instant at 4s count by(namespace, pod, cpu) (node_cpu_seconds_total{cpu=~".*",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v"}) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{namespace="observability",pod="node-exporter-l454v"} + {cpu="10",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 + {cpu="35",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 + {cpu="89",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 clear diff --git a/pkg/streamingpromql/testdata/upstream/operators.test b/pkg/streamingpromql/testdata/upstream/operators.test index 97aac16c642..1b8da810caa 100644 --- a/pkg/streamingpromql/testdata/upstream/operators.test +++ b/pkg/streamingpromql/testdata/upstream/operators.test @@ -334,98 +334,82 @@ load 5m threshold{instance="abc",job="node",target="a@b.com"} 0 # Copy machine role to node variable. -# Unsupported by streaming engine. -# eval instant at 1m node_role * on (instance) group_right (role) node_var -# {instance="abc",job="node",role="prometheus"} 2 +eval instant at 1m node_role * on (instance) group_right (role) node_var + {instance="abc",job="node",role="prometheus"} 2 -# Unsupported by streaming engine. -# eval instant at 1m node_var * on (instance) group_left (role) node_role -# {instance="abc",job="node",role="prometheus"} 2 +eval instant at 1m node_var * on (instance) group_left (role) node_role + {instance="abc",job="node",role="prometheus"} 2 -# Unsupported by streaming engine. -# eval instant at 1m node_var * ignoring (role) group_left (role) node_role -# {instance="abc",job="node",role="prometheus"} 2 +eval instant at 1m node_var * ignoring (role) group_left (role) node_role + {instance="abc",job="node",role="prometheus"} 2 -# Unsupported by streaming engine. -# eval instant at 1m node_role * ignoring (role) group_right (role) node_var -# {instance="abc",job="node",role="prometheus"} 2 +eval instant at 1m node_role * ignoring (role) group_right (role) node_var + {instance="abc",job="node",role="prometheus"} 2 # Copy machine role to node variable with instrumentation labels. -# Unsupported by streaming engine. -# eval instant at 1m node_cpu * ignoring (role, mode) group_left (role) node_role -# {instance="abc",job="node",mode="idle",role="prometheus"} 3 -# {instance="abc",job="node",mode="user",role="prometheus"} 1 +eval instant at 1m node_cpu * ignoring (role, mode) group_left (role) node_role + {instance="abc",job="node",mode="idle",role="prometheus"} 3 + {instance="abc",job="node",mode="user",role="prometheus"} 1 -# Unsupported by streaming engine. -# eval instant at 1m node_cpu * on (instance) group_left (role) node_role -# {instance="abc",job="node",mode="idle",role="prometheus"} 3 -# {instance="abc",job="node",mode="user",role="prometheus"} 1 +eval instant at 1m node_cpu * on (instance) group_left (role) node_role + {instance="abc",job="node",mode="idle",role="prometheus"} 3 + {instance="abc",job="node",mode="user",role="prometheus"} 1 # Ratio of total. -# Unsupported by streaming engine. -# eval instant at 1m node_cpu / on (instance) group_left sum by (instance,job)(node_cpu) -# {instance="abc",job="node",mode="idle"} .75 -# {instance="abc",job="node",mode="user"} .25 -# {instance="def",job="node",mode="idle"} .80 -# {instance="def",job="node",mode="user"} .20 - -# Unsupported by streaming engine. -# eval instant at 1m sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu) -# {job="node",mode="idle"} 0.7857142857142857 -# {job="node",mode="user"} 0.21428571428571427 - -# Unsupported by streaming engine. -# eval instant at 1m sum(sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu)) -# {} 1.0 - - -# Unsupported by streaming engine. -# eval instant at 1m node_cpu / ignoring (mode) group_left sum without (mode)(node_cpu) -# {instance="abc",job="node",mode="idle"} .75 -# {instance="abc",job="node",mode="user"} .25 -# {instance="def",job="node",mode="idle"} .80 -# {instance="def",job="node",mode="user"} .20 - -# Unsupported by streaming engine. -# eval instant at 1m node_cpu / ignoring (mode) group_left(dummy) sum without (mode)(node_cpu) -# {instance="abc",job="node",mode="idle"} .75 -# {instance="abc",job="node",mode="user"} .25 -# {instance="def",job="node",mode="idle"} .80 -# {instance="def",job="node",mode="user"} .20 - -# Unsupported by streaming engine. -# eval instant at 1m sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu) -# {job="node",mode="idle"} 0.7857142857142857 -# {job="node",mode="user"} 0.21428571428571427 - -# Unsupported by streaming engine. -# eval instant at 1m sum(sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu)) -# {} 1.0 +eval instant at 1m node_cpu / on (instance) group_left sum by (instance,job)(node_cpu) + {instance="abc",job="node",mode="idle"} .75 + {instance="abc",job="node",mode="user"} .25 + {instance="def",job="node",mode="idle"} .80 + {instance="def",job="node",mode="user"} .20 + +eval instant at 1m sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu) + {job="node",mode="idle"} 0.7857142857142857 + {job="node",mode="user"} 0.21428571428571427 + +eval instant at 1m sum(sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu)) + {} 1.0 + + +eval instant at 1m node_cpu / ignoring (mode) group_left sum without (mode)(node_cpu) + {instance="abc",job="node",mode="idle"} .75 + {instance="abc",job="node",mode="user"} .25 + {instance="def",job="node",mode="idle"} .80 + {instance="def",job="node",mode="user"} .20 + +eval instant at 1m node_cpu / ignoring (mode) group_left(dummy) sum without (mode)(node_cpu) + {instance="abc",job="node",mode="idle"} .75 + {instance="abc",job="node",mode="user"} .25 + {instance="def",job="node",mode="idle"} .80 + {instance="def",job="node",mode="user"} .20 + +eval instant at 1m sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu) + {job="node",mode="idle"} 0.7857142857142857 + {job="node",mode="user"} 0.21428571428571427 + +eval instant at 1m sum(sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu)) + {} 1.0 # Copy over label from metric with no matching labels, without having to list cross-job target labels ('job' here). -# Unsupported by streaming engine. -# eval instant at 1m node_cpu + on(dummy) group_left(foo) random*0 -# {instance="abc",job="node",mode="idle",foo="bar"} 3 -# {instance="abc",job="node",mode="user",foo="bar"} 1 -# {instance="def",job="node",mode="idle",foo="bar"} 8 -# {instance="def",job="node",mode="user",foo="bar"} 2 +eval instant at 1m node_cpu + on(dummy) group_left(foo) random*0 + {instance="abc",job="node",mode="idle",foo="bar"} 3 + {instance="abc",job="node",mode="user",foo="bar"} 1 + {instance="def",job="node",mode="idle",foo="bar"} 8 + {instance="def",job="node",mode="user",foo="bar"} 2 # Use threshold from metric, and copy over target. -# Unsupported by streaming engine. -# eval instant at 1m node_cpu > on(job, instance) group_left(target) threshold -# node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 -# node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 +eval instant at 1m node_cpu > on(job, instance) group_left(target) threshold + node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 + node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 # Use threshold from metric, and a default (1) if it's not present. -# Unsupported by streaming engine. -# eval instant at 1m node_cpu > on(job, instance) group_left(target) (threshold or on (job, instance) (sum by (job, instance)(node_cpu) * 0 + 1)) -# node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 -# node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 -# node_cpu{instance="def",job="node",mode="idle"} 8 -# node_cpu{instance="def",job="node",mode="user"} 2 +eval instant at 1m node_cpu > on(job, instance) group_left(target) (threshold or on (job, instance) (sum by (job, instance)(node_cpu) * 0 + 1)) + node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 + node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 + node_cpu{instance="def",job="node",mode="idle"} 8 + node_cpu{instance="def",job="node",mode="user"} 2 # Check that binops drop the metric name. From aad0179aab5843c38de6dd7891cefa621ab701b7 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 26 Nov 2024 15:48:30 +1100 Subject: [PATCH 03/43] Remove comments about and/or/unless. --- .../operators/binops/vector_vector_binary_operation.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go index 78fdc5d4cd8..72d569c0b64 100644 --- a/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go @@ -167,7 +167,6 @@ func (b *VectorVectorBinaryOperation) loadSeriesMetadata(ctx context.Context) (b } if len(b.leftMetadata) == 0 { - // FIXME: this is incorrect for 'or' // No series on left-hand side, we'll never have any output series. return false, nil } @@ -178,7 +177,6 @@ func (b *VectorVectorBinaryOperation) loadSeriesMetadata(ctx context.Context) (b } if len(b.rightMetadata) == 0 { - // FIXME: this is incorrect for 'or' and 'unless' // No series on right-hand side, we'll never have any output series. return false, nil } @@ -202,7 +200,6 @@ func (b *VectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetad // Use the smaller side to populate the map of possible output series first. // This should ensure we don't unnecessarily populate the output series map with series that will never match in most cases. // (It's possible that all the series on the larger side all belong to the same group, but this is expected to be rare.) - // FIXME: this doesn't work as-is for 'unless'. smallerSide := b.leftMetadata largerSide := b.rightMetadata smallerSideIsLeftSide := len(b.leftMetadata) < len(b.rightMetadata) @@ -240,14 +237,11 @@ func (b *VectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetad series.leftSeriesIndices = append(series.leftSeriesIndices, idx) } } - - // FIXME: if this is an 'or' operation, then we need to create the right side even if the left doesn't exist (or vice-versa) } // Remove series that cannot produce samples. for seriesLabels, outputSeries := range outputSeriesMap { if len(outputSeries.leftSeriesIndices) == 0 || len(outputSeries.rightSeriesIndices) == 0 { - // FIXME: this is incorrect for 'or' and 'unless' // No matching series on at least one side for this output series, so output series will have no samples. Remove it. delete(outputSeriesMap, seriesLabels) } From 37434ade8154b9f7182c50755b7d166045d18e89 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 26 Nov 2024 17:14:13 +1100 Subject: [PATCH 04/43] Add some test cases --- .../testdata/ours/binary_operators.test | 242 ++++++++++++++++++ 1 file changed, 242 insertions(+) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index 72f3f130d1a..a31961bd003 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -977,3 +977,245 @@ load 6m # Test the case where both sides of 'or' contain series with the same labels. eval range from 0 to 48m step 6m min(series_1) or min(series_2) {} 9 1 2 9 4 5 9 7 9 + +clear + +# Many-to-one / one-to-many matching. +load 6m + method_code:http_errors:rate5m{method="get", code="500"} 24 240 240 _ + method_code:http_errors:rate5m{method="get", code="404"} 30 300 _ _ + method_code:http_errors:rate5m{method="put", code="501"} 3 30 _ _ + method_code:http_errors:rate5m{method="post", code="500"} 6 60 _ 60 + method_code:http_errors:rate5m{method="post", code="404"} 21 210 _ 210 + method:http_requests:rate5m{method="get", foo="bar"} 600 _ 2400 2400 + method:http_requests:rate5m{method="get", foo="bar2"} _ 1200 1200 1200 + method:http_requests:rate5m{method="del", foo="baz"} 34 80 _ _ + method:http_requests:rate5m{method="post", foo="blah"} 120 100 _ 100 + +eval instant at 0 method_code:http_errors:rate5m / ignoring(code, foo) group_left() method:http_requests:rate5m + {method="get", code="500"} 0.04 + {method="get", code="404"} 0.05 + {method="post", code="500"} 0.05 + {method="post", code="404"} 0.175 + +eval instant at 0 method_code:http_errors:rate5m / on(method) group_left() method:http_requests:rate5m + {method="get", code="500"} 0.04 + {method="get", code="404"} 0.05 + {method="post", code="500"} 0.05 + {method="post", code="404"} 0.175 + +eval instant at 0 method_code:http_errors:rate5m / ignoring(code, foo) group_left(foo) method:http_requests:rate5m + {method="get", code="500", foo="bar"} 0.04 + {method="get", code="404", foo="bar"} 0.05 + {method="post", code="500", foo="blah"} 0.05 + {method="post", code="404", foo="blah"} 0.175 + +eval instant at 0 method_code:http_errors:rate5m / on(method) group_left(foo) method:http_requests:rate5m + {method="get", code="500", foo="bar"} 0.04 + {method="get", code="404", foo="bar"} 0.05 + {method="post", code="500", foo="blah"} 0.05 + {method="post", code="404", foo="blah"} 0.175 + +eval instant at 6m method_code:http_errors:rate5m / ignoring(code, foo) group_left() method:http_requests:rate5m + {method="get", code="500"} 0.2 + {method="get", code="404"} 0.25 + {method="post", code="500"} 0.6 + {method="post", code="404"} 2.1 + +eval instant at 6m method_code:http_errors:rate5m / on(method) group_left() method:http_requests:rate5m + {method="get", code="500"} 0.2 + {method="get", code="404"} 0.25 + {method="post", code="500"} 0.6 + {method="post", code="404"} 2.1 + +eval instant at 6m method_code:http_errors:rate5m / ignoring(code, foo) group_left(foo) method:http_requests:rate5m + {method="get", code="500", foo="bar2"} 0.2 + {method="get", code="404", foo="bar2"} 0.25 + {method="post", code="500", foo="blah"} 0.6 + {method="post", code="404", foo="blah"} 2.1 + +eval instant at 6m method_code:http_errors:rate5m / on(method) group_left(foo) method:http_requests:rate5m + {method="get", code="500", foo="bar2"} 0.2 + {method="get", code="404", foo="bar2"} 0.25 + {method="post", code="500", foo="blah"} 0.6 + {method="post", code="404", foo="blah"} 2.1 + +eval range from 0 to 6m step 6m method_code:http_errors:rate5m / ignoring(code, foo) group_left() method:http_requests:rate5m + {method="get", code="500"} 0.04 0.2 + {method="get", code="404"} 0.05 0.25 + {method="post", code="500"} 0.05 0.6 + {method="post", code="404"} 0.175 2.1 + +eval range from 0 to 6m step 6m method_code:http_errors:rate5m / on(method) group_left() method:http_requests:rate5m + {method="get", code="500"} 0.04 0.2 + {method="get", code="404"} 0.05 0.25 + {method="post", code="500"} 0.05 0.6 + {method="post", code="404"} 0.175 2.1 + +eval range from 0 to 6m step 6m method_code:http_errors:rate5m / ignoring(code, foo) group_left(foo) method:http_requests:rate5m + {method="get", code="500", foo="bar"} 0.04 _ + {method="get", code="404", foo="bar"} 0.05 _ + {method="get", code="500", foo="bar2"} _ 0.2 + {method="get", code="404", foo="bar2"} _ 0.25 + {method="post", code="500", foo="blah"} 0.05 0.6 + {method="post", code="404", foo="blah"} 0.175 2.1 + +eval range from 0 to 6m step 6m method_code:http_errors:rate5m / on(method) group_left(foo) method:http_requests:rate5m + {method="get", code="500", foo="bar"} 0.04 _ + {method="get", code="404", foo="bar"} 0.05 _ + {method="get", code="500", foo="bar2"} _ 0.2 + {method="get", code="404", foo="bar2"} _ 0.25 + {method="post", code="500", foo="blah"} 0.05 0.6 + {method="post", code="404", foo="blah"} 0.175 2.1 + +# Fail if multiple series on "one" side, even if they differ on the additional labels +eval_fail instant at 12m method_code:http_errors:rate5m / ignoring(code, foo) group_left() method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +eval_fail instant at 12m method_code:http_errors:rate5m / on(method) group_left() method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +eval_fail instant at 12m method_code:http_errors:rate5m / ignoring(code, foo) group_left(foo) method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +eval_fail instant at 12m method_code:http_errors:rate5m / on(method) group_left(foo) method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +# Fail if multiple series on "one" side, even if there is no matching point on the "many" side +eval_fail instant at 18m method_code:http_errors:rate5m / ignoring(code, foo) group_left(foo) method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +eval_fail instant at 18m method_code:http_errors:rate5m / on(method) group_left(foo) method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +# Same cases as above, but with group_right and expressions swapped. +eval instant at 0 method:http_requests:rate5m / ignoring(code, foo) group_right() method_code:http_errors:rate5m + {method="get", code="500"} 25 + {method="get", code="404"} 20 + {method="post", code="500"} 20 + {method="post", code="404"} 5.7142857143 + +eval instant at 0 method:http_requests:rate5m / on(method) group_right() method_code:http_errors:rate5m + {method="get", code="500"} 25 + {method="get", code="404"} 20 + {method="post", code="500"} 20 + {method="post", code="404"} 5.7142857143 + +eval instant at 0 method:http_requests:rate5m / ignoring(code, foo) group_right(foo) method_code:http_errors:rate5m + {method="get", code="500", foo="bar"} 25 + {method="get", code="404", foo="bar"} 20 + {method="post", code="500", foo="blah"} 20 + {method="post", code="404", foo="blah"} 5.7142857143 + +eval instant at 0 method:http_requests:rate5m / on(method) group_right(foo) method_code:http_errors:rate5m + {method="get", code="500", foo="bar"} 25 + {method="get", code="404", foo="bar"} 20 + {method="post", code="500", foo="blah"} 20 + {method="post", code="404", foo="blah"} 5.7142857143 + +eval instant at 6m method:http_requests:rate5m / ignoring(code, foo) group_right() method_code:http_errors:rate5m + {method="get", code="500"} 5 + {method="get", code="404"} 4 + {method="post", code="500"} 1.6666666667 + {method="post", code="404"} 0.4761904762 + +eval instant at 6m method:http_requests:rate5m / on(method) group_right() method_code:http_errors:rate5m + {method="get", code="500"} 5 + {method="get", code="404"} 4 + {method="post", code="500"} 1.6666666667 + {method="post", code="404"} 0.4761904762 + +eval instant at 6m method:http_requests:rate5m / ignoring(code, foo) group_right(foo) method_code:http_errors:rate5m + {method="get", code="500", foo="bar2"} 5 + {method="get", code="404", foo="bar2"} 4 + {method="post", code="500", foo="blah"} 1.6666666667 + {method="post", code="404", foo="blah"} 0.4761904762 + +eval instant at 6m method:http_requests:rate5m / on(method) group_right(foo) method_code:http_errors:rate5m + {method="get", code="500", foo="bar2"} 5 + {method="get", code="404", foo="bar2"} 4 + {method="post", code="500", foo="blah"} 1.6666666667 + {method="post", code="404", foo="blah"} 0.4761904762 + +eval range from 0 to 6m step 6m method:http_requests:rate5m / ignoring(code, foo) group_right() method_code:http_errors:rate5m + {method="get", code="500"} 25 5 + {method="get", code="404"} 20 4 + {method="post", code="500"} 20 1.6666666667 + {method="post", code="404"} 5.7142857143 0.4761904762 + +eval range from 0 to 6m step 6m method:http_requests:rate5m / on(method) group_right() method_code:http_errors:rate5m + {method="get", code="500"} 25 5 + {method="get", code="404"} 20 4 + {method="post", code="500"} 20 1.6666666667 + {method="post", code="404"} 5.7142857143 0.4761904762 + +eval range from 0 to 6m step 6m method:http_requests:rate5m / ignoring(code, foo) group_right(foo) method_code:http_errors:rate5m + {method="get", code="500", foo="bar"} 25 _ + {method="get", code="404", foo="bar"} 20 _ + {method="get", code="500", foo="bar2"} _ 5 + {method="get", code="404", foo="bar2"} _ 4 + {method="post", code="500", foo="blah"} 20 1.6666666667 + {method="post", code="404", foo="blah"} 5.7142857143 0.4761904762 + +eval range from 0 to 6m step 6m method:http_requests:rate5m / on(method) group_right(foo) method_code:http_errors:rate5m + {method="get", code="500", foo="bar"} 25 _ + {method="get", code="404", foo="bar"} 20 _ + {method="get", code="500", foo="bar2"} _ 5 + {method="get", code="404", foo="bar2"} _ 4 + {method="post", code="500", foo="blah"} 20 1.6666666667 + {method="post", code="404", foo="blah"} 5.7142857143 0.4761904762 + +# Fail if multiple series on "one" side, even if they differ on the additional labels +eval_fail instant at 12m method:http_requests:rate5m / ignoring(code, foo) group_right() method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +eval_fail instant at 12m method:http_requests:rate5m / on(method) group_right() method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +eval_fail instant at 12m method:http_requests:rate5m / ignoring(code, foo) group_right(foo) method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +eval_fail instant at 12m method:http_requests:rate5m / on(method) group_right(foo) method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +# Fail if multiple series on "one" side, even if there is no matching point on the "many" side +eval_fail instant at 18m method:http_requests:rate5m / ignoring(code, foo) group_right(foo) method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +eval_fail instant at 18m method:http_requests:rate5m / on(method) group_right(foo) method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side + +clear + +# Test group_left / group_right where the additional labels from "one" side replace labels with the same name from the the "many" side +load 6m + left{method="get", code="500", foo="left-1"} 1 + left{method="get", code="404", foo="left-2"} 2 + right{method="get", foo="right-1"} 4 + +eval instant at 0 left / on(method) group_left(foo) right + {method="get", code="500", foo="right-1"} 0.25 + {method="get", code="404", foo="right-1"} 0.5 + +eval instant at 0 right / on(method) group_right(foo) left + {method="get", code="500", foo="right-1"} 4 + {method="get", code="404", foo="right-1"} 2 + +clear + +load 6m + left{method="get", code="500", foo="left-1"} 1 _ 10 + left{method="get", code="404", foo="left-2"} _ 4 20 + right{method="get", code="999", foo="right-1"} 4 8 40 + +eval range from 0 to 6m step 6m left / on(method) group_left(foo, code) right + {method="get", code="999", foo="right-1"} 0.25 0.5 + +eval range from 0 to 6m step 6m right / on(method) group_right(foo, code) left + {method="get", code="999", foo="right-1"} 4 2 + +eval_fail instant at 12m left / on(method) group_left(foo, code) right + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +eval_fail instant at 12m right / on(method) group_right(foo, code) left + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches From 31c886f730726d22adab08432f031cc6d635eb77 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 27 Nov 2024 12:03:06 +1100 Subject: [PATCH 05/43] Make condition clearer --- .../operators/binops/vector_vector_binary_operation.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go index 72d569c0b64..138472bbdc1 100644 --- a/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go @@ -502,7 +502,7 @@ func (b *VectorVectorBinaryOperation) computeResult(left types.InstantVectorSeri // at the end of series2 (also index 3). // It should be pretty uncommon that metric contains both histograms and floats, so we will // accept the cost of a new slice. - mixedPoints := len(left.Floats) > 0 && len(left.Histograms) > 0 || len(right.Floats) > 0 && len(right.Histograms) > 0 + mixedPoints := (len(left.Floats) > 0 && len(left.Histograms) > 0) || (len(right.Floats) > 0 && len(right.Histograms) > 0) prepareFSlice := func() error { if !mixedPoints && maxPoints <= cap(left.Floats) && cap(left.Floats) < cap(right.Floats) { From 6ec6e0e5f0a2854a9a4cd934e799f02e1f36e493 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 27 Nov 2024 12:22:01 +1100 Subject: [PATCH 06/43] Add comparison operation edge cases --- .../testdata/ours/binary_operators.test | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index a31961bd003..671b2d39e1e 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1219,3 +1219,99 @@ eval_fail instant at 12m left / on(method) group_left(foo, code) right eval_fail instant at 12m right / on(method) group_right(foo, code) left expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +clear + +# Test comparison operator edge cases. +load 6m + left_side_a{env="test", pod="a"} 1 2 3 4 + left_side_b{env="test", pod="a"} 5 6 7 8 + right_side{env="test", pod="a"} 2 2 7 7 + +eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side + left_side_a{pod="a"} _ 2 _ _ + left_side_b{pod="a"} _ _ 7 _ + +eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool ignoring(env) right_side + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) + +eval_fail range from 0 to 18m step 6m right_side == ignoring(env) {__name__=~"left_side.*"} + expected_fail_regexp found duplicate series for the match group .* on the right (hand-)?side of the operation + +eval_fail range from 0 to 18m step 6m right_side == bool ignoring(env) {__name__=~"left_side.*"} + expected_fail_regexp found duplicate series for the match group .* on the right (hand-)?side of the operation + +# This should return: +# left_side_a{pod="a"} _ 2 _ _ +# left_side_b{pod="a"} _ _ 7 _ +# but instead both engines drop the metric names in the output. +# This is accepted behaviour: https://github.com/prometheus/prometheus/issues/5326 +eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == on(pod) right_side + {pod="a"} _ 2 7 _ + +eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool on(pod) right_side + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) + +eval_fail range from 0 to 18m step 6m right_side == on(pod) {__name__=~"left_side.*"} + expected_fail_regexp found duplicate series for the match group .* on the right (hand-)?side of the operation + +eval_fail range from 0 to 18m step 6m right_side == bool on(pod) {__name__=~"left_side.*"} + expected_fail_regexp found duplicate series for the match group .* on the right (hand-)?side of the operation + +clear + +# If we change the data slightly... (note the second point for left_side_b is now 2) + +load 6m + left_side_a{env="test", pod="a"} 1 2 3 4 + left_side_b{env="test", pod="a"} 5 2 7 8 + right_side{env="test", pod="a"} 2 2 7 7 + +eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the right side of the operation) + +eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool ignoring(env) right_side + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the right side of the operation) + +eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == on(pod) right_side + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the right side of the operation) + +eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool on(pod) right_side + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the right side of the operation) + +clear + +# Same thing as above, but with the same metric name for all series on left side. +load 6m + left{pod="a"} 1 2 3 4 + left{pod="b"} 5 6 7 8 + right 2 2 7 7 + +eval range from 0 to 18m step 6m left == ignoring(pod) right + left _ 2 7 _ + +clear + +# Same thing as above, but with no overlapping samples on left side. +load 6m + left_side_a{env="test", pod="a"} 1 2 _ _ + left_side_b{env="test", pod="a"} _ _ 7 8 + right_side{env="test", pod="a"} 2 2 7 7 + +eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side + left_side_a{pod="a"} _ 2 _ _ + left_side_b{pod="a"} _ _ 7 _ + +eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool ignoring(env) right_side + {pod="a"} 0 1 1 0 + +# This should return: +# left_side_a{pod="a"} _ 2 _ _ +# left_side_b{pod="a"} _ _ 7 _ +# but instead both engines drop the metric names in the output. +# This is accepted behaviour: https://github.com/prometheus/prometheus/issues/5326 +eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == on(pod) right_side + {pod="a"} _ 2 7 _ + +eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool on(pod) right_side + {pod="a"} 0 1 1 0 From fa12925bfe85e25d1ad2a9c87fa1bdf0aca43b8b Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 27 Nov 2024 13:46:59 +1100 Subject: [PATCH 07/43] Add tests for comparison operators with group_left / group_right --- .../testdata/ours/binary_operators.test | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index 671b2d39e1e..1fd95656011 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1315,3 +1315,64 @@ eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == on(pod) right_side eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool on(pod) right_side {pod="a"} 0 1 1 0 + +clear + +# Comparison operations with group_left / group_right. + +load 6m + left_side{env="test", pod="a", region="au"} 1 2 3 9 10 + left_side{env="test", pod="a", region="us"} 6 7 8 4 5 + right_side{env="test", dc="1"} 2 _ _ 5 _ + right_side{env="test", dc="2"} _ 3 _ _ 6 + +eval range from 0 to 24m step 6m left_side < on(env) group_left(dc) right_side + left_side{env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ + left_side{env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ + left_side{env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ + left_side{env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 + +eval range from 0 to 24m step 6m left_side < bool on(env) group_left(dc) right_side + {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ + {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 + {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ + {env="test", pod="a", region="us", dc="2"} _ 0 _ _ 1 + +eval range from 0 to 24m step 6m left_side < ignoring(pod, region, dc) group_left(dc) right_side + left_side{env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ + left_side{env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ + left_side{env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ + left_side{env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 + +eval range from 0 to 24m step 6m left_side < bool ignoring(pod, region, dc) group_left(dc) right_side + {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ + {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 + {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ + {env="test", pod="a", region="us", dc="2"} _ 0 _ _ 1 + +# FIXME: shouldn't this return series with name right_side? +eval range from 0 to 24m step 6m right_side > on(env) group_right(dc) left_side + left_side{env="test", pod="a", region="au", dc="1"} 2 _ _ _ _ + left_side{env="test", pod="a", region="au", dc="2"} _ 3 _ _ _ + left_side{env="test", pod="a", region="us", dc="1"} _ _ _ 5 _ + left_side{env="test", pod="a", region="us", dc="2"} _ _ _ _ 6 + +eval range from 0 to 24m step 6m right_side > bool on(env) group_right(dc) left_side + {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ + {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 + {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ + {env="test", pod="a", region="us", dc="2"} _ 0 _ _ 1 + +# FIXME: shouldn't this return series with name right_side? +eval range from 0 to 24m step 6m right_side > ignoring(pod, region, dc) group_right(dc) left_side + left_side{env="test", pod="a", region="au", dc="1"} 2 _ _ _ _ + left_side{env="test", pod="a", region="au", dc="2"} _ 3 _ _ _ + left_side{env="test", pod="a", region="us", dc="1"} _ _ _ 5 _ + left_side{env="test", pod="a", region="us", dc="2"} _ _ _ _ 6 + +eval range from 0 to 24m step 6m right_side > bool ignoring(pod, region, dc) group_right(dc) left_side + {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ + {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 + {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ + {env="test", pod="a", region="us", dc="2"} _ 0 _ _ 1 + From a09c1ac4f00abeceb98248141a682872db5ad547 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 27 Nov 2024 14:03:48 +1100 Subject: [PATCH 08/43] Update tests to reflect https://github.com/prometheus/prometheus/issues/15471 --- .../testdata/ours/binary_operators.test | 99 ++++++++++++------- 1 file changed, 64 insertions(+), 35 deletions(-) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index 1fd95656011..6c08823326a 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1321,58 +1321,87 @@ clear # Comparison operations with group_left / group_right. load 6m - left_side{env="test", pod="a", region="au"} 1 2 3 9 10 - left_side{env="test", pod="a", region="us"} 6 7 8 4 5 - right_side{env="test", dc="1"} 2 _ _ 5 _ - right_side{env="test", dc="2"} _ 3 _ _ 6 - -eval range from 0 to 24m step 6m left_side < on(env) group_left(dc) right_side - left_side{env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ - left_side{env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ - left_side{env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ - left_side{env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 - -eval range from 0 to 24m step 6m left_side < bool on(env) group_left(dc) right_side + side_a{env="test", pod="a", region="au"} 1 2 3 9 10 + side_a{env="test", pod="a", region="us"} 6 7 8 4 5 + side_b{env="test", dc="1"} 2 _ _ 5 _ + side_b{env="test", dc="2"} _ 3 _ _ 6 + +eval range from 0 to 24m step 6m side_a < on(env) group_left(dc) side_b + side_a{env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ + side_a{env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ + side_a{env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ + side_a{env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 + +eval range from 0 to 24m step 6m sum without () (side_a) < on(env) group_left(dc) side_b + {env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ + {env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ + {env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ + {env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 + +eval range from 0 to 24m step 6m side_a < bool on(env) group_left(dc) side_b {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ {env="test", pod="a", region="us", dc="2"} _ 0 _ _ 1 -eval range from 0 to 24m step 6m left_side < ignoring(pod, region, dc) group_left(dc) right_side - left_side{env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ - left_side{env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ - left_side{env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ - left_side{env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 +eval range from 0 to 24m step 6m side_a < ignoring(pod, region, dc) group_left(dc) side_b + side_a{env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ + side_a{env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ + side_a{env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ + side_a{env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 -eval range from 0 to 24m step 6m left_side < bool ignoring(pod, region, dc) group_left(dc) right_side +eval range from 0 to 24m step 6m sum without () (side_a) < ignoring(pod, region, dc) group_left(dc) side_b + {env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ + {env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ + {env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ + {env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 + +eval range from 0 to 24m step 6m side_a < bool ignoring(pod, region, dc) group_left(dc) side_b {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ {env="test", pod="a", region="us", dc="2"} _ 0 _ _ 1 -# FIXME: shouldn't this return series with name right_side? -eval range from 0 to 24m step 6m right_side > on(env) group_right(dc) left_side - left_side{env="test", pod="a", region="au", dc="1"} 2 _ _ _ _ - left_side{env="test", pod="a", region="au", dc="2"} _ 3 _ _ _ - left_side{env="test", pod="a", region="us", dc="1"} _ _ _ 5 _ - left_side{env="test", pod="a", region="us", dc="2"} _ _ _ _ 6 - -eval range from 0 to 24m step 6m right_side > bool on(env) group_right(dc) left_side +# The docs say this should return series with name "side_b" from the left, but it is accepted that this will return +# "side_a" from the right: see https://github.com/prometheus/prometheus/issues/15471. +eval range from 0 to 24m step 6m side_b > on(env) group_right(dc) side_a + side_a{env="test", pod="a", region="au", dc="1"} 2 _ _ _ _ + side_a{env="test", pod="a", region="au", dc="2"} _ 3 _ _ _ + side_a{env="test", pod="a", region="us", dc="1"} _ _ _ 5 _ + side_a{env="test", pod="a", region="us", dc="2"} _ _ _ _ 6 + +# The docs say this should return series with no name, but it is accepted that this will return +# "side_a" from the right: see https://github.com/prometheus/prometheus/issues/15471. +eval range from 0 to 24m step 6m sum without () (side_b) > on(env) group_right(dc) side_a + side_a{env="test", pod="a", region="au", dc="1"} 2 _ _ _ _ + side_a{env="test", pod="a", region="au", dc="2"} _ 3 _ _ _ + side_a{env="test", pod="a", region="us", dc="1"} _ _ _ 5 _ + side_a{env="test", pod="a", region="us", dc="2"} _ _ _ _ 6 + +eval range from 0 to 24m step 6m side_b > bool on(env) group_right(dc) side_a {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ {env="test", pod="a", region="us", dc="2"} _ 0 _ _ 1 -# FIXME: shouldn't this return series with name right_side? -eval range from 0 to 24m step 6m right_side > ignoring(pod, region, dc) group_right(dc) left_side - left_side{env="test", pod="a", region="au", dc="1"} 2 _ _ _ _ - left_side{env="test", pod="a", region="au", dc="2"} _ 3 _ _ _ - left_side{env="test", pod="a", region="us", dc="1"} _ _ _ 5 _ - left_side{env="test", pod="a", region="us", dc="2"} _ _ _ _ 6 - -eval range from 0 to 24m step 6m right_side > bool ignoring(pod, region, dc) group_right(dc) left_side +# The docs say this should return series with name "side_b" from the left, but it is accepted that this will return +# "side_a" from the right: see https://github.com/prometheus/prometheus/issues/15471. +eval range from 0 to 24m step 6m side_b > ignoring(pod, region, dc, ignored) group_right(dc) side_a + side_a{env="test", pod="a", region="au", dc="1"} 2 _ _ _ _ + side_a{env="test", pod="a", region="au", dc="2"} _ 3 _ _ _ + side_a{env="test", pod="a", region="us", dc="1"} _ _ _ 5 _ + side_a{env="test", pod="a", region="us", dc="2"} _ _ _ _ 6 + +# The docs say this should return series with no name (ie. the metric name from the left), but it is accepted that this will return +# "side_a" from the right: see https://github.com/prometheus/prometheus/issues/15471. +eval range from 0 to 24m step 6m sum without () (side_b) > ignoring(pod, region, dc, ignored) group_right(dc) side_a + side_a{env="test", pod="a", region="au", dc="1"} 2 _ _ _ _ + side_a{env="test", pod="a", region="au", dc="2"} _ 3 _ _ _ + side_a{env="test", pod="a", region="us", dc="1"} _ _ _ 5 _ + side_a{env="test", pod="a", region="us", dc="2"} _ _ _ _ 6 + +eval range from 0 to 24m step 6m side_b > bool ignoring(pod, region, dc) group_right(dc) side_a {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ {env="test", pod="a", region="us", dc="2"} _ 0 _ _ 1 - From 4965398fa13b44390593959e4888bc1a35f8a861 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 27 Nov 2024 15:54:53 +1100 Subject: [PATCH 09/43] Expand test to confirm label handling behaviour --- .../testdata/ours/binary_operators.test | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index 6c08823326a..d980ab6116a 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1323,8 +1323,8 @@ clear load 6m side_a{env="test", pod="a", region="au"} 1 2 3 9 10 side_a{env="test", pod="a", region="us"} 6 7 8 4 5 - side_b{env="test", dc="1"} 2 _ _ 5 _ - side_b{env="test", dc="2"} _ 3 _ _ 6 + side_b{env="test", dc="1", ignored="1"} 2 _ _ 5 _ + side_b{env="test", dc="2", ignored="1"} _ 3 _ _ 6 eval range from 0 to 24m step 6m side_a < on(env) group_left(dc) side_b side_a{env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ @@ -1344,19 +1344,19 @@ eval range from 0 to 24m step 6m side_a < bool on(env) group_left(dc) side_b {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ {env="test", pod="a", region="us", dc="2"} _ 0 _ _ 1 -eval range from 0 to 24m step 6m side_a < ignoring(pod, region, dc) group_left(dc) side_b +eval range from 0 to 24m step 6m side_a < ignoring(pod, region, dc, ignored) group_left(dc) side_b side_a{env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ side_a{env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ side_a{env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ side_a{env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 -eval range from 0 to 24m step 6m sum without () (side_a) < ignoring(pod, region, dc) group_left(dc) side_b +eval range from 0 to 24m step 6m sum without () (side_a) < ignoring(pod, region, dc, ignored) group_left(dc) side_b {env="test", pod="a", region="au", dc="1"} 1 _ _ _ _ {env="test", pod="a", region="au", dc="2"} _ 2 _ _ _ {env="test", pod="a", region="us", dc="1"} _ _ _ 4 _ {env="test", pod="a", region="us", dc="2"} _ _ _ _ 5 -eval range from 0 to 24m step 6m side_a < bool ignoring(pod, region, dc) group_left(dc) side_b +eval range from 0 to 24m step 6m side_a < bool ignoring(pod, region, dc, ignored) group_left(dc) side_b {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ @@ -1400,7 +1400,7 @@ eval range from 0 to 24m step 6m sum without () (side_b) > ignoring(pod, region, side_a{env="test", pod="a", region="us", dc="1"} _ _ _ 5 _ side_a{env="test", pod="a", region="us", dc="2"} _ _ _ _ 6 -eval range from 0 to 24m step 6m side_b > bool ignoring(pod, region, dc) group_right(dc) side_a +eval range from 0 to 24m step 6m side_b > bool ignoring(pod, region, dc, ignored) group_right(dc) side_a {env="test", pod="a", region="au", dc="1"} 1 _ _ 0 _ {env="test", pod="a", region="au", dc="2"} _ 1 _ _ 0 {env="test", pod="a", region="us", dc="1"} 0 _ _ 1 _ From 64114f712a96dd09fa8ac94f02929f04b50bc527 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 27 Nov 2024 16:47:31 +1100 Subject: [PATCH 10/43] Rename existing operator --- ..._to_one_vector_vector_binary_operation.go} | 57 ++++++++++--------- ...ne_vector_vector_binary_operation_test.go} | 32 +++++------ pkg/streamingpromql/query.go | 2 +- 3 files changed, 46 insertions(+), 45 deletions(-) rename pkg/streamingpromql/operators/binops/{vector_vector_binary_operation.go => one_to_one_vector_vector_binary_operation.go} (91%) rename pkg/streamingpromql/operators/binops/{vector_vector_binary_operation_test.go => one_to_one_vector_vector_binary_operation_test.go} (93%) diff --git a/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go similarity index 91% rename from pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go rename to pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go index 138472bbdc1..41137e1904b 100644 --- a/pkg/streamingpromql/operators/binops/vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go @@ -27,8 +27,9 @@ import ( "github.com/grafana/mimir/pkg/streamingpromql/types" ) -// VectorVectorBinaryOperation represents a binary operation between instant vectors such as " + " or " - ". -type VectorVectorBinaryOperation struct { +// OneToOneVectorVectorBinaryOperation represents a one-to-one binary operation between instant vectors such as " + " or " - ". +// One-to-many and many-to-one binary operations between instant vectors are not supported. +type OneToOneVectorVectorBinaryOperation struct { Left types.InstantVectorOperator Right types.InstantVectorOperator Op parser.ItemType @@ -44,7 +45,7 @@ type VectorVectorBinaryOperation struct { leftMetadata []types.SeriesMetadata rightMetadata []types.SeriesMetadata - remainingSeries []*binaryOperationOutputSeries + remainingSeries []*oneToOneBinaryOperationOutputSeries leftBuffer *operators.InstantVectorOperatorBuffer rightBuffer *operators.InstantVectorOperatorBuffer leftIterator types.InstantVectorSeriesDataIterator @@ -55,9 +56,9 @@ type VectorVectorBinaryOperation struct { annotations *annotations.Annotations } -var _ types.InstantVectorOperator = &VectorVectorBinaryOperation{} +var _ types.InstantVectorOperator = &OneToOneVectorVectorBinaryOperation{} -type binaryOperationOutputSeries struct { +type oneToOneBinaryOperationOutputSeries struct { leftSeriesIndices []int rightSeriesIndices []int } @@ -65,18 +66,18 @@ type binaryOperationOutputSeries struct { // latestLeftSeries returns the index of the last series from the left source needed for this output series. // // It assumes that leftSeriesIndices is sorted in ascending order. -func (s binaryOperationOutputSeries) latestLeftSeries() int { +func (s oneToOneBinaryOperationOutputSeries) latestLeftSeries() int { return s.leftSeriesIndices[len(s.leftSeriesIndices)-1] } // latestRightSeries returns the index of the last series from the right source needed for this output series. // // It assumes that rightSeriesIndices is sorted in ascending order. -func (s binaryOperationOutputSeries) latestRightSeries() int { +func (s oneToOneBinaryOperationOutputSeries) latestRightSeries() int { return s.rightSeriesIndices[len(s.rightSeriesIndices)-1] } -func NewVectorVectorBinaryOperation( +func NewOneToOneVectorVectorBinaryOperation( left types.InstantVectorOperator, right types.InstantVectorOperator, vectorMatching parser.VectorMatching, @@ -85,8 +86,8 @@ func NewVectorVectorBinaryOperation( memoryConsumptionTracker *limiting.MemoryConsumptionTracker, annotations *annotations.Annotations, expressionPosition posrange.PositionRange, -) (*VectorVectorBinaryOperation, error) { - b := &VectorVectorBinaryOperation{ +) (*OneToOneVectorVectorBinaryOperation, error) { + b := &OneToOneVectorVectorBinaryOperation{ Left: left, Right: right, leftIterator: types.InstantVectorSeriesDataIterator{}, @@ -113,7 +114,7 @@ func NewVectorVectorBinaryOperation( return b, nil } -func (b *VectorVectorBinaryOperation) ExpressionPosition() posrange.PositionRange { +func (b *OneToOneVectorVectorBinaryOperation) ExpressionPosition() posrange.PositionRange { return b.expressionPosition } @@ -132,7 +133,7 @@ func (b *VectorVectorBinaryOperation) ExpressionPosition() posrange.PositionRang // (The alternative would be to compute the entire result here in SeriesMetadata and only return the series that // contain points, but that would mean we'd need to hold the entire result in memory at once, which we want to // avoid.) -func (b *VectorVectorBinaryOperation) SeriesMetadata(ctx context.Context) ([]types.SeriesMetadata, error) { +func (b *OneToOneVectorVectorBinaryOperation) SeriesMetadata(ctx context.Context) ([]types.SeriesMetadata, error) { if canProduceAnySeries, err := b.loadSeriesMetadata(ctx); err != nil { return nil, err } else if !canProduceAnySeries { @@ -156,7 +157,7 @@ func (b *VectorVectorBinaryOperation) SeriesMetadata(ctx context.Context) ([]typ // loadSeriesMetadata loads series metadata from both sides of this operation. // It returns false if one side returned no series and that means there is no way for this operation to return any series. // (eg. if doing A + B and either A or B have no series, then there is no way for this operation to produce any series) -func (b *VectorVectorBinaryOperation) loadSeriesMetadata(ctx context.Context) (bool, error) { +func (b *OneToOneVectorVectorBinaryOperation) loadSeriesMetadata(ctx context.Context) (bool, error) { // We retain the series labels for later so we can use them to generate error messages. // We'll return them to the pool in Close(). @@ -192,10 +193,10 @@ func (b *VectorVectorBinaryOperation) loadSeriesMetadata(ctx context.Context) (b // - a corresponding list of the source series for each output series // - a list indicating which series from the left side are needed to compute the output // - a list indicating which series from the right side are needed to compute the output -func (b *VectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetadata, []*binaryOperationOutputSeries, []bool, []bool, error) { +func (b *OneToOneVectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetadata, []*oneToOneBinaryOperationOutputSeries, []bool, []bool, error) { labelsFunc := b.groupLabelsFunc() groupKeyFunc := vectorMatchingGroupKeyFunc(b.VectorMatching) - outputSeriesMap := map[string]*binaryOperationOutputSeries{} + outputSeriesMap := map[string]*oneToOneBinaryOperationOutputSeries{} // Use the smaller side to populate the map of possible output series first. // This should ensure we don't unnecessarily populate the output series map with series that will never match in most cases. @@ -214,7 +215,7 @@ func (b *VectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetad series, exists := outputSeriesMap[string(groupKey)] // Important: don't extract the string(...) call here - passing it directly allows us to avoid allocating it. if !exists { - series = &binaryOperationOutputSeries{} + series = &oneToOneBinaryOperationOutputSeries{} outputSeriesMap[string(groupKey)] = series } @@ -248,7 +249,7 @@ func (b *VectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetad } allMetadata := types.GetSeriesMetadataSlice(len(outputSeriesMap)) - allSeries := make([]*binaryOperationOutputSeries, 0, len(outputSeriesMap)) + allSeries := make([]*oneToOneBinaryOperationOutputSeries, 0, len(outputSeriesMap)) leftSeriesUsed, err := types.BoolSlicePool.Get(len(b.leftMetadata), b.MemoryConsumptionTracker) if err != nil { @@ -287,7 +288,7 @@ func (b *VectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetad // // At present, sortSeries uses a very basic heuristic to guess the best way to sort the output series, but we could make // this more sophisticated in the future. -func (b *VectorVectorBinaryOperation) sortSeries(metadata []types.SeriesMetadata, series []*binaryOperationOutputSeries) { +func (b *OneToOneVectorVectorBinaryOperation) sortSeries(metadata []types.SeriesMetadata, series []*oneToOneBinaryOperationOutputSeries) { // For one-to-one matching, we assume that each output series takes one series from each side of the operator. // If this is true, then the best order is the one in which we read from the highest cardinality side in order. // If we do this, then in the worst case, we'll have to buffer the whole of the lower cardinality side. @@ -311,14 +312,14 @@ func (b *VectorVectorBinaryOperation) sortSeries(metadata []types.SeriesMetadata type binaryOperationOutputSorter struct { metadata []types.SeriesMetadata - series []*binaryOperationOutputSeries + series []*oneToOneBinaryOperationOutputSeries } type favourLeftSideSorter struct { binaryOperationOutputSorter } -func newFavourLeftSideSorter(metadata []types.SeriesMetadata, series []*binaryOperationOutputSeries) favourLeftSideSorter { +func newFavourLeftSideSorter(metadata []types.SeriesMetadata, series []*oneToOneBinaryOperationOutputSeries) favourLeftSideSorter { return favourLeftSideSorter{binaryOperationOutputSorter{metadata, series}} } @@ -326,7 +327,7 @@ type favourRightSideSorter struct { binaryOperationOutputSorter } -func newFavourRightSideSorter(metadata []types.SeriesMetadata, series []*binaryOperationOutputSeries) favourRightSideSorter { +func newFavourRightSideSorter(metadata []types.SeriesMetadata, series []*oneToOneBinaryOperationOutputSeries) favourRightSideSorter { return favourRightSideSorter{binaryOperationOutputSorter{metadata, series}} } @@ -360,7 +361,7 @@ func (g favourRightSideSorter) Less(i, j int) bool { } // groupLabelsFunc returns a function that computes the labels of the output group this series belongs to. -func (b *VectorVectorBinaryOperation) groupLabelsFunc() func(labels.Labels) labels.Labels { +func (b *OneToOneVectorVectorBinaryOperation) groupLabelsFunc() func(labels.Labels) labels.Labels { lb := labels.NewBuilder(labels.EmptyLabels()) if b.VectorMatching.On { @@ -388,7 +389,7 @@ func (b *VectorVectorBinaryOperation) groupLabelsFunc() func(labels.Labels) labe } } -func (b *VectorVectorBinaryOperation) NextSeries(ctx context.Context) (types.InstantVectorSeriesData, error) { +func (b *OneToOneVectorVectorBinaryOperation) NextSeries(ctx context.Context) (types.InstantVectorSeriesData, error) { if len(b.remainingSeries) == 0 { return types.InstantVectorSeriesData{}, types.EOS } @@ -434,7 +435,7 @@ func (b *VectorVectorBinaryOperation) NextSeries(ctx context.Context) (types.Ins // NOTE: mergeOneSide has the side effect of re-ordering both data and sourceSeriesIndices. // // FIXME: for many-to-one / one-to-many matching, we could avoid re-merging each time for the side used multiple times -func (b *VectorVectorBinaryOperation) mergeOneSide(data []types.InstantVectorSeriesData, sourceSeriesIndices []int, sourceSeriesMetadata []types.SeriesMetadata, side string) (types.InstantVectorSeriesData, error) { +func (b *OneToOneVectorVectorBinaryOperation) mergeOneSide(data []types.InstantVectorSeriesData, sourceSeriesIndices []int, sourceSeriesMetadata []types.SeriesMetadata, side string) (types.InstantVectorSeriesData, error) { merged, conflict, err := operators.MergeSeries(data, sourceSeriesIndices, b.MemoryConsumptionTracker) if err != nil { @@ -448,7 +449,7 @@ func (b *VectorVectorBinaryOperation) mergeOneSide(data []types.InstantVectorSer return merged, nil } -func (b *VectorVectorBinaryOperation) mergeConflictToError(conflict *operators.MergeConflict, sourceSeriesMetadata []types.SeriesMetadata, side string) error { +func (b *OneToOneVectorVectorBinaryOperation) mergeConflictToError(conflict *operators.MergeConflict, sourceSeriesMetadata []types.SeriesMetadata, side string) error { firstConflictingSeriesLabels := sourceSeriesMetadata[conflict.FirstConflictingSeriesIndex].Labels groupLabels := b.groupLabelsFunc()(firstConflictingSeriesLabels) @@ -475,7 +476,7 @@ func (b *VectorVectorBinaryOperation) mergeConflictToError(conflict *operators.M ) } -func (b *VectorVectorBinaryOperation) computeResult(left types.InstantVectorSeriesData, right types.InstantVectorSeriesData) (types.InstantVectorSeriesData, error) { +func (b *OneToOneVectorVectorBinaryOperation) computeResult(left types.InstantVectorSeriesData, right types.InstantVectorSeriesData) (types.InstantVectorSeriesData, error) { var fPoints []promql.FPoint var hPoints []promql.HPoint @@ -629,7 +630,7 @@ func (b *VectorVectorBinaryOperation) computeResult(left types.InstantVectorSeri }, nil } -func (b *VectorVectorBinaryOperation) Close() { +func (b *OneToOneVectorVectorBinaryOperation) Close() { b.Left.Close() b.Right.Close() @@ -650,7 +651,7 @@ func (b *VectorVectorBinaryOperation) Close() { } } -func (b *VectorVectorBinaryOperation) emitAnnotation(generator types.AnnotationGenerator) { +func (b *OneToOneVectorVectorBinaryOperation) emitAnnotation(generator types.AnnotationGenerator) { b.annotations.Add(generator("", b.expressionPosition)) } diff --git a/pkg/streamingpromql/operators/binops/vector_vector_binary_operation_test.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go similarity index 93% rename from pkg/streamingpromql/operators/binops/vector_vector_binary_operation_test.go rename to pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go index 75b8c45d2d1..40c79e515fb 100644 --- a/pkg/streamingpromql/operators/binops/vector_vector_binary_operation_test.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go @@ -24,7 +24,7 @@ import ( // The merging behaviour has many edge cases, so it's easier to test it directly from Go. // // Most of the edge cases are already covered by TestMergeSeries, so we focus on the logic -// unique to VectorVectorBinaryOperation: converting conflicts to user-friendly error messages. +// unique to OneToOneVectorVectorBinaryOperation: converting conflicts to user-friendly error messages. func TestVectorVectorBinaryOperation_SeriesMerging(t *testing.T) { testCases := map[string]struct { input []types.InstantVectorSeriesData @@ -188,7 +188,7 @@ func TestVectorVectorBinaryOperation_SeriesMerging(t *testing.T) { for name, testCase := range testCases { t.Run(name, func(t *testing.T) { memoryConsumptionTracker := limiting.NewMemoryConsumptionTracker(0, nil) - o := &VectorVectorBinaryOperation{ + o := &OneToOneVectorVectorBinaryOperation{ // Simulate an expression with "on (env)". // This is used to generate error messages. VectorMatching: parser.VectorMatching{ @@ -216,19 +216,19 @@ func TestVectorVectorBinaryOperation_SeriesMerging(t *testing.T) { func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { testCases := map[string]struct { - series []*binaryOperationOutputSeries + series []*oneToOneBinaryOperationOutputSeries expectedOrderFavouringLeftSide []int expectedOrderFavouringRightSide []int }{ "no output series": { - series: []*binaryOperationOutputSeries{}, + series: []*oneToOneBinaryOperationOutputSeries{}, expectedOrderFavouringLeftSide: []int{}, expectedOrderFavouringRightSide: []int{}, }, "single output series": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{4}, rightSeriesIndices: []int{1}, @@ -239,7 +239,7 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { expectedOrderFavouringRightSide: []int{0}, }, "two output series, both with one input series, read from both sides in same order and already sorted correctly": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{1}, rightSeriesIndices: []int{1}, @@ -254,7 +254,7 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { expectedOrderFavouringRightSide: []int{0, 1}, }, "two output series, both with one input series, read from both sides in same order but sorted incorrectly": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{2}, rightSeriesIndices: []int{2}, @@ -269,7 +269,7 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { expectedOrderFavouringRightSide: []int{1, 0}, }, "two output series, both with one input series, read from both sides in different order": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{1}, rightSeriesIndices: []int{2}, @@ -284,7 +284,7 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { expectedOrderFavouringRightSide: []int{1, 0}, }, "two output series, both with multiple input series": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{1, 2}, rightSeriesIndices: []int{0, 3}, @@ -299,7 +299,7 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { expectedOrderFavouringRightSide: []int{1, 0}, }, "multiple output series, both with one input series, read from both sides in same order and already sorted correctly": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{1}, rightSeriesIndices: []int{1}, @@ -318,7 +318,7 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { expectedOrderFavouringRightSide: []int{0, 1, 2}, }, "multiple output series, both with one input series, read from both sides in same order but sorted incorrectly": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{2}, rightSeriesIndices: []int{2}, @@ -337,7 +337,7 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { expectedOrderFavouringRightSide: []int{2, 0, 1}, }, "multiple output series, both with one input series, read from both sides in different order": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{1}, rightSeriesIndices: []int{2}, @@ -356,7 +356,7 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { expectedOrderFavouringRightSide: []int{2, 0, 1}, }, "multiple output series, with multiple input series each": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{4, 5, 10}, rightSeriesIndices: []int{2, 20}, @@ -375,7 +375,7 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { expectedOrderFavouringRightSide: []int{0, 2, 1}, }, "multiple output series which depend on the same input series": { - series: []*binaryOperationOutputSeries{ + series: []*oneToOneBinaryOperationOutputSeries{ { leftSeriesIndices: []int{1}, rightSeriesIndices: []int{2}, @@ -409,8 +409,8 @@ func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { metadata[i] = types.SeriesMetadata{Labels: labels.FromStrings("series", strconv.Itoa(i))} } - test := func(t *testing.T, series []*binaryOperationOutputSeries, metadata []types.SeriesMetadata, sorter sort.Interface, expectedOrder []int) { - expectedSeriesOrder := make([]*binaryOperationOutputSeries, len(series)) + test := func(t *testing.T, series []*oneToOneBinaryOperationOutputSeries, metadata []types.SeriesMetadata, sorter sort.Interface, expectedOrder []int) { + expectedSeriesOrder := make([]*oneToOneBinaryOperationOutputSeries, len(series)) expectedMetadataOrder := make([]types.SeriesMetadata, len(metadata)) for outputIndex, inputIndex := range expectedOrder { diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index 8a85ccbfd99..001f1cf0b62 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -269,7 +269,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr, timeRange types case parser.LOR: return binops.NewOrBinaryOperation(lhs, rhs, *e.VectorMatching, q.memoryConsumptionTracker, timeRange, e.PositionRange()), nil default: - return binops.NewVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange()) + return binops.NewOneToOneVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange()) } case *parser.UnaryExpr: From 749789ba4a1c331fbea767173d7d1104c05e53dc Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 27 Nov 2024 17:07:04 +1100 Subject: [PATCH 11/43] Add structure for new operator --- .../grouped_vector_vector_binary_operation.go | 103 ++++++++++++++++++ pkg/streamingpromql/query.go | 9 +- 2 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go new file mode 100644 index 00000000000..9102cbad834 --- /dev/null +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package binops + +import ( + "context" + "fmt" + + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/parser/posrange" + "github.com/prometheus/prometheus/util/annotations" + + "github.com/grafana/mimir/pkg/streamingpromql/compat" + "github.com/grafana/mimir/pkg/streamingpromql/limiting" + "github.com/grafana/mimir/pkg/streamingpromql/types" +) + +// GroupedVectorVectorBinaryOperation represents a one-to-many or many-to-one binary operation between instant vectors such as " + group_left " or " - group_right ". +// One-to-one binary operations between instant vectors are not supported. +type GroupedVectorVectorBinaryOperation struct { + Left types.InstantVectorOperator + Right types.InstantVectorOperator + Op parser.ItemType + ReturnBool bool + MemoryConsumptionTracker *limiting.MemoryConsumptionTracker + + VectorMatching parser.VectorMatching + + // We need to retain these so that NextSeries() can return an error message with the series labels when + // multiple points match on a single side. + // Note that we don't retain the output series metadata: if we need to return an error message, we can compute + // the output series labels from these again. + leftMetadata []types.SeriesMetadata + rightMetadata []types.SeriesMetadata + + opFunc binaryOperationFunc + + expressionPosition posrange.PositionRange + annotations *annotations.Annotations +} + +var _ types.InstantVectorOperator = &GroupedVectorVectorBinaryOperation{} + +func NewGroupedVectorVectorBinaryOperation( + left types.InstantVectorOperator, + right types.InstantVectorOperator, + vectorMatching parser.VectorMatching, + op parser.ItemType, + returnBool bool, + memoryConsumptionTracker *limiting.MemoryConsumptionTracker, + annotations *annotations.Annotations, + expressionPosition posrange.PositionRange, +) (*GroupedVectorVectorBinaryOperation, error) { + b := &GroupedVectorVectorBinaryOperation{ + Left: left, + Right: right, + VectorMatching: vectorMatching, + Op: op, + ReturnBool: returnBool, + MemoryConsumptionTracker: memoryConsumptionTracker, + + expressionPosition: expressionPosition, + annotations: annotations, + } + + if returnBool { + b.opFunc = boolComparisonOperationFuncs[op] + } else { + b.opFunc = arithmeticAndComparisonOperationFuncs[op] + } + + if b.opFunc == nil { + return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", op)) + } + + return b, nil +} + +func (g *GroupedVectorVectorBinaryOperation) SeriesMetadata(ctx context.Context) ([]types.SeriesMetadata, error) { + return nil, nil +} + +func (g *GroupedVectorVectorBinaryOperation) NextSeries(ctx context.Context) (types.InstantVectorSeriesData, error) { + return types.InstantVectorSeriesData{}, nil +} + +func (g *GroupedVectorVectorBinaryOperation) ExpressionPosition() posrange.PositionRange { + return g.expressionPosition +} + +func (g *GroupedVectorVectorBinaryOperation) Close() { + g.Left.Close() + g.Right.Close() + + if g.leftMetadata != nil { + types.PutSeriesMetadataSlice(g.leftMetadata) + } + + if g.rightMetadata != nil { + types.PutSeriesMetadataSlice(g.rightMetadata) + } + +} diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index 001f1cf0b62..8b90c309187 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -269,7 +269,14 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr, timeRange types case parser.LOR: return binops.NewOrBinaryOperation(lhs, rhs, *e.VectorMatching, q.memoryConsumptionTracker, timeRange, e.PositionRange()), nil default: - return binops.NewOneToOneVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange()) + switch e.VectorMatching.Card { + case parser.CardOneToMany, parser.CardManyToOne: + return binops.NewGroupedVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange()) + case parser.CardOneToOne: + return binops.NewOneToOneVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange()) + default: + return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with %v matching for '%v'", e.VectorMatching.Card, e.Op)) + } } case *parser.UnaryExpr: From 9ce74073e1897392814ca3ec1cd358835f5cfb28 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Mon, 2 Dec 2024 15:26:57 +1100 Subject: [PATCH 12/43] Initial implementation of SeriesMetadata --- .../grouped_vector_vector_binary_operation.go | 389 +++++++++++++++++- 1 file changed, 374 insertions(+), 15 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 9102cbad834..c4eef07c6d9 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -5,13 +5,16 @@ package binops import ( "context" "fmt" + "sort" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/util/annotations" "github.com/grafana/mimir/pkg/streamingpromql/compat" "github.com/grafana/mimir/pkg/streamingpromql/limiting" + "github.com/grafana/mimir/pkg/streamingpromql/operators" "github.com/grafana/mimir/pkg/streamingpromql/types" ) @@ -26,20 +29,67 @@ type GroupedVectorVectorBinaryOperation struct { VectorMatching parser.VectorMatching + opFunc binaryOperationFunc + + expressionPosition posrange.PositionRange + annotations *annotations.Annotations + + remainingSeries []*groupedBinaryOperationOutputSeries + oneSide types.InstantVectorOperator // Either Left or Right + manySide types.InstantVectorOperator + oneSideBuffer *operators.InstantVectorOperatorBuffer + manySideBuffer *operators.InstantVectorOperatorBuffer + // We need to retain these so that NextSeries() can return an error message with the series labels when // multiple points match on a single side. // Note that we don't retain the output series metadata: if we need to return an error message, we can compute // the output series labels from these again. - leftMetadata []types.SeriesMetadata - rightMetadata []types.SeriesMetadata + oneSideMetadata []types.SeriesMetadata + manySideMetadata []types.SeriesMetadata +} - opFunc binaryOperationFunc +var _ types.InstantVectorOperator = &GroupedVectorVectorBinaryOperation{} - expressionPosition posrange.PositionRange - annotations *annotations.Annotations +type groupedBinaryOperationOutputSeries struct { + manySideSeriesIndices []int + oneSide *oneSide } -var _ types.InstantVectorOperator = &GroupedVectorVectorBinaryOperation{} +// latestManySeries returns the index of the last series from the "many" side needed for this output series. +// +// It assumes that manySideSeriesIndices is sorted in ascending order. +func (s groupedBinaryOperationOutputSeries) latestManySeries() int { + return s.manySideSeriesIndices[len(s.manySideSeriesIndices)-1] +} + +// latestOneSeries returns the index of the last series from the "one" side needed for this output series. +// +// It assumes that oneSide.outputSeries is sorted in ascending order. +func (s groupedBinaryOperationOutputSeries) latestOneSeries() int { + return s.oneSide.seriesIndices[len(s.oneSide.seriesIndices)-1] +} + +type groupedBinaryOperationOutputSeriesWithLabels struct { + labels labels.Labels + outputSeries *groupedBinaryOperationOutputSeries +} + +type oneSide struct { + // If this side has not been populated, seriesIndices will not be nil and mergedData will be empty. + // If this side has been populated, seriesIndices will be nil. + seriesIndices []int + mergedData types.InstantVectorSeriesData + + outputSeriesCount int // The number of output series that refer to this side. + + matchGroup *matchGroup +} + +type matchGroup struct { + // Time steps at which we've seen samples for any "one" side in this group. + // Each value is the index of the source series of the sample, or -1 if no sample has been seen for this time step yet. + presence []int +} func NewGroupedVectorVectorBinaryOperation( left types.InstantVectorOperator, @@ -51,7 +101,7 @@ func NewGroupedVectorVectorBinaryOperation( annotations *annotations.Annotations, expressionPosition posrange.PositionRange, ) (*GroupedVectorVectorBinaryOperation, error) { - b := &GroupedVectorVectorBinaryOperation{ + g := &GroupedVectorVectorBinaryOperation{ Left: left, Right: right, VectorMatching: vectorMatching, @@ -64,22 +114,323 @@ func NewGroupedVectorVectorBinaryOperation( } if returnBool { - b.opFunc = boolComparisonOperationFuncs[op] + g.opFunc = boolComparisonOperationFuncs[op] } else { - b.opFunc = arithmeticAndComparisonOperationFuncs[op] + g.opFunc = arithmeticAndComparisonOperationFuncs[op] + } + + switch g.VectorMatching.Card { + case parser.CardOneToMany: + g.oneSide, g.manySide = g.Left, g.Right + case parser.CardManyToOne: + g.manySide, g.oneSide = g.Left, g.Right + default: + return nil, fmt.Errorf("unsupported cardinality '%v'", g.VectorMatching.Card) } - if b.opFunc == nil { + if g.opFunc == nil { return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", op)) } - return b, nil + return g, nil } +// SeriesMetadata returns the series expected to be produced by this operator. +// +// Note that it is possible that this method returns a series which will not have any points, as the +// list of possible output series is generated based solely on the series labels, not their data. +// +// For example, if this operator is for a range query with the expression "left_metric + right_metric", but +// left_metric has points at T=0 and T=1 in the query range, and right_metric has points at T=2 and T=3 in the +// query range, then SeriesMetadata will return a series, but NextSeries will return no points for that series. +// +// If this affects many series in the query, this may cause consuming operators to be less efficient, but in +// practice this rarely happens. +// +// (The alternative would be to compute the entire result here in SeriesMetadata and only return the series that +// contain points, but that would mean we'd need to hold the entire result in memory at once, which we want to +// avoid.) func (g *GroupedVectorVectorBinaryOperation) SeriesMetadata(ctx context.Context) ([]types.SeriesMetadata, error) { + if canProduceAnySeries, err := g.loadSeriesMetadata(ctx); err != nil { + return nil, err + } else if !canProduceAnySeries { + return nil, nil + } + + allMetadata, allSeries, oneSideSeriesUsed, manySideSeriesUsed, err := g.computeOutputSeries() + if err != nil { + return nil, err + } + + g.sortSeries(allMetadata, allSeries) + g.remainingSeries = allSeries + + g.oneSideBuffer = operators.NewInstantVectorOperatorBuffer(g.oneSide, oneSideSeriesUsed, g.MemoryConsumptionTracker) + g.manySideBuffer = operators.NewInstantVectorOperatorBuffer(g.manySide, manySideSeriesUsed, g.MemoryConsumptionTracker) + return nil, nil } +// loadSeriesMetadata loads series metadata from both sides of this operation. +// It returns false if one side returned no series and that means there is no way for this operation to return any series. +// (eg. if doing A + B and either A or B have no series, then there is no way for this operation to produce any series) +func (g *GroupedVectorVectorBinaryOperation) loadSeriesMetadata(ctx context.Context) (bool, error) { + // We retain the series labels for later so we can use them to generate error messages. + // We'll return them to the pool in Close(). + + var err error + g.oneSideMetadata, err = g.oneSide.SeriesMetadata(ctx) + if err != nil { + return false, err + } + + if len(g.oneSideMetadata) == 0 { + // No series on left-hand side, we'll never have any output series. + return false, nil + } + + g.manySideMetadata, err = g.manySide.SeriesMetadata(ctx) + if err != nil { + return false, err + } + + if len(g.manySideMetadata) == 0 { + // No series on right-hand side, we'll never have any output series. + return false, nil + } + + return true, nil +} + +// computeOutputSeries determines the possible output series from this operator. +// It assumes oneSideMetadata and manySideMetadata have already been populated. +// +// It returns: +// - a list of all possible series this operator could return +// - a corresponding list of the source series for each output series +// - a list indicating which series from the "one" side are needed to compute the output +// - a list indicating which series from the "many" side are needed to compute the output +func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetadata, []*groupedBinaryOperationOutputSeries, []bool, []bool, error) { + groupKeyFunc := vectorMatchingGroupKeyFunc(g.VectorMatching) + + // First, iterate through all the series on the "one" side and determine all the possible groups. + // For example, if we are matching on the "env" label and "region" is an additional label, + // oneSideMap would look something like this once we're done: + // [env=test][region=au]: {...} + // [env=test][region=eu]: {...} + // [env=test][region=us]: {...} + // [env=prod][region=au]: {...} + // [env=prod][region=eu]: {...} + // [env=prod][region=us]: {...} + additionalLabelsKeyFunc := g.additionalLabelsKeyFunc() + oneSideMap := map[string]map[string]*oneSide{} + + for idx, s := range g.oneSideMetadata { + groupKey := groupKeyFunc(s.Labels) + oneSideGroup, exists := oneSideMap[string(groupKey)] // Important: don't extract the string(...) call here - passing it directly allows us to avoid allocating it. + + if !exists { + oneSideGroup = map[string]*oneSide{} + oneSideMap[string(groupKey)] = oneSideGroup + } + + additionalLabelsKey := additionalLabelsKeyFunc(s.Labels) + side, exists := oneSideGroup[string(additionalLabelsKey)] // Important: don't extract the string(...) call here - passing it directly allows us to avoid allocating it. + + if !exists { + side = &oneSide{} + oneSideGroup[string(additionalLabelsKey)] = side + } + + side.seriesIndices = append(side.seriesIndices, idx) + } + + // Now iterate through all series on the "many" side and determine all the possible output series, as + // well as which series from the "many" side we'll actually need. + outputSeriesMap := map[string]groupedBinaryOperationOutputSeriesWithLabels{} + manySideSeriesUsed, err := types.BoolSlicePool.Get(len(g.manySideMetadata), g.MemoryConsumptionTracker) + if err != nil { + return nil, nil, nil, nil, err + } + + manySideSeriesUsed = manySideSeriesUsed[:len(g.manySideMetadata)] + seriesLabelsFunc := g.seriesLabelsFunc() + buf := make([]byte, 0, 1024) + + for idx, s := range g.manySideMetadata { + groupKey := groupKeyFunc(s.Labels) + oneSideGroup, exists := oneSideMap[string(groupKey)] // Important: don't extract the string(...) call here - passing it directly allows us to avoid allocating it. + + if !exists { + // There are no series on the "one" side that match this series, so we'll produce no output series for this series. + continue + } + + manySideSeriesUsed[idx] = true + + for _, oneSide := range oneSideGroup { + // Most of the time, the output series won't already exist (unless we have input series with different metric names), + // so just create the series labels directly rather than trying to avoid their creation until we know for sure we'll + // need them. + l := seriesLabelsFunc(g.oneSideMetadata[oneSide.seriesIndices[0]].Labels, s.Labels) + outputSeries, exists := outputSeriesMap[string(l.Bytes(buf))] + + if exists { + outputSeries.outputSeries.manySideSeriesIndices = append(outputSeries.outputSeries.manySideSeriesIndices, idx) + } else { + oneSide.outputSeriesCount++ + + outputSeries = groupedBinaryOperationOutputSeriesWithLabels{ + labels: l, + outputSeries: &groupedBinaryOperationOutputSeries{ + manySideSeriesIndices: []int{idx}, + oneSide: oneSide, + }, + } + outputSeriesMap[string(l.Bytes(buf))] = outputSeries + } + } + } + + // Next, go through all the "one" side groups again, and determine which of the "one" side series + // we'll actually need. + oneSideSeriesUsed, err := types.BoolSlicePool.Get(len(g.oneSideMetadata), g.MemoryConsumptionTracker) + if err != nil { + return nil, nil, nil, nil, err + } + + oneSideSeriesUsed = oneSideSeriesUsed[:len(g.oneSideMetadata)] + + for _, oneSideGroup := range oneSideMap { + var thisMatchGroup *matchGroup + + for _, oneSide := range oneSideGroup { + if oneSide.outputSeriesCount == 0 { + // If any part of a group has no output series, then no parts of that group will have output series. + break + } else if thisMatchGroup == nil { + thisMatchGroup = &matchGroup{} + } + + oneSide.matchGroup = thisMatchGroup + + for _, idx := range oneSide.seriesIndices { + oneSideSeriesUsed[idx] = true + } + } + } + + // Finally, construct the list of series that this operator will return. + outputMetadata := types.GetSeriesMetadataSlice(len(outputSeriesMap)) + outputSeries := make([]*groupedBinaryOperationOutputSeries, 0, len(outputSeriesMap)) + + for _, o := range outputSeriesMap { + outputMetadata = append(outputMetadata, types.SeriesMetadata{Labels: o.labels}) + outputSeries = append(outputSeries, o.outputSeries) + } + + return outputMetadata, outputSeries, oneSideSeriesUsed, manySideSeriesUsed, nil +} + +func (g *GroupedVectorVectorBinaryOperation) additionalLabelsKeyFunc() func(oneSideLabels labels.Labels) []byte { + if len(g.VectorMatching.Include) == 0 { + return func(oneSideLabels labels.Labels) []byte { + return nil + } + } + + buf := make([]byte, 0, 1024) + + return func(oneSideLabels labels.Labels) []byte { + return oneSideLabels.BytesWithLabels(buf, g.VectorMatching.MatchingLabels...) + } +} + +func (g *GroupedVectorVectorBinaryOperation) seriesLabelsFunc() func(oneSideLabels labels.Labels, manySideLabels labels.Labels) labels.Labels { + shouldRemoveMetricName := !g.Op.IsComparisonOperator() + + if len(g.VectorMatching.Include) == 0 { + if shouldRemoveMetricName { + return func(_ labels.Labels, manySideLabels labels.Labels) labels.Labels { + return manySideLabels.DropMetricName() + } + } + + return func(_ labels.Labels, manySideLabels labels.Labels) labels.Labels { + return manySideLabels + } + } + + lb := labels.NewBuilder(labels.EmptyLabels()) + + if shouldRemoveMetricName { + return func(oneSideLabels labels.Labels, manySideLabels labels.Labels) labels.Labels { + lb.Reset(manySideLabels) + lb.Del(labels.MetricName) + + for _, l := range g.VectorMatching.Include { + lb.Set(l, oneSideLabels.Get(l)) + } + + return lb.Labels() + } + } + + return func(oneSideLabels labels.Labels, manySideLabels labels.Labels) labels.Labels { + lb.Reset(manySideLabels) + + for _, l := range g.VectorMatching.Include { + lb.Set(l, oneSideLabels.Get(l)) + } + + return lb.Labels() + } +} + +// sortSeries sorts metadata and series in place to try to minimise the number of input series we'll need to buffer in memory. +// +// This is critical for minimising the memory consumption of this operator: if we choose a poor ordering of series, +// we'll need to buffer many input series in memory. +// +// At present, sortSeries uses a very basic heuristic to guess the best way to sort the output series, but we could make +// this more sophisticated in the future. +func (g *GroupedVectorVectorBinaryOperation) sortSeries(metadata []types.SeriesMetadata, series []*groupedBinaryOperationOutputSeries) { + // Each series from the "many" side is used for at most one output series, so sort the output series so that we buffer as little of the + // "many" side series as possible. + // + // This isn't necessarily perfect: it may be that this still requires us to buffer many series from the "many" side if many + // series from the "many" side map to one output series, but this is expected to be rare. + sort.Sort(newFavourManySideSorter(metadata, series)) +} + +type favourManySideSorter struct { + metadata []types.SeriesMetadata + series []*groupedBinaryOperationOutputSeries +} + +func newFavourManySideSorter(metadata []types.SeriesMetadata, series []*groupedBinaryOperationOutputSeries) sort.Interface { + return favourManySideSorter{metadata, series} +} + +func (s favourManySideSorter) Len() int { + return len(s.metadata) +} + +func (s favourManySideSorter) Less(i, j int) bool { + iMany := s.series[i].latestManySeries() + jMany := s.series[j].latestManySeries() + if iMany != jMany { + return iMany < jMany + } + + return s.series[i].latestOneSeries() < s.series[j].latestOneSeries() +} + +func (s favourManySideSorter) Swap(i, j int) { + s.metadata[i], s.metadata[j] = s.metadata[j], s.metadata[i] + s.series[i], s.series[j] = s.series[j], s.series[i] +} + func (g *GroupedVectorVectorBinaryOperation) NextSeries(ctx context.Context) (types.InstantVectorSeriesData, error) { return types.InstantVectorSeriesData{}, nil } @@ -91,13 +442,21 @@ func (g *GroupedVectorVectorBinaryOperation) ExpressionPosition() posrange.Posit func (g *GroupedVectorVectorBinaryOperation) Close() { g.Left.Close() g.Right.Close() + // We don't need to close g.oneSide or g.manySide, as these are either g.Left or g.Right and so have been closed above. - if g.leftMetadata != nil { - types.PutSeriesMetadataSlice(g.leftMetadata) + if g.oneSideMetadata != nil { + types.PutSeriesMetadataSlice(g.oneSideMetadata) } - if g.rightMetadata != nil { - types.PutSeriesMetadataSlice(g.rightMetadata) + if g.manySideMetadata != nil { + types.PutSeriesMetadataSlice(g.manySideMetadata) } + if g.oneSideBuffer != nil { + g.oneSideBuffer.Close() + } + + if g.manySideBuffer != nil { + g.manySideBuffer.Close() + } } From 882b2a5f0ec5b1c60ad6972dcdae247368a1f4dd Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Mon, 2 Dec 2024 16:29:09 +1100 Subject: [PATCH 13/43] Initial implementation --- .../operators/binops/binary_operation.go | 228 +++++++++++++++++ .../grouped_vector_vector_binary_operation.go | 154 ++++++++++-- ...e_to_one_vector_vector_binary_operation.go | 235 ++---------------- ...one_vector_vector_binary_operation_test.go | 2 +- 4 files changed, 384 insertions(+), 235 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/binary_operation.go b/pkg/streamingpromql/operators/binops/binary_operation.go index 7b603a5d3ae..de5726ee9ae 100644 --- a/pkg/streamingpromql/operators/binops/binary_operation.go +++ b/pkg/streamingpromql/operators/binops/binary_operation.go @@ -3,15 +3,19 @@ package binops import ( + "fmt" "slices" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/util/annotations" + "github.com/grafana/mimir/pkg/streamingpromql/compat" "github.com/grafana/mimir/pkg/streamingpromql/limiting" + "github.com/grafana/mimir/pkg/streamingpromql/operators/functions" "github.com/grafana/mimir/pkg/streamingpromql/types" ) @@ -46,6 +50,35 @@ func vectorMatchingGroupKeyFunc(vectorMatching parser.VectorMatching) func(label } } +// vectorMatchingGroupLabelsFunc returns a function that computes the labels of the output group a series belongs to. +func groupLabelsFunc(vectorMatching parser.VectorMatching, returnBool bool) func(labels.Labels) labels.Labels { + lb := labels.NewBuilder(labels.EmptyLabels()) + + if vectorMatching.On { + return func(l labels.Labels) labels.Labels { + lb.Reset(l) + lb.Keep(vectorMatching.MatchingLabels...) + return lb.Labels() + } + } + + if returnBool { + // If this is a comparison operator, we want to retain the metric name, as the comparison acts like a filter. + return func(l labels.Labels) labels.Labels { + lb.Reset(l) + lb.Del(vectorMatching.MatchingLabels...) + return lb.Labels() + } + } + + return func(l labels.Labels) labels.Labels { + lb.Reset(l) + lb.Del(labels.MetricName) + lb.Del(vectorMatching.MatchingLabels...) + return lb.Labels() + } +} + // filterSeries returns data filtered based on the mask provided. // // mask is expected to contain one value for each time step in the query time range. @@ -115,3 +148,198 @@ func sampleTypeDescription(h *histogram.FloatHistogram) string { return "histogram" } + +type vectorVectorBinaryOperationEvaluator struct { + op parser.ItemType + opFunc binaryOperationFunc + leftIterator types.InstantVectorSeriesDataIterator + rightIterator types.InstantVectorSeriesDataIterator + memoryConsumptionTracker *limiting.MemoryConsumptionTracker + annotations *annotations.Annotations + expressionPosition posrange.PositionRange +} + +func newVectorVectorBinaryOperationEvaluator( + op parser.ItemType, + returnBool bool, + memoryConsumptionTracker *limiting.MemoryConsumptionTracker, + annotations *annotations.Annotations, + expressionPosition posrange.PositionRange, +) (vectorVectorBinaryOperationEvaluator, error) { + e := vectorVectorBinaryOperationEvaluator{ + op: op, + opFunc: nil, + memoryConsumptionTracker: memoryConsumptionTracker, + annotations: annotations, + expressionPosition: expressionPosition, + } + + if returnBool { + e.opFunc = boolComparisonOperationFuncs[op] + } else { + e.opFunc = arithmeticAndComparisonOperationFuncs[op] + } + + if e.opFunc == nil { + return vectorVectorBinaryOperationEvaluator{}, compat.NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", op)) + } + + return e, nil + +} + +func (e *vectorVectorBinaryOperationEvaluator) computeResult(left types.InstantVectorSeriesData, right types.InstantVectorSeriesData, takeOwnershipOfLeft bool, takeOwnershipOfRight bool) (types.InstantVectorSeriesData, error) { + var fPoints []promql.FPoint + var hPoints []promql.HPoint + + // For one-to-one matching for arithmetic operators, we'll never produce more points than the smaller input side. + // Because floats and histograms can be multiplied together, we use the sum of both the float and histogram points. + // We also don't know if the output will be exclusively floats or histograms, so we'll use the same size slice for both. + // We only assign the slices once we see the associated point type so it shouldn't be common that we allocate both. + canReturnLeftFPointSlice, canReturnLeftHPointSlice, canReturnRightFPointSlice, canReturnRightHPointSlice := takeOwnershipOfLeft, takeOwnershipOfLeft, takeOwnershipOfRight, takeOwnershipOfRight + leftPoints := len(left.Floats) + len(left.Histograms) + rightPoints := len(right.Floats) + len(right.Histograms) + maxPoints := max(leftPoints, rightPoints) + + // We cannot re-use any slices when the series contain a mix of floats and histograms. + // Consider the following, where f is a float at a particular step, and h is a histogram. + // load 5m + // series1 f f f h h + // series2 h h f f h + // eval range from 0 to 25m step 5m series1 * series2 + // {} h h f h f + // We can fit the resulting 3 histograms into series2 existing slice. However, the second + // last step (index 3) produces a histogram which would be stored over the existing histogram + // at the end of series2 (also index 3). + // It should be pretty uncommon that metric contains both histograms and floats, so we will + // accept the cost of a new slice. + mixedPoints := (len(left.Floats) > 0 && len(left.Histograms) > 0) || (len(right.Floats) > 0 && len(right.Histograms) > 0) + + prepareFSlice := func() error { + if !mixedPoints && maxPoints <= cap(left.Floats) && cap(left.Floats) < cap(right.Floats) && takeOwnershipOfLeft { + // Can fit output in left side, the left side is smaller than the right, and we're allowed to modify it + canReturnLeftFPointSlice = false + fPoints = left.Floats[:0] + return nil + } + if !mixedPoints && maxPoints <= cap(right.Floats) && takeOwnershipOfRight { + // Can otherwise fit in the right side and we're allowed to modify it + canReturnRightFPointSlice = false + fPoints = right.Floats[:0] + return nil + } + // Either we have mixed points or we can't fit in either left or right side, so create a new slice + var err error + if fPoints, err = types.FPointSlicePool.Get(maxPoints, e.memoryConsumptionTracker); err != nil { + return err + } + return nil + } + + prepareHSlice := func() error { + if !mixedPoints && maxPoints <= cap(left.Histograms) && cap(left.Histograms) < cap(right.Histograms) && takeOwnershipOfLeft { + // Can fit output in left side, the left side is smaller than the right, and we're allowed to modify it + canReturnLeftHPointSlice = false + hPoints = left.Histograms[:0] + return nil + } + if !mixedPoints && maxPoints <= cap(right.Histograms) && takeOwnershipOfRight { + // Can otherwise fit in the right side and we're allowed to modify it + canReturnRightHPointSlice = false + hPoints = right.Histograms[:0] + return nil + } + // Either we have mixed points or we can't fit in either left or right side, so create a new slice + var err error + if hPoints, err = types.HPointSlicePool.Get(maxPoints, e.memoryConsumptionTracker); err != nil { + return err + } + return nil + } + + e.leftIterator.Reset(left) + e.rightIterator.Reset(right) + + // Get first sample from left and right + lT, lF, lH, lOk := e.leftIterator.Next() + rT, rF, rH, rOk := e.rightIterator.Next() + // Continue iterating until we exhaust either the LHS or RHS + // denoted by lOk or rOk being false. + for lOk && rOk { + if lT == rT { + // We have samples on both sides at this timestep. + resultFloat, resultHist, keep, valid, err := e.opFunc(lF, rF, lH, rH) + + if err != nil { + err = functions.NativeHistogramErrorToAnnotation(err, e.emitAnnotation) + if err != nil { + return types.InstantVectorSeriesData{}, err + } + + // Else: error was converted to an annotation, continue without emitting a sample here. + keep = false + } + + if !valid { + emitIncompatibleTypesAnnotation(e.annotations, e.op, lH, rH, e.expressionPosition) + } + + if keep { + if resultHist != nil { + if hPoints == nil { + if err = prepareHSlice(); err != nil { + return types.InstantVectorSeriesData{}, err + } + } + hPoints = append(hPoints, promql.HPoint{ + H: resultHist, + T: lT, + }) + } else { + if fPoints == nil { + if err = prepareFSlice(); err != nil { + return types.InstantVectorSeriesData{}, err + } + } + fPoints = append(fPoints, promql.FPoint{ + F: resultFloat, + T: lT, + }) + } + } + } + + // Advance the iterator with the lower timestamp, or both if equal + if lT == rT { + lT, lF, lH, lOk = e.leftIterator.Next() + rT, rF, rH, rOk = e.rightIterator.Next() + } else if lT < rT { + lT, lF, lH, lOk = e.leftIterator.Next() + } else { + rT, rF, rH, rOk = e.rightIterator.Next() + } + } + + // Cleanup the unused slices. + if canReturnLeftFPointSlice { + types.FPointSlicePool.Put(left.Floats, e.memoryConsumptionTracker) + } + if canReturnLeftHPointSlice { + types.HPointSlicePool.Put(left.Histograms, e.memoryConsumptionTracker) + } + if canReturnRightFPointSlice { + types.FPointSlicePool.Put(right.Floats, e.memoryConsumptionTracker) + } + if canReturnRightHPointSlice { + types.HPointSlicePool.Put(right.Histograms, e.memoryConsumptionTracker) + } + + return types.InstantVectorSeriesData{ + Floats: fPoints, + Histograms: hPoints, + }, nil +} + +func (e *vectorVectorBinaryOperationEvaluator) emitAnnotation(generator types.AnnotationGenerator) { + e.annotations.Add(generator("", e.expressionPosition)) +} diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index c4eef07c6d9..b89192e1eaa 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -6,13 +6,14 @@ import ( "context" "fmt" "sort" + "time" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/util/annotations" - "github.com/grafana/mimir/pkg/streamingpromql/compat" "github.com/grafana/mimir/pkg/streamingpromql/limiting" "github.com/grafana/mimir/pkg/streamingpromql/operators" "github.com/grafana/mimir/pkg/streamingpromql/types" @@ -29,11 +30,10 @@ type GroupedVectorVectorBinaryOperation struct { VectorMatching parser.VectorMatching - opFunc binaryOperationFunc - expressionPosition posrange.PositionRange annotations *annotations.Annotations + evaluator vectorVectorBinaryOperationEvaluator remainingSeries []*groupedBinaryOperationOutputSeries oneSide types.InstantVectorOperator // Either Left or Right manySide types.InstantVectorOperator @@ -101,6 +101,11 @@ func NewGroupedVectorVectorBinaryOperation( annotations *annotations.Annotations, expressionPosition posrange.PositionRange, ) (*GroupedVectorVectorBinaryOperation, error) { + e, err := newVectorVectorBinaryOperationEvaluator(op, returnBool, memoryConsumptionTracker, annotations, expressionPosition) + if err != nil { + return nil, err + } + g := &GroupedVectorVectorBinaryOperation{ Left: left, Right: right, @@ -109,16 +114,11 @@ func NewGroupedVectorVectorBinaryOperation( ReturnBool: returnBool, MemoryConsumptionTracker: memoryConsumptionTracker, + evaluator: e, expressionPosition: expressionPosition, annotations: annotations, } - if returnBool { - g.opFunc = boolComparisonOperationFuncs[op] - } else { - g.opFunc = arithmeticAndComparisonOperationFuncs[op] - } - switch g.VectorMatching.Card { case parser.CardOneToMany: g.oneSide, g.manySide = g.Left, g.Right @@ -128,10 +128,6 @@ func NewGroupedVectorVectorBinaryOperation( return nil, fmt.Errorf("unsupported cardinality '%v'", g.VectorMatching.Card) } - if g.opFunc == nil { - return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", op)) - } - return g, nil } @@ -168,7 +164,7 @@ func (g *GroupedVectorVectorBinaryOperation) SeriesMetadata(ctx context.Context) g.oneSideBuffer = operators.NewInstantVectorOperatorBuffer(g.oneSide, oneSideSeriesUsed, g.MemoryConsumptionTracker) g.manySideBuffer = operators.NewInstantVectorOperatorBuffer(g.manySide, manySideSeriesUsed, g.MemoryConsumptionTracker) - return nil, nil + return allMetadata, nil } // loadSeriesMetadata loads series metadata from both sides of this operation. @@ -347,7 +343,7 @@ func (g *GroupedVectorVectorBinaryOperation) additionalLabelsKeyFunc() func(oneS } func (g *GroupedVectorVectorBinaryOperation) seriesLabelsFunc() func(oneSideLabels labels.Labels, manySideLabels labels.Labels) labels.Labels { - shouldRemoveMetricName := !g.Op.IsComparisonOperator() + shouldRemoveMetricName := !g.Op.IsComparisonOperator() || g.ReturnBool if len(g.VectorMatching.Include) == 0 { if shouldRemoveMetricName { @@ -432,7 +428,133 @@ func (s favourManySideSorter) Swap(i, j int) { } func (g *GroupedVectorVectorBinaryOperation) NextSeries(ctx context.Context) (types.InstantVectorSeriesData, error) { - return types.InstantVectorSeriesData{}, nil + if len(g.remainingSeries) == 0 { + return types.InstantVectorSeriesData{}, types.EOS + } + + thisSeries := g.remainingSeries[0] + g.remainingSeries = g.remainingSeries[1:] + + if err := g.ensureOneSidePopulated(ctx, thisSeries.oneSide); err != nil { + return types.InstantVectorSeriesData{}, err + } + + manySideSeries, err := g.manySideBuffer.GetSeries(ctx, thisSeries.manySideSeriesIndices) + if err != nil { + return types.InstantVectorSeriesData{}, err + } + + mergedManySide, err := g.mergeSingleSide(manySideSeries, thisSeries.manySideSeriesIndices, g.manySideMetadata, g.manySideHandedness()) + if err != nil { + return types.InstantVectorSeriesData{}, err + } + + thisSeries.oneSide.outputSeriesCount-- + isLastOutputSeriesForOneSide := thisSeries.oneSide.outputSeriesCount == 0 + var result types.InstantVectorSeriesData + + switch g.VectorMatching.Card { + case parser.CardOneToMany: + result, err = g.evaluator.computeResult(thisSeries.oneSide.mergedData, mergedManySide, isLastOutputSeriesForOneSide, true) + case parser.CardManyToOne: + result, err = g.evaluator.computeResult(mergedManySide, thisSeries.oneSide.mergedData, true, isLastOutputSeriesForOneSide) + default: + panic(fmt.Sprintf("unsupported cardinality '%v'", g.VectorMatching.Card)) + } + + if err != nil { + return types.InstantVectorSeriesData{}, err + } + + return result, nil +} + +func (g *GroupedVectorVectorBinaryOperation) ensureOneSidePopulated(ctx context.Context, oneSide *oneSide) error { + if oneSide.seriesIndices == nil { + // Already populated. + return nil + } + + // First time we've used this "one" side, populate it. + data, err := g.oneSideBuffer.GetSeries(ctx, oneSide.seriesIndices) + if err != nil { + return err + } + + oneSide.mergedData, err = g.mergeSingleSide(data, oneSide.seriesIndices, g.oneSideMetadata, g.oneSideHandedness()) + if err != nil { + return err + } + + // TODO: update oneSide.matchGroup.presence, bail if conflict + + // Clear seriesIndices to indicate that we've populated it. + oneSide.seriesIndices = nil + + return nil +} + +func (g *GroupedVectorVectorBinaryOperation) mergeSingleSide(data []types.InstantVectorSeriesData, sourceSeriesIndices []int, sourceSeriesMetadata []types.SeriesMetadata, side string) (types.InstantVectorSeriesData, error) { + merged, conflict, err := operators.MergeSeries(data, sourceSeriesIndices, g.MemoryConsumptionTracker) + + if err != nil { + return types.InstantVectorSeriesData{}, err + } + + if conflict != nil { + return types.InstantVectorSeriesData{}, g.mergeConflictToError(conflict, sourceSeriesMetadata, side) + } + + return merged, nil +} + +func (g *GroupedVectorVectorBinaryOperation) mergeConflictToError(conflict *operators.MergeConflict, sourceSeriesMetadata []types.SeriesMetadata, side string) error { + firstConflictingSeriesLabels := sourceSeriesMetadata[conflict.FirstConflictingSeriesIndex].Labels + groupLabels := groupLabelsFunc(g.VectorMatching, g.ReturnBool)(firstConflictingSeriesLabels) + + if conflict.SecondConflictingSeriesIndex == -1 { + return fmt.Errorf( + "found %s for the match group %s on the %s side of the operation at timestamp %s", + conflict.Description, + groupLabels, + side, + timestamp.Time(conflict.Timestamp).Format(time.RFC3339Nano), + ) + } + + secondConflictingSeriesLabels := sourceSeriesMetadata[conflict.SecondConflictingSeriesIndex].Labels + + return fmt.Errorf( + "found %s for the match group %s on the %s side of the operation at timestamp %s: %s and %s", + conflict.Description, + groupLabels, + side, + timestamp.Time(conflict.Timestamp).Format(time.RFC3339Nano), + firstConflictingSeriesLabels, + secondConflictingSeriesLabels, + ) +} + +func (g *GroupedVectorVectorBinaryOperation) oneSideHandedness() string { + switch g.VectorMatching.Card { + case parser.CardOneToMany: + return "left" + case parser.CardManyToOne: + return "right" + default: + panic(fmt.Sprintf("unsupported cardinality '%v'", g.VectorMatching.Card)) + } +} + +func (g *GroupedVectorVectorBinaryOperation) manySideHandedness() string { + switch g.VectorMatching.Card { + case parser.CardOneToMany: + return "right" + case parser.CardManyToOne: + return "left" + default: + panic(fmt.Sprintf("unsupported cardinality '%v'", g.VectorMatching.Card)) + } } func (g *GroupedVectorVectorBinaryOperation) ExpressionPosition() posrange.PositionRange { diff --git a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go index 41137e1904b..337e5f33f83 100644 --- a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go @@ -13,17 +13,13 @@ import ( "time" "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" - "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/util/annotations" - "github.com/grafana/mimir/pkg/streamingpromql/compat" "github.com/grafana/mimir/pkg/streamingpromql/limiting" "github.com/grafana/mimir/pkg/streamingpromql/operators" - "github.com/grafana/mimir/pkg/streamingpromql/operators/functions" "github.com/grafana/mimir/pkg/streamingpromql/types" ) @@ -48,9 +44,7 @@ type OneToOneVectorVectorBinaryOperation struct { remainingSeries []*oneToOneBinaryOperationOutputSeries leftBuffer *operators.InstantVectorOperatorBuffer rightBuffer *operators.InstantVectorOperatorBuffer - leftIterator types.InstantVectorSeriesDataIterator - rightIterator types.InstantVectorSeriesDataIterator - opFunc binaryOperationFunc + evaluator vectorVectorBinaryOperationEvaluator expressionPosition posrange.PositionRange annotations *annotations.Annotations @@ -87,30 +81,24 @@ func NewOneToOneVectorVectorBinaryOperation( annotations *annotations.Annotations, expressionPosition posrange.PositionRange, ) (*OneToOneVectorVectorBinaryOperation, error) { + e, err := newVectorVectorBinaryOperationEvaluator(op, returnBool, memoryConsumptionTracker, annotations, expressionPosition) + if err != nil { + return nil, err + } + b := &OneToOneVectorVectorBinaryOperation{ Left: left, Right: right, - leftIterator: types.InstantVectorSeriesDataIterator{}, - rightIterator: types.InstantVectorSeriesDataIterator{}, VectorMatching: vectorMatching, Op: op, ReturnBool: returnBool, MemoryConsumptionTracker: memoryConsumptionTracker, + evaluator: e, expressionPosition: expressionPosition, annotations: annotations, } - if returnBool { - b.opFunc = boolComparisonOperationFuncs[op] - } else { - b.opFunc = arithmeticAndComparisonOperationFuncs[op] - } - - if b.opFunc == nil { - return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with '%s'", op)) - } - return b, nil } @@ -194,7 +182,7 @@ func (b *OneToOneVectorVectorBinaryOperation) loadSeriesMetadata(ctx context.Con // - a list indicating which series from the left side are needed to compute the output // - a list indicating which series from the right side are needed to compute the output func (b *OneToOneVectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetadata, []*oneToOneBinaryOperationOutputSeries, []bool, []bool, error) { - labelsFunc := b.groupLabelsFunc() + labelsFunc := groupLabelsFunc(b.VectorMatching, b.ReturnBool) groupKeyFunc := vectorMatchingGroupKeyFunc(b.VectorMatching) outputSeriesMap := map[string]*oneToOneBinaryOperationOutputSeries{} @@ -360,35 +348,6 @@ func (g favourRightSideSorter) Less(i, j int) bool { return g.series[i].latestLeftSeries() < g.series[j].latestLeftSeries() } -// groupLabelsFunc returns a function that computes the labels of the output group this series belongs to. -func (b *OneToOneVectorVectorBinaryOperation) groupLabelsFunc() func(labels.Labels) labels.Labels { - lb := labels.NewBuilder(labels.EmptyLabels()) - - if b.VectorMatching.On { - return func(l labels.Labels) labels.Labels { - lb.Reset(l) - lb.Keep(b.VectorMatching.MatchingLabels...) - return lb.Labels() - } - } - - if b.Op.IsComparisonOperator() && !b.ReturnBool { - // If this is a comparison operator, we want to retain the metric name, as the comparison acts like a filter. - return func(l labels.Labels) labels.Labels { - lb.Reset(l) - lb.Del(b.VectorMatching.MatchingLabels...) - return lb.Labels() - } - } - - return func(l labels.Labels) labels.Labels { - lb.Reset(l) - lb.Del(labels.MetricName) - lb.Del(b.VectorMatching.MatchingLabels...) - return lb.Labels() - } -} - func (b *OneToOneVectorVectorBinaryOperation) NextSeries(ctx context.Context) (types.InstantVectorSeriesData, error) { if len(b.remainingSeries) == 0 { return types.InstantVectorSeriesData{}, types.EOS @@ -402,7 +361,7 @@ func (b *OneToOneVectorVectorBinaryOperation) NextSeries(ctx context.Context) (t return types.InstantVectorSeriesData{}, err } - mergedLeftSide, err := b.mergeOneSide(allLeftSeries, thisSeries.leftSeriesIndices, b.leftMetadata, "left") + mergedLeftSide, err := b.mergeSingleSide(allLeftSeries, thisSeries.leftSeriesIndices, b.leftMetadata, "left") if err != nil { return types.InstantVectorSeriesData{}, err } @@ -412,15 +371,15 @@ func (b *OneToOneVectorVectorBinaryOperation) NextSeries(ctx context.Context) (t return types.InstantVectorSeriesData{}, err } - mergedRightSide, err := b.mergeOneSide(allRightSeries, thisSeries.rightSeriesIndices, b.rightMetadata, "right") + mergedRightSide, err := b.mergeSingleSide(allRightSeries, thisSeries.rightSeriesIndices, b.rightMetadata, "right") if err != nil { return types.InstantVectorSeriesData{}, err } - return b.computeResult(mergedLeftSide, mergedRightSide) + return b.evaluator.computeResult(mergedLeftSide, mergedRightSide, true, true) } -// mergeOneSide exists to handle the case where one side of an output series has different source series at different time steps. +// mergeSingleSide exists to handle the case where one side of an output series has different source series at different time steps. // // For example, consider the query "left_side + on (env) right_side" with the following source data: // @@ -428,14 +387,12 @@ func (b *OneToOneVectorVectorBinaryOperation) NextSeries(ctx context.Context) (t // left_side{env="test", pod="b"} _ _ 3 // right_side{env="test"} 100 200 300 // -// mergeOneSide will take in both series for left_side and return a single series with the points [1, 2, 3]. +// mergeSingleSide will take in both series for left_side and return a single series with the points [1, 2, 3]. // -// mergeOneSide is optimised for the case where there is only one source series, or the source series do not overlap, as in the example above. +// mergeSingleSide is optimised for the case where there is only one source series, or the source series do not overlap, as in the example above. // -// NOTE: mergeOneSide has the side effect of re-ordering both data and sourceSeriesIndices. -// -// FIXME: for many-to-one / one-to-many matching, we could avoid re-merging each time for the side used multiple times -func (b *OneToOneVectorVectorBinaryOperation) mergeOneSide(data []types.InstantVectorSeriesData, sourceSeriesIndices []int, sourceSeriesMetadata []types.SeriesMetadata, side string) (types.InstantVectorSeriesData, error) { +// mergeSingleSide has the side effect of re-ordering both data and sourceSeriesIndices. +func (b *OneToOneVectorVectorBinaryOperation) mergeSingleSide(data []types.InstantVectorSeriesData, sourceSeriesIndices []int, sourceSeriesMetadata []types.SeriesMetadata, side string) (types.InstantVectorSeriesData, error) { merged, conflict, err := operators.MergeSeries(data, sourceSeriesIndices, b.MemoryConsumptionTracker) if err != nil { @@ -451,7 +408,7 @@ func (b *OneToOneVectorVectorBinaryOperation) mergeOneSide(data []types.InstantV func (b *OneToOneVectorVectorBinaryOperation) mergeConflictToError(conflict *operators.MergeConflict, sourceSeriesMetadata []types.SeriesMetadata, side string) error { firstConflictingSeriesLabels := sourceSeriesMetadata[conflict.FirstConflictingSeriesIndex].Labels - groupLabels := b.groupLabelsFunc()(firstConflictingSeriesLabels) + groupLabels := groupLabelsFunc(b.VectorMatching, b.ReturnBool)(firstConflictingSeriesLabels) if conflict.SecondConflictingSeriesIndex == -1 { return fmt.Errorf( @@ -476,160 +433,6 @@ func (b *OneToOneVectorVectorBinaryOperation) mergeConflictToError(conflict *ope ) } -func (b *OneToOneVectorVectorBinaryOperation) computeResult(left types.InstantVectorSeriesData, right types.InstantVectorSeriesData) (types.InstantVectorSeriesData, error) { - var fPoints []promql.FPoint - var hPoints []promql.HPoint - - // For one-to-one matching for arithmetic operators, we'll never produce more points than the smaller input side. - // Because floats and histograms can be multiplied together, we use the sum of both the float and histogram points. - // We also don't know if the output will be exclusively floats or histograms, so we'll use the same size slice for both. - // We only assign the slices once we see the associated point type so it shouldn't be common that we allocate both. - // - // FIXME: this is not safe to do for one-to-many or many-to-one matching, as we may need the input series for later output series. - canReturnLeftFPointSlice, canReturnLeftHPointSlice, canReturnRightFPointSlice, canReturnRightHPointSlice := true, true, true, true - leftPoints := len(left.Floats) + len(left.Histograms) - rightPoints := len(right.Floats) + len(right.Histograms) - maxPoints := max(leftPoints, rightPoints) - - // We cannot re-use any slices when the series contain a mix of floats and histograms. - // Consider the following, where f is a float at a particular step, and h is a histogram. - // load 5m - // series1 f f f h h - // series2 h h f f h - // eval range from 0 to 25m step 5m series1 * series2 - // {} h h f h f - // We can fit the resulting 3 histograms into series2 existing slice. However, the second - // last step (index 3) produces a histogram which would be stored over the existing histogram - // at the end of series2 (also index 3). - // It should be pretty uncommon that metric contains both histograms and floats, so we will - // accept the cost of a new slice. - mixedPoints := (len(left.Floats) > 0 && len(left.Histograms) > 0) || (len(right.Floats) > 0 && len(right.Histograms) > 0) - - prepareFSlice := func() error { - if !mixedPoints && maxPoints <= cap(left.Floats) && cap(left.Floats) < cap(right.Floats) { - // Can fit output in left side, and the left side is smaller than the right - canReturnLeftFPointSlice = false - fPoints = left.Floats[:0] - return nil - } - if !mixedPoints && maxPoints <= cap(right.Floats) { - // Can otherwise fit in the right side - canReturnRightFPointSlice = false - fPoints = right.Floats[:0] - return nil - } - // Either we have mixed points or we can't fit in either left or right side, so create a new slice - var err error - if fPoints, err = types.FPointSlicePool.Get(maxPoints, b.MemoryConsumptionTracker); err != nil { - return err - } - return nil - } - - prepareHSlice := func() error { - if !mixedPoints && maxPoints <= cap(left.Histograms) && cap(left.Histograms) < cap(right.Histograms) { - // Can fit output in left side, and the left side is smaller than the right - canReturnLeftHPointSlice = false - hPoints = left.Histograms[:0] - return nil - } - if !mixedPoints && maxPoints <= cap(right.Histograms) { - // Can otherwise fit in the right side - canReturnRightHPointSlice = false - hPoints = right.Histograms[:0] - return nil - } - // Either we have mixed points or we can't fit in either left or right side, so create a new slice - var err error - if hPoints, err = types.HPointSlicePool.Get(maxPoints, b.MemoryConsumptionTracker); err != nil { - return err - } - return nil - } - - b.leftIterator.Reset(left) - b.rightIterator.Reset(right) - - // Get first sample from left and right - lT, lF, lH, lOk := b.leftIterator.Next() - rT, rF, rH, rOk := b.rightIterator.Next() - // Continue iterating until we exhaust either the LHS or RHS - // denoted by lOk or rOk being false. - for lOk && rOk { - if lT == rT { - // We have samples on both sides at this timestep. - resultFloat, resultHist, keep, valid, err := b.opFunc(lF, rF, lH, rH) - - if err != nil { - err = functions.NativeHistogramErrorToAnnotation(err, b.emitAnnotation) - if err != nil { - return types.InstantVectorSeriesData{}, err - } - - // Else: error was converted to an annotation, continue without emitting a sample here. - keep = false - } - - if !valid { - emitIncompatibleTypesAnnotation(b.annotations, b.Op, lH, rH, b.expressionPosition) - } - - if keep { - if resultHist != nil { - if hPoints == nil { - if err = prepareHSlice(); err != nil { - return types.InstantVectorSeriesData{}, err - } - } - hPoints = append(hPoints, promql.HPoint{ - H: resultHist, - T: lT, - }) - } else { - if fPoints == nil { - if err = prepareFSlice(); err != nil { - return types.InstantVectorSeriesData{}, err - } - } - fPoints = append(fPoints, promql.FPoint{ - F: resultFloat, - T: lT, - }) - } - } - } - - // Advance the iterator with the lower timestamp, or both if equal - if lT == rT { - lT, lF, lH, lOk = b.leftIterator.Next() - rT, rF, rH, rOk = b.rightIterator.Next() - } else if lT < rT { - lT, lF, lH, lOk = b.leftIterator.Next() - } else { - rT, rF, rH, rOk = b.rightIterator.Next() - } - } - - // Cleanup the unused slices. - if canReturnLeftFPointSlice { - types.FPointSlicePool.Put(left.Floats, b.MemoryConsumptionTracker) - } - if canReturnLeftHPointSlice { - types.HPointSlicePool.Put(left.Histograms, b.MemoryConsumptionTracker) - } - if canReturnRightFPointSlice { - types.FPointSlicePool.Put(right.Floats, b.MemoryConsumptionTracker) - } - if canReturnRightHPointSlice { - types.HPointSlicePool.Put(right.Histograms, b.MemoryConsumptionTracker) - } - - return types.InstantVectorSeriesData{ - Floats: fPoints, - Histograms: hPoints, - }, nil -} - func (b *OneToOneVectorVectorBinaryOperation) Close() { b.Left.Close() b.Right.Close() @@ -651,10 +454,6 @@ func (b *OneToOneVectorVectorBinaryOperation) Close() { } } -func (b *OneToOneVectorVectorBinaryOperation) emitAnnotation(generator types.AnnotationGenerator) { - b.annotations.Add(generator("", b.expressionPosition)) -} - type binaryOperationFunc func(lhs, rhs float64, hlhs, hrhs *histogram.FloatHistogram) (f float64, h *histogram.FloatHistogram, keep bool, valid bool, err error) // FIXME(jhesketh): Investigate avoiding copying histograms for binary ops. diff --git a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go index 40c79e515fb..7faa513b11b 100644 --- a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go @@ -202,7 +202,7 @@ func TestVectorVectorBinaryOperation_SeriesMerging(t *testing.T) { require.NoError(t, memoryConsumptionTracker.IncreaseMemoryConsumption(types.FPointSize*uint64(len(s.Floats))+types.HPointSize*uint64(len(s.Histograms)))) } - result, err := o.mergeOneSide(testCase.input, testCase.sourceSeriesIndices, testCase.sourceSeriesMetadata, "right") + result, err := o.mergeSingleSide(testCase.input, testCase.sourceSeriesIndices, testCase.sourceSeriesMetadata, "right") if testCase.expectedError == "" { require.NoError(t, err) From 0ea868733d2ff9460833ab46b420064e0477cc0a Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 3 Dec 2024 13:07:53 +1100 Subject: [PATCH 14/43] Handle case where one set of many-side series many to many output series --- .../grouped_vector_vector_binary_operation.go | 152 +++++++++++++----- 1 file changed, 108 insertions(+), 44 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index b89192e1eaa..07d062a74f2 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -51,27 +51,29 @@ type GroupedVectorVectorBinaryOperation struct { var _ types.InstantVectorOperator = &GroupedVectorVectorBinaryOperation{} type groupedBinaryOperationOutputSeries struct { - manySideSeriesIndices []int - oneSide *oneSide + manySide *manySide + oneSide *oneSide } -// latestManySeries returns the index of the last series from the "many" side needed for this output series. -// -// It assumes that manySideSeriesIndices is sorted in ascending order. -func (s groupedBinaryOperationOutputSeries) latestManySeries() int { - return s.manySideSeriesIndices[len(s.manySideSeriesIndices)-1] +type groupedBinaryOperationOutputSeriesWithLabels struct { + labels labels.Labels + outputSeries *groupedBinaryOperationOutputSeries } -// latestOneSeries returns the index of the last series from the "one" side needed for this output series. -// -// It assumes that oneSide.outputSeries is sorted in ascending order. -func (s groupedBinaryOperationOutputSeries) latestOneSeries() int { - return s.oneSide.seriesIndices[len(s.oneSide.seriesIndices)-1] +type manySide struct { + // If this side has not been populated, seriesIndices will not be nil and mergedData will be empty. + // If this side has been populated, seriesIndices will be nil. + seriesIndices []int + mergedData types.InstantVectorSeriesData + + outputSeriesCount int } -type groupedBinaryOperationOutputSeriesWithLabels struct { - labels labels.Labels - outputSeries *groupedBinaryOperationOutputSeries +// latestSeries returns the index of the last series from this side. +// +// It assumes that outputSeries is sorted in ascending order. +func (s manySide) latestSeries() int { + return s.seriesIndices[len(s.seriesIndices)-1] } type oneSide struct { @@ -249,8 +251,10 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri return nil, nil, nil, nil, err } + manySideMap := map[string]*manySide{} manySideSeriesUsed = manySideSeriesUsed[:len(g.manySideMetadata)] - seriesLabelsFunc := g.seriesLabelsFunc() + manySideGroupKeyFunc := g.manySideGroupKeyFunc() + outputSeriesLabelsFunc := g.outputSeriesLabelsFunc() buf := make([]byte, 0, 1024) for idx, s := range g.manySideMetadata { @@ -263,26 +267,39 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri } manySideSeriesUsed[idx] = true + manySideGroupKey := manySideGroupKeyFunc(s.Labels) + thisManySide, exists := manySideMap[string(manySideGroupKey)] // Important: don't extract the string(...) call here - passing it directly allows us to avoid allocating it. + + if exists { + thisManySide.seriesIndices = append(thisManySide.seriesIndices, idx) + continue + } + + thisManySide = &manySide{ + seriesIndices: []int{idx}, + } + + manySideMap[string(manySideGroupKey)] = thisManySide for _, oneSide := range oneSideGroup { // Most of the time, the output series won't already exist (unless we have input series with different metric names), // so just create the series labels directly rather than trying to avoid their creation until we know for sure we'll // need them. - l := seriesLabelsFunc(g.oneSideMetadata[oneSide.seriesIndices[0]].Labels, s.Labels) + l := outputSeriesLabelsFunc(g.oneSideMetadata[oneSide.seriesIndices[0]].Labels, s.Labels) outputSeries, exists := outputSeriesMap[string(l.Bytes(buf))] - if exists { - outputSeries.outputSeries.manySideSeriesIndices = append(outputSeries.outputSeries.manySideSeriesIndices, idx) - } else { + if !exists { oneSide.outputSeriesCount++ + thisManySide.outputSeriesCount++ outputSeries = groupedBinaryOperationOutputSeriesWithLabels{ labels: l, outputSeries: &groupedBinaryOperationOutputSeries{ - manySideSeriesIndices: []int{idx}, - oneSide: oneSide, + manySide: thisManySide, + oneSide: oneSide, }, } + outputSeriesMap[string(l.Bytes(buf))] = outputSeries } } @@ -328,6 +345,8 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri return outputMetadata, outputSeries, oneSideSeriesUsed, manySideSeriesUsed, nil } +// additionalLabelsKeyFunc returns a function that extracts a key representing the additional labels from a "one" side series that will +// be included in the final output series labels. func (g *GroupedVectorVectorBinaryOperation) additionalLabelsKeyFunc() func(oneSideLabels labels.Labels) []byte { if len(g.VectorMatching.Include) == 0 { return func(oneSideLabels labels.Labels) []byte { @@ -342,11 +361,26 @@ func (g *GroupedVectorVectorBinaryOperation) additionalLabelsKeyFunc() func(oneS } } -func (g *GroupedVectorVectorBinaryOperation) seriesLabelsFunc() func(oneSideLabels labels.Labels, manySideLabels labels.Labels) labels.Labels { - shouldRemoveMetricName := !g.Op.IsComparisonOperator() || g.ReturnBool +// manySideGroupKeyFunc returns a function that extracts a key representing the set of series from the "many" side that will contribute +// to the same set of output series. +func (g *GroupedVectorVectorBinaryOperation) manySideGroupKeyFunc() func(manySideLabels labels.Labels) []byte { + buf := make([]byte, 0, 1024) + + if g.shouldRemoveMetricNameFromManySide() { + return func(manySideLabels labels.Labels) []byte { + return manySideLabels.BytesWithoutLabels(buf, labels.MetricName) + } + } + return func(manySideLabels labels.Labels) []byte { + return manySideLabels.Bytes(buf) // FIXME: it'd be nice if we could avoid copying the bytes here + } +} + +// outputSeriesLabelsFunc returns a function that determines the final output series labels for given series on both sides. +func (g *GroupedVectorVectorBinaryOperation) outputSeriesLabelsFunc() func(oneSideLabels labels.Labels, manySideLabels labels.Labels) labels.Labels { if len(g.VectorMatching.Include) == 0 { - if shouldRemoveMetricName { + if g.shouldRemoveMetricNameFromManySide() { return func(_ labels.Labels, manySideLabels labels.Labels) labels.Labels { return manySideLabels.DropMetricName() } @@ -359,7 +393,7 @@ func (g *GroupedVectorVectorBinaryOperation) seriesLabelsFunc() func(oneSideLabe lb := labels.NewBuilder(labels.EmptyLabels()) - if shouldRemoveMetricName { + if g.shouldRemoveMetricNameFromManySide() { return func(oneSideLabels labels.Labels, manySideLabels labels.Labels) labels.Labels { lb.Reset(manySideLabels) lb.Del(labels.MetricName) @@ -383,6 +417,14 @@ func (g *GroupedVectorVectorBinaryOperation) seriesLabelsFunc() func(oneSideLabe } } +func (g *GroupedVectorVectorBinaryOperation) shouldRemoveMetricNameFromManySide() bool { + if g.Op.IsComparisonOperator() { + return g.ReturnBool + } + + return true +} + // sortSeries sorts metadata and series in place to try to minimise the number of input series we'll need to buffer in memory. // // This is critical for minimising the memory consumption of this operator: if we choose a poor ordering of series, @@ -413,13 +455,13 @@ func (s favourManySideSorter) Len() int { } func (s favourManySideSorter) Less(i, j int) bool { - iMany := s.series[i].latestManySeries() - jMany := s.series[j].latestManySeries() + iMany := s.series[i].manySide.latestSeries() + jMany := s.series[j].manySide.latestSeries() if iMany != jMany { return iMany < jMany } - return s.series[i].latestOneSeries() < s.series[j].latestOneSeries() + return s.series[i].oneSide.latestSeries() < s.series[j].oneSide.latestSeries() } func (s favourManySideSorter) Swap(i, j int) { @@ -439,25 +481,24 @@ func (g *GroupedVectorVectorBinaryOperation) NextSeries(ctx context.Context) (ty return types.InstantVectorSeriesData{}, err } - manySideSeries, err := g.manySideBuffer.GetSeries(ctx, thisSeries.manySideSeriesIndices) - if err != nil { - return types.InstantVectorSeriesData{}, err - } - - mergedManySide, err := g.mergeSingleSide(manySideSeries, thisSeries.manySideSeriesIndices, g.manySideMetadata, g.manySideHandedness()) - if err != nil { + if err := g.ensureManySidePopulated(ctx, thisSeries.manySide); err != nil { return types.InstantVectorSeriesData{}, err } thisSeries.oneSide.outputSeriesCount-- isLastOutputSeriesForOneSide := thisSeries.oneSide.outputSeriesCount == 0 + + thisSeries.manySide.outputSeriesCount-- + isLastOutputSeriesForManySide := thisSeries.manySide.outputSeriesCount == 0 + var result types.InstantVectorSeriesData + var err error switch g.VectorMatching.Card { case parser.CardOneToMany: - result, err = g.evaluator.computeResult(thisSeries.oneSide.mergedData, mergedManySide, isLastOutputSeriesForOneSide, true) + result, err = g.evaluator.computeResult(thisSeries.oneSide.mergedData, thisSeries.manySide.mergedData, isLastOutputSeriesForOneSide, isLastOutputSeriesForManySide) case parser.CardManyToOne: - result, err = g.evaluator.computeResult(mergedManySide, thisSeries.oneSide.mergedData, true, isLastOutputSeriesForOneSide) + result, err = g.evaluator.computeResult(thisSeries.manySide.mergedData, thisSeries.oneSide.mergedData, isLastOutputSeriesForManySide, isLastOutputSeriesForOneSide) default: panic(fmt.Sprintf("unsupported cardinality '%v'", g.VectorMatching.Card)) } @@ -469,27 +510,50 @@ func (g *GroupedVectorVectorBinaryOperation) NextSeries(ctx context.Context) (ty return result, nil } -func (g *GroupedVectorVectorBinaryOperation) ensureOneSidePopulated(ctx context.Context, oneSide *oneSide) error { - if oneSide.seriesIndices == nil { +func (g *GroupedVectorVectorBinaryOperation) ensureOneSidePopulated(ctx context.Context, side *oneSide) error { + if side.seriesIndices == nil { // Already populated. return nil } // First time we've used this "one" side, populate it. - data, err := g.oneSideBuffer.GetSeries(ctx, oneSide.seriesIndices) + data, err := g.oneSideBuffer.GetSeries(ctx, side.seriesIndices) if err != nil { return err } - oneSide.mergedData, err = g.mergeSingleSide(data, oneSide.seriesIndices, g.oneSideMetadata, g.oneSideHandedness()) + side.mergedData, err = g.mergeSingleSide(data, side.seriesIndices, g.oneSideMetadata, g.oneSideHandedness()) if err != nil { return err } - // TODO: update oneSide.matchGroup.presence, bail if conflict + // TODO: update side.matchGroup.presence, bail if conflict + + // Clear seriesIndices to indicate that we've populated it. + side.seriesIndices = nil + + return nil +} + +func (g *GroupedVectorVectorBinaryOperation) ensureManySidePopulated(ctx context.Context, side *manySide) error { + if side.seriesIndices == nil { + // Already populated. + return nil + } + + // First time we've used this "one" side, populate it. + data, err := g.manySideBuffer.GetSeries(ctx, side.seriesIndices) + if err != nil { + return err + } + + side.mergedData, err = g.mergeSingleSide(data, side.seriesIndices, g.manySideMetadata, g.manySideHandedness()) + if err != nil { + return err + } // Clear seriesIndices to indicate that we've populated it. - oneSide.seriesIndices = nil + side.seriesIndices = nil return nil } From b29c99e36ba23052a46e46845104fa477bcde283 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 3 Dec 2024 13:15:53 +1100 Subject: [PATCH 15/43] Use correct labels when grouping one side series --- .../operators/binops/grouped_vector_vector_binary_operation.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 07d062a74f2..7d7524fb3fd 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -357,7 +357,7 @@ func (g *GroupedVectorVectorBinaryOperation) additionalLabelsKeyFunc() func(oneS buf := make([]byte, 0, 1024) return func(oneSideLabels labels.Labels) []byte { - return oneSideLabels.BytesWithLabels(buf, g.VectorMatching.MatchingLabels...) + return oneSideLabels.BytesWithLabels(buf, g.VectorMatching.Include...) } } From 11bf6b0aedd84da545ca9a7496cd624f9bea1ea5 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 3 Dec 2024 15:35:06 +1100 Subject: [PATCH 16/43] Return a conflict error message if there are multiple samples at the same timestamp on the same "one" side for the same group with different additional labels --- .../grouped_vector_vector_binary_operation.go | 110 +++++++++++++++--- pkg/streamingpromql/query.go | 2 +- .../testdata/ours/binary_operators.test | 50 +++----- pkg/streamingpromql/types/limiting_pool.go | 10 ++ 4 files changed, 127 insertions(+), 45 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 7d7524fb3fd..3b604cd7a57 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -32,6 +32,7 @@ type GroupedVectorVectorBinaryOperation struct { expressionPosition posrange.PositionRange annotations *annotations.Annotations + timeRange types.QueryTimeRange evaluator vectorVectorBinaryOperationEvaluator remainingSeries []*groupedBinaryOperationOutputSeries @@ -71,7 +72,7 @@ type manySide struct { // latestSeries returns the index of the last series from this side. // -// It assumes that outputSeries is sorted in ascending order. +// It assumes that seriesIndices is sorted in ascending order. func (s manySide) latestSeries() int { return s.seriesIndices[len(s.seriesIndices)-1] } @@ -84,13 +85,31 @@ type oneSide struct { outputSeriesCount int // The number of output series that refer to this side. - matchGroup *matchGroup + matchGroup *matchGroup // nil if this is the only "one" side in this group. +} + +// latestSeries returns the index of the last series from this side. +// +// It assumes that seriesIndices is sorted in ascending order. +func (s oneSide) latestSeries() int { + return s.seriesIndices[len(s.seriesIndices)-1] } type matchGroup struct { // Time steps at which we've seen samples for any "one" side in this group. // Each value is the index of the source series of the sample, or -1 if no sample has been seen for this time step yet. presence []int + + oneSideCount int +} + +func (g *matchGroup) updatePresence(timestampIdx int64, seriesIdx int) int { + if existing := g.presence[timestampIdx]; existing != -1 { + return existing + } + + g.presence[timestampIdx] = seriesIdx + return -1 } func NewGroupedVectorVectorBinaryOperation( @@ -102,6 +121,7 @@ func NewGroupedVectorVectorBinaryOperation( memoryConsumptionTracker *limiting.MemoryConsumptionTracker, annotations *annotations.Annotations, expressionPosition posrange.PositionRange, + timeRange types.QueryTimeRange, ) (*GroupedVectorVectorBinaryOperation, error) { e, err := newVectorVectorBinaryOperationEvaluator(op, returnBool, memoryConsumptionTracker, annotations, expressionPosition) if err != nil { @@ -119,6 +139,7 @@ func NewGroupedVectorVectorBinaryOperation( evaluator: e, expressionPosition: expressionPosition, annotations: annotations, + timeRange: timeRange, } switch g.VectorMatching.Card { @@ -321,8 +342,8 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri if oneSide.outputSeriesCount == 0 { // If any part of a group has no output series, then no parts of that group will have output series. break - } else if thisMatchGroup == nil { - thisMatchGroup = &matchGroup{} + } else if thisMatchGroup == nil && len(oneSideGroup) > 1 { + thisMatchGroup = &matchGroup{oneSideCount: len(oneSideGroup)} } oneSide.matchGroup = thisMatchGroup @@ -522,19 +543,71 @@ func (g *GroupedVectorVectorBinaryOperation) ensureOneSidePopulated(ctx context. return err } + if err := g.updateOneSidePresence(side, data); err != nil { + return err + } + side.mergedData, err = g.mergeSingleSide(data, side.seriesIndices, g.oneSideMetadata, g.oneSideHandedness()) if err != nil { return err } - // TODO: update side.matchGroup.presence, bail if conflict - // Clear seriesIndices to indicate that we've populated it. side.seriesIndices = nil return nil } +func (g *GroupedVectorVectorBinaryOperation) updateOneSidePresence(side *oneSide, data []types.InstantVectorSeriesData) error { + matchGroup := side.matchGroup + if matchGroup == nil { + // If there is only one set of additional labels for this set of grouping labels, then there's nothing to do. + return nil + } + + // If there are multiple sets of additional labels for the same set of grouping labels, check that there is only one series at each + // time step for each set of grouping labels. + + if matchGroup.presence == nil { + var err error + matchGroup.presence, err = types.IntSlicePool.Get(g.timeRange.StepCount, g.MemoryConsumptionTracker) + + if err != nil { + return err + } + + matchGroup.presence = matchGroup.presence[:g.timeRange.StepCount] + + for idx := range matchGroup.presence { + matchGroup.presence[idx] = -1 + } + } + + for dataIdx, seriesData := range data { + seriesIdx := side.seriesIndices[dataIdx] + + for _, p := range seriesData.Floats { + if otherSeriesIdx := matchGroup.updatePresence(g.timeRange.PointIndex(p.T), seriesIdx); otherSeriesIdx != -1 { + return g.formatConflictError(otherSeriesIdx, seriesIdx, "duplicate series", p.T, g.oneSideMetadata, g.oneSideHandedness()) + } + } + + for _, p := range seriesData.Histograms { + if otherSeriesIdx := matchGroup.updatePresence(g.timeRange.PointIndex(p.T), seriesIdx); otherSeriesIdx != -1 { + return g.formatConflictError(otherSeriesIdx, seriesIdx, "duplicate series", p.T, g.oneSideMetadata, g.oneSideHandedness()) + } + } + } + + matchGroup.oneSideCount-- + + if matchGroup.oneSideCount == 0 { + types.IntSlicePool.Put(matchGroup.presence, g.MemoryConsumptionTracker) + } + + return nil +} + func (g *GroupedVectorVectorBinaryOperation) ensureManySidePopulated(ctx context.Context, side *manySide) error { if side.seriesIndices == nil { // Already populated. @@ -573,27 +646,38 @@ func (g *GroupedVectorVectorBinaryOperation) mergeSingleSide(data []types.Instan } func (g *GroupedVectorVectorBinaryOperation) mergeConflictToError(conflict *operators.MergeConflict, sourceSeriesMetadata []types.SeriesMetadata, side string) error { - firstConflictingSeriesLabels := sourceSeriesMetadata[conflict.FirstConflictingSeriesIndex].Labels + return g.formatConflictError(conflict.FirstConflictingSeriesIndex, conflict.SecondConflictingSeriesIndex, conflict.Description, conflict.Timestamp, sourceSeriesMetadata, side) +} + +func (g *GroupedVectorVectorBinaryOperation) formatConflictError( + firstConflictingSeriesIndex int, + secondConflictingSeriesIndex int, + description string, + ts int64, + sourceSeriesMetadata []types.SeriesMetadata, + side string, +) error { + firstConflictingSeriesLabels := sourceSeriesMetadata[firstConflictingSeriesIndex].Labels groupLabels := groupLabelsFunc(g.VectorMatching, g.ReturnBool)(firstConflictingSeriesLabels) - if conflict.SecondConflictingSeriesIndex == -1 { + if secondConflictingSeriesIndex == -1 { return fmt.Errorf( "found %s for the match group %s on the %s side of the operation at timestamp %s", - conflict.Description, + description, groupLabels, side, - timestamp.Time(conflict.Timestamp).Format(time.RFC3339Nano), + timestamp.Time(ts).Format(time.RFC3339Nano), ) } - secondConflictingSeriesLabels := sourceSeriesMetadata[conflict.SecondConflictingSeriesIndex].Labels + secondConflictingSeriesLabels := sourceSeriesMetadata[secondConflictingSeriesIndex].Labels return fmt.Errorf( "found %s for the match group %s on the %s side of the operation at timestamp %s: %s and %s", - conflict.Description, + description, groupLabels, side, - timestamp.Time(conflict.Timestamp).Format(time.RFC3339Nano), + timestamp.Time(ts).Format(time.RFC3339Nano), firstConflictingSeriesLabels, secondConflictingSeriesLabels, ) diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index 8b90c309187..ee3d2ccd8e2 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -271,7 +271,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr, timeRange types default: switch e.VectorMatching.Card { case parser.CardOneToMany, parser.CardManyToOne: - return binops.NewGroupedVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange()) + return binops.NewGroupedVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange(), timeRange) case parser.CardOneToOne: return binops.NewOneToOneVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange()) default: diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index d980ab6116a..c28cfdb982d 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1068,25 +1068,19 @@ eval range from 0 to 6m step 6m method_code:http_errors:rate5m / on(method) grou {method="post", code="500", foo="blah"} 0.05 0.6 {method="post", code="404", foo="blah"} 0.175 2.1 -# Fail if multiple series on "one" side, even if they differ on the additional labels -eval_fail instant at 12m method_code:http_errors:rate5m / ignoring(code, foo) group_left() method:http_requests:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side - -eval_fail instant at 12m method_code:http_errors:rate5m / on(method) group_left() method:http_requests:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side - -eval_fail instant at 12m method_code:http_errors:rate5m / ignoring(code, foo) group_left(foo) method:http_requests:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side +# Fail if multiple series on "one" side, even if they differ on the additional labels. +# We run these tests as range queries with a single step to avoid promqltest's instant query time shifting, which makes using an explicit error message pattern more difficult. +eval_fail range from 12m to 12m step 1m method_code:http_errors:rate5m / ignoring(code, foo) group_left() method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right (hand-)?side of the operation( at timestamp 1970-01-01T00:12:00Z)?: \[?\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(,| and) \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(\];many-to-many matching not allowed: matching labels must be unique on one side)? -eval_fail instant at 12m method_code:http_errors:rate5m / on(method) group_left(foo) method:http_requests:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side +eval_fail range from 12m to 12m step 1m method_code:http_errors:rate5m / on(method) group_left() method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right (hand-)?side of the operation( at timestamp 1970-01-01T00:12:00Z)?: \[?\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(,| and) \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(\];many-to-many matching not allowed: matching labels must be unique on one side)? -# Fail if multiple series on "one" side, even if there is no matching point on the "many" side -eval_fail instant at 18m method_code:http_errors:rate5m / ignoring(code, foo) group_left(foo) method:http_requests:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side +eval_fail range from 12m to 12m step 1m method_code:http_errors:rate5m / ignoring(code, foo) group_left(foo) method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right (hand-)?side of the operation( at timestamp 1970-01-01T00:12:00Z)?: \[?\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(,| and) \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(\];many-to-many matching not allowed: matching labels must be unique on one side)? -eval_fail instant at 18m method_code:http_errors:rate5m / on(method) group_left(foo) method:http_requests:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side +eval_fail range from 12m to 12m step 1m method_code:http_errors:rate5m / on(method) group_left(foo) method:http_requests:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the right (hand-)?side of the operation( at timestamp 1970-01-01T00:12:00Z)?: \[?\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(,| and) \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(\];many-to-many matching not allowed: matching labels must be unique on one side)? # Same cases as above, but with group_right and expressions swapped. eval instant at 0 method:http_requests:rate5m / ignoring(code, foo) group_right() method_code:http_errors:rate5m @@ -1166,24 +1160,18 @@ eval range from 0 to 6m step 6m method:http_requests:rate5m / on(method) group_r {method="post", code="404", foo="blah"} 5.7142857143 0.4761904762 # Fail if multiple series on "one" side, even if they differ on the additional labels -eval_fail instant at 12m method:http_requests:rate5m / ignoring(code, foo) group_right() method_code:http_errors:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side - -eval_fail instant at 12m method:http_requests:rate5m / on(method) group_right() method_code:http_errors:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side - -eval_fail instant at 12m method:http_requests:rate5m / ignoring(code, foo) group_right(foo) method_code:http_errors:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side +# We run these tests as range queries with a single step to avoid promqltest's instant query time shifting, which makes using an explicit error message pattern more difficult. +eval_fail range from 12m to 12m step 1m method:http_requests:rate5m / ignoring(code, foo) group_right() method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left (hand-)?side of the operation( at timestamp 1970-01-01T00:12:00Z)?: \[?\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(,| and) \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(\];many-to-many matching not allowed: matching labels must be unique on one side)? -eval_fail instant at 12m method:http_requests:rate5m / on(method) group_right(foo) method_code:http_errors:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side +eval_fail range from 12m to 12m step 1m method:http_requests:rate5m / on(method) group_right() method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left (hand-)?side of the operation( at timestamp 1970-01-01T00:12:00Z)?: \[?\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(,| and) \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(\];many-to-many matching not allowed: matching labels must be unique on one side)? -# Fail if multiple series on "one" side, even if there is no matching point on the "many" side -eval_fail instant at 18m method:http_requests:rate5m / ignoring(code, foo) group_right(foo) method_code:http_errors:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side +eval_fail range from 12m to 12m step 1m method:http_requests:rate5m / ignoring(code, foo) group_right(foo) method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left (hand-)?side of the operation( at timestamp 1970-01-01T00:12:00Z)?: \[?\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(,| and) \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(\];many-to-many matching not allowed: matching labels must be unique on one side)? -eval_fail instant at 18m method:http_requests:rate5m / on(method) group_right(foo) method_code:http_errors:rate5m - expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left hand-side of the operation: \[\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}, \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}\];many-to-many matching not allowed: matching labels must be unique on one side +eval_fail range from 12m to 12m step 1m method:http_requests:rate5m / on(method) group_right(foo) method_code:http_errors:rate5m + expected_fail_regexp found duplicate series for the match group \{method="get"\} on the left (hand-)?side of the operation( at timestamp 1970-01-01T00:12:00Z)?: \[?\{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(,| and) \{__name__="method:http_requests:rate5m", foo="(bar|bar2)", method="get"\}(\];many-to-many matching not allowed: matching labels must be unique on one side)? clear diff --git a/pkg/streamingpromql/types/limiting_pool.go b/pkg/streamingpromql/types/limiting_pool.go index 4866e258441..0afe558fad1 100644 --- a/pkg/streamingpromql/types/limiting_pool.go +++ b/pkg/streamingpromql/types/limiting_pool.go @@ -23,6 +23,7 @@ const ( HPointSize = uint64(FPointSize * nativeHistogramSampleSizeFactor) VectorSampleSize = uint64(unsafe.Sizeof(promql.Sample{})) // This assumes each sample is a float sample, not a histogram. Float64Size = uint64(unsafe.Sizeof(float64(0))) + IntSize = uint64(unsafe.Sizeof(int(0))) BoolSize = uint64(unsafe.Sizeof(false)) HistogramPointerSize = uint64(unsafe.Sizeof((*histogram.FloatHistogram)(nil))) StringSize = uint64(unsafe.Sizeof("")) @@ -73,6 +74,15 @@ var ( nil, ) + IntSlicePool = NewLimitingBucketedPool( + pool.NewBucketedPool(MaxExpectedPointsPerSeries, func(size int) []int { + return make([]int, 0, size) + }), + IntSize, + true, + nil, + ) + BoolSlicePool = NewLimitingBucketedPool( pool.NewBucketedPool(MaxExpectedPointsPerSeries, func(size int) []bool { return make([]bool, 0, size) From d48be6affecb29c2a5ce8c379d0ec6f73da3febe Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 3 Dec 2024 20:09:54 +1100 Subject: [PATCH 17/43] Fix handling of cases where additional labels appear on the "many" side --- .../grouped_vector_vector_binary_operation.go | 20 +++++- .../testdata/ours/binary_operators.test | 67 +++++++++++++++++-- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 3b604cd7a57..1a403fdfc93 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -5,6 +5,7 @@ package binops import ( "context" "fmt" + "slices" "sort" "time" @@ -382,19 +383,32 @@ func (g *GroupedVectorVectorBinaryOperation) additionalLabelsKeyFunc() func(oneS } } -// manySideGroupKeyFunc returns a function that extracts a key representing the set of series from the "many" side that will contribute +// manySideGroupKeyFunc returns a function that extracts a key representing the set of labels from the "many" side that will contribute // to the same set of output series. func (g *GroupedVectorVectorBinaryOperation) manySideGroupKeyFunc() func(manySideLabels labels.Labels) []byte { buf := make([]byte, 0, 1024) - if g.shouldRemoveMetricNameFromManySide() { + if !g.shouldRemoveMetricNameFromManySide() && len(g.VectorMatching.Include) == 0 { + return func(manySideLabels labels.Labels) []byte { + return manySideLabels.Bytes(buf) // FIXME: it'd be nice if we could avoid copying the bytes here + } + } + + if len(g.VectorMatching.Include) == 0 { return func(manySideLabels labels.Labels) []byte { return manySideLabels.BytesWithoutLabels(buf, labels.MetricName) } } + labelsToRemove := g.VectorMatching.Include + + if g.shouldRemoveMetricNameFromManySide() { + labelsToRemove = append(labelsToRemove, labels.MetricName) + slices.Sort(labelsToRemove) + } + return func(manySideLabels labels.Labels) []byte { - return manySideLabels.Bytes(buf) // FIXME: it'd be nice if we could avoid copying the bytes here + return manySideLabels.BytesWithoutLabels(buf, labelsToRemove...) } } diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index c28cfdb982d..8302eda0cc7 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1191,21 +1191,74 @@ eval instant at 0 right / on(method) group_right(foo) left clear +# Test group_left / group_right where both sides contain the additional labels. load 6m - left{method="get", code="500", foo="left-1"} 1 _ 10 - left{method="get", code="404", foo="left-2"} _ 4 20 - right{method="get", code="999", foo="right-1"} 4 8 40 + series_a{method="get", code="500", foo="left-1"} 1 _ 10 + series_a{method="get", code="404", foo="left-2"} _ 4 20 + series_b{method="get", code="999", foo="right-1"} 4 8 40 -eval range from 0 to 6m step 6m left / on(method) group_left(foo, code) right +eval range from 0 to 6m step 6m series_a / on(method) group_left(foo, code) series_b {method="get", code="999", foo="right-1"} 0.25 0.5 -eval range from 0 to 6m step 6m right / on(method) group_right(foo, code) left +eval range from 0 to 6m step 6m series_b / on(method) group_right(foo, code) series_a {method="get", code="999", foo="right-1"} 4 2 -eval_fail instant at 12m left / on(method) group_left(foo, code) right +# Cannot have multiple matches from the "many" side. +eval_fail instant at 12m series_a / on(method) group_left(foo, code) series_b expected_fail_message multiple matches for labels: grouping labels must ensure unique matches -eval_fail instant at 12m right / on(method) group_right(foo, code) left +eval_fail instant at 12m series_b / on(method) group_right(foo, code) series_a + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +# Same thing, but with 'ignoring'. +eval range from 0 to 6m step 6m series_a / ignoring(code, foo) group_left(foo, code) series_b + {method="get", code="999", foo="right-1"} 0.25 0.5 + +eval range from 0 to 6m step 6m series_b / ignoring(code, foo) group_right(foo, code) series_a + {method="get", code="999", foo="right-1"} 4 2 + +# Cannot have multiple matches from the "many" side. +eval_fail instant at 12m series_a / ignoring(code, foo) group_left(foo, code) series_b + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +eval_fail instant at 12m series_b / ignoring(code, foo) group_right(foo, code) series_a + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +clear + +# Same as above, but this time where the additional labels are present on the "many" side but not the "one" side. +# (They should be taken from the "one" side.) + +load 6m + series_a{method="get", code="500", foo="left-1"} 1 _ 10 + series_a{method="get", code="404", foo="left-2"} _ 4 20 + series_b{method="get", code="999"} 4 8 40 + +eval range from 0 to 6m step 6m series_a / on(method) group_left(foo, code) series_b + {method="get", code="999"} 0.25 0.5 + +eval range from 0 to 6m step 6m series_b / on(method) group_right(foo, code) series_a + {method="get", code="999"} 4 2 + +# Cannot have multiple matches from the "many" side. +eval_fail instant at 12m series_a / on(method) group_left(foo, code) series_b + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +eval_fail instant at 12m series_b / on(method) group_right(foo, code) series_a + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +# Same thing, but with 'ignoring'. +eval range from 0 to 6m step 6m series_a / ignoring(code, foo) group_left(foo, code) series_b + {method="get", code="999"} 0.25 0.5 + +eval range from 0 to 6m step 6m series_b / ignoring(code, foo) group_right(foo, code) series_a + {method="get", code="999"} 4 2 + +# Cannot have multiple matches from the "many" side. +eval_fail instant at 12m series_a / ignoring(code, foo) group_left(foo, code) series_b + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +eval_fail instant at 12m series_b / ignoring(code, foo) group_right(foo, code) series_a expected_fail_message multiple matches for labels: grouping labels must ensure unique matches clear From 4e1f6fa3b872a0cbcf9a0cfe34171b2096455185 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 10:38:09 +1100 Subject: [PATCH 18/43] Return a non-misleading error message when a conflict occurs on the "many" side --- .../grouped_vector_vector_binary_operation.go | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 1a403fdfc93..b0fd041ff1f 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -4,6 +4,7 @@ package binops import ( "context" + "errors" "fmt" "slices" "sort" @@ -20,6 +21,8 @@ import ( "github.com/grafana/mimir/pkg/streamingpromql/types" ) +var errMultipleMatchesOnManySide = errors.New("multiple matches for labels: grouping labels must ensure unique matches") + // GroupedVectorVectorBinaryOperation represents a one-to-many or many-to-one binary operation between instant vectors such as " + group_left " or " - group_right ". // One-to-one binary operations between instant vectors are not supported. type GroupedVectorVectorBinaryOperation struct { @@ -561,7 +564,7 @@ func (g *GroupedVectorVectorBinaryOperation) ensureOneSidePopulated(ctx context. return err } - side.mergedData, err = g.mergeSingleSide(data, side.seriesIndices, g.oneSideMetadata, g.oneSideHandedness()) + side.mergedData, err = g.mergeOneSide(data, side.seriesIndices) if err != nil { return err } @@ -622,6 +625,20 @@ func (g *GroupedVectorVectorBinaryOperation) updateOneSidePresence(side *oneSide return nil } +func (g *GroupedVectorVectorBinaryOperation) mergeOneSide(data []types.InstantVectorSeriesData, sourceSeriesIndices []int) (types.InstantVectorSeriesData, error) { + merged, conflict, err := operators.MergeSeries(data, sourceSeriesIndices, g.MemoryConsumptionTracker) + + if err != nil { + return types.InstantVectorSeriesData{}, err + } + + if conflict != nil { + return types.InstantVectorSeriesData{}, g.formatConflictError(conflict.FirstConflictingSeriesIndex, conflict.SecondConflictingSeriesIndex, conflict.Description, conflict.Timestamp, g.oneSideMetadata, g.oneSideHandedness()) + } + + return merged, nil +} + func (g *GroupedVectorVectorBinaryOperation) ensureManySidePopulated(ctx context.Context, side *manySide) error { if side.seriesIndices == nil { // Already populated. @@ -634,7 +651,7 @@ func (g *GroupedVectorVectorBinaryOperation) ensureManySidePopulated(ctx context return err } - side.mergedData, err = g.mergeSingleSide(data, side.seriesIndices, g.manySideMetadata, g.manySideHandedness()) + side.mergedData, err = g.mergeManySide(data, side.seriesIndices) if err != nil { return err } @@ -645,7 +662,7 @@ func (g *GroupedVectorVectorBinaryOperation) ensureManySidePopulated(ctx context return nil } -func (g *GroupedVectorVectorBinaryOperation) mergeSingleSide(data []types.InstantVectorSeriesData, sourceSeriesIndices []int, sourceSeriesMetadata []types.SeriesMetadata, side string) (types.InstantVectorSeriesData, error) { +func (g *GroupedVectorVectorBinaryOperation) mergeManySide(data []types.InstantVectorSeriesData, sourceSeriesIndices []int) (types.InstantVectorSeriesData, error) { merged, conflict, err := operators.MergeSeries(data, sourceSeriesIndices, g.MemoryConsumptionTracker) if err != nil { @@ -653,16 +670,12 @@ func (g *GroupedVectorVectorBinaryOperation) mergeSingleSide(data []types.Instan } if conflict != nil { - return types.InstantVectorSeriesData{}, g.mergeConflictToError(conflict, sourceSeriesMetadata, side) + return types.InstantVectorSeriesData{}, errMultipleMatchesOnManySide } return merged, nil } -func (g *GroupedVectorVectorBinaryOperation) mergeConflictToError(conflict *operators.MergeConflict, sourceSeriesMetadata []types.SeriesMetadata, side string) error { - return g.formatConflictError(conflict.FirstConflictingSeriesIndex, conflict.SecondConflictingSeriesIndex, conflict.Description, conflict.Timestamp, sourceSeriesMetadata, side) -} - func (g *GroupedVectorVectorBinaryOperation) formatConflictError( firstConflictingSeriesIndex int, secondConflictingSeriesIndex int, From 1876813921f7cd36494fa5d23b10448cbba347a2 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 10:41:39 +1100 Subject: [PATCH 19/43] Update comments --- pkg/streamingpromql/operators/binops/binary_operation.go | 2 +- .../binops/grouped_vector_vector_binary_operation.go | 2 +- .../binops/one_to_one_vector_vector_binary_operation.go | 5 ----- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/binary_operation.go b/pkg/streamingpromql/operators/binops/binary_operation.go index de5726ee9ae..64a2246993f 100644 --- a/pkg/streamingpromql/operators/binops/binary_operation.go +++ b/pkg/streamingpromql/operators/binops/binary_operation.go @@ -192,7 +192,7 @@ func (e *vectorVectorBinaryOperationEvaluator) computeResult(left types.InstantV var fPoints []promql.FPoint var hPoints []promql.HPoint - // For one-to-one matching for arithmetic operators, we'll never produce more points than the smaller input side. + // For arithmetic and comparison operators, we'll never produce more points than the smaller input side. // Because floats and histograms can be multiplied together, we use the sum of both the float and histogram points. // We also don't know if the output will be exclusively floats or histograms, so we'll use the same size slice for both. // We only assign the slices once we see the associated point type so it shouldn't be common that we allocate both. diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index b0fd041ff1f..1fd71af91f8 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -471,7 +471,7 @@ func (g *GroupedVectorVectorBinaryOperation) shouldRemoveMetricNameFromManySide( // At present, sortSeries uses a very basic heuristic to guess the best way to sort the output series, but we could make // this more sophisticated in the future. func (g *GroupedVectorVectorBinaryOperation) sortSeries(metadata []types.SeriesMetadata, series []*groupedBinaryOperationOutputSeries) { - // Each series from the "many" side is used for at most one output series, so sort the output series so that we buffer as little of the + // Each series from the "many" side is usually used for at most one output series, so sort the output series so that we buffer as little of the // "many" side series as possible. // // This isn't necessarily perfect: it may be that this still requires us to buffer many series from the "many" side if many diff --git a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go index 337e5f33f83..e4b68c0df59 100644 --- a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go @@ -282,11 +282,6 @@ func (b *OneToOneVectorVectorBinaryOperation) sortSeries(metadata []types.Series // If we do this, then in the worst case, we'll have to buffer the whole of the lower cardinality side. // (Compare this with sorting so that we read the lowest cardinality side in order: in the worst case, we'll have // to buffer the whole of the higher cardinality side.) - // - // FIXME: this is reasonable for one-to-one matching, but likely not for one-to-many / many-to-one. - // For one-to-many / many-to-one, it would likely be best to buffer the side used for multiple output series (the "one" side), - // as we'll need to retain these series for multiple output series anyway. - var sortInterface sort.Interface if len(b.leftMetadata) < len(b.rightMetadata) { From d35f3557f2d1463a5dfb9c71812b9517a02168aa Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 11:01:49 +1100 Subject: [PATCH 20/43] Fix regression in comparison operation output labels --- pkg/streamingpromql/operators/binops/binary_operation.go | 4 ++-- .../binops/grouped_vector_vector_binary_operation.go | 2 +- .../binops/one_to_one_vector_vector_binary_operation.go | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/binary_operation.go b/pkg/streamingpromql/operators/binops/binary_operation.go index 64a2246993f..ba279efeaee 100644 --- a/pkg/streamingpromql/operators/binops/binary_operation.go +++ b/pkg/streamingpromql/operators/binops/binary_operation.go @@ -51,7 +51,7 @@ func vectorMatchingGroupKeyFunc(vectorMatching parser.VectorMatching) func(label } // vectorMatchingGroupLabelsFunc returns a function that computes the labels of the output group a series belongs to. -func groupLabelsFunc(vectorMatching parser.VectorMatching, returnBool bool) func(labels.Labels) labels.Labels { +func groupLabelsFunc(vectorMatching parser.VectorMatching, op parser.ItemType, returnBool bool) func(labels.Labels) labels.Labels { lb := labels.NewBuilder(labels.EmptyLabels()) if vectorMatching.On { @@ -62,7 +62,7 @@ func groupLabelsFunc(vectorMatching parser.VectorMatching, returnBool bool) func } } - if returnBool { + if op.IsComparisonOperator() && !returnBool { // If this is a comparison operator, we want to retain the metric name, as the comparison acts like a filter. return func(l labels.Labels) labels.Labels { lb.Reset(l) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 1fd71af91f8..f617274bf3b 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -685,7 +685,7 @@ func (g *GroupedVectorVectorBinaryOperation) formatConflictError( side string, ) error { firstConflictingSeriesLabels := sourceSeriesMetadata[firstConflictingSeriesIndex].Labels - groupLabels := groupLabelsFunc(g.VectorMatching, g.ReturnBool)(firstConflictingSeriesLabels) + groupLabels := groupLabelsFunc(g.VectorMatching, g.Op, g.ReturnBool)(firstConflictingSeriesLabels) if secondConflictingSeriesIndex == -1 { return fmt.Errorf( diff --git a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go index e4b68c0df59..70f1204d7bf 100644 --- a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go @@ -182,7 +182,7 @@ func (b *OneToOneVectorVectorBinaryOperation) loadSeriesMetadata(ctx context.Con // - a list indicating which series from the left side are needed to compute the output // - a list indicating which series from the right side are needed to compute the output func (b *OneToOneVectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetadata, []*oneToOneBinaryOperationOutputSeries, []bool, []bool, error) { - labelsFunc := groupLabelsFunc(b.VectorMatching, b.ReturnBool) + labelsFunc := groupLabelsFunc(b.VectorMatching, b.Op, b.ReturnBool) groupKeyFunc := vectorMatchingGroupKeyFunc(b.VectorMatching) outputSeriesMap := map[string]*oneToOneBinaryOperationOutputSeries{} @@ -403,7 +403,7 @@ func (b *OneToOneVectorVectorBinaryOperation) mergeSingleSide(data []types.Insta func (b *OneToOneVectorVectorBinaryOperation) mergeConflictToError(conflict *operators.MergeConflict, sourceSeriesMetadata []types.SeriesMetadata, side string) error { firstConflictingSeriesLabels := sourceSeriesMetadata[conflict.FirstConflictingSeriesIndex].Labels - groupLabels := groupLabelsFunc(b.VectorMatching, b.ReturnBool)(firstConflictingSeriesLabels) + groupLabels := groupLabelsFunc(b.VectorMatching, b.Op, b.ReturnBool)(firstConflictingSeriesLabels) if conflict.SecondConflictingSeriesIndex == -1 { return fmt.Errorf( From 87bbd46a301bcc47dcf35c6185d54d2b0e83663b Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 11:02:41 +1100 Subject: [PATCH 21/43] Disable one-to-one comparison operation cases that fail for known reasons --- .../testdata/ours/binary_operators.test | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index 8302eda0cc7..6407462651b 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1269,9 +1269,10 @@ load 6m left_side_b{env="test", pod="a"} 5 6 7 8 right_side{env="test", pod="a"} 2 2 7 7 -eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side - left_side_a{pod="a"} _ 2 _ _ - left_side_b{pod="a"} _ _ 7 _ +# FIXME: MQE currently does not correctly handle this case because it performs filtering after merging input series, whereas we should do it in the other order. +#eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side +# left_side_a{pod="a"} _ 2 _ _ +# left_side_b{pod="a"} _ _ 7 _ eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool ignoring(env) right_side expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) @@ -1287,8 +1288,9 @@ eval_fail range from 0 to 18m step 6m right_side == bool ignoring(env) {__name__ # left_side_b{pod="a"} _ _ 7 _ # but instead both engines drop the metric names in the output. # This is accepted behaviour: https://github.com/prometheus/prometheus/issues/5326 -eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == on(pod) right_side - {pod="a"} _ 2 7 _ +# FIXME: MQE currently does not correctly handle this case because it performs filtering after merging input series, whereas we should do it in the other order. +#eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == on(pod) right_side +# {pod="a"} _ 2 7 _ eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool on(pod) right_side expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) @@ -1309,16 +1311,16 @@ load 6m right_side{env="test", pod="a"} 2 2 7 7 eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side - expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the right side of the operation) + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool ignoring(env) right_side - expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the right side of the operation) + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == on(pod) right_side - expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the right side of the operation) + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool on(pod) right_side - expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the right side of the operation) + expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) clear @@ -1328,8 +1330,9 @@ load 6m left{pod="b"} 5 6 7 8 right 2 2 7 7 -eval range from 0 to 18m step 6m left == ignoring(pod) right - left _ 2 7 _ +# FIXME: MQE currently does not correctly handle this case because it performs filtering after merging input series, whereas we should do it in the other order. +# eval range from 0 to 18m step 6m left == ignoring(pod) right +# left _ 2 7 _ clear @@ -1339,9 +1342,10 @@ load 6m left_side_b{env="test", pod="a"} _ _ 7 8 right_side{env="test", pod="a"} 2 2 7 7 -eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side - left_side_a{pod="a"} _ 2 _ _ - left_side_b{pod="a"} _ _ 7 _ +# FIXME: MQE currently does not correctly handle this case. +#eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side +# left_side_a{pod="a"} _ 2 _ _ +# left_side_b{pod="a"} _ _ 7 _ eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool ignoring(env) right_side {pod="a"} 0 1 1 0 From a53580aece1e07ea5eea0b9ff293fed1d38ad12d Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 11:05:01 +1100 Subject: [PATCH 22/43] Fix linting warnings and simplify `computeOutputSeries()` --- .../grouped_vector_vector_binary_operation.go | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index f617274bf3b..2ede5ee8867 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -311,21 +311,19 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri // so just create the series labels directly rather than trying to avoid their creation until we know for sure we'll // need them. l := outputSeriesLabelsFunc(g.oneSideMetadata[oneSide.seriesIndices[0]].Labels, s.Labels) - outputSeries, exists := outputSeriesMap[string(l.Bytes(buf))] + _, exists := outputSeriesMap[string(l.Bytes(buf))] if !exists { oneSide.outputSeriesCount++ thisManySide.outputSeriesCount++ - outputSeries = groupedBinaryOperationOutputSeriesWithLabels{ + outputSeriesMap[string(l.Bytes(buf))] = groupedBinaryOperationOutputSeriesWithLabels{ labels: l, outputSeries: &groupedBinaryOperationOutputSeries{ manySide: thisManySide, oneSide: oneSide, }, } - - outputSeriesMap[string(l.Bytes(buf))] = outputSeries } } } @@ -374,7 +372,7 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri // be included in the final output series labels. func (g *GroupedVectorVectorBinaryOperation) additionalLabelsKeyFunc() func(oneSideLabels labels.Labels) []byte { if len(g.VectorMatching.Include) == 0 { - return func(oneSideLabels labels.Labels) []byte { + return func(_ labels.Labels) []byte { return nil } } @@ -721,17 +719,6 @@ func (g *GroupedVectorVectorBinaryOperation) oneSideHandedness() string { } } -func (g *GroupedVectorVectorBinaryOperation) manySideHandedness() string { - switch g.VectorMatching.Card { - case parser.CardOneToMany: - return "right" - case parser.CardManyToOne: - return "left" - default: - panic(fmt.Sprintf("unsupported cardinality '%v'", g.VectorMatching.Card)) - } -} - func (g *GroupedVectorVectorBinaryOperation) ExpressionPosition() posrange.PositionRange { return g.expressionPosition } From 331cf7050233eecc22591c38418fd12150e9a770 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 11:19:03 +1100 Subject: [PATCH 23/43] Add tests for annotations --- pkg/streamingpromql/engine_test.go | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/pkg/streamingpromql/engine_test.go b/pkg/streamingpromql/engine_test.go index 3074eaab0b6..e6be8a8684c 100644 --- a/pkg/streamingpromql/engine_test.go +++ b/pkg/streamingpromql/engine_test.go @@ -2441,6 +2441,12 @@ func TestBinaryOperationAnnotations(t *testing.T) { testCases[name] = testCase } + cardinalities := map[string]string{ + "one-to-one": "", + "many-to-one": "group_left", + "one-to-many": "group_right", + } + for op, binop := range binaryOperations { expressions := []string{op} @@ -2449,14 +2455,18 @@ func TestBinaryOperationAnnotations(t *testing.T) { } for _, expr := range expressions { - addBinopTestCase(op, fmt.Sprintf("binary %v between two floats", expr), fmt.Sprintf(`metric{type="float"} %v ignoring(type) metric{type="float"}`, expr), "float", "float", true) - addBinopTestCase(op, fmt.Sprintf("binary %v between a float on the left side and a histogram on the right", expr), fmt.Sprintf(`metric{type="float"} %v ignoring(type) metric{type="histogram"}`, expr), "float", "histogram", binop.floatHistogramSupported) - addBinopTestCase(op, fmt.Sprintf("binary %v between a scalar on the left side and a histogram on the right", expr), fmt.Sprintf(`2 %v metric{type="histogram"}`, expr), "float", "histogram", binop.floatHistogramSupported) - addBinopTestCase(op, fmt.Sprintf("binary %v between a histogram on the left side and a float on the right", expr), fmt.Sprintf(`metric{type="histogram"} %v ignoring(type) metric{type="float"}`, expr), "histogram", "float", binop.histogramFloatSupported) - addBinopTestCase(op, fmt.Sprintf("binary %v between a histogram on the left side and a scalar on the right", expr), fmt.Sprintf(`metric{type="histogram"} %v 2`, expr), "histogram", "float", binop.histogramFloatSupported) - addBinopTestCase(op, fmt.Sprintf("binary %v between two histograms", expr), fmt.Sprintf(`metric{type="histogram"} %v ignoring(type) metric{type="histogram"}`, expr), "histogram", "histogram", binop.histogramHistogramSupported) + addBinopTestCase(op, fmt.Sprintf("binary %v between a scalar on the left side and a histogram on the right with", expr), fmt.Sprintf(`2 %v metric{type="histogram"}`, expr), "float", "histogram", binop.floatHistogramSupported) + addBinopTestCase(op, fmt.Sprintf("binary %v between a histogram on the left side and a scalar on the right with", expr), fmt.Sprintf(`metric{type="histogram"} %v 2`, expr), "histogram", "float", binop.histogramFloatSupported) + + for cardinalityName, cardinalityModifier := range cardinalities { + addBinopTestCase(op, fmt.Sprintf("binary %v between two floats with %v matching", expr, cardinalityName), fmt.Sprintf(`metric{type="float"} %v ignoring(type) %v metric{type="float"}`, expr, cardinalityModifier), "float", "float", true) + addBinopTestCase(op, fmt.Sprintf("binary %v between a float on the left side and a histogram on the right with %v matching", expr, cardinalityName), fmt.Sprintf(`metric{type="float"} %v ignoring(type) %v metric{type="histogram"}`, expr, cardinalityModifier), "float", "histogram", binop.floatHistogramSupported) + addBinopTestCase(op, fmt.Sprintf("binary %v between a histogram on the left side and a float on the right with %v matching", expr, cardinalityName), fmt.Sprintf(`metric{type="histogram"} %v ignoring(type) %v metric{type="float"}`, expr, cardinalityModifier), "histogram", "float", binop.histogramFloatSupported) + addBinopTestCase(op, fmt.Sprintf("binary %v between two histograms with %v matching", expr, cardinalityName), fmt.Sprintf(`metric{type="histogram"} %v ignoring(type) %v metric{type="histogram"}`, expr, cardinalityModifier), "histogram", "histogram", binop.histogramHistogramSupported) + } } } + runAnnotationTests(t, testCases) } From 606a96d8cd1e4d03d218cc373975b61b57b338dd Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 11:21:59 +1100 Subject: [PATCH 24/43] Add tests for case where additional labels are not present on series on either side --- .../testdata/ours/binary_operators.test | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index 6407462651b..c40ef18c429 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1263,6 +1263,41 @@ eval_fail instant at 12m series_b / ignoring(code, foo) group_right(foo, code) s clear +# Same as above, but this time the additional labels are not present on either side. +load 6m + series_a{method="get", code="500"} 1 _ 10 + series_a{method="get", code="404"} _ 4 20 + series_b{method="get", code="999"} 4 8 40 + +eval range from 0 to 6m step 6m series_a / on(method) group_left(foo, code) series_b + {method="get", code="999"} 0.25 0.5 + +eval range from 0 to 6m step 6m series_b / on(method) group_right(foo, code) series_a + {method="get", code="999"} 4 2 + +# Cannot have multiple matches from the "many" side. +eval_fail instant at 12m series_a / on(method) group_left(foo, code) series_b + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +eval_fail instant at 12m series_b / on(method) group_right(foo, code) series_a + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +# Same thing, but with 'ignoring'. +eval range from 0 to 6m step 6m series_a / ignoring(code, foo) group_left(foo, code) series_b + {method="get", code="999"} 0.25 0.5 + +eval range from 0 to 6m step 6m series_b / ignoring(code, foo) group_right(foo, code) series_a + {method="get", code="999"} 4 2 + +# Cannot have multiple matches from the "many" side. +eval_fail instant at 12m series_a / ignoring(code, foo) group_left(foo, code) series_b + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +eval_fail instant at 12m series_b / ignoring(code, foo) group_right(foo, code) series_a + expected_fail_message multiple matches for labels: grouping labels must ensure unique matches + +clear + # Test comparison operator edge cases. load 6m left_side_a{env="test", pod="a"} 1 2 3 4 From 8c56b2d11e5949250f024d226e66101f88734fa9 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 13:00:47 +1100 Subject: [PATCH 25/43] Add series sorting test --- ...ped_vector_vector_binary_operation_test.go | 318 ++++++++++++++++++ ...one_vector_vector_binary_operation_test.go | 4 +- 2 files changed, 320 insertions(+), 2 deletions(-) create mode 100644 pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation_test.go diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation_test.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation_test.go new file mode 100644 index 00000000000..55efb38de31 --- /dev/null +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation_test.go @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package binops + +import ( + "context" + "testing" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/parser/posrange" + "github.com/stretchr/testify/require" + + "github.com/grafana/mimir/pkg/streamingpromql/limiting" + "github.com/grafana/mimir/pkg/streamingpromql/operators" + "github.com/grafana/mimir/pkg/streamingpromql/testutils" + "github.com/grafana/mimir/pkg/streamingpromql/types" +) + +func TestGroupedVectorVectorBinaryOperation_OutputSeriesSorting(t *testing.T) { + testCases := map[string]struct { + leftSeries []labels.Labels + rightSeries []labels.Labels + + matching parser.VectorMatching + op parser.ItemType + returnBool bool + + expectedOutputSeries []labels.Labels + }{ + "no series on either side": { + leftSeries: []labels.Labels{}, + rightSeries: []labels.Labels{}, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardManyToOne}, + + expectedOutputSeries: []labels.Labels{}, + }, + + "no series on left side": { + leftSeries: []labels.Labels{}, + rightSeries: []labels.Labels{ + labels.FromStrings("series", "a"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardManyToOne}, + + expectedOutputSeries: []labels.Labels{}, + }, + + "no series on right side": { + leftSeries: []labels.Labels{ + labels.FromStrings("series", "a"), + }, + rightSeries: []labels.Labels{}, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardManyToOne}, + + expectedOutputSeries: []labels.Labels{}, + }, + + "single series on each side matched and both sides' series are in the same order": { + leftSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "left", "group", "a"), + labels.FromStrings(labels.MetricName, "left", "group", "b"), + }, + rightSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "right", "group", "a"), + labels.FromStrings(labels.MetricName, "right", "group", "b"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardManyToOne, MatchingLabels: []string{"group"}, On: true}, + + expectedOutputSeries: []labels.Labels{ + labels.FromStrings("group", "a"), + labels.FromStrings("group", "b"), + }, + }, + + "single series on each side matched and both sides' series are in different order with group_left": { + leftSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "left", "group", "a"), + labels.FromStrings(labels.MetricName, "left", "group", "b"), + }, + rightSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "right", "group", "b"), + labels.FromStrings(labels.MetricName, "right", "group", "a"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardManyToOne, MatchingLabels: []string{"group"}, On: true}, + + // Should be sorted to avoid buffering "many" side. + expectedOutputSeries: []labels.Labels{ + labels.FromStrings("group", "a"), + labels.FromStrings("group", "b"), + }, + }, + + "single series on each side matched and both sides' series are in different order with group_right": { + leftSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "left", "group", "a"), + labels.FromStrings(labels.MetricName, "left", "group", "b"), + }, + rightSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "right", "group", "b"), + labels.FromStrings(labels.MetricName, "right", "group", "a"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardOneToMany, MatchingLabels: []string{"group"}, On: true}, + + // Should be sorted to avoid buffering "many" side. + expectedOutputSeries: []labels.Labels{ + labels.FromStrings("group", "b"), + labels.FromStrings("group", "a"), + }, + }, + + "multiple series on left side match to a single series on right side with group_left": { + leftSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx", "1"), + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx", "2"), + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx", "3"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx", "3"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx", "1"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx", "2"), + }, + rightSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "right", "group", "b"), + labels.FromStrings(labels.MetricName, "right", "group", "a"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardManyToOne, MatchingLabels: []string{"group"}, On: true}, + + // Should be sorted to avoid buffering "many" side. + expectedOutputSeries: []labels.Labels{ + labels.FromStrings("group", "a", "idx", "1"), + labels.FromStrings("group", "a", "idx", "2"), + labels.FromStrings("group", "a", "idx", "3"), + labels.FromStrings("group", "b", "idx", "3"), + labels.FromStrings("group", "b", "idx", "1"), + labels.FromStrings("group", "b", "idx", "2"), + }, + }, + + "multiple series on left side match to a single series on right side with group_right": { + leftSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx", "1"), + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx", "2"), + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx", "3"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx", "3"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx", "1"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx", "2"), + }, + rightSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "right", "group", "b"), + labels.FromStrings(labels.MetricName, "right", "group", "a"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardOneToMany, MatchingLabels: []string{"group"}, On: true}, + + // Should be sorted to avoid buffering "many" side. + expectedOutputSeries: []labels.Labels{ + labels.FromStrings("group", "b"), + labels.FromStrings("group", "a"), + }, + }, + + "single series on left side match to multiple series on right side with group_left": { + leftSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "left", "group", "a"), + labels.FromStrings(labels.MetricName, "left", "group", "b"), + }, + rightSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx", "1"), + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx", "2"), + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx", "3"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx", "3"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx", "1"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx", "2"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardManyToOne, MatchingLabels: []string{"group"}, On: true}, + + // Should be sorted to avoid buffering "many" side. + expectedOutputSeries: []labels.Labels{ + labels.FromStrings("group", "a"), + labels.FromStrings("group", "b"), + }, + }, + + "single series on left side match to multiple series on right side with group_right": { + leftSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "left", "group", "a"), + labels.FromStrings(labels.MetricName, "left", "group", "b"), + }, + rightSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx", "1"), + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx", "2"), + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx", "3"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx", "3"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx", "1"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx", "2"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardOneToMany, MatchingLabels: []string{"group"}, On: true}, + + // Should be sorted to avoid buffering "many" side. + expectedOutputSeries: []labels.Labels{ + labels.FromStrings("group", "b", "idx", "1"), + labels.FromStrings("group", "b", "idx", "2"), + labels.FromStrings("group", "b", "idx", "3"), + labels.FromStrings("group", "a", "idx", "3"), + labels.FromStrings("group", "a", "idx", "1"), + labels.FromStrings("group", "a", "idx", "2"), + }, + }, + + "multiple series on left side match to multiple series on right side with group_left": { + leftSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx_left", "1"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx_left", "3"), + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx_left", "2"), + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx_left", "3"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx_left", "1"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx_left", "2"), + }, + rightSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx_right", "4"), + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx_right", "5"), + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx_right", "6"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx_right", "5"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx_right", "4"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx_right", "6"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardManyToOne, MatchingLabels: []string{"group"}, On: true}, + + // Should be sorted to avoid buffering "many" side. + expectedOutputSeries: []labels.Labels{ + labels.FromStrings("group", "a", "idx_left", "1"), + labels.FromStrings("group", "b", "idx_left", "3"), + labels.FromStrings("group", "a", "idx_left", "2"), + labels.FromStrings("group", "a", "idx_left", "3"), + labels.FromStrings("group", "b", "idx_left", "1"), + labels.FromStrings("group", "b", "idx_left", "2"), + }, + }, + + "multiple series on left side match to multiple series on right side with group_right": { + leftSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx_left", "1"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx_left", "3"), + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx_left", "2"), + labels.FromStrings(labels.MetricName, "left", "group", "a", "idx_left", "3"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx_left", "1"), + labels.FromStrings(labels.MetricName, "left", "group", "b", "idx_left", "2"), + }, + rightSeries: []labels.Labels{ + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx_right", "4"), + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx_right", "5"), + labels.FromStrings(labels.MetricName, "right", "group", "b", "idx_right", "6"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx_right", "5"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx_right", "4"), + labels.FromStrings(labels.MetricName, "right", "group", "a", "idx_right", "6"), + }, + + op: parser.ADD, + matching: parser.VectorMatching{Card: parser.CardOneToMany, MatchingLabels: []string{"group"}, On: true}, + + // Should be sorted to avoid buffering "many" side. + expectedOutputSeries: []labels.Labels{ + labels.FromStrings("group", "b", "idx_right", "4"), + labels.FromStrings("group", "b", "idx_right", "5"), + labels.FromStrings("group", "b", "idx_right", "6"), + labels.FromStrings("group", "a", "idx_right", "5"), + labels.FromStrings("group", "a", "idx_right", "4"), + labels.FromStrings("group", "a", "idx_right", "6"), + }, + }, + } + + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + left := &operators.TestOperator{Series: testCase.leftSeries} + right := &operators.TestOperator{Series: testCase.rightSeries} + + o, err := NewGroupedVectorVectorBinaryOperation( + left, + right, + testCase.matching, + testCase.op, + testCase.returnBool, + limiting.NewMemoryConsumptionTracker(0, nil), + nil, + posrange.PositionRange{}, + types.QueryTimeRange{}, + ) + + require.NoError(t, err) + + outputSeries, err := o.SeriesMetadata(context.Background()) + require.NoError(t, err) + + require.Equal(t, testutils.LabelsToSeriesMetadata(testCase.expectedOutputSeries), outputSeries) + }) + } +} diff --git a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go index 7faa513b11b..bd697398b3a 100644 --- a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go @@ -25,7 +25,7 @@ import ( // // Most of the edge cases are already covered by TestMergeSeries, so we focus on the logic // unique to OneToOneVectorVectorBinaryOperation: converting conflicts to user-friendly error messages. -func TestVectorVectorBinaryOperation_SeriesMerging(t *testing.T) { +func TestOneToOneVectorVectorBinaryOperation_SeriesMerging(t *testing.T) { testCases := map[string]struct { input []types.InstantVectorSeriesData sourceSeriesIndices []int @@ -214,7 +214,7 @@ func TestVectorVectorBinaryOperation_SeriesMerging(t *testing.T) { } } -func TestVectorVectorBinaryOperation_Sorting(t *testing.T) { +func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { testCases := map[string]struct { series []*oneToOneBinaryOperationOutputSeries From 25be40fe811d38d980ce5f955f5959ed493c4685 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 13:00:56 +1100 Subject: [PATCH 26/43] Add provenance comment --- .../operators/binops/grouped_vector_vector_binary_operation.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 2ede5ee8867..ea34dce371d 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -1,4 +1,7 @@ // SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/engine.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Prometheus Authors package binops From 502245fdba859db73dbd6928a9bdf8264a75b3b3 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 13:13:28 +1100 Subject: [PATCH 27/43] Add benchmark --- pkg/streamingpromql/benchmarks/benchmarks.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/streamingpromql/benchmarks/benchmarks.go b/pkg/streamingpromql/benchmarks/benchmarks.go index fc018f4a2e4..597c2704b37 100644 --- a/pkg/streamingpromql/benchmarks/benchmarks.go +++ b/pkg/streamingpromql/benchmarks/benchmarks.go @@ -164,6 +164,9 @@ func TestCases(metricSizes []int) []BenchCase { { Expr: "nh_X / 2", }, + { + Expr: "h_X * on(l) group_left() a_X", + }, // Test the case where one side of a binary operation has many more series than the other. { Expr: `a_100{l=~"[13579]."} - b_100`, From 0b8fa34efbb2af39e092bd456cd71764b692b65e Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 13:36:18 +1100 Subject: [PATCH 28/43] Expand comments --- .../grouped_vector_vector_binary_operation.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index ea34dce371d..2fb7c47c511 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -273,17 +273,17 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri // Now iterate through all series on the "many" side and determine all the possible output series, as // well as which series from the "many" side we'll actually need. - outputSeriesMap := map[string]groupedBinaryOperationOutputSeriesWithLabels{} + outputSeriesMap := map[string]groupedBinaryOperationOutputSeriesWithLabels{} // All output series, keyed by their labels. + manySideMap := map[string]*manySide{} // Series from the "many" side, grouped by which output series they'll contribute to. + manySideGroupKeyFunc := g.manySideGroupKeyFunc() + outputSeriesLabelsFunc := g.outputSeriesLabelsFunc() + buf := make([]byte, 0, 1024) + manySideSeriesUsed, err := types.BoolSlicePool.Get(len(g.manySideMetadata), g.MemoryConsumptionTracker) if err != nil { return nil, nil, nil, nil, err } - - manySideMap := map[string]*manySide{} manySideSeriesUsed = manySideSeriesUsed[:len(g.manySideMetadata)] - manySideGroupKeyFunc := g.manySideGroupKeyFunc() - outputSeriesLabelsFunc := g.outputSeriesLabelsFunc() - buf := make([]byte, 0, 1024) for idx, s := range g.manySideMetadata { groupKey := groupKeyFunc(s.Labels) @@ -299,6 +299,7 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri thisManySide, exists := manySideMap[string(manySideGroupKey)] // Important: don't extract the string(...) call here - passing it directly allows us to avoid allocating it. if exists { + // There is already at least one other "many" side series that contributes to the same set of output series, so just append this series to the same output series. thisManySide.seriesIndices = append(thisManySide.seriesIndices, idx) continue } @@ -331,8 +332,7 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri } } - // Next, go through all the "one" side groups again, and determine which of the "one" side series - // we'll actually need. + // Next, go through all the "one" side groups again, and determine which of the "one" side series we'll actually need. oneSideSeriesUsed, err := types.BoolSlicePool.Get(len(g.oneSideMetadata), g.MemoryConsumptionTracker) if err != nil { return nil, nil, nil, nil, err @@ -348,6 +348,8 @@ func (g *GroupedVectorVectorBinaryOperation) computeOutputSeries() ([]types.Seri // If any part of a group has no output series, then no parts of that group will have output series. break } else if thisMatchGroup == nil && len(oneSideGroup) > 1 { + // We only need a matchGroup to detect conflicts between series on the "one" side that have the same grouping labels. + // So if there is only one "one" side, we don't need to bother with this and can skip creating the matchGroup. thisMatchGroup = &matchGroup{oneSideCount: len(oneSideGroup)} } From 7e4113af1a83886a1b60b8b8a488737a63f561cd Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 13:36:27 +1100 Subject: [PATCH 29/43] Fix typo in test names --- pkg/streamingpromql/engine_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/streamingpromql/engine_test.go b/pkg/streamingpromql/engine_test.go index e6be8a8684c..acc8885749e 100644 --- a/pkg/streamingpromql/engine_test.go +++ b/pkg/streamingpromql/engine_test.go @@ -2455,8 +2455,8 @@ func TestBinaryOperationAnnotations(t *testing.T) { } for _, expr := range expressions { - addBinopTestCase(op, fmt.Sprintf("binary %v between a scalar on the left side and a histogram on the right with", expr), fmt.Sprintf(`2 %v metric{type="histogram"}`, expr), "float", "histogram", binop.floatHistogramSupported) - addBinopTestCase(op, fmt.Sprintf("binary %v between a histogram on the left side and a scalar on the right with", expr), fmt.Sprintf(`metric{type="histogram"} %v 2`, expr), "histogram", "float", binop.histogramFloatSupported) + addBinopTestCase(op, fmt.Sprintf("binary %v between a scalar on the left side and a histogram on the right", expr), fmt.Sprintf(`2 %v metric{type="histogram"}`, expr), "float", "histogram", binop.floatHistogramSupported) + addBinopTestCase(op, fmt.Sprintf("binary %v between a histogram on the left side and a scalar on the right", expr), fmt.Sprintf(`metric{type="histogram"} %v 2`, expr), "histogram", "float", binop.histogramFloatSupported) for cardinalityName, cardinalityModifier := range cardinalities { addBinopTestCase(op, fmt.Sprintf("binary %v between two floats with %v matching", expr, cardinalityName), fmt.Sprintf(`metric{type="float"} %v ignoring(type) %v metric{type="float"}`, expr, cardinalityModifier), "float", "float", true) From c41b575a806dd928b95aa6f47bca56d6f2876613 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 14:22:07 +1100 Subject: [PATCH 30/43] Add test cases with label names in different orders --- .../grouped_vector_vector_binary_operation.go | 2 + .../testdata/ours/binary_operators.test | 60 +++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 2fb7c47c511..cc2cfd27fd7 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -158,6 +158,8 @@ func NewGroupedVectorVectorBinaryOperation( return nil, fmt.Errorf("unsupported cardinality '%v'", g.VectorMatching.Card) } + slices.Sort(g.VectorMatching.Include) + return g, nil } diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index c40ef18c429..ef4426d0b5a 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1298,6 +1298,66 @@ eval_fail instant at 12m series_b / ignoring(code, foo) group_right(foo, code) s clear +# Test group_left and group_right with label names in different orders in 'on' and 'ignoring'. +load 6m + left_side{env="test", pod="a", group="foo"} 1 2 3 + left_side{env="test", pod="b", group="bar"} 4 5 6 + left_side{env="prod", pod="a", group="baz"} 7 8 9 + right_side{env="test", pod="a", group="bar"} 10 20 30 + right_side{env="test", pod="b", group="baz"} 40 50 60 + right_side{env="prod", pod="a", group="foo"} 70 80 90 + +eval range from 0 to 18m step 6m left_side - on(env, pod) group_left() right_side + {env="prod", pod="a", group="baz"} -63 -72 -81 + {env="test", pod="a", group="foo"} -9 -18 -27 + {env="test", pod="b", group="bar"} -36 -45 -54 + +# Test the same thing again with the grouping labels in a different order. +# (The implementation of binary operations relies on grouping labels being sorted in some places, +# so this test exists to ensure this is done correctly.) +eval range from 0 to 18m step 6m left_side - on(pod, env) group_left() right_side + {env="prod", pod="a", group="baz"} -63 -72 -81 + {env="test", pod="a", group="foo"} -9 -18 -27 + {env="test", pod="b", group="bar"} -36 -45 -54 + +eval range from 0 to 18m step 6m left_side - ignoring(env, pod) group_left() right_side + {env="prod", pod="a", group="baz"} -33 -42 -51 + {env="test", pod="b", group="bar"} -6 -15 -24 + {env="test", pod="a", group="foo"} -69 -78 -87 + +# Test the same thing again with the grouping labels in a different order. +# (The implementation of binary operations relies on grouping labels being sorted in some places, +# so this test exists to ensure this is done correctly.) +eval range from 0 to 18m step 6m left_side - ignoring(pod, env) group_left() right_side + {env="prod", pod="a", group="baz"} -33 -42 -51 + {env="test", pod="b", group="bar"} -6 -15 -24 + {env="test", pod="a", group="foo"} -69 -78 -87 + +# Same thing, but with the additional labels given in group_left / group_right in different orders. +load 6m + many_side{group="a", idx="x"} 1 2 3 + many_side{group="b", idx="y"} 4 5 6 + one_side{group="a", env="test", pod="1"} 10 20 30 + one_side{group="b", env="prod", pod="2"} 100 110 120 + +eval range from 0 to 18m step 6m many_side - on(group) group_left(env, pod) one_side + {group="a", env="test", pod="1", idx="x"} -9 -18 -27 + {group="b", env="prod", pod="2", idx="y"} -96 -105 -114 + +eval range from 0 to 18m step 6m many_side - on(group) group_left(pod, env) one_side + {group="a", env="test", pod="1", idx="x"} -9 -18 -27 + {group="b", env="prod", pod="2", idx="y"} -96 -105 -114 + +eval range from 0 to 18m step 6m one_side - on(group) group_right(env, pod) many_side + {group="a", env="test", pod="1", idx="x"} 9 18 27 + {group="b", env="prod", pod="2", idx="y"} 96 105 114 + +eval range from 0 to 18m step 6m one_side - on(group) group_right(pod, env) many_side + {group="a", env="test", pod="1", idx="x"} 9 18 27 + {group="b", env="prod", pod="2", idx="y"} 96 105 114 + +clear + # Test comparison operator edge cases. load 6m left_side_a{env="test", pod="a"} 1 2 3 4 From 230adfe9bd5ea1377e9758124e9321946809cf20 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 14:33:00 +1100 Subject: [PATCH 31/43] Add some test cases with native histograms --- .../testdata/ours/binary_operators.test | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index ef4426d0b5a..cb33558432e 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1358,6 +1358,39 @@ eval range from 0 to 18m step 6m one_side - on(group) group_right(pod, env) many clear +# Binary operations on native histograms with group_left. +# We don't bother testing all the combinations of label matching, group_right etc. given that's covered by floats above. +load 5m + first_histogram{job="test"} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} + second_histogram{job="test"} {{schema:0 sum:10 count:6 buckets:[1 2 1]}} + metric{job="test"} 2 + +eval instant at 0 first_histogram + on(job) group_left second_histogram + {job="test"} {{schema:0 sum:15 count:10 buckets:[2 4 2]}} + +eval instant at 0 second_histogram - on(job) group_left first_histogram + {job="test"} {{schema:0 sum:5 count:2 buckets:[0 0 0]}} + +# Cannot multiply two histograms +eval_info instant at 0 first_histogram * on(job) group_left second_histogram + +# Cannot divide a histogram by a histogram +eval_info instant at 0 first_histogram / on(job) group_left second_histogram + +# Histogram multiplied by float +eval instant at 0 first_histogram * on(job) group_left metric + {job="test"} {{schema:0 count:8 sum:10 buckets:[2 4 2]}} + +# Works in either order +eval instant at 0 metric * on(job) group_left first_histogram + {job="test"} {{schema:0 count:8 sum:10 buckets:[2 4 2]}} + +# Histogram divide by float +eval instant at 0 first_histogram / on(job) group_left metric + {job="test"} {{schema:0 count:2 sum:2.5 buckets:[0.5 1 0.5]}} + +clear + # Test comparison operator edge cases. load 6m left_side_a{env="test", pod="a"} 1 2 3 4 From a6d876336c7a449c4c0de52c2ba71e932cfe211b Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Dec 2024 14:44:44 +1100 Subject: [PATCH 32/43] Ensure buffers passed to labels.Labels.Bytes(), BytesWithLabels() and BytesWithoutLabels() are reused if resized --- .../operators/aggregations/aggregation.go | 6 ++++-- .../operators/binops/binary_operation.go | 9 ++++++--- .../binops/grouped_vector_vector_binary_operation.go | 12 ++++++++---- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pkg/streamingpromql/operators/aggregations/aggregation.go b/pkg/streamingpromql/operators/aggregations/aggregation.go index 6575aba9629..407cdf134cb 100644 --- a/pkg/streamingpromql/operators/aggregations/aggregation.go +++ b/pkg/streamingpromql/operators/aggregations/aggregation.go @@ -212,7 +212,8 @@ func (a *Aggregation) groupingWithoutLabelsSeriesToGroupFuncs() (seriesToGroupLa // Why 1024 bytes? It's what labels.Labels.String() uses as a buffer size, so we use that as a sensible starting point too. b := make([]byte, 0, 1024) bytesFunc := func(l labels.Labels) []byte { - return l.BytesWithoutLabels(b, a.Grouping...) // NewAggregation will add __name__ to Grouping for 'without' aggregations, so no need to add it here. + b = l.BytesWithoutLabels(b, a.Grouping...) // NewAggregation will add __name__ to Grouping for 'without' aggregations, so no need to add it here. + return b } lb := labels.NewBuilder(labels.EmptyLabels()) @@ -231,7 +232,8 @@ func (a *Aggregation) groupingByLabelsSeriesToGroupFuncs() (seriesToGroupLabelsB // Why 1024 bytes? It's what labels.Labels.String() uses as a buffer size, so we use that as a sensible starting point too. b := make([]byte, 0, 1024) bytesFunc := func(l labels.Labels) []byte { - return l.BytesWithLabels(b, a.Grouping...) + b = l.BytesWithLabels(b, a.Grouping...) + return b } lb := labels.NewBuilder(labels.EmptyLabels()) diff --git a/pkg/streamingpromql/operators/binops/binary_operation.go b/pkg/streamingpromql/operators/binops/binary_operation.go index ba279efeaee..78cb27bc69e 100644 --- a/pkg/streamingpromql/operators/binops/binary_operation.go +++ b/pkg/streamingpromql/operators/binops/binary_operation.go @@ -29,14 +29,16 @@ func vectorMatchingGroupKeyFunc(vectorMatching parser.VectorMatching) func(label slices.Sort(vectorMatching.MatchingLabels) return func(l labels.Labels) []byte { - return l.BytesWithLabels(buf, vectorMatching.MatchingLabels...) + buf = l.BytesWithLabels(buf, vectorMatching.MatchingLabels...) + return buf } } if len(vectorMatching.MatchingLabels) == 0 { // Fast path for common case for expressions like "a + b" with no 'on' or 'without' labels. return func(l labels.Labels) []byte { - return l.BytesWithoutLabels(buf, labels.MetricName) + buf = l.BytesWithoutLabels(buf, labels.MetricName) + return buf } } @@ -46,7 +48,8 @@ func vectorMatchingGroupKeyFunc(vectorMatching parser.VectorMatching) func(label slices.Sort(lbls) return func(l labels.Labels) []byte { - return l.BytesWithoutLabels(buf, lbls...) + buf = l.BytesWithoutLabels(buf, lbls...) + return buf } } diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index cc2cfd27fd7..15a56d8d708 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -387,7 +387,8 @@ func (g *GroupedVectorVectorBinaryOperation) additionalLabelsKeyFunc() func(oneS buf := make([]byte, 0, 1024) return func(oneSideLabels labels.Labels) []byte { - return oneSideLabels.BytesWithLabels(buf, g.VectorMatching.Include...) + buf = oneSideLabels.BytesWithLabels(buf, g.VectorMatching.Include...) + return buf } } @@ -398,13 +399,15 @@ func (g *GroupedVectorVectorBinaryOperation) manySideGroupKeyFunc() func(manySid if !g.shouldRemoveMetricNameFromManySide() && len(g.VectorMatching.Include) == 0 { return func(manySideLabels labels.Labels) []byte { - return manySideLabels.Bytes(buf) // FIXME: it'd be nice if we could avoid copying the bytes here + buf = manySideLabels.Bytes(buf) // FIXME: it'd be nice if we could avoid Bytes() copying the slice here + return buf } } if len(g.VectorMatching.Include) == 0 { return func(manySideLabels labels.Labels) []byte { - return manySideLabels.BytesWithoutLabels(buf, labels.MetricName) + buf = manySideLabels.BytesWithoutLabels(buf, labels.MetricName) + return buf } } @@ -416,7 +419,8 @@ func (g *GroupedVectorVectorBinaryOperation) manySideGroupKeyFunc() func(manySid } return func(manySideLabels labels.Labels) []byte { - return manySideLabels.BytesWithoutLabels(buf, labelsToRemove...) + buf = manySideLabels.BytesWithoutLabels(buf, labelsToRemove...) + return buf } } From e46702346f7e9ca46db1d299b4020a23c6e435f2 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 10 Dec 2024 20:08:33 +1100 Subject: [PATCH 33/43] Address PR feedback: use minimal number of points for binary operation slice --- .../operators/binops/binary_operation.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/binary_operation.go b/pkg/streamingpromql/operators/binops/binary_operation.go index 78cb27bc69e..cf8518031ce 100644 --- a/pkg/streamingpromql/operators/binops/binary_operation.go +++ b/pkg/streamingpromql/operators/binops/binary_operation.go @@ -202,7 +202,7 @@ func (e *vectorVectorBinaryOperationEvaluator) computeResult(left types.InstantV canReturnLeftFPointSlice, canReturnLeftHPointSlice, canReturnRightFPointSlice, canReturnRightHPointSlice := takeOwnershipOfLeft, takeOwnershipOfLeft, takeOwnershipOfRight, takeOwnershipOfRight leftPoints := len(left.Floats) + len(left.Histograms) rightPoints := len(right.Floats) + len(right.Histograms) - maxPoints := max(leftPoints, rightPoints) + minPoints := min(leftPoints, rightPoints) // We cannot re-use any slices when the series contain a mix of floats and histograms. // Consider the following, where f is a float at a particular step, and h is a histogram. @@ -219,13 +219,13 @@ func (e *vectorVectorBinaryOperationEvaluator) computeResult(left types.InstantV mixedPoints := (len(left.Floats) > 0 && len(left.Histograms) > 0) || (len(right.Floats) > 0 && len(right.Histograms) > 0) prepareFSlice := func() error { - if !mixedPoints && maxPoints <= cap(left.Floats) && cap(left.Floats) < cap(right.Floats) && takeOwnershipOfLeft { + if !mixedPoints && minPoints <= cap(left.Floats) && cap(left.Floats) < cap(right.Floats) && takeOwnershipOfLeft { // Can fit output in left side, the left side is smaller than the right, and we're allowed to modify it canReturnLeftFPointSlice = false fPoints = left.Floats[:0] return nil } - if !mixedPoints && maxPoints <= cap(right.Floats) && takeOwnershipOfRight { + if !mixedPoints && minPoints <= cap(right.Floats) && takeOwnershipOfRight { // Can otherwise fit in the right side and we're allowed to modify it canReturnRightFPointSlice = false fPoints = right.Floats[:0] @@ -233,20 +233,20 @@ func (e *vectorVectorBinaryOperationEvaluator) computeResult(left types.InstantV } // Either we have mixed points or we can't fit in either left or right side, so create a new slice var err error - if fPoints, err = types.FPointSlicePool.Get(maxPoints, e.memoryConsumptionTracker); err != nil { + if fPoints, err = types.FPointSlicePool.Get(minPoints, e.memoryConsumptionTracker); err != nil { return err } return nil } prepareHSlice := func() error { - if !mixedPoints && maxPoints <= cap(left.Histograms) && cap(left.Histograms) < cap(right.Histograms) && takeOwnershipOfLeft { + if !mixedPoints && minPoints <= cap(left.Histograms) && cap(left.Histograms) < cap(right.Histograms) && takeOwnershipOfLeft { // Can fit output in left side, the left side is smaller than the right, and we're allowed to modify it canReturnLeftHPointSlice = false hPoints = left.Histograms[:0] return nil } - if !mixedPoints && maxPoints <= cap(right.Histograms) && takeOwnershipOfRight { + if !mixedPoints && minPoints <= cap(right.Histograms) && takeOwnershipOfRight { // Can otherwise fit in the right side and we're allowed to modify it canReturnRightHPointSlice = false hPoints = right.Histograms[:0] @@ -254,7 +254,7 @@ func (e *vectorVectorBinaryOperationEvaluator) computeResult(left types.InstantV } // Either we have mixed points or we can't fit in either left or right side, so create a new slice var err error - if hPoints, err = types.HPointSlicePool.Get(maxPoints, e.memoryConsumptionTracker); err != nil { + if hPoints, err = types.HPointSlicePool.Get(minPoints, e.memoryConsumptionTracker); err != nil { return err } return nil From aabd0935c30abe963a91620b6a5a081ef08450ef Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 10 Dec 2024 20:17:39 +1100 Subject: [PATCH 34/43] Address PR feedback: rename `latestSeries` to `latestSeriesIndex` --- .../grouped_vector_vector_binary_operation.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 15a56d8d708..ee74750e49a 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -77,10 +77,10 @@ type manySide struct { outputSeriesCount int } -// latestSeries returns the index of the last series from this side. +// latestSeriesIndex returns the index of the last series from this side. // // It assumes that seriesIndices is sorted in ascending order. -func (s manySide) latestSeries() int { +func (s manySide) latestSeriesIndex() int { return s.seriesIndices[len(s.seriesIndices)-1] } @@ -95,10 +95,10 @@ type oneSide struct { matchGroup *matchGroup // nil if this is the only "one" side in this group. } -// latestSeries returns the index of the last series from this side. +// latestSeriesIndex returns the index of the last series from this side. // // It assumes that seriesIndices is sorted in ascending order. -func (s oneSide) latestSeries() int { +func (s oneSide) latestSeriesIndex() int { return s.seriesIndices[len(s.seriesIndices)-1] } @@ -502,13 +502,13 @@ func (s favourManySideSorter) Len() int { } func (s favourManySideSorter) Less(i, j int) bool { - iMany := s.series[i].manySide.latestSeries() - jMany := s.series[j].manySide.latestSeries() + iMany := s.series[i].manySide.latestSeriesIndex() + jMany := s.series[j].manySide.latestSeriesIndex() if iMany != jMany { return iMany < jMany } - return s.series[i].oneSide.latestSeries() < s.series[j].oneSide.latestSeries() + return s.series[i].oneSide.latestSeriesIndex() < s.series[j].oneSide.latestSeriesIndex() } func (s favourManySideSorter) Swap(i, j int) { From 8c8f782fb76543dcdcb80e3126ca266da53bf419 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 10 Dec 2024 20:19:36 +1100 Subject: [PATCH 35/43] Address PR feedback: add docstring for `updatePresence` --- .../binops/grouped_vector_vector_binary_operation.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index ee74750e49a..206044c3051 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -110,6 +110,10 @@ type matchGroup struct { oneSideCount int } +// updatePresence records the presence of a sample from the series with index seriesIdx at the timestamp with index timestampIdx. +// +// If there is already a sample present from another series at the same timestamp, updatePresence returns that series' index, or +// -1 if there was no sample present at the same timestamp from another series. func (g *matchGroup) updatePresence(timestampIdx int64, seriesIdx int) int { if existing := g.presence[timestampIdx]; existing != -1 { return existing From 7aa29ee8293dd29bf1c36f1a1038a0a9ed91e466 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 10 Dec 2024 20:28:45 +1100 Subject: [PATCH 36/43] Address PR feedback: try to reuse slices in more cases --- .../operators/binops/binary_operation.go | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/binary_operation.go b/pkg/streamingpromql/operators/binops/binary_operation.go index cf8518031ce..dc748f539cc 100644 --- a/pkg/streamingpromql/operators/binops/binary_operation.go +++ b/pkg/streamingpromql/operators/binops/binary_operation.go @@ -219,19 +219,25 @@ func (e *vectorVectorBinaryOperationEvaluator) computeResult(left types.InstantV mixedPoints := (len(left.Floats) > 0 && len(left.Histograms) > 0) || (len(right.Floats) > 0 && len(right.Histograms) > 0) prepareFSlice := func() error { - if !mixedPoints && minPoints <= cap(left.Floats) && cap(left.Floats) < cap(right.Floats) && takeOwnershipOfLeft { - // Can fit output in left side, the left side is smaller than the right, and we're allowed to modify it + canFitInLeftSide := minPoints <= cap(left.Floats) + leftSideIsSmaller := cap(left.Floats) < cap(right.Floats) + safeToReuseLeftSide := !mixedPoints && canFitInLeftSide && takeOwnershipOfLeft + canFitInRightSide := minPoints <= cap(right.Floats) + safeToReuseRightSide := !mixedPoints && canFitInRightSide && takeOwnershipOfRight + + if safeToReuseLeftSide && (leftSideIsSmaller || !safeToReuseRightSide) { canReturnLeftFPointSlice = false fPoints = left.Floats[:0] return nil } - if !mixedPoints && minPoints <= cap(right.Floats) && takeOwnershipOfRight { - // Can otherwise fit in the right side and we're allowed to modify it + + if safeToReuseRightSide { canReturnRightFPointSlice = false fPoints = right.Floats[:0] return nil } - // Either we have mixed points or we can't fit in either left or right side, so create a new slice + + // We can't reuse either existing slice, so create a new one. var err error if fPoints, err = types.FPointSlicePool.Get(minPoints, e.memoryConsumptionTracker); err != nil { return err @@ -240,19 +246,25 @@ func (e *vectorVectorBinaryOperationEvaluator) computeResult(left types.InstantV } prepareHSlice := func() error { - if !mixedPoints && minPoints <= cap(left.Histograms) && cap(left.Histograms) < cap(right.Histograms) && takeOwnershipOfLeft { - // Can fit output in left side, the left side is smaller than the right, and we're allowed to modify it + canFitInLeftSide := minPoints <= cap(left.Histograms) + leftSideIsSmaller := cap(left.Histograms) < cap(right.Histograms) + safeToReuseLeftSide := !mixedPoints && canFitInLeftSide && takeOwnershipOfLeft + canFitInRightSide := minPoints <= cap(right.Histograms) + safeToReuseRightSide := !mixedPoints && canFitInRightSide && takeOwnershipOfRight + + if safeToReuseLeftSide && (leftSideIsSmaller || !safeToReuseRightSide) { canReturnLeftHPointSlice = false hPoints = left.Histograms[:0] return nil } - if !mixedPoints && minPoints <= cap(right.Histograms) && takeOwnershipOfRight { - // Can otherwise fit in the right side and we're allowed to modify it + + if safeToReuseRightSide { canReturnRightHPointSlice = false hPoints = right.Histograms[:0] return nil } - // Either we have mixed points or we can't fit in either left or right side, so create a new slice + + // We can't reuse either existing slice, so create a new one. var err error if hPoints, err = types.HPointSlicePool.Get(minPoints, e.memoryConsumptionTracker); err != nil { return err From 153f66f32742512c14b01bbef137d0659e292b29 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 10 Dec 2024 20:33:33 +1100 Subject: [PATCH 37/43] Run mixed metrics tests in parallel --- pkg/streamingpromql/engine_test.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/streamingpromql/engine_test.go b/pkg/streamingpromql/engine_test.go index acc8885749e..c3711c0983e 100644 --- a/pkg/streamingpromql/engine_test.go +++ b/pkg/streamingpromql/engine_test.go @@ -2665,6 +2665,8 @@ func runMixedMetricsTests(t *testing.T, expressions []string, pointsPerSeries in } func TestCompareVariousMixedMetricsFunctions(t *testing.T) { + t.Parallel() + labelsToUse, pointsPerSeries, seriesData := getMixedMetricsForTests(true) // Test each label individually to catch edge cases in with single series @@ -2697,6 +2699,8 @@ func TestCompareVariousMixedMetricsFunctions(t *testing.T) { } func TestCompareVariousMixedMetricsBinaryOperations(t *testing.T) { + t.Parallel() + labelsToUse, pointsPerSeries, seriesData := getMixedMetricsForTests(false) // Generate combinations of 2 and 3 labels. (e.g., "a,b", "e,f", "c,d,e" etc) @@ -2737,6 +2741,8 @@ func TestCompareVariousMixedMetricsBinaryOperations(t *testing.T) { } func TestCompareVariousMixedMetricsAggregations(t *testing.T) { + t.Parallel() + labelsToUse, pointsPerSeries, seriesData := getMixedMetricsForTests(true) // Test each label individually to catch edge cases in with single series @@ -2765,6 +2771,8 @@ func TestCompareVariousMixedMetricsAggregations(t *testing.T) { } func TestCompareVariousMixedMetricsVectorSelectors(t *testing.T) { + t.Parallel() + labelsToUse, pointsPerSeries, seriesData := getMixedMetricsForTests(true) // Test each label individually to catch edge cases in with single series @@ -2790,6 +2798,8 @@ func TestCompareVariousMixedMetricsVectorSelectors(t *testing.T) { } func TestCompareVariousMixedMetricsComparisonOps(t *testing.T) { + t.Parallel() + labelsToUse, pointsPerSeries, seriesData := getMixedMetricsForTests(true) // Test each label individually to catch edge cases in with single series From 3a262bf20c39964f5a166b2a4da9958aa97443ce Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 10 Dec 2024 20:41:53 +1100 Subject: [PATCH 38/43] Add `group_left` to mixed metrics tests --- pkg/streamingpromql/engine_test.go | 34 +++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/pkg/streamingpromql/engine_test.go b/pkg/streamingpromql/engine_test.go index c3711c0983e..629e7ec3409 100644 --- a/pkg/streamingpromql/engine_test.go +++ b/pkg/streamingpromql/engine_test.go @@ -2711,30 +2711,44 @@ func TestCompareVariousMixedMetricsBinaryOperations(t *testing.T) { for _, labels := range labelCombinations { for _, op := range []string{"+", "-", "*", "/", "and", "unless", "or"} { - binaryExpr := fmt.Sprintf(`series{label="%s"}`, labels[0]) + expr := fmt.Sprintf(`series{label="%s"}`, labels[0]) for _, label := range labels[1:] { - binaryExpr += fmt.Sprintf(` %s series{label="%s"}`, op, label) + expr += fmt.Sprintf(` %s series{label="%s"}`, op, label) } - expressions = append(expressions, binaryExpr) + expressions = append(expressions, expr) // Same thing again, this time with grouping. - binaryExpr = fmt.Sprintf(`series{label="%s"}`, labels[0]) + expr = fmt.Sprintf(`series{label="%s"}`, labels[0]) for i, label := range labels[1:] { - binaryExpr += fmt.Sprintf(` %s ignoring (label, group) `, op) + expr += fmt.Sprintf(` %s ignoring (label, group) `, op) if i == 0 && len(labels) > 2 { - binaryExpr += "(" + expr += "(" } - binaryExpr += fmt.Sprintf(`{label="%s"}`, label) + expr += fmt.Sprintf(`{label="%s"}`, label) } - if len(labels) > 2 { - binaryExpr += ")" + expr += ")" + } + expressions = append(expressions, expr) + } + + // Similar thing again, this time with group_left + expr := fmt.Sprintf(`series{label="%s"}`, labels[0]) + for i, label := range labels[1:] { + expr += ` * on(group) group_left(label) ` + + if i == 0 && len(labels) > 2 { + expr += "(" } - expressions = append(expressions, binaryExpr) + expr += fmt.Sprintf(`{label="%s"}`, label) + } + if len(labels) > 2 { + expr += ")" } + expressions = append(expressions, expr) } runMixedMetricsTests(t, expressions, pointsPerSeries, seriesData, false) From d2e2a1c6585e9e31c1ec20bf4361bbcfeb3e82e4 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Tue, 10 Dec 2024 20:50:52 +1100 Subject: [PATCH 39/43] Address PR feedback: refactor `vectorVectorBinaryOperationEvaluator.computeResult` to reduce nesting --- .../operators/binops/binary_operation.go | 97 ++++++++++++------- 1 file changed, 60 insertions(+), 37 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/binary_operation.go b/pkg/streamingpromql/operators/binops/binary_operation.go index dc748f539cc..b499ad4f387 100644 --- a/pkg/streamingpromql/operators/binops/binary_operation.go +++ b/pkg/streamingpromql/operators/binops/binary_operation.go @@ -278,49 +278,72 @@ func (e *vectorVectorBinaryOperationEvaluator) computeResult(left types.InstantV // Get first sample from left and right lT, lF, lH, lOk := e.leftIterator.Next() rT, rF, rH, rOk := e.rightIterator.Next() - // Continue iterating until we exhaust either the LHS or RHS - // denoted by lOk or rOk being false. - for lOk && rOk { - if lT == rT { - // We have samples on both sides at this timestep. - resultFloat, resultHist, keep, valid, err := e.opFunc(lF, rF, lH, rH) - if err != nil { - err = functions.NativeHistogramErrorToAnnotation(err, e.emitAnnotation) - if err != nil { - return types.InstantVectorSeriesData{}, err - } + appendHistogram := func(t int64, h *histogram.FloatHistogram) error { + if hPoints == nil { + if err := prepareHSlice(); err != nil { + return err + } + } + + hPoints = append(hPoints, promql.HPoint{ + H: h, + T: t, + }) - // Else: error was converted to an annotation, continue without emitting a sample here. - keep = false + return nil + } + + appendFloat := func(t int64, f float64) error { + if fPoints == nil { + if err := prepareFSlice(); err != nil { + return err } + } + + fPoints = append(fPoints, promql.FPoint{ + F: f, + T: t, + }) + + return nil + } + + appendNextSample := func() error { + resultFloat, resultHist, keep, valid, err := e.opFunc(lF, rF, lH, rH) - if !valid { - emitIncompatibleTypesAnnotation(e.annotations, e.op, lH, rH, e.expressionPosition) + if err != nil { + err = functions.NativeHistogramErrorToAnnotation(err, e.emitAnnotation) + if err != nil { + return err } - if keep { - if resultHist != nil { - if hPoints == nil { - if err = prepareHSlice(); err != nil { - return types.InstantVectorSeriesData{}, err - } - } - hPoints = append(hPoints, promql.HPoint{ - H: resultHist, - T: lT, - }) - } else { - if fPoints == nil { - if err = prepareFSlice(); err != nil { - return types.InstantVectorSeriesData{}, err - } - } - fPoints = append(fPoints, promql.FPoint{ - F: resultFloat, - T: lT, - }) - } + // Else: error was converted to an annotation, continue without emitting a sample here. + keep = false + } + + if !valid { + emitIncompatibleTypesAnnotation(e.annotations, e.op, lH, rH, e.expressionPosition) + } + + if !keep { + return nil + } + + if resultHist != nil { + return appendHistogram(lT, resultHist) + } + + return appendFloat(lT, resultFloat) + } + + // Continue iterating until we exhaust either the LHS or RHS + // denoted by lOk or rOk being false. + for lOk && rOk { + if lT == rT { + // We have samples on both sides at this timestep. + if err := appendNextSample(); err != nil { + return types.InstantVectorSeriesData{}, err } } From 2f22459509b98467df66583334b3af4325e88249 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 13 Dec 2024 14:39:11 +1100 Subject: [PATCH 40/43] Add early filtering test case for group_right --- pkg/streamingpromql/testdata/ours/binary_operators.test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index cb33558432e..59894e3900c 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1462,6 +1462,9 @@ load 6m # eval range from 0 to 18m step 6m left == ignoring(pod) right # left _ 2 7 _ +eval_fail range from 0 to 18m step 6m left == ignoring(pod) group_right right + expected_fail_regexp found duplicate series for the match group .* on the left (hand-)?side of the operation + clear # Same thing as above, but with no overlapping samples on left side. From f8b79e8e5c12d410d7137b66c833e6dc3467de74 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 13 Dec 2024 16:05:52 +1100 Subject: [PATCH 41/43] Fix issue where comparison operations without the bool modifier would return incorrect results if the left side contained series with different metric names --- .../operators/binops/binary_operation.go | 39 +++ .../grouped_vector_vector_binary_operation.go | 43 +--- ...e_to_one_vector_vector_binary_operation.go | 239 +++++++++++------- ...one_vector_vector_binary_operation_test.go | 100 ++++---- pkg/streamingpromql/query.go | 2 +- .../testdata/ours/binary_operators.test | 7 +- 6 files changed, 247 insertions(+), 183 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/binary_operation.go b/pkg/streamingpromql/operators/binops/binary_operation.go index b499ad4f387..3bc3a82e2f6 100644 --- a/pkg/streamingpromql/operators/binops/binary_operation.go +++ b/pkg/streamingpromql/operators/binops/binary_operation.go @@ -5,9 +5,11 @@ package binops import ( "fmt" "slices" + "time" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" @@ -82,6 +84,43 @@ func groupLabelsFunc(vectorMatching parser.VectorMatching, op parser.ItemType, r } } +func formatConflictError( + firstConflictingSeriesIndex int, + secondConflictingSeriesIndex int, + description string, + ts int64, + sourceSeriesMetadata []types.SeriesMetadata, + side string, + vectorMatching parser.VectorMatching, + op parser.ItemType, + returnBool bool, +) error { + firstConflictingSeriesLabels := sourceSeriesMetadata[firstConflictingSeriesIndex].Labels + groupLabels := groupLabelsFunc(vectorMatching, op, returnBool)(firstConflictingSeriesLabels) + + if secondConflictingSeriesIndex == -1 { + return fmt.Errorf( + "found %s for the match group %s on the %s side of the operation at timestamp %s", + description, + groupLabels, + side, + timestamp.Time(ts).Format(time.RFC3339Nano), + ) + } + + secondConflictingSeriesLabels := sourceSeriesMetadata[secondConflictingSeriesIndex].Labels + + return fmt.Errorf( + "found %s for the match group %s on the %s side of the operation at timestamp %s: %s and %s", + description, + groupLabels, + side, + timestamp.Time(ts).Format(time.RFC3339Nano), + firstConflictingSeriesLabels, + secondConflictingSeriesLabels, + ) +} + // filterSeries returns data filtered based on the mask provided. // // mask is expected to contain one value for each time step in the query time range. diff --git a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go index 206044c3051..2bdda7d501a 100644 --- a/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/grouped_vector_vector_binary_operation.go @@ -11,10 +11,8 @@ import ( "fmt" "slices" "sort" - "time" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/util/annotations" @@ -618,13 +616,13 @@ func (g *GroupedVectorVectorBinaryOperation) updateOneSidePresence(side *oneSide for _, p := range seriesData.Floats { if otherSeriesIdx := matchGroup.updatePresence(g.timeRange.PointIndex(p.T), seriesIdx); otherSeriesIdx != -1 { - return g.formatConflictError(otherSeriesIdx, seriesIdx, "duplicate series", p.T, g.oneSideMetadata, g.oneSideHandedness()) + return formatConflictError(otherSeriesIdx, seriesIdx, "duplicate series", p.T, g.oneSideMetadata, g.oneSideHandedness(), g.VectorMatching, g.Op, g.ReturnBool) } } for _, p := range seriesData.Histograms { if otherSeriesIdx := matchGroup.updatePresence(g.timeRange.PointIndex(p.T), seriesIdx); otherSeriesIdx != -1 { - return g.formatConflictError(otherSeriesIdx, seriesIdx, "duplicate series", p.T, g.oneSideMetadata, g.oneSideHandedness()) + return formatConflictError(otherSeriesIdx, seriesIdx, "duplicate series", p.T, g.oneSideMetadata, g.oneSideHandedness(), g.VectorMatching, g.Op, g.ReturnBool) } } } @@ -646,7 +644,8 @@ func (g *GroupedVectorVectorBinaryOperation) mergeOneSide(data []types.InstantVe } if conflict != nil { - return types.InstantVectorSeriesData{}, g.formatConflictError(conflict.FirstConflictingSeriesIndex, conflict.SecondConflictingSeriesIndex, conflict.Description, conflict.Timestamp, g.oneSideMetadata, g.oneSideHandedness()) + err := formatConflictError(conflict.FirstConflictingSeriesIndex, conflict.SecondConflictingSeriesIndex, conflict.Description, conflict.Timestamp, g.oneSideMetadata, g.oneSideHandedness(), g.VectorMatching, g.Op, g.ReturnBool) + return types.InstantVectorSeriesData{}, err } return merged, nil @@ -689,40 +688,6 @@ func (g *GroupedVectorVectorBinaryOperation) mergeManySide(data []types.InstantV return merged, nil } -func (g *GroupedVectorVectorBinaryOperation) formatConflictError( - firstConflictingSeriesIndex int, - secondConflictingSeriesIndex int, - description string, - ts int64, - sourceSeriesMetadata []types.SeriesMetadata, - side string, -) error { - firstConflictingSeriesLabels := sourceSeriesMetadata[firstConflictingSeriesIndex].Labels - groupLabels := groupLabelsFunc(g.VectorMatching, g.Op, g.ReturnBool)(firstConflictingSeriesLabels) - - if secondConflictingSeriesIndex == -1 { - return fmt.Errorf( - "found %s for the match group %s on the %s side of the operation at timestamp %s", - description, - groupLabels, - side, - timestamp.Time(ts).Format(time.RFC3339Nano), - ) - } - - secondConflictingSeriesLabels := sourceSeriesMetadata[secondConflictingSeriesIndex].Labels - - return fmt.Errorf( - "found %s for the match group %s on the %s side of the operation at timestamp %s: %s and %s", - description, - groupLabels, - side, - timestamp.Time(ts).Format(time.RFC3339Nano), - firstConflictingSeriesLabels, - secondConflictingSeriesLabels, - ) -} - func (g *GroupedVectorVectorBinaryOperation) oneSideHandedness() string { switch g.VectorMatching.Card { case parser.CardOneToMany: diff --git a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go index 70f1204d7bf..8d99344a082 100644 --- a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go @@ -7,13 +7,10 @@ package binops import ( "context" - "fmt" "math" "sort" - "time" "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/util/annotations" @@ -48,13 +45,14 @@ type OneToOneVectorVectorBinaryOperation struct { expressionPosition posrange.PositionRange annotations *annotations.Annotations + timeRange types.QueryTimeRange } var _ types.InstantVectorOperator = &OneToOneVectorVectorBinaryOperation{} type oneToOneBinaryOperationOutputSeries struct { - leftSeriesIndices []int - rightSeriesIndices []int + leftSeriesIndices []int + rightSide *oneToOneBinaryOperationRightSide } // latestLeftSeries returns the index of the last series from the left source needed for this output series. @@ -66,9 +64,38 @@ func (s oneToOneBinaryOperationOutputSeries) latestLeftSeries() int { // latestRightSeries returns the index of the last series from the right source needed for this output series. // -// It assumes that rightSeriesIndices is sorted in ascending order. +// It assumes that rightSide.rightSeriesIndices is sorted in ascending order. func (s oneToOneBinaryOperationOutputSeries) latestRightSeries() int { - return s.rightSeriesIndices[len(s.rightSeriesIndices)-1] + return s.rightSide.rightSeriesIndices[len(s.rightSide.rightSeriesIndices)-1] +} + +type oneToOneBinaryOperationRightSide struct { + // If this right side is used for multiple output series and has not been populated, rightSeriesIndices will not be nil. + // If this right side has been populated, rightSeriesIndices will be nil. + rightSeriesIndices []int + mergedData types.InstantVectorSeriesData + + // The number of output series that use the same series from the right side. + // Will only be greater than 1 for comparison binary operations without the bool modifier + // where the input series on the left side have different metric names. + outputSeriesCount int + + // Time steps at which we've seen samples for any left side that matches with this right side. + // Each value is the index of the source series of the sample, or -1 if no sample has been seen for this time step yet. + leftSidePresence []int +} + +// updatePresence records the presence of a sample from the left side series with index seriesIdx at the timestamp with index timestampIdx. +// +// If there is already a sample present from another series at the same timestamp, updatePresence returns that series' index, or +// -1 if there was no sample present at the same timestamp from another series. +func (g *oneToOneBinaryOperationRightSide) updatePresence(timestampIdx int64, seriesIdx int) int { + if existing := g.leftSidePresence[timestampIdx]; existing != -1 { + return existing + } + + g.leftSidePresence[timestampIdx] = seriesIdx + return -1 } func NewOneToOneVectorVectorBinaryOperation( @@ -80,6 +107,7 @@ func NewOneToOneVectorVectorBinaryOperation( memoryConsumptionTracker *limiting.MemoryConsumptionTracker, annotations *annotations.Annotations, expressionPosition posrange.PositionRange, + timeRange types.QueryTimeRange, ) (*OneToOneVectorVectorBinaryOperation, error) { e, err := newVectorVectorBinaryOperationEvaluator(op, returnBool, memoryConsumptionTracker, annotations, expressionPosition) if err != nil { @@ -97,6 +125,7 @@ func NewOneToOneVectorVectorBinaryOperation( evaluator: e, expressionPosition: expressionPosition, annotations: annotations, + timeRange: timeRange, } return b, nil @@ -184,60 +213,21 @@ func (b *OneToOneVectorVectorBinaryOperation) loadSeriesMetadata(ctx context.Con func (b *OneToOneVectorVectorBinaryOperation) computeOutputSeries() ([]types.SeriesMetadata, []*oneToOneBinaryOperationOutputSeries, []bool, []bool, error) { labelsFunc := groupLabelsFunc(b.VectorMatching, b.Op, b.ReturnBool) groupKeyFunc := vectorMatchingGroupKeyFunc(b.VectorMatching) - outputSeriesMap := map[string]*oneToOneBinaryOperationOutputSeries{} - - // Use the smaller side to populate the map of possible output series first. - // This should ensure we don't unnecessarily populate the output series map with series that will never match in most cases. - // (It's possible that all the series on the larger side all belong to the same group, but this is expected to be rare.) - smallerSide := b.leftMetadata - largerSide := b.rightMetadata - smallerSideIsLeftSide := len(b.leftMetadata) < len(b.rightMetadata) - - if !smallerSideIsLeftSide { - smallerSide = b.rightMetadata - largerSide = b.leftMetadata - } + rightSeriesGroupsMap := map[string]*oneToOneBinaryOperationRightSide{} - for idx, s := range smallerSide { + for idx, s := range b.rightMetadata { groupKey := groupKeyFunc(s.Labels) - series, exists := outputSeriesMap[string(groupKey)] // Important: don't extract the string(...) call here - passing it directly allows us to avoid allocating it. + group, exists := rightSeriesGroupsMap[string(groupKey)] // Important: don't extract the string(...) call here - passing it directly allows us to avoid allocating it. if !exists { - series = &oneToOneBinaryOperationOutputSeries{} - outputSeriesMap[string(groupKey)] = series - } - - if smallerSideIsLeftSide { - series.leftSeriesIndices = append(series.leftSeriesIndices, idx) - } else { - series.rightSeriesIndices = append(series.rightSeriesIndices, idx) - } - } - - for idx, s := range largerSide { - groupKey := groupKeyFunc(s.Labels) - - // Important: don't extract the string(...) call below - passing it directly allows us to avoid allocating it. - if series, exists := outputSeriesMap[string(groupKey)]; exists { - if smallerSideIsLeftSide { - // Currently iterating through right side. - series.rightSeriesIndices = append(series.rightSeriesIndices, idx) - } else { - series.leftSeriesIndices = append(series.leftSeriesIndices, idx) - } + group = &oneToOneBinaryOperationRightSide{} + rightSeriesGroupsMap[string(groupKey)] = group } - } - // Remove series that cannot produce samples. - for seriesLabels, outputSeries := range outputSeriesMap { - if len(outputSeries.leftSeriesIndices) == 0 || len(outputSeries.rightSeriesIndices) == 0 { - // No matching series on at least one side for this output series, so output series will have no samples. Remove it. - delete(outputSeriesMap, seriesLabels) - } + group.rightSeriesIndices = append(group.rightSeriesIndices, idx) } - allMetadata := types.GetSeriesMetadataSlice(len(outputSeriesMap)) - allSeries := make([]*oneToOneBinaryOperationOutputSeries, 0, len(outputSeriesMap)) + outputSeriesMap := map[string]*oneToOneBinaryOperationOutputSeries{} leftSeriesUsed, err := types.BoolSlicePool.Get(len(b.leftMetadata), b.MemoryConsumptionTracker) if err != nil { @@ -252,18 +242,45 @@ func (b *OneToOneVectorVectorBinaryOperation) computeOutputSeries() ([]types.Ser leftSeriesUsed = leftSeriesUsed[:len(b.leftMetadata)] rightSeriesUsed = rightSeriesUsed[:len(b.rightMetadata)] + for leftSeriesIndex, s := range b.leftMetadata { + outputSeriesLabels := labelsFunc(s.Labels) + outputSeries, exists := outputSeriesMap[outputSeriesLabels.String()] + + if !exists { + groupKey := groupKeyFunc(s.Labels) + + // Important: don't extract the string(...) call below - passing it directly allows us to avoid allocating it. + rightSide, exists := rightSeriesGroupsMap[string(groupKey)] + + if !exists { + // No matching series on the right side. + continue + } + + if rightSide.outputSeriesCount == 0 { + // First output series the right side has matched to. + for _, rightSeriesIndex := range rightSide.rightSeriesIndices { + rightSeriesUsed[rightSeriesIndex] = true + } + } + + rightSide.outputSeriesCount++ + + outputSeries = &oneToOneBinaryOperationOutputSeries{rightSide: rightSide} + outputSeriesMap[outputSeriesLabels.String()] = outputSeries + } + + outputSeries.leftSeriesIndices = append(outputSeries.leftSeriesIndices, leftSeriesIndex) + leftSeriesUsed[leftSeriesIndex] = true + } + + allMetadata := types.GetSeriesMetadataSlice(len(outputSeriesMap)) + allSeries := make([]*oneToOneBinaryOperationOutputSeries, 0, len(outputSeriesMap)) + for _, outputSeries := range outputSeriesMap { firstSeriesLabels := b.leftMetadata[outputSeries.leftSeriesIndices[0]].Labels allMetadata = append(allMetadata, types.SeriesMetadata{Labels: labelsFunc(firstSeriesLabels)}) allSeries = append(allSeries, outputSeries) - - for _, leftSeriesIndex := range outputSeries.leftSeriesIndices { - leftSeriesUsed[leftSeriesIndex] = true - } - - for _, rightSeriesIndex := range outputSeries.rightSeriesIndices { - rightSeriesUsed[rightSeriesIndex] = true - } } return allMetadata, allSeries, leftSeriesUsed, rightSeriesUsed, nil @@ -361,17 +378,84 @@ func (b *OneToOneVectorVectorBinaryOperation) NextSeries(ctx context.Context) (t return types.InstantVectorSeriesData{}, err } - allRightSeries, err := b.rightBuffer.GetSeries(ctx, thisSeries.rightSeriesIndices) + rightSide := thisSeries.rightSide + + if rightSide.rightSeriesIndices != nil { + // Right side hasn't been populated yet. + if err := b.populateRightSide(ctx, rightSide); err != nil { + return types.InstantVectorSeriesData{}, err + } + } + + // We don't need to return thisSeries.rightSide.mergedData here - computeResult will return it below if this is the last output series that references this right side. + rightSide.outputSeriesCount-- + canMutateRightSide := rightSide.outputSeriesCount == 0 + + result, err := b.evaluator.computeResult(mergedLeftSide, rightSide.mergedData, true, canMutateRightSide) if err != nil { return types.InstantVectorSeriesData{}, err } - mergedRightSide, err := b.mergeSingleSide(allRightSeries, thisSeries.rightSeriesIndices, b.rightMetadata, "right") + // If the right side matches to many output series, check for conflicts between those left side series. + if rightSide.leftSidePresence != nil { + seriesIdx := thisSeries.leftSeriesIndices[0] // FIXME: this isn't right, need to do this after applying early filtering + + if err := b.updateLeftSidePresence(rightSide, result, seriesIdx); err != nil { + return types.InstantVectorSeriesData{}, err + } + + if rightSide.outputSeriesCount == 0 { + types.IntSlicePool.Put(rightSide.leftSidePresence, b.MemoryConsumptionTracker) + } + } + + return result, nil +} + +func (b *OneToOneVectorVectorBinaryOperation) populateRightSide(ctx context.Context, rightSide *oneToOneBinaryOperationRightSide) error { + allRightSeries, err := b.rightBuffer.GetSeries(ctx, rightSide.rightSeriesIndices) + if err != nil { + return err + } + + rightSide.mergedData, err = b.mergeSingleSide(allRightSeries, rightSide.rightSeriesIndices, b.rightMetadata, "right") if err != nil { - return types.InstantVectorSeriesData{}, err + return err + } + + if rightSide.outputSeriesCount > 1 { + rightSide.leftSidePresence, err = types.IntSlicePool.Get(b.timeRange.StepCount, b.MemoryConsumptionTracker) + if err != nil { + return err + } + + rightSide.leftSidePresence = rightSide.leftSidePresence[:b.timeRange.StepCount] + + for i := range rightSide.leftSidePresence { + rightSide.leftSidePresence[i] = -1 + } + } + + // Signal that the right side has been populated. + rightSide.rightSeriesIndices = nil + + return nil +} + +func (b *OneToOneVectorVectorBinaryOperation) updateLeftSidePresence(rightSide *oneToOneBinaryOperationRightSide, leftSideData types.InstantVectorSeriesData, leftSideSeriesIdx int) error { + for _, p := range leftSideData.Floats { + if otherSeriesIdx := rightSide.updatePresence(b.timeRange.PointIndex(p.T), leftSideSeriesIdx); otherSeriesIdx != -1 { + return formatConflictError(otherSeriesIdx, leftSideSeriesIdx, "duplicate series", p.T, b.leftMetadata, "left", b.VectorMatching, b.Op, b.ReturnBool) + } + } + + for _, p := range leftSideData.Histograms { + if otherSeriesIdx := rightSide.updatePresence(b.timeRange.PointIndex(p.T), leftSideSeriesIdx); otherSeriesIdx != -1 { + return formatConflictError(otherSeriesIdx, leftSideSeriesIdx, "duplicate series", p.T, b.leftMetadata, "left", b.VectorMatching, b.Op, b.ReturnBool) + } } - return b.evaluator.computeResult(mergedLeftSide, mergedRightSide, true, true) + return nil } // mergeSingleSide exists to handle the case where one side of an output series has different source series at different time steps. @@ -402,30 +486,7 @@ func (b *OneToOneVectorVectorBinaryOperation) mergeSingleSide(data []types.Insta } func (b *OneToOneVectorVectorBinaryOperation) mergeConflictToError(conflict *operators.MergeConflict, sourceSeriesMetadata []types.SeriesMetadata, side string) error { - firstConflictingSeriesLabels := sourceSeriesMetadata[conflict.FirstConflictingSeriesIndex].Labels - groupLabels := groupLabelsFunc(b.VectorMatching, b.Op, b.ReturnBool)(firstConflictingSeriesLabels) - - if conflict.SecondConflictingSeriesIndex == -1 { - return fmt.Errorf( - "found %s for the match group %s on the %s side of the operation at timestamp %s", - conflict.Description, - groupLabels, - side, - timestamp.Time(conflict.Timestamp).Format(time.RFC3339Nano), - ) - } - - secondConflictingSeriesLabels := sourceSeriesMetadata[conflict.SecondConflictingSeriesIndex].Labels - - return fmt.Errorf( - "found %s for the match group %s on the %s side of the operation at timestamp %s: %s and %s", - conflict.Description, - groupLabels, - side, - timestamp.Time(conflict.Timestamp).Format(time.RFC3339Nano), - firstConflictingSeriesLabels, - secondConflictingSeriesLabels, - ) + return formatConflictError(conflict.FirstConflictingSeriesIndex, conflict.SecondConflictingSeriesIndex, conflict.Description, conflict.Timestamp, sourceSeriesMetadata, side, b.VectorMatching, b.Op, b.ReturnBool) } func (b *OneToOneVectorVectorBinaryOperation) Close() { diff --git a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go index bd697398b3a..0559f775f23 100644 --- a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation_test.go @@ -230,8 +230,8 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "single output series": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{4}, - rightSeriesIndices: []int{1}, + leftSeriesIndices: []int{4}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1}}, }, }, @@ -241,12 +241,12 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "two output series, both with one input series, read from both sides in same order and already sorted correctly": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{1}, - rightSeriesIndices: []int{1}, + leftSeriesIndices: []int{1}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1}}, }, { - leftSeriesIndices: []int{2}, - rightSeriesIndices: []int{2}, + leftSeriesIndices: []int{2}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{2}}, }, }, @@ -256,12 +256,12 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "two output series, both with one input series, read from both sides in same order but sorted incorrectly": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{2}, - rightSeriesIndices: []int{2}, + leftSeriesIndices: []int{2}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{2}}, }, { - leftSeriesIndices: []int{1}, - rightSeriesIndices: []int{1}, + leftSeriesIndices: []int{1}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1}}, }, }, @@ -271,12 +271,12 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "two output series, both with one input series, read from both sides in different order": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{1}, - rightSeriesIndices: []int{2}, + leftSeriesIndices: []int{1}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{2}}, }, { - leftSeriesIndices: []int{2}, - rightSeriesIndices: []int{1}, + leftSeriesIndices: []int{2}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1}}, }, }, @@ -286,12 +286,12 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "two output series, both with multiple input series": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{1, 2}, - rightSeriesIndices: []int{0, 3}, + leftSeriesIndices: []int{1, 2}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{0, 3}}, }, { - leftSeriesIndices: []int{0, 3}, - rightSeriesIndices: []int{1, 2}, + leftSeriesIndices: []int{0, 3}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1, 2}}, }, }, @@ -301,16 +301,16 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "multiple output series, both with one input series, read from both sides in same order and already sorted correctly": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{1}, - rightSeriesIndices: []int{1}, + leftSeriesIndices: []int{1}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1}}, }, { - leftSeriesIndices: []int{2}, - rightSeriesIndices: []int{2}, + leftSeriesIndices: []int{2}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{2}}, }, { - leftSeriesIndices: []int{3}, - rightSeriesIndices: []int{3}, + leftSeriesIndices: []int{3}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{3}}, }, }, @@ -320,16 +320,16 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "multiple output series, both with one input series, read from both sides in same order but sorted incorrectly": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{2}, - rightSeriesIndices: []int{2}, + leftSeriesIndices: []int{2}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{2}}, }, { - leftSeriesIndices: []int{3}, - rightSeriesIndices: []int{3}, + leftSeriesIndices: []int{3}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{3}}, }, { - leftSeriesIndices: []int{1}, - rightSeriesIndices: []int{1}, + leftSeriesIndices: []int{1}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1}}, }, }, @@ -339,16 +339,16 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "multiple output series, both with one input series, read from both sides in different order": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{1}, - rightSeriesIndices: []int{2}, + leftSeriesIndices: []int{1}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{2}}, }, { - leftSeriesIndices: []int{3}, - rightSeriesIndices: []int{3}, + leftSeriesIndices: []int{3}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{3}}, }, { - leftSeriesIndices: []int{2}, - rightSeriesIndices: []int{1}, + leftSeriesIndices: []int{2}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1}}, }, }, @@ -358,16 +358,16 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "multiple output series, with multiple input series each": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{4, 5, 10}, - rightSeriesIndices: []int{2, 20}, + leftSeriesIndices: []int{4, 5, 10}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{2, 20}}, }, { - leftSeriesIndices: []int{2, 4, 15}, - rightSeriesIndices: []int{3, 5, 50}, + leftSeriesIndices: []int{2, 4, 15}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{3, 5, 50}}, }, { - leftSeriesIndices: []int{3, 1}, - rightSeriesIndices: []int{1, 40}, + leftSeriesIndices: []int{3, 1}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1, 40}}, }, }, @@ -377,20 +377,20 @@ func TestOneToOneVectorVectorBinaryOperation_Sorting(t *testing.T) { "multiple output series which depend on the same input series": { series: []*oneToOneBinaryOperationOutputSeries{ { - leftSeriesIndices: []int{1}, - rightSeriesIndices: []int{2}, + leftSeriesIndices: []int{1}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{2}}, }, { - leftSeriesIndices: []int{1}, - rightSeriesIndices: []int{1}, + leftSeriesIndices: []int{1}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1}}, }, { - leftSeriesIndices: []int{2}, - rightSeriesIndices: []int{2}, + leftSeriesIndices: []int{2}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{2}}, }, { - leftSeriesIndices: []int{2}, - rightSeriesIndices: []int{1}, + leftSeriesIndices: []int{2}, + rightSide: &oneToOneBinaryOperationRightSide{rightSeriesIndices: []int{1}}, }, }, diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index ee3d2ccd8e2..d24ccba656a 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -273,7 +273,7 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr, timeRange types case parser.CardOneToMany, parser.CardManyToOne: return binops.NewGroupedVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange(), timeRange) case parser.CardOneToOne: - return binops.NewOneToOneVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange()) + return binops.NewOneToOneVectorVectorBinaryOperation(lhs, rhs, *e.VectorMatching, e.Op, e.ReturnBool, q.memoryConsumptionTracker, q.annotations, e.PositionRange(), timeRange) default: return nil, compat.NewNotSupportedError(fmt.Sprintf("binary expression with %v matching for '%v'", e.VectorMatching.Card, e.Op)) } diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index 59894e3900c..a47550ed538 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1473,10 +1473,9 @@ load 6m left_side_b{env="test", pod="a"} _ _ 7 8 right_side{env="test", pod="a"} 2 2 7 7 -# FIXME: MQE currently does not correctly handle this case. -#eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side -# left_side_a{pod="a"} _ 2 _ _ -# left_side_b{pod="a"} _ _ 7 _ +eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side + left_side_a{pod="a"} _ 2 _ _ + left_side_b{pod="a"} _ _ 7 _ eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool ignoring(env) right_side {pod="a"} 0 1 1 0 From cad9fa02ddfbd1795254a9835020afd6468c45a0 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 13 Dec 2024 16:18:35 +1100 Subject: [PATCH 42/43] Avoid expensive `labels.Labels.String()` call --- ...e_to_one_vector_vector_binary_operation.go | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go index 8d99344a082..2f1ad2ded50 100644 --- a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go @@ -11,6 +11,7 @@ import ( "sort" "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/util/annotations" @@ -98,6 +99,11 @@ func (g *oneToOneBinaryOperationRightSide) updatePresence(timestampIdx int64, se return -1 } +type oneToOneBinaryOperationOutputSeriesWithLabels struct { + labels labels.Labels + series *oneToOneBinaryOperationOutputSeries +} + func NewOneToOneVectorVectorBinaryOperation( left types.InstantVectorOperator, right types.InstantVectorOperator, @@ -227,7 +233,7 @@ func (b *OneToOneVectorVectorBinaryOperation) computeOutputSeries() ([]types.Ser group.rightSeriesIndices = append(group.rightSeriesIndices, idx) } - outputSeriesMap := map[string]*oneToOneBinaryOperationOutputSeries{} + outputSeriesMap := map[string]oneToOneBinaryOperationOutputSeriesWithLabels{} leftSeriesUsed, err := types.BoolSlicePool.Get(len(b.leftMetadata), b.MemoryConsumptionTracker) if err != nil { @@ -241,10 +247,12 @@ func (b *OneToOneVectorVectorBinaryOperation) computeOutputSeries() ([]types.Ser leftSeriesUsed = leftSeriesUsed[:len(b.leftMetadata)] rightSeriesUsed = rightSeriesUsed[:len(b.rightMetadata)] + outputSeriesLabelsBytes := make([]byte, 0, 1024) for leftSeriesIndex, s := range b.leftMetadata { outputSeriesLabels := labelsFunc(s.Labels) - outputSeries, exists := outputSeriesMap[outputSeriesLabels.String()] + outputSeriesLabelsBytes = outputSeriesLabels.Bytes(outputSeriesLabelsBytes) // FIXME: it'd be better if we could just get the underlying byte slice without copying here + outputSeries, exists := outputSeriesMap[string(outputSeriesLabelsBytes)] if !exists { groupKey := groupKeyFunc(s.Labels) @@ -266,11 +274,15 @@ func (b *OneToOneVectorVectorBinaryOperation) computeOutputSeries() ([]types.Ser rightSide.outputSeriesCount++ - outputSeries = &oneToOneBinaryOperationOutputSeries{rightSide: rightSide} - outputSeriesMap[outputSeriesLabels.String()] = outputSeries + outputSeries = oneToOneBinaryOperationOutputSeriesWithLabels{ + labels: outputSeriesLabels, + series: &oneToOneBinaryOperationOutputSeries{rightSide: rightSide}, + } + + outputSeriesMap[string(outputSeriesLabelsBytes)] = outputSeries } - outputSeries.leftSeriesIndices = append(outputSeries.leftSeriesIndices, leftSeriesIndex) + outputSeries.series.leftSeriesIndices = append(outputSeries.series.leftSeriesIndices, leftSeriesIndex) leftSeriesUsed[leftSeriesIndex] = true } @@ -278,9 +290,8 @@ func (b *OneToOneVectorVectorBinaryOperation) computeOutputSeries() ([]types.Ser allSeries := make([]*oneToOneBinaryOperationOutputSeries, 0, len(outputSeriesMap)) for _, outputSeries := range outputSeriesMap { - firstSeriesLabels := b.leftMetadata[outputSeries.leftSeriesIndices[0]].Labels - allMetadata = append(allMetadata, types.SeriesMetadata{Labels: labelsFunc(firstSeriesLabels)}) - allSeries = append(allSeries, outputSeries) + allMetadata = append(allMetadata, types.SeriesMetadata{Labels: outputSeries.labels}) + allSeries = append(allSeries, outputSeries.series) } return allMetadata, allSeries, leftSeriesUsed, rightSeriesUsed, nil From d8b53a14be62e5d7a532006a7e306cd8278a9682 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 13 Dec 2024 16:34:46 +1100 Subject: [PATCH 43/43] Fix issue where comparison operations between two vectors incorrectly fail with a conflict if multiple left series match the same right series and only one left point remains after applying the comparison --- ...e_to_one_vector_vector_binary_operation.go | 41 ++++++++++--------- .../testdata/ours/binary_operators.test | 17 ++++---- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go index 2f1ad2ded50..5d7fd7873ff 100644 --- a/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go +++ b/pkg/streamingpromql/operators/binops/one_to_one_vector_vector_binary_operation.go @@ -378,17 +378,6 @@ func (b *OneToOneVectorVectorBinaryOperation) NextSeries(ctx context.Context) (t thisSeries := b.remainingSeries[0] b.remainingSeries = b.remainingSeries[1:] - - allLeftSeries, err := b.leftBuffer.GetSeries(ctx, thisSeries.leftSeriesIndices) - if err != nil { - return types.InstantVectorSeriesData{}, err - } - - mergedLeftSide, err := b.mergeSingleSide(allLeftSeries, thisSeries.leftSeriesIndices, b.leftMetadata, "left") - if err != nil { - return types.InstantVectorSeriesData{}, err - } - rightSide := thisSeries.rightSide if rightSide.rightSeriesIndices != nil { @@ -402,25 +391,39 @@ func (b *OneToOneVectorVectorBinaryOperation) NextSeries(ctx context.Context) (t rightSide.outputSeriesCount-- canMutateRightSide := rightSide.outputSeriesCount == 0 - result, err := b.evaluator.computeResult(mergedLeftSide, rightSide.mergedData, true, canMutateRightSide) + allLeftSeries, err := b.leftBuffer.GetSeries(ctx, thisSeries.leftSeriesIndices) if err != nil { return types.InstantVectorSeriesData{}, err } - // If the right side matches to many output series, check for conflicts between those left side series. - if rightSide.leftSidePresence != nil { - seriesIdx := thisSeries.leftSeriesIndices[0] // FIXME: this isn't right, need to do this after applying early filtering + for i, leftSeries := range allLeftSeries { + isLastLeftSeries := i == len(allLeftSeries)-1 - if err := b.updateLeftSidePresence(rightSide, result, seriesIdx); err != nil { + allLeftSeries[i], err = b.evaluator.computeResult(leftSeries, rightSide.mergedData, true, canMutateRightSide && isLastLeftSeries) + if err != nil { return types.InstantVectorSeriesData{}, err } - if rightSide.outputSeriesCount == 0 { - types.IntSlicePool.Put(rightSide.leftSidePresence, b.MemoryConsumptionTracker) + // If the right side matches to many output series, check for conflicts between those left side series. + if rightSide.leftSidePresence != nil { + seriesIdx := thisSeries.leftSeriesIndices[i] + + if err := b.updateLeftSidePresence(rightSide, allLeftSeries[i], seriesIdx); err != nil { + return types.InstantVectorSeriesData{}, err + } } } - return result, nil + mergedResult, err := b.mergeSingleSide(allLeftSeries, thisSeries.leftSeriesIndices, b.leftMetadata, "left") + if err != nil { + return types.InstantVectorSeriesData{}, err + } + + if rightSide.leftSidePresence != nil && rightSide.outputSeriesCount == 0 { + types.IntSlicePool.Put(rightSide.leftSidePresence, b.MemoryConsumptionTracker) + } + + return mergedResult, nil } func (b *OneToOneVectorVectorBinaryOperation) populateRightSide(ctx context.Context, rightSide *oneToOneBinaryOperationRightSide) error { diff --git a/pkg/streamingpromql/testdata/ours/binary_operators.test b/pkg/streamingpromql/testdata/ours/binary_operators.test index a47550ed538..f5725eb0399 100644 --- a/pkg/streamingpromql/testdata/ours/binary_operators.test +++ b/pkg/streamingpromql/testdata/ours/binary_operators.test @@ -1397,10 +1397,9 @@ load 6m left_side_b{env="test", pod="a"} 5 6 7 8 right_side{env="test", pod="a"} 2 2 7 7 -# FIXME: MQE currently does not correctly handle this case because it performs filtering after merging input series, whereas we should do it in the other order. -#eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side -# left_side_a{pod="a"} _ 2 _ _ -# left_side_b{pod="a"} _ _ 7 _ +eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == ignoring(env) right_side + left_side_a{pod="a"} _ 2 _ _ + left_side_b{pod="a"} _ _ 7 _ eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool ignoring(env) right_side expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) @@ -1416,9 +1415,8 @@ eval_fail range from 0 to 18m step 6m right_side == bool ignoring(env) {__name__ # left_side_b{pod="a"} _ _ 7 _ # but instead both engines drop the metric names in the output. # This is accepted behaviour: https://github.com/prometheus/prometheus/issues/5326 -# FIXME: MQE currently does not correctly handle this case because it performs filtering after merging input series, whereas we should do it in the other order. -#eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == on(pod) right_side -# {pod="a"} _ 2 7 _ +eval range from 0 to 18m step 6m {__name__=~"left_side.*"} == on(pod) right_side + {pod="a"} _ 2 7 _ eval_fail range from 0 to 18m step 6m {__name__=~"left_side.*"} == bool on(pod) right_side expected_fail_regexp (multiple matches for labels: many-to-one matching must be explicit|found duplicate series for the match group .* on the left side of the operation) @@ -1458,9 +1456,8 @@ load 6m left{pod="b"} 5 6 7 8 right 2 2 7 7 -# FIXME: MQE currently does not correctly handle this case because it performs filtering after merging input series, whereas we should do it in the other order. -# eval range from 0 to 18m step 6m left == ignoring(pod) right -# left _ 2 7 _ +eval range from 0 to 18m step 6m left == ignoring(pod) right + left _ 2 7 _ eval_fail range from 0 to 18m step 6m left == ignoring(pod) group_right right expected_fail_regexp found duplicate series for the match group .* on the left (hand-)?side of the operation