From 1bfb5f95ea7a29874fc720ea948321747c37f8f8 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Tue, 3 Dec 2024 11:16:54 +0100 Subject: [PATCH 1/2] Fix subquery over avg The refactor in https://github.com/thanos-io/promql-engine/pull/477 introduces a bug where subquery on top of avg produces wrong results. This is because the parents are calculated before the query is rewritten, so the new nodes end up with no parents. Signed-off-by: Filip Petkovski --- engine/distributed_test.go | 2 ++ logicalplan/distribute.go | 13 ++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/engine/distributed_test.go b/engine/distributed_test.go index b85c0a15..b5e157f2 100644 --- a/engine/distributed_test.go +++ b/engine/distributed_test.go @@ -232,6 +232,8 @@ func TestDistributedAggregations(t *testing.T) { {name: "absent for existing metric with aggregation", query: `sum(absent(foo))`}, {name: "absent for existing metric", query: `absent(bar{pod="nginx-1"})`}, {name: "absent for existing metric with aggregation", query: `sum(absent(bar{pod="nginx-1"}))`}, + {name: "subquery with sum/count", query: `max_over_time((sum(bar)/count(bar))[30s:15s])`}, + {name: "subquery with avg", query: `max_over_time(avg(bar)[30s:15s])`}, {name: "subquery with window within engine range", query: `max_over_time(sum_over_time(bar[30s])[30s:15s])`}, {name: "subquery with window outside of engine range", query: `max_over_time(sum_over_time(bar[1m])[10m:1m])`}, {name: "subquery with misaligned ranges", rangeStart: time.Unix(7, 0), query: `max_over_time(sum(bar)[30s:15s])`}, diff --git a/logicalplan/distribute.go b/logicalplan/distribute.go index b19d3d9c..0f16847d 100644 --- a/logicalplan/distribute.go +++ b/logicalplan/distribute.go @@ -178,13 +178,6 @@ func (m DistributedExecutionOptimizer) Optimize(plan Node, opts *query.Options) } minEngineOverlap := labelRanges.minOverlap() - // TODO(fpetkovski): Consider changing TraverseBottomUp to pass in a list of parents in the transform function. - parents := make(map[*Node]*Node) - TraverseBottomUp(nil, &plan, func(parent, current *Node) (stop bool) { - parents[current] = parent - return false - }) - // Preprocess rewrite distributable averages as sum/count var warns = annotations.New() TraverseBottomUp(nil, &plan, func(parent, current *Node) (stop bool) { @@ -217,6 +210,12 @@ func (m DistributedExecutionOptimizer) Optimize(plan Node, opts *query.Options) return !(isDistributive(parent, m.SkipBinaryPushdown, engineLabels, warns) || isAvgAggregation(parent)) }) + // TODO(fpetkovski): Consider changing TraverseBottomUp to pass in a list of parents in the transform function. + parents := make(map[*Node]*Node) + TraverseBottomUp(nil, &plan, func(parent, current *Node) (stop bool) { + parents[current] = parent + return false + }) TraverseBottomUp(nil, &plan, func(parent, current *Node) (stop bool) { // If the current operation is not distributive, stop the traversal. if !isDistributive(current, m.SkipBinaryPushdown, engineLabels, warns) { From 99d1bdd3f9c6c36cd0f3034a3504ea6a19672fea Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Tue, 3 Dec 2024 11:25:24 +0100 Subject: [PATCH 2/2] Fix lint Signed-off-by: Filip Petkovski --- engine/distributed_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/distributed_test.go b/engine/distributed_test.go index b5e157f2..db9edf41 100644 --- a/engine/distributed_test.go +++ b/engine/distributed_test.go @@ -232,7 +232,7 @@ func TestDistributedAggregations(t *testing.T) { {name: "absent for existing metric with aggregation", query: `sum(absent(foo))`}, {name: "absent for existing metric", query: `absent(bar{pod="nginx-1"})`}, {name: "absent for existing metric with aggregation", query: `sum(absent(bar{pod="nginx-1"}))`}, - {name: "subquery with sum/count", query: `max_over_time((sum(bar)/count(bar))[30s:15s])`}, + {name: "subquery with sum/count", query: `max_over_time((sum(bar) / count(bar))[30s:15s])`}, {name: "subquery with avg", query: `max_over_time(avg(bar)[30s:15s])`}, {name: "subquery with window within engine range", query: `max_over_time(sum_over_time(bar[30s])[30s:15s])`}, {name: "subquery with window outside of engine range", query: `max_over_time(sum_over_time(bar[1m])[10m:1m])`},