From 6696706bffeea80eb9863cde0d1cafd58eb86f76 Mon Sep 17 00:00:00 2001 From: Benjamin Owad Date: Sun, 8 Dec 2024 16:36:20 -0500 Subject: [PATCH] Subquery Unnesting Agg NULL case workarounds (#257) - Add an outer join with the deduplicated "left side", and a corresponding projection node, to pass along NULL values as expected. - Add a specific workaround in the projection node to case NULL -> 0 if we have a COUNT(*). --- .../src/rules/subquery/depjoin_pushdown.rs | 103 ++++- optd-perfbench/src/datafusion_dbms.rs | 2 +- optd-sqllogictest/slt/unnest-agg-nulls.slt | 21 + optd-sqllogictest/slt/unnest-count-star.slt | 22 ++ optd-sqllogictest/src/lib.rs | 2 +- optd-sqlplannertest/src/lib.rs | 2 +- .../subqueries/subquery_unnesting.planner.sql | 362 +++++++++++------- .../tests/tpch/q17.planner.sql | 23 +- optd-sqlplannertest/tests/tpch/q2.planner.sql | 187 +++++---- 9 files changed, 492 insertions(+), 232 deletions(-) create mode 100644 optd-sqllogictest/slt/unnest-agg-nulls.slt create mode 100644 optd-sqllogictest/slt/unnest-count-star.slt diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index 1c087554..cd96f924 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -3,17 +3,16 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. -use optd_core::nodes::{PlanNodeOrGroup, PredNode}; -// TODO: No push past join -// TODO: Sideways information passing?? +use datafusion_expr::{AggregateFunction, BuiltinScalarFunction}; +use optd_core::nodes::{PlanNodeOrGroup, PredNode, Value}; use optd_core::optimizer::Optimizer; use optd_core::rules::{Rule, RuleMatcher}; use crate::plan_nodes::{ - ArcDfPlanNode, ArcDfPredNode, BinOpPred, BinOpType, ColumnRefPred, ConstantPred, DependentJoin, - DfNodeType, DfPredType, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, JoinType, - ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalFilter, LogicalJoin, LogicalProjection, - PredExt, RawDependentJoin, + ArcDfPlanNode, ArcDfPredNode, BinOpPred, BinOpType, ColumnRefPred, ConstantPred, ConstantType, + DependentJoin, DfNodeType, DfPredType, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, + FuncPred, FuncType, JoinType, ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalFilter, + LogicalJoin, LogicalProjection, PredExt, RawDependentJoin, }; use crate::rules::macros::define_rule; use crate::OptimizerExt; @@ -288,11 +287,8 @@ define_rule!( /// deduplicated set). /// For info on why we do the outer join, refer to the Unnesting Arbitrary Queries /// talk by Mark Raasveldt. The correlated columns are covered in the original paper. -/// -/// TODO: the outer join is not implemented yet, so some edge cases won't work. -/// Run SQLite tests to catch these, I guess. fn apply_dep_join_past_agg( - _optimizer: &impl Optimizer, + optimizer: &impl Optimizer, binding: ArcDfPlanNode, ) -> Vec> { let join = DependentJoin::from_plan_node(binding).unwrap(); @@ -305,6 +301,8 @@ fn apply_dep_join_past_agg( let groups = agg.groups(); let right = agg.child(); + let left_schema_size = optimizer.get_schema_of(left.clone()).len(); + // Cross join should always have true cond assert!(cond == ConstantPred::bool(true).into_pred_node()); @@ -345,11 +343,90 @@ fn apply_dep_join_past_agg( ); let new_dep_join = - DependentJoin::new_unchecked(left, right, cond, extern_cols, JoinType::Cross); + DependentJoin::new_unchecked(left.clone(), right, cond, extern_cols, JoinType::Cross); + let new_agg_exprs_size = new_exprs.len(); + let new_agg_groups_size = new_groups.len(); + let new_agg_schema_size = new_agg_groups_size + new_agg_exprs_size; let new_agg = LogicalAgg::new(new_dep_join.into_plan_node(), new_exprs, new_groups); - vec![new_agg.into_plan_node().into()] + // Add left outer join above the agg node, joining the deduplicated set + // with the new agg node. + + // Both sides will have an agg now, so we want to match the correlated + // columns from the left with those from the right + let outer_join_cond = LogOpPred::new( + LogOpType::And, + correlated_col_indices + .iter() + .enumerate() + .map(|(i, _)| { + assert!(i + left_schema_size < left_schema_size + new_agg_schema_size); + BinOpPred::new( + ColumnRefPred::new(i).into_pred_node(), + // We *prepend* the correlated columns to the groups list, + // so we don't need to take into account the old + // group-by expressions to get the corresponding correlated + // column. + ColumnRefPred::new(left_schema_size + i).into_pred_node(), + BinOpType::Eq, + ) + .into_pred_node() + }) + .collect(), + ); + + let new_outer_join = LogicalJoin::new_unchecked( + left, + new_agg.into_plan_node(), + outer_join_cond.into_pred_node(), + JoinType::LeftOuter, + ); + + // We have to maintain the same schema above outer join as w/o it, but we + // also need to use the groups from the deduplicated left side, and the + // exprs from the new agg node. If we use everything from the new agg, + // we don't maintain nulls as desired. + let outer_join_proj = LogicalProjection::new( + // The meaning is to take everything from the left side, and everything + // from the right side *that is not in the left side*. I am unsure + // of the correctness of this project in every case. + new_outer_join.into_plan_node(), + ListPred::new( + (0..left_schema_size) + .chain(left_schema_size + left_schema_size..left_schema_size + new_agg_schema_size) + .map(|x| { + // Count(*) special case: We want all NULLs to be transformed into 0s. + if x >= left_schema_size + new_agg_groups_size { + // If this node corresponds to an agg function, and + // it's a count(*), apply the workaround + let expr = + exprs.to_vec()[x - left_schema_size - new_agg_groups_size].clone(); + if expr.typ == DfPredType::Func(FuncType::Agg(AggregateFunction::Count)) { + let expr_child = expr.child(0).child(0); + + if expr_child.typ == DfPredType::Constant(ConstantType::UInt8) + && expr_child.data == Some(Value::UInt8(1)) + { + return FuncPred::new( + FuncType::Scalar(BuiltinScalarFunction::Coalesce), + ListPred::new(vec![ + ColumnRefPred::new(x).into_pred_node(), + ConstantPred::int64(0).into_pred_node(), + ]), + ) + .into_pred_node(); + } + } + } + + ColumnRefPred::new(x).into_pred_node() + }) + .collect(), + ), + ); + + vec![outer_join_proj.into_plan_node().into()] } // Heuristics-only rule. If we don't have references to the external columns on the right side, diff --git a/optd-perfbench/src/datafusion_dbms.rs b/optd-perfbench/src/datafusion_dbms.rs index 2cf8e44c..0df98818 100644 --- a/optd-perfbench/src/datafusion_dbms.rs +++ b/optd-perfbench/src/datafusion_dbms.rs @@ -153,7 +153,7 @@ impl DatafusionDBMS { let batches = df.collect().await?; - let options = FormatOptions::default(); + let options = FormatOptions::default().with_null("NULL"); for batch in batches { let converters = batch diff --git a/optd-sqllogictest/slt/unnest-agg-nulls.slt b/optd-sqllogictest/slt/unnest-agg-nulls.slt new file mode 100644 index 00000000..8a5cac6e --- /dev/null +++ b/optd-sqllogictest/slt/unnest-agg-nulls.slt @@ -0,0 +1,21 @@ +include _basic_tables.slt.part + +# This query has NULL values from the subquery agg. It won't work without the +# outer join fix. +# It also has an out-of-order extern column [#1] +query +select + v1, + v2, + ( + select avg(v4) + from t2 + where v4 = v2 + ) as avg_v4 +from t1 order by v1; +---- +1 100 NULL +2 200 200.0 +2 250 250.0 +3 300 300.0 +3 300 300.0 diff --git a/optd-sqllogictest/slt/unnest-count-star.slt b/optd-sqllogictest/slt/unnest-count-star.slt new file mode 100644 index 00000000..a6a13a05 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-count-star.slt @@ -0,0 +1,22 @@ +include _basic_tables.slt.part + +# This query uses a count(*) agg function, with nulls. Nulls should be +# transformed from NULL to 0 when they come from count(*). +# It won't work without the outer join fix + a special case on count(*). +# It also has an out-of-order extern column [#1] +query +select + v1, + v2, + ( + select count(*) + from t2 + where v4 = v2 + ) as avg_v4 +from t1 order by v1; +---- +1 100 0 +2 200 1 +2 250 1 +3 300 1 +3 300 1 diff --git a/optd-sqllogictest/src/lib.rs b/optd-sqllogictest/src/lib.rs index 71506f57..66c0e301 100644 --- a/optd-sqllogictest/src/lib.rs +++ b/optd-sqllogictest/src/lib.rs @@ -107,7 +107,7 @@ impl DatafusionDBMS { }; let batches = df.collect().await?; - let options = FormatOptions::default(); + let options = FormatOptions::default().with_null("NULL"); for batch in batches { if types.is_empty() { diff --git a/optd-sqlplannertest/src/lib.rs b/optd-sqlplannertest/src/lib.rs index a6ba4da2..46f3316e 100644 --- a/optd-sqlplannertest/src/lib.rs +++ b/optd-sqlplannertest/src/lib.rs @@ -183,7 +183,7 @@ impl DatafusionDBMS { let batches = df.collect().await?; - let options = FormatOptions::default(); + let options = FormatOptions::default().with_null("NULL"); for batch in batches { let converters = batch diff --git a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql index 80d62ed2..471f28a9 100644 --- a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql +++ b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql @@ -41,37 +41,56 @@ LogicalProjection { exprs: [ #0, #1 ] } │ └── #2 ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - └── LogicalFilter - ├── cond:Eq - │ ├── #1 - │ └── #0 - └── LogicalJoin { join_type: Inner, cond: true } - ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalScan { table: t1 } - └── LogicalScan { table: t2 } -PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=18005,io=3000}, stat: {row_cnt=1} } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=18002,io=3000}, stat: {row_cnt=1} } - ├── PhysicalFilter - │ ├── cond:Gt - │ │ ├── #1 - │ │ └── 100(i64) - │ ├── cost: {compute=17000,io=2000} - │ ├── stat: {row_cnt=1} - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── [ Cast { cast_to: Int64, child: #2 } ] - │ ├── groups: [ #0 ] - │ ├── cost: {compute=14000,io=2000} - │ ├── stat: {row_cnt=1000} - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - │ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── LogicalProjection { exprs: [ #0, #2 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── #0 + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalScan { table: t2 } +PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4033003,io=4000}, stat: {row_cnt=1} } +└── PhysicalFilter + ├── cond:Gt + │ ├── #4 + │ └── 100(i64) + ├── cost: {compute=4033000,io=4000} + ├── stat: {row_cnt=1} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4018000,io=3000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + ├── cost: {compute=14000,io=2000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ -- Test whether the optimizer can unnest correlated subqueries with (scalar op group agg) @@ -113,48 +132,87 @@ LogicalProjection { exprs: [ #0, #1 ] } │ └── #2 ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ #2 ] - ├── groups: [ #0 ] - └── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalProjection { exprs: [ #0, #2 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } └── LogicalAgg ├── exprs:Agg(Sum) + │ └── [ #2 ] + ├── groups: [ #0 ] + └── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalProjection { exprs: [ #0, #2, #3 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0, #1 ] + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── #0 + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalScan { table: t2 } +PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=44228003,io=5000}, stat: {row_cnt=1} } +└── PhysicalFilter + ├── cond:Gt + │ ├── #4 + │ └── 100(i64) + ├── cost: {compute=44228000,io=5000} + ├── stat: {row_cnt=1} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=44225000,io=5000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=44123000,io=4000} + ├── stat: {row_cnt=100000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ #2 ] + ├── groups: [ #0 ] + ├── cost: {compute=4119000,io=3000} + ├── stat: {row_cnt=10000} + └── PhysicalProjection { exprs: [ #0, #2, #3 ], cost: {compute=4059000,io=3000}, stat: {row_cnt=10000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4019000,io=3000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) │ └── [ Cast { cast_to: Int64, child: #2 } ] ├── groups: [ #0, #1 ] - └── LogicalFilter - ├── cond:Eq - │ ├── #1 - │ └── #0 - └── LogicalJoin { join_type: Inner, cond: true } - ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalScan { table: t1 } - └── LogicalScan { table: t2 } -PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=25005,io=3000}, stat: {row_cnt=1} } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=25002,io=3000}, stat: {row_cnt=1} } - ├── PhysicalFilter - │ ├── cond:Gt - │ │ ├── #1 - │ │ └── 100(i64) - │ ├── cost: {compute=24000,io=2000} - │ ├── stat: {row_cnt=1} - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── [ #2 ] - │ ├── groups: [ #0 ] - │ ├── cost: {compute=21000,io=2000} - │ ├── stat: {row_cnt=1000} - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── [ Cast { cast_to: Int64, child: #2 } ] - │ ├── groups: [ #0, #1 ] - │ ├── cost: {compute=15000,io=2000} - │ ├── stat: {row_cnt=1000} - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - │ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + ├── cost: {compute=15000,io=2000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ -- Test whether the optimizer can unnest correlated subqueries with scalar agg in select list @@ -183,31 +241,50 @@ LogicalProjection { exprs: [ #0, #2 ] } │ └── #2 ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - └── LogicalFilter - ├── cond:Eq - │ ├── #1 - │ └── #0 - └── LogicalJoin { join_type: Inner, cond: true } - ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalScan { table: t1 } - └── LogicalScan { table: t2 } -PhysicalProjection { exprs: [ #0, #3 ], cost: {compute=20000,io=3000}, stat: {row_cnt=1000} } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=17000,io=3000}, stat: {row_cnt=1000} } + └── LogicalProjection { exprs: [ #0, #2 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── #0 + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalScan { table: t2 } +PhysicalProjection { exprs: [ #0, #4 ], cost: {compute=4033000,io=4000}, stat: {row_cnt=1000} } +└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} } ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalAgg - ├── aggrs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - ├── cost: {compute=14000,io=2000} - ├── stat: {row_cnt=1000} - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4018000,io=3000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + ├── cost: {compute=14000,io=2000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ -- Test whether the optimizer can unnest correlated subqueries. @@ -251,45 +328,64 @@ LogicalProjection { exprs: [ #0, #1 ] } │ └── #2 ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Sum) - │ └── [ Cast { cast_to: Int64, child: #2 } ] - ├── groups: [ #0 ] - └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } - └── LogicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #1 - │ │ └── #0 - │ └── Eq - │ ├── #2 - │ └── #3 - └── LogicalJoin { join_type: Inner, cond: true } - ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalScan { table: t1 } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalScan { table: t2 } - └── LogicalScan { table: t3 } -PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=21005,io=4000}, stat: {row_cnt=1} } -└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=21002,io=4000}, stat: {row_cnt=1} } - ├── PhysicalFilter - │ ├── cond:Gt - │ │ ├── #1 - │ │ └── 100(i64) - │ ├── cost: {compute=20000,io=3000} - │ ├── stat: {row_cnt=1} - │ └── PhysicalAgg - │ ├── aggrs:Agg(Sum) - │ │ └── [ Cast { cast_to: Int64, child: #2 } ] - │ ├── groups: [ #0 ] - │ ├── cost: {compute=17000,io=3000} - │ ├── stat: {row_cnt=1000} - │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ], cost: {compute=9000,io=3000}, stat: {row_cnt=1000} } - │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } - │ │ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } - │ │ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - │ │ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - │ └── PhysicalScan { table: t3, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } - └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── LogicalProjection { exprs: [ #0, #2 ] } + └── LogicalJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4 ] } + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #1 + │ │ └── #0 + │ └── Eq + │ ├── #2 + │ └── #3 + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalScan { table: t1 } + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalScan { table: t2 } + └── LogicalScan { table: t3 } +PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4036003,io=5000}, stat: {row_cnt=1} } +└── PhysicalFilter + ├── cond:Gt + │ ├── #4 + │ └── 100(i64) + ├── cost: {compute=4036000,io=5000} + ├── stat: {row_cnt=1} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4033000,io=5000}, stat: {row_cnt=1000} } + ├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── cost: {compute=4021000,io=4000} + ├── stat: {row_cnt=10000} + ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalAgg + ├── aggrs:Agg(Sum) + │ └── [ Cast { cast_to: Int64, child: #2 } ] + ├── groups: [ #0 ] + ├── cost: {compute=17000,io=3000} + ├── stat: {row_cnt=1000} + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ], cost: {compute=9000,io=3000}, stat: {row_cnt=1000} } + ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} } + │ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} } + │ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + │ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } + └── PhysicalScan { table: t3, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } */ diff --git a/optd-sqlplannertest/tests/tpch/q17.planner.sql b/optd-sqlplannertest/tests/tpch/q17.planner.sql index 913aa27d..72de706f 100644 --- a/optd-sqlplannertest/tests/tpch/q17.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q17.planner.sql @@ -104,15 +104,26 @@ PhysicalProjection │ │ ├── 0.2(float) │ │ └── Cast { cast_to: Float64, child: #1 } - └── PhysicalAgg - ├── aggrs:Agg(Avg) - │ └── [ #5 ] - ├── groups: [ #0 ] - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + └── PhysicalProjection { exprs: [ #0, #2 ] } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 ├── PhysicalAgg { aggrs: [], groups: [ #16 ] } │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } │ ├── PhysicalScan { table: lineitem } │ └── PhysicalScan { table: part } - └── PhysicalScan { table: lineitem } + └── PhysicalAgg + ├── aggrs:Agg(Avg) + │ └── [ #5 ] + ├── groups: [ #0 ] + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + ├── PhysicalAgg { aggrs: [], groups: [ #16 ] } + │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalScan { table: lineitem } + │ └── PhysicalScan { table: part } + └── PhysicalScan { table: lineitem } */ diff --git a/optd-sqlplannertest/tests/tpch/q2.planner.sql b/optd-sqlplannertest/tests/tpch/q2.planner.sql index 4fa160aa..113e3468 100644 --- a/optd-sqlplannertest/tests/tpch/q2.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q2.planner.sql @@ -172,45 +172,62 @@ LogicalLimit { skip: 0(u64), fetch: 100(u64) } │ │ └── LogicalScan { table: nation } │ └── LogicalScan { table: region } └── LogicalProjection { exprs: [ #0, #1 ] } - └── LogicalAgg - ├── exprs:Agg(Min) - │ └── [ #4 ] - ├── groups: [ #0 ] - └── LogicalFilter + └── LogicalProjection { exprs: [ #0, #2 ] } + └── LogicalJoin + ├── join_type: LeftOuter ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #1 - │ ├── Eq - │ │ ├── #6 - │ │ └── #2 - │ ├── Eq - │ │ ├── #9 - │ │ └── #13 - │ ├── Eq - │ │ ├── #15 - │ │ └── #17 │ └── Eq - │ ├── #18 - │ └── "AFRICA" - └── LogicalJoin { join_type: Inner, cond: true } - ├── LogicalAgg { exprs: [], groups: [ #0 ] } - │ └── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ │ │ ├── LogicalScan { table: part } - │ │ │ │ └── LogicalScan { table: supplier } - │ │ │ └── LogicalScan { table: partsupp } - │ │ └── LogicalScan { table: nation } - │ └── LogicalScan { table: region } - └── LogicalJoin { join_type: Cross, cond: true } - ├── LogicalJoin { join_type: Cross, cond: true } - │ ├── LogicalJoin { join_type: Cross, cond: true } - │ │ ├── LogicalScan { table: partsupp } - │ │ └── LogicalScan { table: supplier } - │ └── LogicalScan { table: nation } - └── LogicalScan { table: region } + │ ├── #0 + │ └── #1 + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ │ ├── LogicalScan { table: part } + │ │ │ │ └── LogicalScan { table: supplier } + │ │ │ └── LogicalScan { table: partsupp } + │ │ └── LogicalScan { table: nation } + │ └── LogicalScan { table: region } + └── LogicalAgg + ├── exprs:Agg(Min) + │ └── [ #4 ] + ├── groups: [ #0 ] + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #1 + │ ├── Eq + │ │ ├── #6 + │ │ └── #2 + │ ├── Eq + │ │ ├── #9 + │ │ └── #13 + │ ├── Eq + │ │ ├── #15 + │ │ └── #17 + │ └── Eq + │ ├── #18 + │ └── "AFRICA" + └── LogicalJoin { join_type: Inner, cond: true } + ├── LogicalAgg { exprs: [], groups: [ #0 ] } + │ └── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ │ │ ├── LogicalScan { table: part } + │ │ │ │ └── LogicalScan { table: supplier } + │ │ │ └── LogicalScan { table: partsupp } + │ │ └── LogicalScan { table: nation } + │ └── LogicalScan { table: region } + └── LogicalJoin { join_type: Cross, cond: true } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalJoin { join_type: Cross, cond: true } + │ │ ├── LogicalScan { table: partsupp } + │ │ └── LogicalScan { table: supplier } + │ └── LogicalScan { table: nation } + └── LogicalScan { table: region } PhysicalLimit { skip: 0(u64), fetch: 100(u64) } └── PhysicalSort ├── exprs: @@ -223,7 +240,7 @@ PhysicalLimit { skip: 0(u64), fetch: 100(u64) } │ └── SortOrder { order: Asc } │ └── #3 └── PhysicalProjection { exprs: [ #21, #17, #4, #7, #9, #18, #20, #22 ] } - └── PhysicalHashJoin { join_type: Inner, left_keys: [ #26, #7 ], right_keys: [ #1, #0 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #26, #7 ], right_keys: [ #2, #0 ] } ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #7, #16 ], right_keys: [ #0, #1 ] } │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #12 ] } │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #2 ] } @@ -243,44 +260,60 @@ PhysicalLimit { skip: 0(u64), fetch: 100(u64) } │ │ │ └── PhysicalScan { table: part } │ │ └── PhysicalScan { table: supplier } │ └── PhysicalScan { table: partsupp } - └── PhysicalAgg - ├── aggrs:Agg(Min) - │ └── [ #4 ] - ├── groups: [ #0 ] - └── PhysicalFilter - ├── cond:And - │ ├── Eq - │ │ ├── #0 - │ │ └── #1 - │ ├── Eq - │ │ ├── #6 - │ │ └── #2 - │ ├── Eq - │ │ ├── #9 - │ │ └── #13 - │ ├── Eq - │ │ ├── #15 - │ │ └── #17 - │ └── Eq - │ ├── #18 - │ └── "AFRICA" - └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } - ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } - │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ │ │ ├── PhysicalScan { table: part } - │ │ │ │ └── PhysicalScan { table: supplier } - │ │ │ └── PhysicalScan { table: partsupp } - │ │ └── PhysicalScan { table: nation } - │ └── PhysicalScan { table: region } - └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } - │ │ ├── PhysicalScan { table: partsupp } - │ │ └── PhysicalScan { table: supplier } - │ └── PhysicalScan { table: nation } - └── PhysicalScan { table: region } + └── PhysicalNestedLoopJoin + ├── join_type: LeftOuter + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #1 + ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } + │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ │ ├── PhysicalScan { table: part } + │ │ │ │ └── PhysicalScan { table: supplier } + │ │ │ └── PhysicalScan { table: partsupp } + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalScan { table: region } + └── PhysicalAgg + ├── aggrs:Agg(Min) + │ └── [ #4 ] + ├── groups: [ #0 ] + └── PhysicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── #1 + │ ├── Eq + │ │ ├── #6 + │ │ └── #2 + │ ├── Eq + │ │ ├── #9 + │ │ └── #13 + │ ├── Eq + │ │ ├── #15 + │ │ └── #17 + │ └── Eq + │ ├── #18 + │ └── "AFRICA" + └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } + │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ │ │ ├── PhysicalScan { table: part } + │ │ │ │ └── PhysicalScan { table: supplier } + │ │ │ └── PhysicalScan { table: partsupp } + │ │ └── PhysicalScan { table: nation } + │ └── PhysicalScan { table: region } + └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ │ ├── PhysicalScan { table: partsupp } + │ │ └── PhysicalScan { table: supplier } + │ └── PhysicalScan { table: nation } + └── PhysicalScan { table: region } */