From a9fcdae94fb2fe1cb2231612de8d3839a253b32d Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Mon, 22 Jan 2024 18:00:14 +0100 Subject: [PATCH] Handle nested projection with derived column optimization --- .../optimizer/src/optimize_projections.rs | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/optimize_projections.rs b/datafusion/optimizer/src/optimize_projections.rs index ab0cb0a26551..bd7c0412c3ec 100644 --- a/datafusion/optimizer/src/optimize_projections.rs +++ b/datafusion/optimizer/src/optimize_projections.rs @@ -868,7 +868,20 @@ fn rewrite_projection_given_requirements( optimize_projections(&proj.input, config, &required_indices)? { if &projection_schema(&input, &exprs_used)? == input.schema() { - Ok(Some(input)) + if let LogicalPlan::Projection(_) = input { + if exprs_used != input.expressions() { + // Even if the schemas are the same, when the outer Projection + // plan has different expressions than the inner one it should + // remain in place since some non-trivial expression could be + // aliased with an existing field name. + Projection::try_new(exprs_used, Arc::new(input)) + .map(|proj| Some(LogicalPlan::Projection(proj))) + } else { + Ok(Some(input)) + } + } else { + Ok(Some(input)) + } } else { Projection::try_new(exprs_used, Arc::new(input)) .map(|proj| Some(LogicalPlan::Projection(proj))) @@ -899,7 +912,7 @@ mod tests { use datafusion_common::{Result, TableReference}; use datafusion_expr::{ binary_expr, col, count, lit, logical_plan::builder::LogicalPlanBuilder, not, - table_scan, try_cast, Expr, Like, LogicalPlan, Operator, + table_scan, try_cast, when, Expr, Like, LogicalPlan, Operator, }; fn assert_optimized_plan_equal(plan: &LogicalPlan, expected: &str) -> Result<()> { @@ -959,6 +972,27 @@ mod tests { assert_optimized_plan_equal(&plan, expected) } + // Test outer projection isn't discarded despite the same schema as inner + // https://github.com/apache/arrow-datafusion/issues/8942 + #[test] + fn test_derived_column() -> Result<()> { + let table_scan = test_table_scan()?; + let plan = LogicalPlanBuilder::from(table_scan) + .project(vec![col("a"), lit(0).alias("d")])? + .project(vec![ + col("a"), + when(col("a").eq(lit(1)), lit(10)) + .otherwise(col("d"))? + .alias("d"), + ])? + .build()?; + + let expected = "Projection: test.a, CASE WHEN test.a = Int32(1) THEN Int32(10) ELSE d END AS d\ + \n Projection: test.a, Int32(0) AS d\ + \n TableScan: test projection=[a]"; + assert_optimized_plan_equal(&plan, expected) + } + #[test] fn test_nested_count() -> Result<()> { let schema = Schema::new(vec![Field::new("foo", DataType::Int32, false)]);