diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs index 2b75c86260fb..c6a9b2e32275 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir.rs @@ -417,10 +417,11 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult let predicate_ae = to_expr_ir(predicate.clone(), ctxt.expr_arena)?; // TODO: We could do better here by using `pushdown_eligibility()` - return if permits_filter_pushdown_rec( - ctxt.expr_arena.get(predicate_ae.node()), - ctxt.expr_arena, - ) { + return if ctxt.opt_flags.predicate_pushdown() + && permits_filter_pushdown_rec( + ctxt.expr_arena.get(predicate_ae.node()), + ctxt.expr_arena, + ) { // Split expression that are ANDed into multiple Filter nodes as the optimizer can then // push them down independently. Especially if they refer columns from different tables // this will be more performant. @@ -429,6 +430,7 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult // filter [foo == bar] // filter [ham == spam] let mut predicates = vec![]; + let mut stack = vec![predicate_ae.node()]; while let Some(n) = stack.pop() { if let AExpr::BinaryExpr { @@ -443,7 +445,6 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult predicates.push(n) } } - let multiple_filters = predicates.len() > 1; for predicate in predicates { let predicate = ExprIR::from_node(predicate, ctxt.expr_arena); @@ -453,11 +454,6 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult input = run_conversion(lp, ctxt, "filter")?; } - // Ensure that predicate are combined by optimizer - if ctxt.opt_flags.eager() && multiple_filters { - ctxt.opt_flags.insert(OptFlags::EAGER); - } - Ok(input) } else { ctxt.conversion_optimizer diff --git a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs index 0b769d97dd0a..40824c24ee06 100644 --- a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs @@ -278,6 +278,8 @@ impl PredicatePushDown<'_> { match lp { Filter { + // Note: We assume AND'ed predicates have already been split to separate IR filter + // nodes during DSL conversion so we don't do that here. ref predicate, input, } => { diff --git a/py-polars/tests/unit/test_predicates.py b/py-polars/tests/unit/test_predicates.py index 6850c6cf8870..bb5f95e2f2a5 100644 --- a/py-polars/tests/unit/test_predicates.py +++ b/py-polars/tests/unit/test_predicates.py @@ -638,3 +638,14 @@ def test_predicate_pushdown_scalar_20489() -> None: df.lazy().with_columns(b=pl.Series([2])).filter(mask).collect(), pl.DataFrame(schema={"a": pl.Int64, "b": pl.Int64}), ) + + +def test_predicates_not_split_when_pushdown_disabled_20475() -> None: + # This is important for the eager `DataFrame.filter()`, as that runs without + # predicate pushdown enabled. Splitting the predicates in that case can + # severely degrade performance. + q = pl.LazyFrame({"a": 1, "b": 1, "c": 1}).filter( + pl.col("a") > 0, pl.col("b") > 0, pl.col("c") > 0 + ) + + assert q.explain(predicate_pushdown=False).count("FILTER") == 1