Skip to content

Commit

Permalink
perf: Skip filter expansion in eager (#20586)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Jan 8, 2025
1 parent 785bb1e commit 92fd75d
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 10 deletions.
16 changes: 6 additions & 10 deletions crates/polars-plan/src/plans/conversion/dsl_to_ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,10 +417,11 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult
let predicate_ae = to_expr_ir(predicate.clone(), ctxt.expr_arena)?;

// TODO: We could do better here by using `pushdown_eligibility()`
return if permits_filter_pushdown_rec(
ctxt.expr_arena.get(predicate_ae.node()),
ctxt.expr_arena,
) {
return if ctxt.opt_flags.predicate_pushdown()
&& permits_filter_pushdown_rec(
ctxt.expr_arena.get(predicate_ae.node()),
ctxt.expr_arena,
) {
// Split expression that are ANDed into multiple Filter nodes as the optimizer can then
// push them down independently. Especially if they refer columns from different tables
// this will be more performant.
Expand All @@ -429,6 +430,7 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult
// filter [foo == bar]
// filter [ham == spam]
let mut predicates = vec![];

let mut stack = vec![predicate_ae.node()];
while let Some(n) = stack.pop() {
if let AExpr::BinaryExpr {
Expand All @@ -443,7 +445,6 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult
predicates.push(n)
}
}
let multiple_filters = predicates.len() > 1;

for predicate in predicates {
let predicate = ExprIR::from_node(predicate, ctxt.expr_arena);
Expand All @@ -453,11 +454,6 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult
input = run_conversion(lp, ctxt, "filter")?;
}

// Ensure that predicate are combined by optimizer
if ctxt.opt_flags.eager() && multiple_filters {
ctxt.opt_flags.insert(OptFlags::EAGER);
}

Ok(input)
} else {
ctxt.conversion_optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,8 @@ impl PredicatePushDown<'_> {

match lp {
Filter {
// Note: We assume AND'ed predicates have already been split to separate IR filter
// nodes during DSL conversion so we don't do that here.
ref predicate,
input,
} => {
Expand Down
11 changes: 11 additions & 0 deletions py-polars/tests/unit/test_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,3 +638,14 @@ def test_predicate_pushdown_scalar_20489() -> None:
df.lazy().with_columns(b=pl.Series([2])).filter(mask).collect(),
pl.DataFrame(schema={"a": pl.Int64, "b": pl.Int64}),
)


def test_predicates_not_split_when_pushdown_disabled_20475() -> None:
# This is important for the eager `DataFrame.filter()`, as that runs without
# predicate pushdown enabled. Splitting the predicates in that case can
# severely degrade performance.
q = pl.LazyFrame({"a": 1, "b": 1, "c": 1}).filter(
pl.col("a") > 0, pl.col("b") > 0, pl.col("c") > 0
)

assert q.explain(predicate_pushdown=False).count("FILTER") == 1

0 comments on commit 92fd75d

Please sign in to comment.