Skip to content

Commit

Permalink
[UT][BugFix] fix PullUpScanPredicateRule (#53740)
Browse files Browse the repository at this point in the history
Signed-off-by: silverbullet233 <[email protected]>
  • Loading branch information
silverbullet233 committed Jan 7, 2025
1 parent 8c3c1df commit f385b3d
Show file tree
Hide file tree
Showing 6 changed files with 610 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,9 @@ private OptExpression logicalRuleRewrite(
ruleRewriteOnlyOnce(tree, rootTaskContext, RuleSetType.VECTOR_REWRITE);
// this rule should be after mv
// @TODO: it can also be applied to other table scan operator
ruleRewriteOnlyOnce(tree, rootTaskContext, PullUpScanPredicateRule.OLAP_SCAN);
if (context.getSessionVariable().isEnableScanPredicateExprReuse()) {
ruleRewriteOnlyOnce(tree, rootTaskContext, PullUpScanPredicateRule.OLAP_SCAN);
}

tree = SimplifyCaseWhenPredicateRule.INSTANCE.rewrite(tree, rootTaskContext);
deriveLogicalProperty(tree);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ private class CanFullyPushDownVisitorContext {
// currently, we only allow single-column predicates that not contain lambda expressions to be pushed down.
private class CanFullyPushDownVisitor extends ScalarOperatorVisitor<Boolean, CanFullyPushDownVisitorContext> {
private final Map<ColumnRefOperator, Column> columnRefOperatorColumnMap;

public CanFullyPushDownVisitor(Map<ColumnRefOperator, Column> columnRefOperatorColumnMap) {
this.columnRefOperatorColumnMap = columnRefOperatorColumnMap;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,25 @@
import com.google.common.collect.Lists;
import com.starrocks.sql.optimizer.OptExpression;
import com.starrocks.sql.optimizer.OptimizerContext;
import com.starrocks.sql.optimizer.base.ColumnRefFactory;
import com.starrocks.sql.optimizer.base.ColumnRefSet;
import com.starrocks.sql.optimizer.operator.Operator;
import com.starrocks.sql.optimizer.operator.OperatorBuilderFactory;
import com.starrocks.sql.optimizer.operator.OperatorType;
import com.starrocks.sql.optimizer.operator.Projection;
import com.starrocks.sql.optimizer.operator.logical.LogicalFilterOperator;
import com.starrocks.sql.optimizer.operator.logical.LogicalScanOperator;
import com.starrocks.sql.optimizer.operator.pattern.Pattern;
import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
import com.starrocks.sql.optimizer.operator.scalar.CollectionElementOperator;
import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
import com.starrocks.sql.optimizer.operator.scalar.LambdaFunctionOperator;
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator;
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperatorVisitor;
import com.starrocks.sql.optimizer.operator.scalar.SubfieldOperator;
import com.starrocks.sql.optimizer.rewrite.TableScanPredicateExtractor;
import com.starrocks.sql.optimizer.rule.RuleType;
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.PruneSubfieldRule;

import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -79,6 +87,12 @@ public List<OptExpression> transform(OptExpression input, OptimizerContext conte
return Lists.newArrayList();
}

Operator.Builder builder = OperatorBuilderFactory.build(logicalScanOperator);
LogicalScanOperator newScanOperator = (LogicalScanOperator) builder.withOperator(logicalScanOperator)
.setPredicate(pushedPredicates).build();
// these columns must keep in the final projections
List<ColumnRefOperator> outputColumns = newScanOperator.getOutputColumns();

// After applying some rules, the ScanOperator may generate some projection columns, e.g. `RemoveAggregationFromAggTableRule`
// In order to reuse existing columns as much as possible,
// we need to replace the expressions in the predicate with the column that appears in the projection.
Expand All @@ -89,17 +103,33 @@ public List<OptExpression> transform(OptExpression input, OptimizerContext conte
translatingMap.put(v, k);
}
});
if (!translatingMap.isEmpty()) {
reservedPredicates = replaceScalarOperator(reservedPredicates, translatingMap);
}

Map<ColumnRefOperator, ScalarOperator> newProjections = new HashMap();
// collect all expressions that can be used to prune subfield,
// we should replace these expressions in the predicate so that subfield pruning still works.
// for example, `cardinality(col1) + cardinality(col2) > 10`,
// we should make `cardinality(col1)` and `cardinality(col2)` appear in the scan projections
// instead of `col1` and `col2` so that we can have a correct column access path.
SubfieldCollector collector = new SubfieldCollector(context.getColumnRefFactory());
collector.visit(reservedPredicates, null);

collector.getExpressionMapping().forEach((k, v) -> {
if (!translatingMap.containsKey(k)) {
translatingMap.put(k, v);
newProjections.put(v, k);
}
});


if (!translatingMap.isEmpty()) {
reservedPredicates = replaceScalarOperator(reservedPredicates, translatingMap);
}

Operator.Builder builder = OperatorBuilderFactory.build(logicalScanOperator);
LogicalScanOperator newScanOperator = (LogicalScanOperator) builder.withOperator(logicalScanOperator)
.setPredicate(pushedPredicates).build();
ColumnRefSet predicateUsedColumnRefSet = reservedPredicates.getUsedColumns();
newScanOperator.buildColumnFilters(pushedPredicates);

List<ColumnRefOperator> predicateUsedColumns = reservedPredicates.getUsedColumns()
List<ColumnRefOperator> predicateUsedColumns = predicateUsedColumnRefSet
.getColumnRefOperators(context.getColumnRefFactory());
boolean allPredicatesColumnFromTableColumn = predicateUsedColumns.stream().allMatch(
columnRefOperator -> newScanOperator.getColRefToColumnMetaMap().containsKey(columnRefOperator));
Expand All @@ -115,22 +145,126 @@ public List<OptExpression> transform(OptExpression input, OptimizerContext conte
}
}

if (newScanOperator.getProjection() == null && allPredicatesColumnFromTableColumn) {
if (newScanOperator.getProjection() == null && newProjections.isEmpty() && allPredicatesColumnFromTableColumn) {
// if all predicate used columns are table columns and projection is null, we don't need to set projection too
} else {
if (newScanOperator.getProjection() == null) {
newScanOperator.setProjection(new Projection(new HashMap<>()));
}
scanProjectionMap = newScanOperator.getProjection().getColumnRefMap();
for (Map.Entry<ScalarOperator, ColumnRefOperator> entry : translatingMap.entrySet()) {
if (!scanProjectionMap.containsKey(entry.getValue())) {
scanProjectionMap.put(entry.getValue(), entry.getKey());
}
}
for (ColumnRefOperator columnRefOperator : predicateUsedColumns) {
if (!scanProjectionMap.containsKey(columnRefOperator)) {
scanProjectionMap.put(columnRefOperator, columnRefOperator);
}
}
for (ColumnRefOperator columnRefOperator : outputColumns) {
if (!scanProjectionMap.containsKey(columnRefOperator)) {
scanProjectionMap.put(columnRefOperator, columnRefOperator);
}
}

}

LogicalFilterOperator newFilterOperator = new LogicalFilterOperator(reservedPredicates);
if (newScanOperator.hasLimit()) {
newFilterOperator.setLimit(newScanOperator.getLimit());
newScanOperator.setLimit(Operator.DEFAULT_LIMIT);
}

OptExpression filter = OptExpression.create(newFilterOperator, OptExpression.create(newScanOperator));
return Lists.newArrayList(filter);
}

// collect all expressions that can be used to prune subfield and record them in expressionMap
private static class SubfieldCollector extends ScalarOperatorVisitor<Boolean, Void> {
private ColumnRefFactory columnRefFactory;
private Map<ScalarOperator, ColumnRefOperator> expressionMapping = new HashMap();
public SubfieldCollector(ColumnRefFactory columnRefFactory) {
this.columnRefFactory = columnRefFactory;
}
public Map<ScalarOperator, ColumnRefOperator> getExpressionMapping() {
return expressionMapping;
}

@Override
public Boolean visit(ScalarOperator scalarOperator, Void context) {
boolean result = true;
for (ScalarOperator child : scalarOperator.getChildren()) {
if (!child.accept(this, context)) {
result = false;
}
}
return result;
}

@Override
public Boolean visitVariableReference(ColumnRefOperator op, Void context) {
if (op.getType().isComplexType() || op.getType().isJsonType()) {
return true;
}
return false;
}

@Override
public Boolean visitCollectionElement(CollectionElementOperator op, Void context) {
if (op.getUsedColumns().isEmpty()) {
return false;
}
if (op.getChild(1).isConstant() && visit(op.getChild(0), context)) {
if (!expressionMapping.containsKey(op)) {
ColumnRefOperator columnRefOperator = columnRefFactory.create("element_at", op.getType(), op.isNullable());
expressionMapping.put(op, columnRefOperator);
}
return true;
}
return false;
}

@Override
public Boolean visitSubfield(SubfieldOperator op, Void context) {
if (op.getUsedColumns().isEmpty()) {
return false;
}
if (visit(op.getChild(0), context)) {
if (!expressionMapping.containsKey(op)) {
ColumnRefOperator columnRefOperator = columnRefFactory.create("subfield", op.getType(), op.isNullable());
expressionMapping.put(op, columnRefOperator);
}
return true;
}
return false;
}

@Override
public Boolean visitCall(CallOperator op, Void context) {
if (!PruneSubfieldRule.PRUNE_FUNCTIONS.contains(op.getFnName())) {
visit(op, context);
return false;
}
List<ScalarOperator> children = op.getChildren();
boolean allConstArgs = true;
for (int i = 1; i < children.size() && allConstArgs; i++) {
allConstArgs &= children.get(i).isConstant();
}
if (allConstArgs && visit(children.get(0), context)) {
if (!expressionMapping.containsKey(op)) {
ColumnRefOperator columnRefOperator =
columnRefFactory.create(op.getFnName(), op.getType(), op.isNullable());
expressionMapping.put(op, columnRefOperator);
}
return true;
}
return false;
}

@Override
public Boolean visitLambdaFunctionOperator(LambdaFunctionOperator op, Void context) {
return false;
}
}
}
Loading

0 comments on commit f385b3d

Please sign in to comment.