Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Pinot Connector] Support null predicate push down for Pinot connector #15406

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import io.trino.spi.expression.Variable;
import io.trino.spi.predicate.Domain;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.predicate.ValueSet;
import io.trino.spi.type.ArrayType;
import io.trino.spi.type.Type;
import org.apache.pinot.spi.data.Schema;
Expand Down Expand Up @@ -320,18 +319,9 @@ else if (isFilterPushdownUnsupported(entry.getValue())) {
return Optional.of(new ConstraintApplicationResult<>(handle, remainingFilter, constraint.getExpression(), false));
}

// IS NULL and IS NOT NULL are handled differently in Pinot, pushing down would lead to inconsistent results.
// See https://docs.pinot.apache.org/developers/advanced/null-value-support for more info.
private boolean isFilterPushdownUnsupported(Domain domain)
{
ValueSet valueSet = domain.getValues();
boolean isNotNull = valueSet.isAll() && !domain.isNullAllowed();
boolean isUnsupportedAlwaysFalse = domain.isNone() && !SUPPORTS_ALWAYS_FALSE.contains(domain.getType());
boolean isInOrNull = !valueSet.getRanges().getOrderedRanges().isEmpty() && domain.isNullAllowed();
return isNotNull ||
domain.isOnlyNull() ||
isUnsupportedAlwaysFalse ||
isInOrNull;
return domain.isNone() && !SUPPORTS_ALWAYS_FALSE.contains(domain.getType());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import java.util.OptionalLong;

import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.Iterables.getOnlyElement;
import static java.lang.Float.intBitsToFloat;
Expand Down Expand Up @@ -131,16 +130,11 @@ private static Optional<String> toPredicate(PinotColumnHandle pinotColumnHandle,
String predicateArgument = pinotColumnHandle.isAggregate() ? pinotColumnHandle.getExpression() : quoteIdentifier(pinotColumnHandle.getColumnName());
ValueSet valueSet = domain.getValues();
if (valueSet.isNone()) {
verify(!domain.isNullAllowed(), "IS NULL is not supported due to different null handling semantics. See https://docs.pinot.apache.org/developers/advanced/null-value-support");
if (domain.isNullAllowed()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be done only if the table has null handling enabled in the config? How would it know the difference between 'null', Long.MIN_VALUE, etc. vs a null value? Is that handled in v1.0.0?

return Optional.of(format("%s IS NULL", predicateArgument));
}
return Optional.of(format("(%s != %s)", predicateArgument, predicateArgument));
}
if (valueSet.isAll()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iirc this is related to apache/pinot#10600 and 601, since the connector supports 11 and 12.1 should this be put back until we upgrade to 1.0.0 (and remove support for all previous versions)?

verify(domain.isNullAllowed(), "IS NOT NULL is not supported due to different null handling semantics. See https://docs.pinot.apache.org/developers/advanced/null-value-support");
// Pinot does not support "1 = 1" syntax: see https://github.com/apache/pinot/issues/10600
// As a workaround, skip adding always true to conjuncts
return Optional.empty();
}
verify(!domain.getValues().getRanges().getOrderedRanges().isEmpty() && !domain.isNullAllowed(), "IS NULL is not supported due to different null handling semantics. See https://docs.pinot.apache.org/developers/advanced/null-value-support");
List<String> disjuncts = new ArrayList<>();
List<Object> singleValues = new ArrayList<>();
boolean invertPredicate = false;
Expand All @@ -152,23 +146,27 @@ private static Optional<String> toPredicate(PinotColumnHandle pinotColumnHandle,
}
}
for (Range range : valueSet.getRanges().getOrderedRanges()) {
checkState(!range.isAll()); // Already checked
if (range.isSingleValue()) {
singleValues.add(convertValue(range.getType(), range.getSingleValue()));
}
else {
List<String> rangeConjuncts = new ArrayList<>();
if (range.isAll()) {
rangeConjuncts.add(format("%s IS NOT NULL", predicateArgument));
}
if (!range.isLowUnbounded()) {
rangeConjuncts.add(toConjunct(predicateArgument, range.isLowInclusive() ? ">=" : ">", convertValue(range.getType(), range.getLowBoundedValue())));
}
if (!range.isHighUnbounded()) {
rangeConjuncts.add(toConjunct(predicateArgument, range.isHighInclusive() ? "<=" : "<", convertValue(range.getType(), range.getHighBoundedValue())));
}
// If rangeConjuncts is null, then the range was ALL, which is not supported in pql
checkState(!rangeConjuncts.isEmpty());
disjuncts.add("(" + Joiner.on(" AND ").join(rangeConjuncts) + ")");
}
}
// No ranges means the predicate is a null check
if (singleValues.isEmpty() && disjuncts.isEmpty()) {
disjuncts.add(format("%s IS NULL", predicateArgument));
}
// Add back all of the possible single values either as an equality or an IN predicate
if (singleValues.size() == 1) {
String operator = invertPredicate ? "!=" : "=";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1482,23 +1482,23 @@ public void testNullBehavior()
assertThat(query("""
SELECT COUNT(*)
FROM alltypes
WHERE string_col IS NULL"""))
.matches("VALUES (BIGINT '0')")
.isNotFullyPushedDown(FilterNode.class);
WHERE string_col IS NULL OR string_col = 'null'"""))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since null handling is enabled for alltypes should this be AND string_col = 'null' to verify null handling? i.e. this would have the same result as if the table had null handling disabled.

.matches("VALUES (BIGINT '1')")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This table has 'null' string in string_col, so pinot will return 1 row after pushing down.

Copy link
Member

@elonazoulay elonazoulay Jan 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the table config be checked for null handling enabled?

.isFullyPushedDown();

assertThat(query("""
SELECT COUNT(*)
FROM alltypes
WHERE string_col IS NOT NULL"""))
.matches("VALUES (BIGINT '11')")
.isNotFullyPushedDown(FilterNode.class);
WHERE string_col IS NOT NULL AND string_col != 'null'"""))
.matches("VALUES (BIGINT '10')")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would the behavior be different for a table with null handling disabled?

.isFullyPushedDown();

assertThat(query("""
SELECT COUNT(*)
FROM alltypes
WHERE string_col = 'string_0' OR string_col IS NULL"""))
.matches("VALUES (BIGINT '1')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

assertThat(query("""
SELECT COUNT(*)
Expand All @@ -1512,7 +1512,7 @@ SELECT COUNT(*)
FROM alltypes
WHERE string_col != 'string_0' OR string_col IS NULL"""))
.matches("VALUES (BIGINT '10')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

assertThat(query("""
SELECT COUNT(*)
Expand All @@ -1526,7 +1526,7 @@ SELECT COUNT(*)
FROM alltypes
WHERE string_col NOT IN ('null', 'array_null') OR string_col IS NULL"""))
.matches("VALUES (BIGINT '9')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

// VARCHAR NOT IN is pushed down
assertThat(query("""
Expand All @@ -1541,7 +1541,7 @@ SELECT COUNT(*)
FROM alltypes
WHERE string_col IN ('null', 'array_null') OR string_col IS NULL"""))
.matches("VALUES (BIGINT '2')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

// VARCHAR IN is pushed down
assertThat(query("""
Expand All @@ -1554,23 +1554,23 @@ WHERE string_col IN ('null', 'array_null')"""))
assertThat(query("""
SELECT COUNT(*)
FROM alltypes
WHERE long_col IS NULL"""))
.matches("VALUES (BIGINT '0')")
.isNotFullyPushedDown(FilterNode.class);
WHERE long_col IS NULL OR long_col = 0"""))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, should this be tested: if null handling is disabled, shouldn't IS NULL be always false, and if enabled it would be min value?

.matches("VALUES (BIGINT '2')")
.isFullyPushedDown();

assertThat(query("""
SELECT COUNT(*)
FROM alltypes
WHERE long_col IS NOT NULL"""))
.matches("VALUES (BIGINT '11')")
.isNotFullyPushedDown(FilterNode.class);
WHERE long_col IS NOT NULL AND long_col != 0"""))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, maybe have an explicit test for null handling enabled vs disabled in a table w the same data, and check for MIN_VALUE and/or the default null value. lmk, can talk offline about it when you have some time.

.matches("VALUES (BIGINT '9')")
.isFullyPushedDown();

assertThat(query("""
SELECT COUNT(*)
FROM alltypes
WHERE long_col = -3147483645 OR long_col IS NULL"""))
.matches("VALUES (BIGINT '1')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

assertThat(query("""
SELECT COUNT(*)
Expand All @@ -1584,7 +1584,7 @@ SELECT COUNT(*)
FROM alltypes
WHERE long_col != -3147483645 OR long_col IS NULL"""))
.matches("VALUES (BIGINT '10')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

assertThat(query("""
SELECT COUNT(*)
Expand All @@ -1606,7 +1606,7 @@ WHERE long_col NOT IN (-3147483645, -3147483646, -3147483647) OR long_col IS NUL
(BIGINT '-3147483639'),
(BIGINT '0'),
(BIGINT '0')""")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

// BIGINT NOT IN is pushed down
assertThat(query("""
Expand All @@ -1629,7 +1629,7 @@ SELECT COUNT(*)
FROM alltypes
WHERE long_col IN (-3147483645, -3147483646, -3147483647) OR long_col IS NULL"""))
.matches("VALUES (BIGINT '3')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

// BIGINT IN is pushed down
assertThat(query("""
Expand All @@ -1644,7 +1644,7 @@ WHERE long_col IN (-3147483645, -3147483646, -3147483647)"""))
FROM alltypes
WHERE int_col NOT IN (0, 54, 56) OR int_col IS NULL"""))
.matches("VALUES (55), (55), (55)")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

// INTEGER NOT IN is pushed down
assertThat(query("""
Expand All @@ -1659,7 +1659,7 @@ SELECT COUNT(*)
FROM alltypes
WHERE int_col IN (0, 54, 56) OR int_col IS NULL"""))
.matches("VALUES (BIGINT '8')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

// INTEGER IN is pushed down
assertThat(query("""
Expand All @@ -1674,7 +1674,7 @@ SELECT COUNT(*)
FROM alltypes
WHERE bool_col OR bool_col IS NULL"""))
.matches("VALUES (BIGINT '9')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

// BOOLEAN values are pushed down
assertThat(query("""
Expand All @@ -1689,7 +1689,7 @@ SELECT COUNT(*)
FROM alltypes
WHERE NOT bool_col OR bool_col IS NULL"""))
.matches("VALUES (BIGINT '2')")
.isNotFullyPushedDown(FilterNode.class);
.isFullyPushedDown();

assertThat(query("""
SELECT COUNT(*)
Expand All @@ -1709,10 +1709,11 @@ WHERE float_col NOT IN (-2.33, -3.33, -4.33, -5.33, -6.33, -7.33) OR float_col I
assertThat(query("""
SELECT COUNT(*)
FROM alltypes
WHERE float_col NOT IN (-2.33, -3.33, -4.33, -5.33, -6.33, -7.33)"""))
.matches("VALUES (BIGINT '5')")
WHERE float_col NOT IN (0, -2.33, -3.33, -4.33, -5.33, -6.33, -7.33)"""))
.matches("VALUES (BIGINT '3')")
.isNotFullyPushedDown(FilterNode.class);

// DOUBLE values are not pushed down, applyFilter is not called
assertThat(query("""
SELECT COUNT(*)
FROM alltypes
Expand All @@ -1727,6 +1728,18 @@ SELECT COUNT(*)
WHERE double_col NOT IN (0.0, -16.33, -17.33)"""))
.matches("VALUES (BIGINT '7')")
.isNotFullyPushedDown(FilterNode.class);

assertThat(query("SELECT price, vendor FROM " + JSON_TABLE + " WHERE price IS NOT NULL"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This table has null handling disabled, should it check for all values? and/or just use the count query below?

.containsAll("VALUES (REAL '3.5', VARCHAR 'vendor1')")
.isFullyPushedDown();

assertThat(query("SELECT COUNT(*) FROM " + JSON_TABLE + " WHERE price IS NOT NULL"))
.matches("VALUES (BIGINT '7')")
.isFullyPushedDown();

assertThat(query("SELECT COUNT(*) FROM " + JSON_TABLE + " WHERE price IS NULL"))
.matches("VALUES (BIGINT '0')")
.isFullyPushedDown();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe do this for a copy of the table w null handling enabled? Or you can choose a simpler table (hybrid is pretty simple).
Also: what is the behavior of a hybrid table where null handling is enabled for realtime but disabled for offline or vice versa?

}

@Test
Expand Down
Loading