-
Notifications
You must be signed in to change notification settings - Fork 3.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Pinot Connector] Support null predicate push down for Pinot connector #15406
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,7 +38,6 @@ | |
import java.util.OptionalLong; | ||
|
||
import static com.google.common.base.Preconditions.checkState; | ||
import static com.google.common.base.Verify.verify; | ||
import static com.google.common.collect.ImmutableList.toImmutableList; | ||
import static com.google.common.collect.Iterables.getOnlyElement; | ||
import static java.lang.Float.intBitsToFloat; | ||
|
@@ -131,16 +130,11 @@ private static Optional<String> toPredicate(PinotColumnHandle pinotColumnHandle, | |
String predicateArgument = pinotColumnHandle.isAggregate() ? pinotColumnHandle.getExpression() : quoteIdentifier(pinotColumnHandle.getColumnName()); | ||
ValueSet valueSet = domain.getValues(); | ||
if (valueSet.isNone()) { | ||
verify(!domain.isNullAllowed(), "IS NULL is not supported due to different null handling semantics. See https://docs.pinot.apache.org/developers/advanced/null-value-support"); | ||
if (domain.isNullAllowed()) { | ||
return Optional.of(format("%s IS NULL", predicateArgument)); | ||
} | ||
return Optional.of(format("(%s != %s)", predicateArgument, predicateArgument)); | ||
} | ||
if (valueSet.isAll()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. iirc this is related to apache/pinot#10600 and 601, since the connector supports 11 and 12.1 should this be put back until we upgrade to 1.0.0 (and remove support for all previous versions)? |
||
verify(domain.isNullAllowed(), "IS NOT NULL is not supported due to different null handling semantics. See https://docs.pinot.apache.org/developers/advanced/null-value-support"); | ||
// Pinot does not support "1 = 1" syntax: see https://github.com/apache/pinot/issues/10600 | ||
// As a workaround, skip adding always true to conjuncts | ||
return Optional.empty(); | ||
} | ||
verify(!domain.getValues().getRanges().getOrderedRanges().isEmpty() && !domain.isNullAllowed(), "IS NULL is not supported due to different null handling semantics. See https://docs.pinot.apache.org/developers/advanced/null-value-support"); | ||
List<String> disjuncts = new ArrayList<>(); | ||
List<Object> singleValues = new ArrayList<>(); | ||
boolean invertPredicate = false; | ||
|
@@ -152,23 +146,27 @@ private static Optional<String> toPredicate(PinotColumnHandle pinotColumnHandle, | |
} | ||
} | ||
for (Range range : valueSet.getRanges().getOrderedRanges()) { | ||
checkState(!range.isAll()); // Already checked | ||
if (range.isSingleValue()) { | ||
singleValues.add(convertValue(range.getType(), range.getSingleValue())); | ||
} | ||
else { | ||
List<String> rangeConjuncts = new ArrayList<>(); | ||
if (range.isAll()) { | ||
rangeConjuncts.add(format("%s IS NOT NULL", predicateArgument)); | ||
} | ||
if (!range.isLowUnbounded()) { | ||
rangeConjuncts.add(toConjunct(predicateArgument, range.isLowInclusive() ? ">=" : ">", convertValue(range.getType(), range.getLowBoundedValue()))); | ||
} | ||
if (!range.isHighUnbounded()) { | ||
rangeConjuncts.add(toConjunct(predicateArgument, range.isHighInclusive() ? "<=" : "<", convertValue(range.getType(), range.getHighBoundedValue()))); | ||
} | ||
// If rangeConjuncts is null, then the range was ALL, which is not supported in pql | ||
checkState(!rangeConjuncts.isEmpty()); | ||
disjuncts.add("(" + Joiner.on(" AND ").join(rangeConjuncts) + ")"); | ||
} | ||
} | ||
// No ranges means the predicate is a null check | ||
if (singleValues.isEmpty() && disjuncts.isEmpty()) { | ||
disjuncts.add(format("%s IS NULL", predicateArgument)); | ||
} | ||
// Add back all of the possible single values either as an equality or an IN predicate | ||
if (singleValues.size() == 1) { | ||
String operator = invertPredicate ? "!=" : "="; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1482,23 +1482,23 @@ public void testNullBehavior() | |
assertThat(query(""" | ||
SELECT COUNT(*) | ||
FROM alltypes | ||
WHERE string_col IS NULL""")) | ||
.matches("VALUES (BIGINT '0')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
WHERE string_col IS NULL OR string_col = 'null'""")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since null handling is enabled for alltypes should this be |
||
.matches("VALUES (BIGINT '1')") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This table has 'null' string in string_col, so pinot will return 1 row after pushing down. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the table config be checked for null handling enabled? |
||
.isFullyPushedDown(); | ||
|
||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
FROM alltypes | ||
WHERE string_col IS NOT NULL""")) | ||
.matches("VALUES (BIGINT '11')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
WHERE string_col IS NOT NULL AND string_col != 'null'""")) | ||
.matches("VALUES (BIGINT '10')") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would the behavior be different for a table with null handling disabled? |
||
.isFullyPushedDown(); | ||
|
||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
FROM alltypes | ||
WHERE string_col = 'string_0' OR string_col IS NULL""")) | ||
.matches("VALUES (BIGINT '1')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
|
@@ -1512,7 +1512,7 @@ SELECT COUNT(*) | |
FROM alltypes | ||
WHERE string_col != 'string_0' OR string_col IS NULL""")) | ||
.matches("VALUES (BIGINT '10')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
|
@@ -1526,7 +1526,7 @@ SELECT COUNT(*) | |
FROM alltypes | ||
WHERE string_col NOT IN ('null', 'array_null') OR string_col IS NULL""")) | ||
.matches("VALUES (BIGINT '9')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
// VARCHAR NOT IN is pushed down | ||
assertThat(query(""" | ||
|
@@ -1541,7 +1541,7 @@ SELECT COUNT(*) | |
FROM alltypes | ||
WHERE string_col IN ('null', 'array_null') OR string_col IS NULL""")) | ||
.matches("VALUES (BIGINT '2')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
// VARCHAR IN is pushed down | ||
assertThat(query(""" | ||
|
@@ -1554,23 +1554,23 @@ WHERE string_col IN ('null', 'array_null')""")) | |
assertThat(query(""" | ||
SELECT COUNT(*) | ||
FROM alltypes | ||
WHERE long_col IS NULL""")) | ||
.matches("VALUES (BIGINT '0')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
WHERE long_col IS NULL OR long_col = 0""")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, should this be tested: if null handling is disabled, shouldn't |
||
.matches("VALUES (BIGINT '2')") | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
FROM alltypes | ||
WHERE long_col IS NOT NULL""")) | ||
.matches("VALUES (BIGINT '11')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
WHERE long_col IS NOT NULL AND long_col != 0""")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, maybe have an explicit test for null handling enabled vs disabled in a table w the same data, and check for MIN_VALUE and/or the default null value. lmk, can talk offline about it when you have some time. |
||
.matches("VALUES (BIGINT '9')") | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
FROM alltypes | ||
WHERE long_col = -3147483645 OR long_col IS NULL""")) | ||
.matches("VALUES (BIGINT '1')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
|
@@ -1584,7 +1584,7 @@ SELECT COUNT(*) | |
FROM alltypes | ||
WHERE long_col != -3147483645 OR long_col IS NULL""")) | ||
.matches("VALUES (BIGINT '10')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
|
@@ -1606,7 +1606,7 @@ WHERE long_col NOT IN (-3147483645, -3147483646, -3147483647) OR long_col IS NUL | |
(BIGINT '-3147483639'), | ||
(BIGINT '0'), | ||
(BIGINT '0')""") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
// BIGINT NOT IN is pushed down | ||
assertThat(query(""" | ||
|
@@ -1629,7 +1629,7 @@ SELECT COUNT(*) | |
FROM alltypes | ||
WHERE long_col IN (-3147483645, -3147483646, -3147483647) OR long_col IS NULL""")) | ||
.matches("VALUES (BIGINT '3')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
// BIGINT IN is pushed down | ||
assertThat(query(""" | ||
|
@@ -1644,7 +1644,7 @@ WHERE long_col IN (-3147483645, -3147483646, -3147483647)""")) | |
FROM alltypes | ||
WHERE int_col NOT IN (0, 54, 56) OR int_col IS NULL""")) | ||
.matches("VALUES (55), (55), (55)") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
// INTEGER NOT IN is pushed down | ||
assertThat(query(""" | ||
|
@@ -1659,7 +1659,7 @@ SELECT COUNT(*) | |
FROM alltypes | ||
WHERE int_col IN (0, 54, 56) OR int_col IS NULL""")) | ||
.matches("VALUES (BIGINT '8')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
// INTEGER IN is pushed down | ||
assertThat(query(""" | ||
|
@@ -1674,7 +1674,7 @@ SELECT COUNT(*) | |
FROM alltypes | ||
WHERE bool_col OR bool_col IS NULL""")) | ||
.matches("VALUES (BIGINT '9')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
// BOOLEAN values are pushed down | ||
assertThat(query(""" | ||
|
@@ -1689,7 +1689,7 @@ SELECT COUNT(*) | |
FROM alltypes | ||
WHERE NOT bool_col OR bool_col IS NULL""")) | ||
.matches("VALUES (BIGINT '2')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
|
@@ -1709,10 +1709,11 @@ WHERE float_col NOT IN (-2.33, -3.33, -4.33, -5.33, -6.33, -7.33) OR float_col I | |
assertThat(query(""" | ||
SELECT COUNT(*) | ||
FROM alltypes | ||
WHERE float_col NOT IN (-2.33, -3.33, -4.33, -5.33, -6.33, -7.33)""")) | ||
.matches("VALUES (BIGINT '5')") | ||
WHERE float_col NOT IN (0, -2.33, -3.33, -4.33, -5.33, -6.33, -7.33)""")) | ||
.matches("VALUES (BIGINT '3')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
|
||
// DOUBLE values are not pushed down, applyFilter is not called | ||
assertThat(query(""" | ||
SELECT COUNT(*) | ||
FROM alltypes | ||
|
@@ -1727,6 +1728,18 @@ SELECT COUNT(*) | |
WHERE double_col NOT IN (0.0, -16.33, -17.33)""")) | ||
.matches("VALUES (BIGINT '7')") | ||
.isNotFullyPushedDown(FilterNode.class); | ||
|
||
assertThat(query("SELECT price, vendor FROM " + JSON_TABLE + " WHERE price IS NOT NULL")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This table has null handling disabled, should it check for all values? and/or just use the count query below? |
||
.containsAll("VALUES (REAL '3.5', VARCHAR 'vendor1')") | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query("SELECT COUNT(*) FROM " + JSON_TABLE + " WHERE price IS NOT NULL")) | ||
.matches("VALUES (BIGINT '7')") | ||
.isFullyPushedDown(); | ||
|
||
assertThat(query("SELECT COUNT(*) FROM " + JSON_TABLE + " WHERE price IS NULL")) | ||
.matches("VALUES (BIGINT '0')") | ||
.isFullyPushedDown(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe do this for a copy of the table w null handling enabled? Or you can choose a simpler table ( |
||
} | ||
|
||
@Test | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be done only if the table has null handling enabled in the config? How would it know the difference between
'null', Long.MIN_VALUE
, etc. vs a null value? Is that handled in v1.0.0?