Skip to content

Commit

Permalink
Add support for PG LIKE operators
Browse files Browse the repository at this point in the history
  • Loading branch information
gruuya committed Jan 17, 2024
1 parent ffaa679 commit 2500eae
Show file tree
Hide file tree
Showing 11 changed files with 226 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ parquet = { version = "50.0.0", default-features = false, features = ["arrow", "
rand = "0.8"
rstest = "0.18.0"
serde_json = "1"
sqlparser = { version = "0.41.0", features = ["visitor"] }
sqlparser = { git = "https://github.com/splitgraph/sqlparser-rs", branch = "pg-like-op-support", features = ["visitor"] }
tempfile = "3"
thiserror = "1.0.44"
url = "2.2"
Expand Down
6 changes: 2 additions & 4 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions datafusion/expr/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ pub enum Operator {
RegexNotMatch,
/// Case insensitive regex not match
RegexNotIMatch,
/// Case sensitive pattern match
LikeMatch,
/// Case insensitive pattern match
ILikeMatch,
/// Case sensitive pattern not match
NotLikeMatch,
/// Case insensitive pattern not match
NotILikeMatch,
/// Bitwise and, like `&`
BitwiseAnd,
/// Bitwise or, like `|`
Expand Down Expand Up @@ -100,6 +108,10 @@ impl Operator {
Operator::GtEq => Some(Operator::Lt),
Operator::IsDistinctFrom => Some(Operator::IsNotDistinctFrom),
Operator::IsNotDistinctFrom => Some(Operator::IsDistinctFrom),
Operator::LikeMatch => Some(Operator::NotLikeMatch),
Operator::ILikeMatch => Some(Operator::NotILikeMatch),
Operator::NotLikeMatch => Some(Operator::LikeMatch),
Operator::NotILikeMatch => Some(Operator::ILikeMatch),
Operator::Plus
| Operator::Minus
| Operator::Multiply
Expand Down Expand Up @@ -192,6 +204,10 @@ impl Operator {
| Operator::RegexIMatch
| Operator::RegexNotMatch
| Operator::RegexNotIMatch
| Operator::LikeMatch
| Operator::ILikeMatch
| Operator::NotLikeMatch
| Operator::NotILikeMatch
| Operator::BitwiseAnd
| Operator::BitwiseOr
| Operator::BitwiseXor
Expand Down Expand Up @@ -221,6 +237,10 @@ impl Operator {
| Operator::RegexNotMatch
| Operator::RegexIMatch
| Operator::RegexNotIMatch
| Operator::LikeMatch
| Operator::ILikeMatch
| Operator::NotLikeMatch
| Operator::NotILikeMatch
| Operator::BitwiseAnd
| Operator::BitwiseOr
| Operator::BitwiseShiftLeft
Expand Down Expand Up @@ -253,6 +273,10 @@ impl fmt::Display for Operator {
Operator::RegexIMatch => "~*",
Operator::RegexNotMatch => "!~",
Operator::RegexNotIMatch => "!~*",
Operator::LikeMatch => "~~",
Operator::ILikeMatch => "~~*",
Operator::NotLikeMatch => "!~~",
Operator::NotILikeMatch => "!~~*",
Operator::IsDistinctFrom => "IS DISTINCT FROM",
Operator::IsNotDistinctFrom => "IS NOT DISTINCT FROM",
Operator::BitwiseAnd => "&",
Expand Down
7 changes: 7 additions & 0 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature>
)
})
}
LikeMatch | ILikeMatch | NotLikeMatch | NotILikeMatch => {
regex_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
plan_datafusion_err!(
"Cannot infer common argument type for regex operation {lhs} {op} {rhs}"
)
})
}
BitwiseAnd | BitwiseOr | BitwiseXor | BitwiseShiftRight | BitwiseShiftLeft => {
bitwise_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
plan_datafusion_err!(
Expand Down
105 changes: 103 additions & 2 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::sort_properties::SortProperties;
use crate::PhysicalExpr;

use arrow::array::*;
use arrow::compute::cast;
use arrow::compute::{cast, like, ilike, nlike, nilike};
use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
use arrow::compute::kernels::cmp::*;
use arrow::compute::kernels::comparison::regexp_is_match_utf8;
Expand Down Expand Up @@ -281,6 +281,10 @@ impl PhysicalExpr for BinaryExpr {
Operator::GtEq => return apply_cmp(&lhs, &rhs, gt_eq),
Operator::IsDistinctFrom => return apply_cmp(&lhs, &rhs, distinct),
Operator::IsNotDistinctFrom => return apply_cmp(&lhs, &rhs, not_distinct),
Operator::LikeMatch => return apply_cmp(&lhs, &rhs, like),
Operator::ILikeMatch => return apply_cmp(&lhs, &rhs, ilike),
Operator::NotLikeMatch => return apply_cmp(&lhs, &rhs, nlike),
Operator::NotILikeMatch => return apply_cmp(&lhs, &rhs, nilike),
_ => {}
}

Expand Down Expand Up @@ -554,7 +558,8 @@ impl BinaryExpr {
use Operator::*;
match &self.op {
IsDistinctFrom | IsNotDistinctFrom | Lt | LtEq | Gt | GtEq | Eq | NotEq
| Plus | Minus | Multiply | Divide | Modulo => unreachable!(),
| Plus | Minus | Multiply | Divide | Modulo | LikeMatch | ILikeMatch
| NotLikeMatch | NotILikeMatch => unreachable!(),
And => {
if left_data_type == &DataType::Boolean {
boolean_op!(&left, &right, and_kleene)
Expand Down Expand Up @@ -970,6 +975,102 @@ mod tests {
DataType::Boolean,
[false, false, false, false, true],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["abc"; 5],
StringArray,
DataType::Utf8,
vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
Operator::LikeMatch,
BooleanArray,
DataType::Boolean,
[true, false, false, true, false],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["abc"; 5],
StringArray,
DataType::Utf8,
vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
Operator::ILikeMatch,
BooleanArray,
DataType::Boolean,
[true, true, false, true, true],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["abc"; 5],
StringArray,
DataType::Utf8,
vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
Operator::NotLikeMatch,
BooleanArray,
DataType::Boolean,
[false, true, true, false, true],
);
test_coercion!(
StringArray,
DataType::Utf8,
vec!["abc"; 5],
StringArray,
DataType::Utf8,
vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
Operator::NotILikeMatch,
BooleanArray,
DataType::Boolean,
[false, false, true, false, false],
);
test_coercion!(
LargeStringArray,
DataType::LargeUtf8,
vec!["abc"; 5],
LargeStringArray,
DataType::LargeUtf8,
vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
Operator::LikeMatch,
BooleanArray,
DataType::Boolean,
[true, false, false, true, false],
);
test_coercion!(
LargeStringArray,
DataType::LargeUtf8,
vec!["abc"; 5],
LargeStringArray,
DataType::LargeUtf8,
vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
Operator::ILikeMatch,
BooleanArray,
DataType::Boolean,
[true, true, false, true, true],
);
test_coercion!(
LargeStringArray,
DataType::LargeUtf8,
vec!["abc"; 5],
LargeStringArray,
DataType::LargeUtf8,
vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
Operator::NotLikeMatch,
BooleanArray,
DataType::Boolean,
[false, true, true, false, true],
);
test_coercion!(
LargeStringArray,
DataType::LargeUtf8,
vec!["abc"; 5],
LargeStringArray,
DataType::LargeUtf8,
vec!["a__", "A%BC", "A_BC", "abc", "a%C"],
Operator::NotILikeMatch,
BooleanArray,
DataType::Boolean,
[false, false, true, false, false],
);
test_coercion!(
Int16Array,
DataType::Int16,
Expand Down
4 changes: 4 additions & 0 deletions datafusion/sql/src/expr/binary_op.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
BinaryOperator::PGRegexIMatch => Ok(Operator::RegexIMatch),
BinaryOperator::PGRegexNotMatch => Ok(Operator::RegexNotMatch),
BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch),
BinaryOperator::PGLikeMatch => Ok(Operator::LikeMatch),
BinaryOperator::PGILikeMatch => Ok(Operator::ILikeMatch),
BinaryOperator::PGNotLikeMatch => Ok(Operator::NotLikeMatch),
BinaryOperator::PGNotILikeMatch => Ok(Operator::NotILikeMatch),
BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd),
BinaryOperator::BitwiseOr => Ok(Operator::BitwiseOr),
BinaryOperator::BitwiseXor => Ok(Operator::BitwiseXor),
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
| SQLDataType::Int64
| SQLDataType::Float64
| SQLDataType::Struct(_)
| SQLDataType::JSONB
| SQLDataType::Unspecified
=> not_impl_err!(
"Unsupported SQL type {sql_type:?}"
),
Expand Down
3 changes: 3 additions & 0 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,10 +435,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
overwrite,
source,
partitioned,
priority: _,
after_columns,
table,
table_alias: _,
on,
returning,
replace_into: _,
ignore,
} => {
if or.is_some() {
Expand Down
24 changes: 24 additions & 0 deletions datafusion/sqllogictest/test_files/predicates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,30 @@ SELECT * FROM test WHERE column1 !~* 'z'
foo
Barrr

query T
SELECT * FROM test WHERE column1 ~~ '__z%'
----
Bazzz

query T
SELECT * FROM test WHERE column1 ~~* '__z%'
----
Bazzz
ZZZZZ

query T
SELECT * FROM test WHERE column1 !~~ '__z%'
----
foo
Barrr
ZZZZZ

query T
SELECT * FROM test WHERE column1 !~~* '__z%'
----
foo
Barrr

statement ok
DROP TABLE test;

Expand Down
4 changes: 4 additions & 0 deletions datafusion/substrait/src/logical_plan/producer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,10 @@ pub fn operator_to_name(op: Operator) -> &'static str {
Operator::RegexIMatch => "regex_imatch",
Operator::RegexNotMatch => "regex_not_match",
Operator::RegexNotIMatch => "regex_not_imatch",
Operator::LikeMatch => "like_match",
Operator::ILikeMatch => "like_imatch",
Operator::NotLikeMatch => "like_not_match",
Operator::NotILikeMatch => "like_not_imatch",
Operator::BitwiseAnd => "bitwise_and",
Operator::BitwiseOr => "bitwise_or",
Operator::StringConcat => "str_concat",
Expand Down
52 changes: 52 additions & 0 deletions docs/source/user-guide/sql/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,58 @@ Not Regex Case-Insensitive Match
+---------------------------------------------------+
```

### `~~`

Like Match

```sql
SELECT 'datafusion' ~~ 'dat_f%n';
+---------------------------------------+
| Utf8("datafusion") ~~ Utf8("dat_f%n") |
+---------------------------------------+
| true |
+---------------------------------------+
```

### `~~*`

Case-Insensitive Like Match

```sql
SELECT 'datafusion' ~~* 'Dat_F%n';
+----------------------------------------+
| Utf8("datafusion") ~~* Utf8("Dat_F%n") |
+----------------------------------------+
| true |
+----------------------------------------+
```

### `!~~`

Not Like Match

```sql
SELECT 'datafusion' !~~ 'Dat_F%n';
+----------------------------------------+
| Utf8("datafusion") !~~ Utf8("Dat_F%n") |
+----------------------------------------+
| true |
+----------------------------------------+
```

### `!~~*`

Not Case-Insensitive Like Match

```sql
SELECT 'datafusion' !~~* 'Dat%F_n';
+-----------------------------------------+
| Utf8("datafusion") !~~* Utf8("Dat%F_n") |
+-----------------------------------------+
| true |
+-----------------------------------------+
```

## Logical Operators

- [AND](#and)
Expand Down

0 comments on commit 2500eae

Please sign in to comment.