Skip to content

Commit

Permalink
fix: Ensure that SQL LIKE and ILIKE operators support multi-line …
Browse files Browse the repository at this point in the history
…matches (#20613)
  • Loading branch information
alexander-beedie authored Jan 9, 2025
1 parent 247f0b1 commit 09687e4
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 2 deletions.
2 changes: 1 addition & 1 deletion crates/polars-sql/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1355,7 +1355,7 @@ impl SQLContext {
.replace('%', ".*")
.replace('_', ".");

modifiers.ilike = Some(regex::Regex::new(format!("^(?i){}$", rx).as_str()).unwrap());
modifiers.ilike = Some(regex::Regex::new(format!("^(?is){}$", rx).as_str()).unwrap());
}

// SELECT * RENAME
Expand Down
6 changes: 5 additions & 1 deletion crates/polars-sql/src/sql_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,11 @@ impl SQLExprVisitor<'_> {
.replace('%', ".*")
.replace('_', ".");

rx = format!("^{}{}$", if case_insensitive { "(?i)" } else { "" }, rx);
rx = format!(
"^{}{}$",
if case_insensitive { "(?is)" } else { "(?s)" },
rx
);

let expr = self.visit_expr(expr)?;
let matches = expr.str().contains(lit(rx), true);
Expand Down
26 changes: 26 additions & 0 deletions py-polars/tests/unit/sql/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,32 @@ def test_string_like(pattern: str, like: str, expected: list[int]) -> None:
assert res == expected


def test_string_like_multiline() -> None:
s1 = "Hello World"
s2 = "Hello\nWorld"
s3 = "hello\nWORLD"

df = pl.DataFrame({"idx": [0, 1, 2], "txt": [s1, s2, s3]})

# starts with...
res1 = df.sql("SELECT * FROM self WHERE txt LIKE 'Hello%' ORDER BY idx")
res2 = df.sql("SELECT * FROM self WHERE txt ILIKE 'HELLO%' ORDER BY idx")

assert res1["txt"].to_list() == [s1, s2]
assert res2["txt"].to_list() == [s1, s2, s3]

# ends with...
res3 = df.sql("SELECT * FROM self WHERE txt LIKE '%WORLD' ORDER BY idx")
res4 = df.sql("SELECT * FROM self WHERE txt ILIKE '%\nWORLD' ORDER BY idx")

assert res3["txt"].to_list() == [s3]
assert res4["txt"].to_list() == [s2, s3]

# exact match
for s in (s1, s2, s3):
assert df.sql(f"SELECT txt FROM self WHERE txt LIKE '{s}'").item() == s


def test_string_position() -> None:
df = pl.Series(
name="city",
Expand Down

0 comments on commit 09687e4

Please sign in to comment.