-
Notifications
You must be signed in to change notification settings - Fork 607
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* implement query validation logic * implement descriptor * implement test * fix typo at llm extras * Add sqlvalidator to requirements file * Moved sqlvalidator import into the is_valid_sql function * add ignore_missing_imports for sqlvalidator in setup.cfg * docs: add IsValidSQL() --------- Co-authored-by: Your Name <[email protected]> Co-authored-by: Sifr'un <[email protected]> Co-authored-by: Emeli Dral <[email protected]>
- Loading branch information
1 parent
a1318d1
commit 7c8fdfb
Showing
10 changed files
with
97 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from evidently.features import is_valid_sql_feature | ||
from evidently.features.generated_features import FeatureDescriptor | ||
from evidently.features.generated_features import GeneratedFeature | ||
|
||
|
||
class IsValidSQL(FeatureDescriptor): | ||
class Config: | ||
type_alias = "evidently:descriptor:IsValidSQL" | ||
|
||
def feature(self, column_name: str) -> GeneratedFeature: | ||
return is_valid_sql_feature.IsValidSQL(column_name, self.display_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from typing import Any | ||
from typing import ClassVar | ||
from typing import Optional | ||
|
||
from evidently import ColumnType | ||
from evidently.features.generated_features import ApplyColumnGeneratedFeature | ||
|
||
|
||
class IsValidSQL(ApplyColumnGeneratedFeature): | ||
class Config: | ||
type_alias = "evidently:feature:IsValidSQL" | ||
|
||
__feature_type__: ClassVar = ColumnType.Categorical | ||
display_name_template: ClassVar = "SQL Validity Check for {column_name}" | ||
column_name: str | ||
|
||
def __init__(self, column_name: str, display_name: Optional[str] = None): | ||
self.column_name = column_name | ||
self.display_name = display_name | ||
super().__init__() | ||
|
||
def apply(self, value: Any): | ||
if value is None or not isinstance(value, str): | ||
return False | ||
|
||
return self.is_valid_sql(value) | ||
|
||
def is_valid_sql(self, query: str) -> bool: | ||
import sqlvalidator | ||
|
||
queries = query.strip().split(";") # Split by semicolon | ||
|
||
for q in queries: | ||
q = q.strip() # Remove extra whitespace | ||
if not q: # Skip empty queries | ||
continue | ||
|
||
try: | ||
sqlvalidator.format_sql(q) # Validate SQL syntax | ||
except Exception: | ||
return False # Invalid SQL | ||
|
||
return True # All queries are valid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import pandas as pd | ||
|
||
from evidently.features.is_valid_sql_feature import IsValidSQL | ||
from evidently.pipeline.column_mapping import ColumnMapping | ||
from evidently.utils.data_preprocessing import create_data_definition | ||
|
||
|
||
def test_is_valid_sql_feature(): | ||
feature_generator = IsValidSQL("column_1") | ||
data = pd.DataFrame( | ||
dict( | ||
column_1=[ | ||
"SELECT * FROM users", # Valid SQL (simple query) | ||
"SELECT id, address FROM users; SELECT count(id) FROM users", # Valid SQL (multiple SQL queries) | ||
"INSERT INTO table", # Invalid SQL (incomplete query) | ||
"SLECT * FROM users", # Invalid SQL (typo) | ||
"SLECT * FROM users; SELECT id, address FROM users", # Invalid SQL (1 invalid sub-query) | ||
] | ||
) | ||
) | ||
|
||
result = feature_generator.generate_feature( | ||
data=data, | ||
data_definition=create_data_definition(None, data, ColumnMapping()), | ||
) | ||
|
||
expected_result = pd.DataFrame(dict(column_1=[True, True, False, False, False])) | ||
|
||
assert result.equals(expected_result) |