Skip to content

Commit

Permalink
fix TestColumnNumberOfDifferentNulls (#312)
Browse files Browse the repository at this point in the history
* fix TestColumnNumberOfDifferentNulls

* style fix

* small unit test update

* fix defaults for TestColumnNumberOfDifferentNulls

* add unittests for TestColumnNumberOfDifferentNulls + style fixes with black
  • Loading branch information
Tapot authored Sep 6, 2022
1 parent dfad62f commit 69e98d0
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/evidently/tests/data_integrity_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,14 +548,19 @@ def get_condition(self) -> TestValueCondition:
reference_null_values = self.metric.get_result().reference_null_values

if reference_null_values is not None:
ref_value = len(reference_null_values.different_nulls_by_column[self.column_name])
if self.column_name not in reference_null_values.number_of_different_nulls_by_column:
raise ValueError(
f"Cannot define test default conditions: no column '{self.column_name}' in reference dataset."
)

ref_value = reference_null_values.number_of_different_nulls_by_column[self.column_name]
return TestValueCondition(lte=ref_value)

raise ValueError("Neither required test parameters nor reference data has been provided.")

def calculate_value_for_test(self) -> Numeric:
metric_data = self.metric.get_result().current_null_values
return len(metric_data.different_nulls_by_column[self.column_name])
return metric_data.number_of_different_nulls_by_column[self.column_name]

def get_description(self, value: Numeric) -> str:
return (
Expand Down
28 changes: 28 additions & 0 deletions tests/tests/test_data_integrity_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,34 @@ def test_data_integrity_test_different_nulls_one_column() -> None:
assert suite


def test_data_integrity_test_different_nulls_one_column_no_nulls() -> None:
test_dataset = pd.DataFrame({"feature1": [1, 2, 3], "feature2": ["b", "", None]})

suite = TestSuite(
tests=[
TestColumnNumberOfDifferentNulls(column_name="feature1", null_values=["null", "n/a"], replace=False, eq=0)
]
)
suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=ColumnMapping())
assert suite


def test_data_integrity_test_different_nulls_one_column_with_defaults() -> None:
test_dataset = pd.DataFrame({"feature": ["null", "", None]})
reference_dataset = pd.DataFrame({"feature": ["n/a", "test", None]})
data_mapping = ColumnMapping()

suite = TestSuite(
tests=[TestColumnNumberOfDifferentNulls(column_name="feature", null_values=["null", "n/a"], replace=False)]
)
suite.run(current_data=test_dataset, reference_data=reference_dataset, column_mapping=data_mapping)
assert not suite, suite.json()

another_test_dataset = pd.DataFrame({"feature": ["null", "test", None]})
suite.run(current_data=another_test_dataset, reference_data=reference_dataset, column_mapping=data_mapping)
assert suite, suite.json()


def test_data_integrity_test_number_of_nulls_one_column() -> None:
test_dataset = pd.DataFrame({"feature1": ["", None, "null", "a"], "feature2": ["b", "null", None, None]})

Expand Down

0 comments on commit 69e98d0

Please sign in to comment.