diff --git a/src/evidently/tests/data_integrity_tests.py b/src/evidently/tests/data_integrity_tests.py index 1f1cf5ce8a..55992ccad9 100644 --- a/src/evidently/tests/data_integrity_tests.py +++ b/src/evidently/tests/data_integrity_tests.py @@ -548,14 +548,19 @@ def get_condition(self) -> TestValueCondition: reference_null_values = self.metric.get_result().reference_null_values if reference_null_values is not None: - ref_value = len(reference_null_values.different_nulls_by_column[self.column_name]) + if self.column_name not in reference_null_values.number_of_different_nulls_by_column: + raise ValueError( + f"Cannot define test default conditions: no column '{self.column_name}' in reference dataset." + ) + + ref_value = reference_null_values.number_of_different_nulls_by_column[self.column_name] return TestValueCondition(lte=ref_value) raise ValueError("Neither required test parameters nor reference data has been provided.") def calculate_value_for_test(self) -> Numeric: metric_data = self.metric.get_result().current_null_values - return len(metric_data.different_nulls_by_column[self.column_name]) + return metric_data.number_of_different_nulls_by_column[self.column_name] def get_description(self, value: Numeric) -> str: return ( diff --git a/tests/tests/test_data_integrity_tests.py b/tests/tests/test_data_integrity_tests.py index 05c3f595f5..5eae8927ac 100644 --- a/tests/tests/test_data_integrity_tests.py +++ b/tests/tests/test_data_integrity_tests.py @@ -549,6 +549,34 @@ def test_data_integrity_test_different_nulls_one_column() -> None: assert suite +def test_data_integrity_test_different_nulls_one_column_no_nulls() -> None: + test_dataset = pd.DataFrame({"feature1": [1, 2, 3], "feature2": ["b", "", None]}) + + suite = TestSuite( + tests=[ + TestColumnNumberOfDifferentNulls(column_name="feature1", null_values=["null", "n/a"], replace=False, eq=0) + ] + ) + suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=ColumnMapping()) + assert suite + + +def test_data_integrity_test_different_nulls_one_column_with_defaults() -> None: + test_dataset = pd.DataFrame({"feature": ["null", "", None]}) + reference_dataset = pd.DataFrame({"feature": ["n/a", "test", None]}) + data_mapping = ColumnMapping() + + suite = TestSuite( + tests=[TestColumnNumberOfDifferentNulls(column_name="feature", null_values=["null", "n/a"], replace=False)] + ) + suite.run(current_data=test_dataset, reference_data=reference_dataset, column_mapping=data_mapping) + assert not suite, suite.json() + + another_test_dataset = pd.DataFrame({"feature": ["null", "test", None]}) + suite.run(current_data=another_test_dataset, reference_data=reference_dataset, column_mapping=data_mapping) + assert suite, suite.json() + + def test_data_integrity_test_number_of_nulls_one_column() -> None: test_dataset = pd.DataFrame({"feature1": ["", None, "null", "a"], "feature2": ["b", "null", None, None]})