Add integration tests to code coverage report (#2268)

sdv-dev · Oct 24, 2024 · 72153a5 · 72153a5
1 parent 7902fc9
commit 72153a5
Show file tree

Hide file tree

Showing 23 changed files with 44 additions and 50 deletions.
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
@@ -39,3 +39,10 @@ jobs:
       run: |
         invoke integration
         invoke benchmark-dtypes
+    - if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.9
+      name: Upload integration codecov report
+      uses: codecov/codecov-action@v4
+      with:
+        flags: integration
+        file: './integration_cov.xml'
+        token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml
@@ -34,6 +34,10 @@ jobs:
           python -m pip install invoke .[test]
     - name: Run unit tests
       run: invoke unit
-    - if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.8
-      name: Upload codecov report
+    - if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.9
+      name: Upload unit codecov report
       uses: codecov/codecov-action@v4
+      with:
+        flags: unit
+        file: './unit_cov.xml'
+        token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/pyproject.toml b/pyproject.toml
@@ -92,7 +92,7 @@ dev = [
     'Jinja2>=2,<4',
 
     # style check
-    'ruff>=0.4.5,<1',
+    'ruff>=0.4.5,<0.7.2',
 
     # distribute on PyPI
     'twine>=1.10.0,<6',

diff --git a/sdv/__init__.py b/sdv/__init__.py
@@ -109,10 +109,7 @@ def _find_addons():
         try:
             addon = entry_point.load()
         except Exception as e:  # pylint: disable=broad-exception-caught
-            msg = (
-                f'Failed to load "{entry_point.name}" from "{entry_point.value}" '
-                f'with error:\n{e}'
-            )
+            msg = f'Failed to load "{entry_point.name}" from "{entry_point.value}" with error:\n{e}'
             warnings.warn(msg)
             continue
 

diff --git a/sdv/_utils.py b/sdv/_utils.py
@@ -351,7 +351,7 @@ def check_synthesizer_version(synthesizer, is_fit_method=False, compare_operator
     static_message = 'Downgrading your SDV version is not supported.'
     if is_fit_method:
         static_message = (
-            'Fitting this synthesizer again is not supported. ' 'Please create a new synthesizer.'
+            'Fitting this synthesizer again is not supported. Please create a new synthesizer.'
         )
 
     fit_public_version = getattr(synthesizer, '_fitted_sdv_version', None)

diff --git a/sdv/constraints/tabular.py b/sdv/constraints/tabular.py
@@ -126,8 +126,7 @@ def _validate_inputs(cls, **kwargs):
             if 'column_names' not in set(kwargs):
                 errors = [
                     ConstraintMetadataError(
-                        "Missing required values {'column_names'} in a"
-                        ' CustomConstraint constraint.'
+                        "Missing required values {'column_names'} in a CustomConstraint constraint."
                     )
                 ]
                 raise AggregateConstraintsError(errors)

diff --git a/sdv/io/local/local.py b/sdv/io/local/local.py
@@ -120,7 +120,7 @@ def read(self, folder_name, file_names=None):
             missing_files = [file for file in file_names if not (folder_path / file).exists()]
             if missing_files:
                 raise FileNotFoundError(
-                    f"The following files do not exist in the folder: {', '.join(missing_files)}."
+                    f'The following files do not exist in the folder: {", ".join(missing_files)}.'
                 )
 
             file_paths = [folder_path / file for file in file_names]

diff --git a/sdv/lite/single_table.py b/sdv/lite/single_table.py
@@ -22,8 +22,7 @@
 )
 
 META_DEPRECATION_MSG = (
-    "The 'SingleTableMetadata' is deprecated. Please use the new "
-    "'Metadata' class for synthesizers."
+    "The 'SingleTableMetadata' is deprecated. Please use the new 'Metadata' class for synthesizers."
 )
 
 

diff --git a/sdv/metadata/multi_table.py b/sdv/metadata/multi_table.py
@@ -1038,9 +1038,9 @@ def visualize(
                 foreign_keys = r'\l'.join(info.get('foreign_keys', []))
                 keys = r'\l'.join([info['primary_key'], foreign_keys])
                 if foreign_keys:
-                    label = rf"{{{table}|{info['columns']}\l|{keys}\l}}"
+                    label = rf'{{{table}|{info["columns"]}\l|{keys}\l}}'
                 else:
-                    label = rf"{{{table}|{info['columns']}\l|{keys}}}"
+                    label = rf'{{{table}|{info["columns"]}\l|{keys}}}'
 
             else:
                 label = f'{table}'

diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
@@ -699,7 +699,7 @@ def _validate_keys_sdtype(self, keys, key_type):
                 bad_keys.add(key)
         if bad_keys:
             raise InvalidMetadataError(
-                f"The {key_type}_keys {sorted(bad_keys)} must be type 'id' or " 'another PII type.'
+                f"The {key_type}_keys {sorted(bad_keys)} must be type 'id' or another PII type."
             )
 
     def _validate_key(self, column_name, key_type):
@@ -714,7 +714,7 @@ def _validate_key(self, column_name, key_type):
             if setting_sequence_as_primary or setting_primary_as_sequence:
                 raise InvalidMetadataError(
                     f'The column ({column_name}) cannot be set as {key_type}_key as it is already '
-                    f"set as the {'sequence' if key_type == 'primary' else 'primary'}_key."
+                    f'set as the {"sequence" if key_type == "primary" else "primary"}_key.'
                 )
 
             invalid_ids = keys - set(self.columns)
@@ -743,8 +743,7 @@ def set_primary_key(self, column_name):
 
         if self.primary_key is not None:
             warnings.warn(
-                f"There is an existing primary key '{self.primary_key}'."
-                ' This key will be removed.'
+                f"There is an existing primary key '{self.primary_key}'. This key will be removed."
             )
 
         self._updated = True

diff --git a/sdv/metadata/utils.py b/sdv/metadata/utils.py
@@ -9,8 +9,7 @@ def read_json(filepath):
     filepath = Path(filepath)
     if not filepath.exists():
         raise ValueError(
-            f"A file named '{filepath.name}' does not exist. "
-            'Please specify a different filename.'
+            f"A file named '{filepath.name}' does not exist. Please specify a different filename."
         )
 
     with open(filepath, 'r', encoding='utf-8') as metadata_file:

diff --git a/sdv/metadata/visualization.py b/sdv/metadata/visualization.py
@@ -20,7 +20,7 @@ def create_columns_node(columns):
         str:
             String representing the node that will be printed for the given columns.
     """
-    columns = [rf"{name} : {meta.get('sdtype')}" for name, meta in columns.items()]
+    columns = [rf'{name} : {meta.get("sdtype")}' for name, meta in columns.items()]
     return r'\l'.join(columns)
 
 

diff --git a/sdv/multi_table/base.py b/sdv/multi_table/base.py
@@ -32,8 +32,7 @@
 
 SYNTHESIZER_LOGGER = get_sdv_logger('MultiTableSynthesizer')
 DEPRECATION_MSG = (
-    "The 'MultiTableMetadata' is deprecated. Please use the new "
-    "'Metadata' class for synthesizers."
+    "The 'MultiTableMetadata' is deprecated. Please use the new 'Metadata' class for synthesizers."
 )
 
 

diff --git a/sdv/single_table/base.py b/sdv/single_table/base.py
@@ -50,8 +50,7 @@
 )
 
 DEPRECATION_MSG = (
-    "The 'SingleTableMetadata' is deprecated. Please use the new "
-    "'Metadata' class for synthesizers."
+    "The 'SingleTableMetadata' is deprecated. Please use the new 'Metadata' class for synthesizers."
 )
 
 
@@ -898,7 +897,7 @@ def _conditionally_sample_rows(
 
         elif not graceful_reject_sampling:
             user_msg = (
-                'Unable to sample any rows for the given conditions ' f"'{transformed_condition}'. "
+                f"Unable to sample any rows for the given conditions '{transformed_condition}'. "
             )
             if hasattr(self, '_model') and isinstance(self._model, GaussianMultivariate):
                 user_msg = user_msg + (

diff --git a/tasks.py b/tasks.py
@@ -26,12 +26,12 @@ def check_dependencies(c):
 
 @task
 def unit(c):
-    c.run('python -m pytest ./tests/unit --cov=sdv --cov-report=xml')
+    c.run('python -m pytest ./tests/unit --cov=sdv --cov-report=xml:./unit_cov.xml')
 
 
 @task
 def integration(c):
-    c.run('python -m pytest ./tests/integration --reruns 3')
+    c.run('python -m pytest ./tests/integration --reruns 3 --cov=sdv --cov-report=xml:./integration_cov.xml')
 
 
 @task

diff --git a/tests/integration/single_table/test_base.py b/tests/integration/single_table/test_base.py
@@ -193,7 +193,7 @@ def test_sample_remaining_columns_with_some_nans():
 
     # Assert
     warn_msg = (
-        'Missing values are not yet supported. ' 'Rows with any missing values will not be created.'
+        'Missing values are not yet supported. Rows with any missing values will not be created.'
     )
     with pytest.warns(UserWarning, match=warn_msg):
         synthesizer.sample_remaining_columns(known_columns=known_columns)

diff --git a/tests/unit/data_processing/test_data_processor.py b/tests/unit/data_processing/test_data_processor.py
@@ -1444,8 +1444,7 @@ def test_update_transformers_not_fitted(self):
 
         # Run and Assert
         error_msg = (
-            'The DataProcessor must be prepared for fitting before the transformers can be '
-            'updated.'
+            'The DataProcessor must be prepared for fitting before the transformers can be updated.'
         )
         with pytest.raises(NotFittedError, match=error_msg):
             dp.update_transformers({'column': None})

diff --git a/tests/unit/datasets/test_local.py b/tests/unit/datasets/test_local.py
@@ -56,8 +56,7 @@ def test_load_csvs_no_csvs(tmp_path):
     with open(json_file_path, 'w') as outfile:
         json.dump(fake_json, outfile)
     error_message = re.escape(
-        f"No CSV files exist in '{tmp_path}'. Please make sure your files end in the "
-        "'.csv' suffix."
+        f"No CSV files exist in '{tmp_path}'. Please make sure your files end in the '.csv' suffix."
     )
     with pytest.raises(ValueError, match=error_message):
         load_csvs(tmp_path)

diff --git a/tests/unit/metadata/test_multi_table.py b/tests/unit/metadata/test_multi_table.py
@@ -189,8 +189,7 @@ def test__validate_missing_relationship_keys_foreign_key(self):
 
         # Run / Assert
         error_msg = re.escape(
-            'Relationship between tables (users, sessions) contains '
-            "an unknown foreign key {'id'}."
+            "Relationship between tables (users, sessions) contains an unknown foreign key {'id'}."
         )
         with pytest.raises(InvalidMetadataError, match=error_msg):
             MultiTableMetadata._validate_missing_relationship_keys(

diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
@@ -1660,7 +1660,7 @@ def test_set_primary_key_validation_columns(self):
         instance.columns = {'a', 'd'}
 
         err_msg = (
-            "Unknown primary key values {'b'}." ' Keys should be columns that exist in the table.'
+            "Unknown primary key values {'b'}. Keys should be columns that exist in the table."
         )
         # Run / Assert
         with pytest.raises(InvalidMetadataError, match=err_msg):
@@ -1820,7 +1820,7 @@ def test_set_sequence_key_validation_columns(self):
         instance.columns = {'a', 'd'}
 
         err_msg = (
-            "Unknown sequence key values {'b'}." ' Keys should be columns that exist in the table.'
+            "Unknown sequence key values {'b'}. Keys should be columns that exist in the table."
         )
         # Run / Assert
         with pytest.raises(InvalidMetadataError, match=err_msg):
@@ -1934,8 +1934,7 @@ def test_add_alternate_keys_validation_columns(self):
         instance.columns = {'abc', '213', '312'}
 
         err_msg = (
-            "Unknown alternate key values {'123'}."
-            ' Keys should be columns that exist in the table.'
+            "Unknown alternate key values {'123'}. Keys should be columns that exist in the table."
         )
         # Run / Assert
         with pytest.raises(InvalidMetadataError, match=err_msg):
@@ -2265,8 +2264,7 @@ def mock_relationship_validate(relationship):
 
         # Run and Assert
         err_msg = re.escape(
-            "Error in 'relationship_one' relationship.\n"
-            "Error in 'relationship_two' relationship."
+            "Error in 'relationship_one' relationship.\nError in 'relationship_two' relationship."
         )
         with pytest.raises(InvalidMetadataError, match=err_msg):
             instance._validate_all_column_relationships(column_relationships)
@@ -2349,7 +2347,7 @@ def test_validate(self):
         instance._validate_column_args = Mock(side_effect=InvalidMetadataError('column_error'))
 
         err_msg = re.escape(
-            'The following errors were found in the metadata:' '\n\ncolumn_error' '\ncolumn_error'
+            'The following errors were found in the metadata:\n\ncolumn_error\ncolumn_error'
         )
         # Run
         with pytest.raises(InvalidMetadataError, match=err_msg):
@@ -2891,8 +2889,7 @@ def test_load_from_json_schema_not_present(self, mock_json, mock_path, mock_open
 
         # Run / Assert
         error_msg = (
-            'This metadata file is incompatible with the ``SingleTableMetadata`` '
-            'class and version.'
+            'This metadata file is incompatible with the ``SingleTableMetadata`` class and version.'
         )
         with pytest.raises(InvalidMetadataError, match=error_msg):
             SingleTableMetadata.load_from_json('filepath.json')

diff --git a/tests/unit/multi_table/test_base.py b/tests/unit/multi_table/test_base.py
@@ -577,7 +577,7 @@ def test_validate_table_synthesizers_errors(self):
 
         # Run and Assert
         error_msg = (
-            'The provided data does not match the metadata:\n' 'Invalid data for PAR synthesizer.'
+            'The provided data does not match the metadata:\nInvalid data for PAR synthesizer.'
         )
         with pytest.raises(InvalidDataError, match=error_msg):
             instance.validate(data)

diff --git a/tests/unit/single_table/test_base.py b/tests/unit/single_table/test_base.py
@@ -809,7 +809,7 @@ def test_update_transformers_warns_fitted(self):
 
         # Run and Assert
         warning_msg = re.escape(
-            'For this change to take effect, please refit the synthesizer ' 'using `fit`.'
+            'For this change to take effect, please refit the synthesizer using `fit`.'
         )
         with pytest.warns(UserWarning, match=warning_msg):
             instance.update_transformers(column_name_to_transformer)

diff --git a/tests/unit/single_table/test_utils.py b/tests/unit/single_table/test_utils.py
@@ -214,9 +214,7 @@ def test_unflatten_dict():
 def test_handle_sampling_error_temp_file():
     """Test that an error is raised when temp dir is ``False``."""
     # Run and Assert
-    error_msg = (
-        'Error: Sampling terminated. Partial results are stored in test.csv.' '\n' 'Test error'
-    )
+    error_msg = 'Error: Sampling terminated. Partial results are stored in test.csv.\nTest error'
     with pytest.raises(ValueError, match=error_msg):
         handle_sampling_error('test.csv', ValueError('Test error'))