Skip to content

Commit

Permalink
Add integration tests to code coverage report (#2268)
Browse files Browse the repository at this point in the history
  • Loading branch information
amontanez24 authored Oct 24, 2024
1 parent 7902fc9 commit 72153a5
Show file tree
Hide file tree
Showing 23 changed files with 44 additions and 50 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,10 @@ jobs:
run: |
invoke integration
invoke benchmark-dtypes
- if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.9
name: Upload integration codecov report
uses: codecov/codecov-action@v4
with:
flags: integration
file: './integration_cov.xml'
token: ${{ secrets.CODECOV_TOKEN }}
8 changes: 6 additions & 2 deletions .github/workflows/unit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ jobs:
python -m pip install invoke .[test]
- name: Run unit tests
run: invoke unit
- if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.8
name: Upload codecov report
- if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.9
name: Upload unit codecov report
uses: codecov/codecov-action@v4
with:
flags: unit
file: './unit_cov.xml'
token: ${{ secrets.CODECOV_TOKEN }}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ dev = [
'Jinja2>=2,<4',

# style check
'ruff>=0.4.5,<1',
'ruff>=0.4.5,<0.7.2',

# distribute on PyPI
'twine>=1.10.0,<6',
Expand Down
5 changes: 1 addition & 4 deletions sdv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,7 @@ def _find_addons():
try:
addon = entry_point.load()
except Exception as e: # pylint: disable=broad-exception-caught
msg = (
f'Failed to load "{entry_point.name}" from "{entry_point.value}" '
f'with error:\n{e}'
)
msg = f'Failed to load "{entry_point.name}" from "{entry_point.value}" with error:\n{e}'
warnings.warn(msg)
continue

Expand Down
2 changes: 1 addition & 1 deletion sdv/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def check_synthesizer_version(synthesizer, is_fit_method=False, compare_operator
static_message = 'Downgrading your SDV version is not supported.'
if is_fit_method:
static_message = (
'Fitting this synthesizer again is not supported. ' 'Please create a new synthesizer.'
'Fitting this synthesizer again is not supported. Please create a new synthesizer.'
)

fit_public_version = getattr(synthesizer, '_fitted_sdv_version', None)
Expand Down
3 changes: 1 addition & 2 deletions sdv/constraints/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ def _validate_inputs(cls, **kwargs):
if 'column_names' not in set(kwargs):
errors = [
ConstraintMetadataError(
"Missing required values {'column_names'} in a"
' CustomConstraint constraint.'
"Missing required values {'column_names'} in a CustomConstraint constraint."
)
]
raise AggregateConstraintsError(errors)
Expand Down
2 changes: 1 addition & 1 deletion sdv/io/local/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def read(self, folder_name, file_names=None):
missing_files = [file for file in file_names if not (folder_path / file).exists()]
if missing_files:
raise FileNotFoundError(
f"The following files do not exist in the folder: {', '.join(missing_files)}."
f'The following files do not exist in the folder: {", ".join(missing_files)}.'
)

file_paths = [folder_path / file for file in file_names]
Expand Down
3 changes: 1 addition & 2 deletions sdv/lite/single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
)

META_DEPRECATION_MSG = (
"The 'SingleTableMetadata' is deprecated. Please use the new "
"'Metadata' class for synthesizers."
"The 'SingleTableMetadata' is deprecated. Please use the new 'Metadata' class for synthesizers."
)


Expand Down
4 changes: 2 additions & 2 deletions sdv/metadata/multi_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,9 +1038,9 @@ def visualize(
foreign_keys = r'\l'.join(info.get('foreign_keys', []))
keys = r'\l'.join([info['primary_key'], foreign_keys])
if foreign_keys:
label = rf"{{{table}|{info['columns']}\l|{keys}\l}}"
label = rf'{{{table}|{info["columns"]}\l|{keys}\l}}'
else:
label = rf"{{{table}|{info['columns']}\l|{keys}}}"
label = rf'{{{table}|{info["columns"]}\l|{keys}}}'

else:
label = f'{table}'
Expand Down
7 changes: 3 additions & 4 deletions sdv/metadata/single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ def _validate_keys_sdtype(self, keys, key_type):
bad_keys.add(key)
if bad_keys:
raise InvalidMetadataError(
f"The {key_type}_keys {sorted(bad_keys)} must be type 'id' or " 'another PII type.'
f"The {key_type}_keys {sorted(bad_keys)} must be type 'id' or another PII type."
)

def _validate_key(self, column_name, key_type):
Expand All @@ -714,7 +714,7 @@ def _validate_key(self, column_name, key_type):
if setting_sequence_as_primary or setting_primary_as_sequence:
raise InvalidMetadataError(
f'The column ({column_name}) cannot be set as {key_type}_key as it is already '
f"set as the {'sequence' if key_type == 'primary' else 'primary'}_key."
f'set as the {"sequence" if key_type == "primary" else "primary"}_key.'
)

invalid_ids = keys - set(self.columns)
Expand Down Expand Up @@ -743,8 +743,7 @@ def set_primary_key(self, column_name):

if self.primary_key is not None:
warnings.warn(
f"There is an existing primary key '{self.primary_key}'."
' This key will be removed.'
f"There is an existing primary key '{self.primary_key}'. This key will be removed."
)

self._updated = True
Expand Down
3 changes: 1 addition & 2 deletions sdv/metadata/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ def read_json(filepath):
filepath = Path(filepath)
if not filepath.exists():
raise ValueError(
f"A file named '{filepath.name}' does not exist. "
'Please specify a different filename.'
f"A file named '{filepath.name}' does not exist. Please specify a different filename."
)

with open(filepath, 'r', encoding='utf-8') as metadata_file:
Expand Down
2 changes: 1 addition & 1 deletion sdv/metadata/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def create_columns_node(columns):
str:
String representing the node that will be printed for the given columns.
"""
columns = [rf"{name} : {meta.get('sdtype')}" for name, meta in columns.items()]
columns = [rf'{name} : {meta.get("sdtype")}' for name, meta in columns.items()]
return r'\l'.join(columns)


Expand Down
3 changes: 1 addition & 2 deletions sdv/multi_table/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@

SYNTHESIZER_LOGGER = get_sdv_logger('MultiTableSynthesizer')
DEPRECATION_MSG = (
"The 'MultiTableMetadata' is deprecated. Please use the new "
"'Metadata' class for synthesizers."
"The 'MultiTableMetadata' is deprecated. Please use the new 'Metadata' class for synthesizers."
)


Expand Down
5 changes: 2 additions & 3 deletions sdv/single_table/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@
)

DEPRECATION_MSG = (
"The 'SingleTableMetadata' is deprecated. Please use the new "
"'Metadata' class for synthesizers."
"The 'SingleTableMetadata' is deprecated. Please use the new 'Metadata' class for synthesizers."
)


Expand Down Expand Up @@ -898,7 +897,7 @@ def _conditionally_sample_rows(

elif not graceful_reject_sampling:
user_msg = (
'Unable to sample any rows for the given conditions ' f"'{transformed_condition}'. "
f"Unable to sample any rows for the given conditions '{transformed_condition}'. "
)
if hasattr(self, '_model') and isinstance(self._model, GaussianMultivariate):
user_msg = user_msg + (
Expand Down
4 changes: 2 additions & 2 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ def check_dependencies(c):

@task
def unit(c):
c.run('python -m pytest ./tests/unit --cov=sdv --cov-report=xml')
c.run('python -m pytest ./tests/unit --cov=sdv --cov-report=xml:./unit_cov.xml')


@task
def integration(c):
c.run('python -m pytest ./tests/integration --reruns 3')
c.run('python -m pytest ./tests/integration --reruns 3 --cov=sdv --cov-report=xml:./integration_cov.xml')


@task
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/single_table/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def test_sample_remaining_columns_with_some_nans():

# Assert
warn_msg = (
'Missing values are not yet supported. ' 'Rows with any missing values will not be created.'
'Missing values are not yet supported. Rows with any missing values will not be created.'
)
with pytest.warns(UserWarning, match=warn_msg):
synthesizer.sample_remaining_columns(known_columns=known_columns)
Expand Down
3 changes: 1 addition & 2 deletions tests/unit/data_processing/test_data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1444,8 +1444,7 @@ def test_update_transformers_not_fitted(self):

# Run and Assert
error_msg = (
'The DataProcessor must be prepared for fitting before the transformers can be '
'updated.'
'The DataProcessor must be prepared for fitting before the transformers can be updated.'
)
with pytest.raises(NotFittedError, match=error_msg):
dp.update_transformers({'column': None})
Expand Down
3 changes: 1 addition & 2 deletions tests/unit/datasets/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ def test_load_csvs_no_csvs(tmp_path):
with open(json_file_path, 'w') as outfile:
json.dump(fake_json, outfile)
error_message = re.escape(
f"No CSV files exist in '{tmp_path}'. Please make sure your files end in the "
"'.csv' suffix."
f"No CSV files exist in '{tmp_path}'. Please make sure your files end in the '.csv' suffix."
)
with pytest.raises(ValueError, match=error_message):
load_csvs(tmp_path)
Expand Down
3 changes: 1 addition & 2 deletions tests/unit/metadata/test_multi_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ def test__validate_missing_relationship_keys_foreign_key(self):

# Run / Assert
error_msg = re.escape(
'Relationship between tables (users, sessions) contains '
"an unknown foreign key {'id'}."
"Relationship between tables (users, sessions) contains an unknown foreign key {'id'}."
)
with pytest.raises(InvalidMetadataError, match=error_msg):
MultiTableMetadata._validate_missing_relationship_keys(
Expand Down
15 changes: 6 additions & 9 deletions tests/unit/metadata/test_single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1660,7 +1660,7 @@ def test_set_primary_key_validation_columns(self):
instance.columns = {'a', 'd'}

err_msg = (
"Unknown primary key values {'b'}." ' Keys should be columns that exist in the table.'
"Unknown primary key values {'b'}. Keys should be columns that exist in the table."
)
# Run / Assert
with pytest.raises(InvalidMetadataError, match=err_msg):
Expand Down Expand Up @@ -1820,7 +1820,7 @@ def test_set_sequence_key_validation_columns(self):
instance.columns = {'a', 'd'}

err_msg = (
"Unknown sequence key values {'b'}." ' Keys should be columns that exist in the table.'
"Unknown sequence key values {'b'}. Keys should be columns that exist in the table."
)
# Run / Assert
with pytest.raises(InvalidMetadataError, match=err_msg):
Expand Down Expand Up @@ -1934,8 +1934,7 @@ def test_add_alternate_keys_validation_columns(self):
instance.columns = {'abc', '213', '312'}

err_msg = (
"Unknown alternate key values {'123'}."
' Keys should be columns that exist in the table.'
"Unknown alternate key values {'123'}. Keys should be columns that exist in the table."
)
# Run / Assert
with pytest.raises(InvalidMetadataError, match=err_msg):
Expand Down Expand Up @@ -2265,8 +2264,7 @@ def mock_relationship_validate(relationship):

# Run and Assert
err_msg = re.escape(
"Error in 'relationship_one' relationship.\n"
"Error in 'relationship_two' relationship."
"Error in 'relationship_one' relationship.\nError in 'relationship_two' relationship."
)
with pytest.raises(InvalidMetadataError, match=err_msg):
instance._validate_all_column_relationships(column_relationships)
Expand Down Expand Up @@ -2349,7 +2347,7 @@ def test_validate(self):
instance._validate_column_args = Mock(side_effect=InvalidMetadataError('column_error'))

err_msg = re.escape(
'The following errors were found in the metadata:' '\n\ncolumn_error' '\ncolumn_error'
'The following errors were found in the metadata:\n\ncolumn_error\ncolumn_error'
)
# Run
with pytest.raises(InvalidMetadataError, match=err_msg):
Expand Down Expand Up @@ -2891,8 +2889,7 @@ def test_load_from_json_schema_not_present(self, mock_json, mock_path, mock_open

# Run / Assert
error_msg = (
'This metadata file is incompatible with the ``SingleTableMetadata`` '
'class and version.'
'This metadata file is incompatible with the ``SingleTableMetadata`` class and version.'
)
with pytest.raises(InvalidMetadataError, match=error_msg):
SingleTableMetadata.load_from_json('filepath.json')
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/multi_table/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ def test_validate_table_synthesizers_errors(self):

# Run and Assert
error_msg = (
'The provided data does not match the metadata:\n' 'Invalid data for PAR synthesizer.'
'The provided data does not match the metadata:\nInvalid data for PAR synthesizer.'
)
with pytest.raises(InvalidDataError, match=error_msg):
instance.validate(data)
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/single_table/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ def test_update_transformers_warns_fitted(self):

# Run and Assert
warning_msg = re.escape(
'For this change to take effect, please refit the synthesizer ' 'using `fit`.'
'For this change to take effect, please refit the synthesizer using `fit`.'
)
with pytest.warns(UserWarning, match=warning_msg):
instance.update_transformers(column_name_to_transformer)
Expand Down
4 changes: 1 addition & 3 deletions tests/unit/single_table/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,7 @@ def test_unflatten_dict():
def test_handle_sampling_error_temp_file():
"""Test that an error is raised when temp dir is ``False``."""
# Run and Assert
error_msg = (
'Error: Sampling terminated. Partial results are stored in test.csv.' '\n' 'Test error'
)
error_msg = 'Error: Sampling terminated. Partial results are stored in test.csv.\nTest error'
with pytest.raises(ValueError, match=error_msg):
handle_sampling_error('test.csv', ValueError('Test error'))

Expand Down

0 comments on commit 72153a5

Please sign in to comment.