From bd190603566943eac7e015fd439edc61fe17c9bb Mon Sep 17 00:00:00 2001 From: Francesco Calcavecchia Date: Tue, 31 Oct 2023 14:51:39 +0100 Subject: [PATCH] allow optional arguments in load() --- CHANGELOG.md | 7 +++--- src/dac/_input/config.py | 2 +- test/data/__init__.py | 8 ++++++ .../parquet_as_pandas_with_sample_frac.py | 7 ++++++ .../load/parquet_as_pandas_with_sample_n.py | 7 ++++++ test/unit_test/_input/config_test.py | 25 +++++++++++++++++++ 6 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 test/data/load/parquet_as_pandas_with_sample_frac.py create mode 100644 test/data/load/parquet_as_pandas_with_sample_n.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e2130d..11b45d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,14 +10,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Anything MAY change at any time. The public API SHOULD NOT be considered stable."). While in this phase, we will denote breaking changes with a minor increase. -## Unreleased patch +## 0.4.0 ### Changed +* The `load` function in `load.py` can contain optional arguments. Previously no arguments were allowed. +* `load.py` and `schema.py` are publicly accessible under `dac_pkg_name.load` and `dac_pkg_name.schema` respectively. Previously they were marked as private modules, under `dac_pkg_name._load` and `dac_pkg_name._schema`. +* `Schema` does not have to be a `pandera.DataFrameModel` anymore, but any class that implements a `validate` method (see the `_input.interface.Validator` protocol). * `dac` does not rely on [`pydantic`](https://pypi.org/project/pydantic/) anymore, and uses [`dataclass`](https://docs.python.org/3/library/dataclasses.html#) instead. Changes affect `PackConfig` and `PyProjectConfig`. -* `Schema` does not have to be a `pandera.DataFrameModel` anymore, but any class that implements a `validate` method (see the `_input.interface.Validator` protocol). -* `load.py` and `schema.py` are publicly accessible under `dac_pkg_name.load` and `dac_pkg_name.schema` respectively. Previously they were marked as private modules, under `dac_pkg_name._load` and `dac_pkg_name._schema`. ## 0.3.3 diff --git a/src/dac/_input/config.py b/src/dac/_input/config.py index f9be80a..3c2f05b 100644 --- a/src/dac/_input/config.py +++ b/src/dac/_input/config.py @@ -45,7 +45,7 @@ def _check_load_contains_expected_function(self) -> None: try: signature = inspect.getfullargspec(pkg.load) - assert signature.args == [] + assert len(signature.args) == (len(signature.defaults) if signature.defaults is not None else 0) except Exception as e: raise ValueError((f"{self.load_path.as_posix()} does not contain the required `def load()`")) from e diff --git a/test/data/__init__.py b/test/data/__init__.py index 8ed4e34..e9732be 100644 --- a/test/data/__init__.py +++ b/test/data/__init__.py @@ -17,6 +17,14 @@ def get_path_to_sample_load_parquet_as_pandas() -> Path: return Path(__file__).parent / "load/parquet_as_pandas.py" +def get_path_to_sample_load_parquet_as_pandas_with_sample_frac() -> Path: + return Path(__file__).parent / "load/parquet_as_pandas_with_sample_frac.py" + + +def get_path_to_sample_load_parquet_as_pandas_with_sample_n() -> Path: + return Path(__file__).parent / "load/parquet_as_pandas_with_sample_n.py" + + def get_path_to_self_contained_load_as_pandas() -> Path: return Path(__file__).parent / "load/self_contained_as_pandas.py" diff --git a/test/data/load/parquet_as_pandas_with_sample_frac.py b/test/data/load/parquet_as_pandas_with_sample_frac.py new file mode 100644 index 0000000..da6c9bc --- /dev/null +++ b/test/data/load/parquet_as_pandas_with_sample_frac.py @@ -0,0 +1,7 @@ +from pathlib import Path + +import pandas as pd + + +def load(sample_frac: float = 1.0) -> pd.DataFrame: + return pd.read_parquet(Path(__file__).parent / "sample.parquet").sample(frac=sample_frac) diff --git a/test/data/load/parquet_as_pandas_with_sample_n.py b/test/data/load/parquet_as_pandas_with_sample_n.py new file mode 100644 index 0000000..8560c27 --- /dev/null +++ b/test/data/load/parquet_as_pandas_with_sample_n.py @@ -0,0 +1,7 @@ +from pathlib import Path + +import pandas as pd + + +def load(sample_n: int) -> pd.DataFrame: + return pd.read_parquet(Path(__file__).parent / "sample.parquet").sample(n=sample_n) diff --git a/test/unit_test/_input/config_test.py b/test/unit_test/_input/config_test.py index 112f17c..dae230a 100644 --- a/test/unit_test/_input/config_test.py +++ b/test/unit_test/_input/config_test.py @@ -4,6 +4,8 @@ get_path_to_invalid_load, get_path_to_invalid_schema, get_path_to_sample_load_parquet_as_pandas, + get_path_to_sample_load_parquet_as_pandas_with_sample_frac, + get_path_to_sample_load_parquet_as_pandas_with_sample_n, get_path_to_sample_parquet, get_path_to_sample_schema, get_path_to_schema_incompatible_with_sample_df, @@ -75,6 +77,29 @@ def test_if_load_does_not_contain_expected_function_then_raise_exception(pyproje ) +def test_if_load_contain_optional_arguments_then_do_not_raise_exception(pyproject: PyProjectConfig): + with TemporaryDirectory() as tmp_dir: + PackConfig( + data_path=get_path_to_sample_parquet(), + load_path=get_path_to_sample_load_parquet_as_pandas_with_sample_frac(), + schema_path=get_path_to_sample_schema(), + wheel_dir=Path(tmp_dir), + pyproject=pyproject, + ) + + +def test_if_load_contain_non_optional_arguments_then_raise_exception(pyproject: PyProjectConfig): + with TemporaryDirectory() as tmp_dir: + with pytest.raises(ValueError): + PackConfig( + data_path=get_path_to_sample_parquet(), + load_path=get_path_to_sample_load_parquet_as_pandas_with_sample_n(), + schema_path=get_path_to_sample_schema(), + wheel_dir=Path(tmp_dir), + pyproject=pyproject, + ) + + def test_if_invalid_schema_path_then_raise_exception(pyproject: PyProjectConfig): with TemporaryDirectory() as tmp_dir: with pytest.raises(ValueError):