diff --git a/.github/workflows/lint_and_test_and_bump.yml b/.github/workflows/lint_and_test_and_bump.yml index 61d84e4..0e67837 100644 --- a/.github/workflows/lint_and_test_and_bump.yml +++ b/.github/workflows/lint_and_test_and_bump.yml @@ -77,7 +77,7 @@ jobs: echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV echo "venue=ops" >> $GITHUB_ENV - - name: Install bumblebee + - name: Install stitchee run: poetry install - name: Lint @@ -87,7 +87,8 @@ jobs: - name: Test with pytest run: | - poetry run pytest + poetry run pytest tests/test_group_handling.py + # TODO: expand tests to include full concatenation runs, i.e., don't just run test_group_handling.py # - name: Commit Version Bump # # If building develop, a release branch, or main then we commit the version bump back to the repo @@ -96,8 +97,8 @@ jobs: # github.ref == 'refs/heads/main' || # startsWith(github.ref, 'refs/heads/release') # run: | -# git config --global user.name 'bumblebee bot' -# git config --global user.email 'bumblebee@noreply.github.com' +# git config --global user.name 'stitchee bot' +# git config --global user.email 'stitchee@noreply.github.com' # git commit -am "/version ${{ env.software_version }}" # git push # diff --git a/.github/workflows/lint_and_test_on_pull_request.yml b/.github/workflows/lint_and_test_on_pull_request.yml index b94fb21..da85632 100644 --- a/.github/workflows/lint_and_test_on_pull_request.yml +++ b/.github/workflows/lint_and_test_on_pull_request.yml @@ -25,7 +25,7 @@ jobs: with: poetry-version: 1.3.2 - - name: Install bumblebee + - name: Install stitchee run: poetry install - name: Lint @@ -35,4 +35,5 @@ jobs: - name: Test with pytest run: | - poetry run pytest + poetry run pytest tests/test_group_handling.py + # TODO: expand tests to include full concatenation runs, i.e., don't just run test_group_handling.py diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c56fe32 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,16 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + - [PR #1](https://github.com/danielfromearth/stitchee/pull/1): An initial GitHub Actions workflow +### Changed + - [PR #12](https://github.com/danielfromearth/stitchee/pull/12): Changed name to "stitchee" +### Deprecated +### Removed +### Fixed +- [PR #4](https://github.com/danielfromearth/stitchee/pull/4): Error with TEMPO ozone profile data because of duplicated dimension names diff --git a/README.md b/README.md index 70de8ce..aaeda04 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,21 @@ -# bumblebee +[](stitchee_9_hex) -Tool for concatenating netCDF data *along an existing dimension*, -which is deigned as both a standalone utility and -for use as a service in [Harmony](https://harmony.earthdata.nasa.gov/). +# Overview +_____ + +_STITCHEE_ (STITCH by Extending a dimEnsion) is used for concatenating netCDF data *along an existing dimension*, +and it is deigned as both a standalone utility and for use as a service in [Harmony](https://harmony.earthdata.nasa.gov/). ## Getting started, with poetry 1. Follow the instructions for installing `poetry` [here](https://python-poetry.org/docs/). -2. Install `bumblebee`, with its dependencies, by running the following from the repository directory: +2. Install `stitchee`, with its dependencies, by running the following from the repository directory: ```shell poetry install ``` -## How to test `bumblebee` locally +## How to test `stitchee` locally ```shell poetry run pytest tests/ @@ -22,25 +24,36 @@ poetry run pytest tests/ ## Usage (with poetry) ```shell -$ poetry run bumblebee --help -usage: bumblebee [-h] [--make_dir_copy] [-v] data_dir output_path +$ poetry run stitchee --help +usage: stitchee [-h] -o output_path [--concat_dim concat_dim] [--make_dir_copy] [--keep_tmp_files] [-O] [-v] + path/directory or path list [path/directory or path list ...] Run the along-existing-dimension concatenator. -positional arguments: - data_dir The directory containing the files to be merged. - output_path The output filename for the merged output. - options: - -h, --help show this help message and exit - --make_dir_copy Make a duplicate of the input directory to avoid modification of input files. This is useful for testing, but uses more disk space. - -v, --verbose Enable verbose output to stdout; useful for debugging + -h, --help show this help message and exit + --concat_dim concat_dim + Dimension to concatenate along, if possible. + --make_dir_copy Make a duplicate of the input directory to avoid modification of input files. This is useful for testing, but + uses more disk space. + --keep_tmp_files Prevents removal, after successful execution, of (1) the flattened concatenated file and (2) the input + directory copy if created by '--make_dir_copy'. + -O, --overwrite Overwrite output file if it already exists. + -v, --verbose Enable verbose output to stdout; useful for debugging + +Required: + path/directory or path list + Files to be concatenated, specified via a (1) single directory containing the files to be concatenated, (2) + single text file containing linebreak-separated paths of the files to be concatenated, or (3) multiple + filepaths of the files to be concatenated. + -o output_path, --output_path output_path + The output filename for the merged output. ``` For example: ```shell -poetry run bumblebee /path/to/netcdf/directory/ /path/to/output.nc +poetry run stitchee /path/to/netcdf/directory/ /path/to/output.nc ``` ## Usage (without poetry) diff --git a/concatenator/concat_with_nco.py b/concatenator/concat_with_nco.py index fa3052c..923b571 100644 --- a/concatenator/concat_with_nco.py +++ b/concatenator/concat_with_nco.py @@ -4,7 +4,7 @@ import netCDF4 as nc # type: ignore from nco import Nco # type: ignore -from concatenator.bumblebee import _validate_workable_files +from concatenator.stitchee import _validate_workable_files default_logger = getLogger(__name__) diff --git a/concatenator/concat_with_nco_cli.py b/concatenator/concat_with_nco_cli.py index 1537da7..ca39748 100644 --- a/concatenator/concat_with_nco_cli.py +++ b/concatenator/concat_with_nco_cli.py @@ -7,7 +7,7 @@ import sys from concatenator.concat_with_nco import concat_netcdf_files -from concatenator.run_bumblebee import parse_args +from concatenator.run_stitchee import parse_args def run_nco_concat(args: list) -> None: diff --git a/concatenator/run_bumblebee.py b/concatenator/run_stitchee.py similarity index 92% rename from concatenator/run_bumblebee.py rename to concatenator/run_stitchee.py index 2ccfc82..36bf678 100644 --- a/concatenator/run_bumblebee.py +++ b/concatenator/run_stitchee.py @@ -8,8 +8,8 @@ from pathlib import Path from typing import Tuple, Union -from concatenator.bumblebee import bumblebee from concatenator.file_ops import add_label_to_path +from concatenator.stitchee import stitchee def parse_args(args: list) -> Tuple[list[str], str, str, bool, Union[str, None]]: @@ -21,7 +21,7 @@ def parse_args(args: list) -> Tuple[list[str], str, str, bool, Union[str, None]] tuple """ parser = ArgumentParser( - prog='bumblebee', + prog='stitchee', description='Run the along-existing-dimension concatenator.') # Required arguments @@ -132,19 +132,19 @@ def _get_list_of_filepaths_from_dir(data_dir: Path): return input_files -def run_bumblebee(args: list) -> None: +def run_stitchee(args: list) -> None: """ Parse arguments and run subsetter on the specified input file """ input_files, output_path, concat_dim, keep_tmp_files, temporary_dir_to_remove = parse_args(args) num_inputs = len(input_files) - logging.info('Executing bumblebee concatenation on %d files...', num_inputs) - bumblebee(input_files, output_path, - write_tmp_flat_concatenated=keep_tmp_files, - keep_tmp_files=keep_tmp_files, - concat_dim=concat_dim) - logging.info('BUMBLEBEE complete. Result in %s', output_path) + logging.info('Executing stitchee concatenation on %d files...', num_inputs) + stitchee(input_files, output_path, + write_tmp_flat_concatenated=keep_tmp_files, + keep_tmp_files=keep_tmp_files, + concat_dim=concat_dim) + logging.info('STITCHEE complete. Result in %s', output_path) if not keep_tmp_files and temporary_dir_to_remove: shutil.rmtree(temporary_dir_to_remove) @@ -157,7 +157,7 @@ def main() -> None: format='[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.DEBUG ) - run_bumblebee(sys.argv[1:]) + run_stitchee(sys.argv[1:]) if __name__ == '__main__': diff --git a/concatenator/bumblebee.py b/concatenator/stitchee.py similarity index 95% rename from concatenator/bumblebee.py rename to concatenator/stitchee.py index d7ff7db..cd20069 100644 --- a/concatenator/bumblebee.py +++ b/concatenator/stitchee.py @@ -18,12 +18,12 @@ default_logger = logging.getLogger(__name__) -def bumblebee(files_to_concat: list[str], - output_file: str, - write_tmp_flat_concatenated: bool = False, - keep_tmp_files: bool = True, - concat_dim: str = "", - logger: Logger = default_logger) -> str: +def stitchee(files_to_concat: list[str], + output_file: str, + write_tmp_flat_concatenated: bool = False, + keep_tmp_files: bool = True, + concat_dim: str = "", + logger: Logger = default_logger) -> str: """Concatenate netCDF data files along an existing dimension. Parameters diff --git a/entry.py b/entry.py index 7a12c40..e3a052a 100644 --- a/entry.py +++ b/entry.py @@ -2,7 +2,7 @@ import logging import sys -from concatenator.run_bumblebee import run_bumblebee +from concatenator.run_stitchee import run_stitchee def main() -> None: @@ -12,7 +12,7 @@ def main() -> None: format='[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.DEBUG ) - run_bumblebee(sys.argv[1:]) + run_stitchee(sys.argv[1:]) if __name__ == '__main__': diff --git a/poetry.lock b/poetry.lock index c27346c..ef50d47 100644 --- a/poetry.lock +++ b/poetry.lock @@ -101,13 +101,13 @@ files = [ [[package]] name = "dask" -version = "2023.8.1" +version = "2023.9.1" description = "Parallel PyData with Task Scheduling" optional = false python-versions = ">=3.9" files = [ - {file = "dask-2023.8.1-py3-none-any.whl", hash = "sha256:53627bb324975624835a0f2b64b3b7e24afb4947ca5b371e0092c7e98adad0e2"}, - {file = "dask-2023.8.1.tar.gz", hash = "sha256:5c4b402908938dc87506e0fc07fb0dfa329e59587adf34531cd300fe853b3bf8"}, + {file = "dask-2023.9.1-py3-none-any.whl", hash = "sha256:2b1085ef3a54cb8b96cb8651a803e644d77d992198bda2954715fd4fd6cb434c"}, + {file = "dask-2023.9.1.tar.gz", hash = "sha256:782825bbe52c23c5698166aa0cf95cfed02dcb9afa59b0814b69ae58e1bc9686"}, ] [package.dependencies] @@ -125,8 +125,8 @@ array = ["numpy (>=1.21)"] complete = ["dask[array,dataframe,diagnostics,distributed]", "lz4 (>=4.3.2)", "pyarrow (>=7.0)"] dataframe = ["dask[array]", "pandas (>=1.3)"] diagnostics = ["bokeh (>=2.4.2)", "jinja2 (>=2.10.3)"] -distributed = ["distributed (==2023.8.1)"] -test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist"] +distributed = ["distributed (==2023.9.1)"] +test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist"] [[package]] name = "dill" @@ -174,13 +174,13 @@ pyflakes = ">=3.1.0,<3.2.0" [[package]] name = "fsspec" -version = "2023.6.0" +version = "2023.9.0" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, - {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, + {file = "fsspec-2023.9.0-py3-none-any.whl", hash = "sha256:d55b9ab2a4c1f2b759888ae9f93e40c2aa72c0808132e87e282b549f9e6c4254"}, + {file = "fsspec-2023.9.0.tar.gz", hash = "sha256:4dbf0fefee035b7c6d3bbbe6bc99b2f201f40d4dca95b67c2b719be77bcd917f"}, ] [package.extras] @@ -481,41 +481,35 @@ files = [ [[package]] name = "pandas" -version = "2.0.3" +version = "2.1.0" description = "Powerful data structures for data analysis, time series, and statistics" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, - {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, - {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, - {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, - {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, - {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, - {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, - {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, - {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, - {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, - {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, - {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, - {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, - {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, - {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, - {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, - {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, - {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, - {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, - {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, - {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, - {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, - {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, - {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, - {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, + {file = "pandas-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40dd20439ff94f1b2ed55b393ecee9cb6f3b08104c2c40b0cb7186a2f0046242"}, + {file = "pandas-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4f38e4fedeba580285eaac7ede4f686c6701a9e618d8a857b138a126d067f2f"}, + {file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e6a0fe052cf27ceb29be9429428b4918f3740e37ff185658f40d8702f0b3e09"}, + {file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d81e1813191070440d4c7a413cb673052b3b4a984ffd86b8dd468c45742d3cc"}, + {file = "pandas-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eb20252720b1cc1b7d0b2879ffc7e0542dd568f24d7c4b2347cb035206936421"}, + {file = "pandas-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:38f74ef7ebc0ffb43b3d633e23d74882bce7e27bfa09607f3c5d3e03ffd9a4a5"}, + {file = "pandas-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cda72cc8c4761c8f1d97b169661f23a86b16fdb240bdc341173aee17e4d6cedd"}, + {file = "pandas-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d97daeac0db8c993420b10da4f5f5b39b01fc9ca689a17844e07c0a35ac96b4b"}, + {file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8c58b1113892e0c8078f006a167cc210a92bdae23322bb4614f2f0b7a4b510f"}, + {file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:629124923bcf798965b054a540f9ccdfd60f71361255c81fa1ecd94a904b9dd3"}, + {file = "pandas-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:70cf866af3ab346a10debba8ea78077cf3a8cd14bd5e4bed3d41555a3280041c"}, + {file = "pandas-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d53c8c1001f6a192ff1de1efe03b31a423d0eee2e9e855e69d004308e046e694"}, + {file = "pandas-2.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:86f100b3876b8c6d1a2c66207288ead435dc71041ee4aea789e55ef0e06408cb"}, + {file = "pandas-2.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28f330845ad21c11db51e02d8d69acc9035edfd1116926ff7245c7215db57957"}, + {file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9a6ccf0963db88f9b12df6720e55f337447aea217f426a22d71f4213a3099a6"}, + {file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99e678180bc59b0c9443314297bddce4ad35727a1a2656dbe585fd78710b3b9"}, + {file = "pandas-2.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b31da36d376d50a1a492efb18097b9101bdbd8b3fbb3f49006e02d4495d4c644"}, + {file = "pandas-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0164b85937707ec7f70b34a6c3a578dbf0f50787f910f21ca3b26a7fd3363437"}, + {file = "pandas-2.1.0.tar.gz", hash = "sha256:62c24c7fc59e42b775ce0679cfa7b14a5f9bfb7643cfbe708c960699e05fb918"}, ] [package.dependencies] numpy = [ - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, ] python-dateutil = ">=2.8.2" @@ -523,27 +517,28 @@ pytz = ">=2020.1" tzdata = ">=2022.1" [package.extras] -all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] -aws = ["s3fs (>=2021.08.0)"] -clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] -compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] -computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] +all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] +aws = ["s3fs (>=2022.05.0)"] +clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] +compression = ["zstandard (>=0.17.0)"] +computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2021.07.0)"] -gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] -hdf5 = ["tables (>=3.6.1)"] -html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] -mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] +fss = ["fsspec (>=2022.05.0)"] +gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] +hdf5 = ["tables (>=3.7.0)"] +html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] +mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] +performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] -spss = ["pyreadstat (>=1.1.2)"] -sql-other = ["SQLAlchemy (>=1.4.16)"] -test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.6.3)"] +postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] +spss = ["pyreadstat (>=1.1.5)"] +sql-other = ["SQLAlchemy (>=1.4.36)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.8.0)"] [[package]] name = "partd" @@ -580,13 +575,13 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co [[package]] name = "pluggy" -version = "1.2.0" +version = "1.3.0" description = "plugin and hook calling mechanisms for python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"}, - {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"}, + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, ] [package.extras] @@ -645,13 +640,13 @@ testutils = ["gitpython (>3)"] [[package]] name = "pytest" -version = "7.4.0" +version = "7.4.2" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"}, - {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"}, + {file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"}, + {file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"}, ] [package.dependencies] @@ -681,13 +676,13 @@ six = ">=1.5" [[package]] name = "pytz" -version = "2023.3" +version = "2023.3.post1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, - {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, + {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, + {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index b49d24c..b05a997 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.poetry] -name = "bumblebee" +name = "stitchee" version = "0.1.0" description = "NetCDF4 Along-existing-dimension Concatenation Service" authors = ["Daniel Kaufman "] @@ -28,7 +28,7 @@ flake8 = "^6.1.0" nco = "^1.1.0" [tool.poetry.scripts] -bumblebee = 'concatenator.run_bumblebee:main' +stitchee = 'concatenator.run_stitchee:main' [build-system] requires = ["poetry-core"] diff --git a/tests/test_concat.py b/tests/test_concat.py index 855d3fa..b496d06 100644 --- a/tests/test_concat.py +++ b/tests/test_concat.py @@ -12,7 +12,7 @@ import pytest from concatenator import concat_with_nco -from concatenator.bumblebee import bumblebee +from concatenator.stitchee import stitchee @pytest.mark.usefixtures("pass_options") @@ -30,10 +30,10 @@ def tearDownClass(cls): if not cls.KEEP_TMP: # pylint: disable=no-member rmtree(cls.__output_path) - def run_verification_with_bumblebee(self, - data_dir, - output_name, - record_dim_name: str = 'mirror_step'): + def run_verification_with_stitchee(self, + data_dir, + output_name, + record_dim_name: str = 'mirror_step'): output_path = str(self.__output_path.joinpath(output_name)) # type: ignore data_path = self.__test_data_path.joinpath(data_dir) # type: ignore @@ -44,11 +44,11 @@ def run_verification_with_bumblebee(self, shutil.copyfile(filepath, copied_input_new_path) input_files.append(str(copied_input_new_path)) - output_path = bumblebee(files_to_concat=input_files, - output_file=output_path, - write_tmp_flat_concatenated=True, - keep_tmp_files=True, - concat_dim=record_dim_name) + output_path = stitchee(files_to_concat=input_files, + output_file=output_path, + write_tmp_flat_concatenated=True, + keep_tmp_files=True, + concat_dim=record_dim_name) merged_dataset = nc.Dataset(output_path) @@ -83,34 +83,34 @@ def run_verification_with_nco(self, data_dir, output_name, record_dim_name='mirr length_sum += len(nc.Dataset(file).variables[record_dim_name]) assert length_sum == len(merged_dataset.variables[record_dim_name]) - # def test_tempo_no2_concat_with_bumblebee(self): - # self.run_verification_with_bumblebee('tempo/no2', 'tempo_no2_bee_concatenated.nc') + def test_tempo_no2_concat_with_stitchee(self): + self.run_verification_with_stitchee('tempo/no2', 'tempo_no2_bee_concatenated.nc') - # def test_tempo_hcho_concat_with_bumblebee(self): - # self.run_verification_with_bumblebee('tempo/hcho', 'tempo_hcho_bee_concatenated.nc') + def test_tempo_hcho_concat_with_stitchee(self): + self.run_verification_with_stitchee('tempo/hcho', 'tempo_hcho_bee_concatenated.nc') - # def test_tempo_cld04_concat_with_bumblebee(self): - # self.run_verification_with_bumblebee('tempo/cld04', 'tempo_cld04_bee_concatenated.nc') + def test_tempo_cld04_concat_with_stitchee(self): + self.run_verification_with_stitchee('tempo/cld04', 'tempo_cld04_bee_concatenated.nc') - # def test_tempo_o3prof_concat_with_bumblebee(self): - # self.run_verification_with_bumblebee('tempo/o3prof', 'tempo_o3prof_bee_concatenated.nc') + def test_tempo_o3prof_concat_with_stitchee(self): + self.run_verification_with_stitchee('tempo/o3prof', 'tempo_o3prof_bee_concatenated.nc') - # def test_icesat_concat_with_bumblebee(self): - # self.run_verification_with_bumblebee('icesat', 'icesat_concat_with_bumblebee.nc') + # def test_icesat_concat_with_stitchee(self): + # self.run_verification_with_stitchee('icesat', 'icesat_concat_with_stitchee.nc') # - # def test_ceres_concat_with_bumblebee(self): - # self.run_verification_with_bumblebee('ceres-subsetter-output', + # def test_ceres_concat_with_stitchee(self): + # self.run_verification_with_stitchee('ceres-subsetter-output', # 'ceres_bee_concatenated.nc', # record_dim_name='time') # - # def test_ceres_flash_concat_with_bumblebee(self): - # self.run_verification_with_bumblebee('ceres_flash-subsetter-output', + # def test_ceres_flash_concat_with_stitchee(self): + # self.run_verification_with_stitchee('ceres_flash-subsetter-output', # 'ceres_flash_bee_concatenated.nc', # record_dim_name='time') # - # def test_ceres_flash_concat_with_bumblebee(self): - # self.run_verification_with_bumblebee('ceres_flash-subsetter-output', - # 'ceres_flash_concat_with_bumblebee.nc', + # def test_ceres_flash_concat_with_stitchee(self): + # self.run_verification_with_stitchee('ceres_flash-subsetter-output', + # 'ceres_flash_concat_with_stitchee.nc', # record_dim_name='time') # def test_tempo_no2_concat_with_nco(self):