From 9d7ce65fc3e5e97c6b1008fbbcbf2f3d94f304fb Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Thu, 21 Nov 2024 12:51:20 -0600 Subject: [PATCH 1/9] add python3.13 support on datasets --- .github/workflows/kedro-datasets.yml | 2 +- kedro-datasets/RELEASE.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/kedro-datasets.yml b/.github/workflows/kedro-datasets.yml index d5aae0282..ca4a2abea 100644 --- a/.github/workflows/kedro-datasets.yml +++ b/.github/workflows/kedro-datasets.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest, windows-latest ] - python-version: [ "3.10", "3.11", "3.12" ] + python-version: [ "3.10", "3.11", "3.12", "3.13" ] uses: ./.github/workflows/unit-tests.yml with: plugin: kedro-datasets diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index 482b3c76f..80392c0c0 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -1,6 +1,7 @@ # Upcoming Release 6.0.0 ## Major features and improvements +- Added support for Python 3.13 - Added functionality to save Pandas DataFrame directly to Snowflake, facilitating seemless `.csv` ingestion - Added Python 3.9, 3.10 and 3.11 support for SnowflakeTableDataset - Added the following new **experimental** datasets: From f4b824808d62e8ad408dcd806d2ee4caabc3e2c3 Mon Sep 17 00:00:00 2001 From: Nok Lam Chan Date: Thu, 28 Nov 2024 16:55:51 +0800 Subject: [PATCH 2/9] skip tensorfolow for py313 (#951) skip tensorflow for py313 Signed-off-by: Nok --- kedro-datasets/pyproject.toml | 6 +++--- .../tests/tensorflow/test_tensorflow_model_dataset.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index 9ae3af9aa..1efb054f7 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -186,7 +186,7 @@ prophet = ["kedro-datasets[prophet]"] pytorch-dataset = ["torch"] pytorch = ["kedro-datasets[pytorch-dataset]"] -rioxarray-geotiffdataset = ["rioxarray>=0.15.0"] +rioxarray-geotiffdataset = ["rioxarraπtey>=0.15.0"] rioxarray = ["kedro-datasets[rioxarray-geotiffdataset]"] video-videodataset = ["opencv-python~=4.5.5.64"] @@ -254,8 +254,8 @@ test = [ "packaging", "SQLAlchemy>=1.2", "tables>=3.6", - "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'", - "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'", + "tensorflow-macos~=2.12; platform_system == 'Darwin' and platform_machine == 'arm64' and python_version < '3.13'", + "tensorflow~=2.12; (platform_system != 'Darwin' or platform_machine != 'arm64') and python_version < '3.13'", "triad>=0.6.7, <1.0", "xarray>=2023.1.0", "xlsxwriter~=1.0", diff --git a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py index 8855dc4f3..d71a3648c 100644 --- a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py +++ b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py @@ -14,6 +14,8 @@ "TensorFlow tests have become inexplicably flaky in Windows CI", allow_module_level=True, ) +elif sys.version.minor < 13: + pytest.skip("Tensorflow is not available in Python 3.13 yet") # In this test module, we wrap tensorflow and TensorFlowModelDataset imports into a module-scoped From cf8fa7a578648122a04045468c2abd330f9d5f5c Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Thu, 28 Nov 2024 05:02:52 -0700 Subject: [PATCH 3/9] build(datasets): fix rioxarray dependency spelling Signed-off-by: Deepyaman Datta --- kedro-datasets/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index 4c85bea65..d6a0c89a1 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -186,7 +186,7 @@ prophet = ["kedro-datasets[prophet]"] pytorch-dataset = ["torch"] pytorch = ["kedro-datasets[pytorch-dataset]"] -rioxarray-geotiffdataset = ["rioxarraπtey>=0.15.0"] +rioxarray-geotiffdataset = ["rioxarray>=0.15.0"] rioxarray = ["kedro-datasets[rioxarray-geotiffdataset]"] safetensors-safetensorsdataset = ["safetensors", "numpy"] From 4c276a180a9826fc21af87c01815cf5b4c327b24 Mon Sep 17 00:00:00 2001 From: Nok Date: Fri, 29 Nov 2024 08:23:21 +0000 Subject: [PATCH 4/9] fix dependencies, networkx and import-linter both depends on grimp Signed-off-by: Nok --- kedro-datasets/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index d6a0c89a1..24f9ea5fe 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -98,7 +98,7 @@ pandas-jsondataset = ["kedro-datasets[pandas-base]"] pandas-parquetdataset = ["kedro-datasets[pandas-base]", "pyarrow>=6.0"] pandas-sqltabledataset = ["kedro-datasets[pandas-base]", "SQLAlchemy>=1.4, <3.0"] pandas-sqlquerydataset = ["kedro-datasets[pandas-base]", "SQLAlchemy>=1.4, <3.0", "pyodbc>=4.0"] -pandas-xmldataset = ["kedro-datasets[pandas-base]", "lxml~=4.6"] +pandas-xmldataset = ["kedro-datasets[pandas-base]", "lxml"] pandas = [ """kedro-datasets[pandas-csvdataset,\ pandas-deltatabledataset,\ @@ -273,7 +273,7 @@ lint = [ "bandit>=1.6.2, <2.0", "blacken-docs==1.9.2", "black~=22.0", - "import-linter[toml]==1.2.6", + "import-linter[toml]>=1.2.6", "mypy~=1.0", "pre-commit>=2.9.2", "ruff~=0.0.290", From f44037f348046b38f0a4cf93b40f21e817acf9e1 Mon Sep 17 00:00:00 2001 From: Nok Date: Fri, 29 Nov 2024 08:41:26 +0000 Subject: [PATCH 5/9] relax more dependencies Signed-off-by: Nok --- kedro-datasets/pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index 24f9ea5fe..af2bed644 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -115,7 +115,7 @@ pandas = [ pandas-xmldataset]""" ] -pickle-pickledataset = ["compress-pickle[lz4]~=2.1.0"] +pickle-pickledataset = ["compress-pickle[lz4]>=2.1.0"] pickle = ["kedro-datasets[pickle-pickledataset]"] pillow-imagedataset = ["Pillow>=9.0"] @@ -212,7 +212,7 @@ test = [ "compress-pickle[lz4]~=2.1.0", "coverage>=7.2.0", "dask[complete]>=2021.10", - "delta-spark>=1.0, <3.0", + "delta-spark>=1.0, <4.0", "deltalake>=0.10.0", "dill~=0.3.1", "filelock>=3.4.0, <4.0", @@ -237,7 +237,7 @@ test = [ "pandas>=2.0", "Pillow~=10.0", "plotly>=4.8.0, <6.0", - "polars[xlsx2csv, deltalake]~=0.18.0", + "polars[xlsx2csv, deltalake]>=0.18.0", "pyarrow>=1.0; python_version < '3.11'", "pyarrow>=7.0; python_version >= '3.11'", # Adding to avoid numpy build errors "pyodbc~=5.0", From 77f06bd9bf843ca58cc0f3334e4b51616e343a17 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Mon, 16 Dec 2024 21:41:08 -0600 Subject: [PATCH 6/9] relax requirements for python 3.13 support --- kedro-datasets/pyproject.toml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index b5df6ac65..d00537da3 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -207,7 +207,7 @@ test = [ "accelerate<0.32", # Temporary pin "adlfs~=2023.1", "behave==1.2.6", - "biopython~=1.73", + "biopython~=1.73; python_version < '3.13'", "cloudpickle~=2.2.1", "compress-pickle[lz4]~=2.1.0", "coverage>=7.2.0", @@ -218,7 +218,7 @@ test = [ "filelock>=3.4.0, <4.0", "fiona >=1.8, <2.0", "gcsfs>=2023.1, <2023.3", - "geopandas>=0.8.0, <2.0", + "geopandas>=0.8.0, <2.0; python_version < '3.13'", "hdfs>=2.5.8, <3.0", "holoviews>=1.13.0", "ibis-framework[duckdb,examples]", @@ -227,7 +227,7 @@ test = [ "joblib>=0.14", "jupyterlab>=3.0", "jupyter~=1.0", - "lxml~=4.6", + "lxml~=5.3", "matplotlib>=3.5, <4.0", "memory_profiler>=0.50.0, <1.0", "moto==5.0.0", @@ -239,7 +239,8 @@ test = [ "plotly>=4.8.0, <6.0", "polars[deltalake,xlsx2csv]>=1.0", "pyarrow>=1.0; python_version < '3.11'", - "pyarrow>=7.0; python_version >= '3.11'", # Adding to avoid numpy build errors + "pyarrow>=7.0; python_version >= '3.11' and python_version < '3.13'", # Adding to avoid numpy build errors + "pyarrow>=18.0; python_version >= '3.13'", "pyodbc~=5.0", "pyspark>=3.0; python_version < '3.11'", "pyspark>=3.4; python_version >= '3.11'", @@ -266,7 +267,7 @@ test = [ # huggingface "datasets", "huggingface_hub", - "transformers[torch]", + "transformers[torch]; python_version < '3.13'", ] # Lint requirements From 3730a059ea3318a62cdba9b6516e1148884a71b0 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Mon, 16 Dec 2024 22:10:08 -0600 Subject: [PATCH 7/9] testing 3.13 --- kedro-datasets/pyproject.toml | 3 ++- kedro-datasets/tests/biosequence/test_biosequence_dataset.py | 5 ++++- kedro-datasets/tests/geopandas/test_generic_dataset.py | 4 ++++ kedro-datasets/tests/huggingface/conftest.py | 5 +++++ .../tests/tensorflow/test_tensorflow_model_dataset.py | 3 ++- 5 files changed, 17 insertions(+), 3 deletions(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index d00537da3..478ec72e5 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -251,7 +251,8 @@ test = [ "redis~=4.1", "requests-mock~=1.6", "requests~=2.20", - "s3fs>=2021.04", + "s3fs>=2021.04; python_version < '3.13'", + "s3fs>=2024.10; python_version >= '3.13'", "snowflake-snowpark-python>=1.23; python_version < '3.12'", "scikit-learn>=1.0.2,<2", "scipy>=1.7.3", diff --git a/kedro-datasets/tests/biosequence/test_biosequence_dataset.py b/kedro-datasets/tests/biosequence/test_biosequence_dataset.py index b5c35bd8f..dd9caed1d 100644 --- a/kedro-datasets/tests/biosequence/test_biosequence_dataset.py +++ b/kedro-datasets/tests/biosequence/test_biosequence_dataset.py @@ -1,6 +1,6 @@ from io import StringIO from pathlib import PurePosixPath - +import sys import pytest from Bio import SeqIO from fsspec.implementations.http import HTTPFileSystem @@ -14,6 +14,9 @@ LOAD_ARGS = {"format": "fasta"} SAVE_ARGS = {"format": "fasta"} +if sys.version_info >= (3, 13): + pytest.skip("BioPython is not available in Python 3.13 yet") + @pytest.fixture def filepath_biosequence(tmp_path): diff --git a/kedro-datasets/tests/geopandas/test_generic_dataset.py b/kedro-datasets/tests/geopandas/test_generic_dataset.py index 5c4569e9c..c5ad69323 100644 --- a/kedro-datasets/tests/geopandas/test_generic_dataset.py +++ b/kedro-datasets/tests/geopandas/test_generic_dataset.py @@ -2,6 +2,7 @@ import geopandas as gpd import pytest +import sys from fsspec.implementations.http import HTTPFileSystem from fsspec.implementations.local import LocalFileSystem from gcsfs import GCSFileSystem @@ -12,6 +13,9 @@ from kedro_datasets.geopandas import GenericDataset +if sys.version_info >= (3, 13): + pytest.skip("GeoPandas is not available in Python 3.13 yet") + @pytest.fixture(params=[None]) def load_version(request): diff --git a/kedro-datasets/tests/huggingface/conftest.py b/kedro-datasets/tests/huggingface/conftest.py index 8630b0dbb..6561dc6d3 100644 --- a/kedro-datasets/tests/huggingface/conftest.py +++ b/kedro-datasets/tests/huggingface/conftest.py @@ -4,3 +4,8 @@ discover them automatically. More info here: https://docs.pytest.org/en/latest/fixture.html """ +import sys +import pytest + +if sys.version_info >= (3, 13): + pytest.skip("Transformers is not available in Python 3.13 yet") diff --git a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py index d71a3648c..85e061dc6 100644 --- a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py +++ b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py @@ -14,7 +14,8 @@ "TensorFlow tests have become inexplicably flaky in Windows CI", allow_module_level=True, ) -elif sys.version.minor < 13: + +if sys.version_info >= (3, 13): pytest.skip("Tensorflow is not available in Python 3.13 yet") From dbb7cb69ae115fa3f9fd8ca9d4ae2018db697148 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Tue, 17 Dec 2024 09:52:12 -0600 Subject: [PATCH 8/9] revert s3fs dep --- kedro-datasets/pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index 478ec72e5..d00537da3 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -251,8 +251,7 @@ test = [ "redis~=4.1", "requests-mock~=1.6", "requests~=2.20", - "s3fs>=2021.04; python_version < '3.13'", - "s3fs>=2024.10; python_version >= '3.13'", + "s3fs>=2021.04", "snowflake-snowpark-python>=1.23; python_version < '3.12'", "scikit-learn>=1.0.2,<2", "scipy>=1.7.3", From 2fd26a2d460e505510204818d4a84cc65b3684d2 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Tue, 17 Dec 2024 10:13:18 -0600 Subject: [PATCH 9/9] revert dep relaxations --- kedro-datasets/pyproject.toml | 6 +++--- .../tests/biosequence/test_biosequence_dataset.py | 4 ---- kedro-datasets/tests/geopandas/test_generic_dataset.py | 4 ---- kedro-datasets/tests/huggingface/conftest.py | 5 ----- .../tests/tensorflow/test_tensorflow_model_dataset.py | 2 +- 5 files changed, 4 insertions(+), 17 deletions(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index d00537da3..84e6a6deb 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -207,7 +207,7 @@ test = [ "accelerate<0.32", # Temporary pin "adlfs~=2023.1", "behave==1.2.6", - "biopython~=1.73; python_version < '3.13'", + "biopython~=1.73", "cloudpickle~=2.2.1", "compress-pickle[lz4]~=2.1.0", "coverage>=7.2.0", @@ -218,7 +218,7 @@ test = [ "filelock>=3.4.0, <4.0", "fiona >=1.8, <2.0", "gcsfs>=2023.1, <2023.3", - "geopandas>=0.8.0, <2.0; python_version < '3.13'", + "geopandas>=0.8.0, <2.0", "hdfs>=2.5.8, <3.0", "holoviews>=1.13.0", "ibis-framework[duckdb,examples]", @@ -267,7 +267,7 @@ test = [ # huggingface "datasets", "huggingface_hub", - "transformers[torch]; python_version < '3.13'", + "transformers[torch]", ] # Lint requirements diff --git a/kedro-datasets/tests/biosequence/test_biosequence_dataset.py b/kedro-datasets/tests/biosequence/test_biosequence_dataset.py index dd9caed1d..802578296 100644 --- a/kedro-datasets/tests/biosequence/test_biosequence_dataset.py +++ b/kedro-datasets/tests/biosequence/test_biosequence_dataset.py @@ -1,6 +1,5 @@ from io import StringIO from pathlib import PurePosixPath -import sys import pytest from Bio import SeqIO from fsspec.implementations.http import HTTPFileSystem @@ -14,9 +13,6 @@ LOAD_ARGS = {"format": "fasta"} SAVE_ARGS = {"format": "fasta"} -if sys.version_info >= (3, 13): - pytest.skip("BioPython is not available in Python 3.13 yet") - @pytest.fixture def filepath_biosequence(tmp_path): diff --git a/kedro-datasets/tests/geopandas/test_generic_dataset.py b/kedro-datasets/tests/geopandas/test_generic_dataset.py index c5ad69323..5c4569e9c 100644 --- a/kedro-datasets/tests/geopandas/test_generic_dataset.py +++ b/kedro-datasets/tests/geopandas/test_generic_dataset.py @@ -2,7 +2,6 @@ import geopandas as gpd import pytest -import sys from fsspec.implementations.http import HTTPFileSystem from fsspec.implementations.local import LocalFileSystem from gcsfs import GCSFileSystem @@ -13,9 +12,6 @@ from kedro_datasets.geopandas import GenericDataset -if sys.version_info >= (3, 13): - pytest.skip("GeoPandas is not available in Python 3.13 yet") - @pytest.fixture(params=[None]) def load_version(request): diff --git a/kedro-datasets/tests/huggingface/conftest.py b/kedro-datasets/tests/huggingface/conftest.py index 6561dc6d3..8630b0dbb 100644 --- a/kedro-datasets/tests/huggingface/conftest.py +++ b/kedro-datasets/tests/huggingface/conftest.py @@ -4,8 +4,3 @@ discover them automatically. More info here: https://docs.pytest.org/en/latest/fixture.html """ -import sys -import pytest - -if sys.version_info >= (3, 13): - pytest.skip("Transformers is not available in Python 3.13 yet") diff --git a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py index 85e061dc6..f48a67086 100644 --- a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py +++ b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py @@ -15,7 +15,7 @@ allow_module_level=True, ) -if sys.version_info >= (3, 13): +if sys.version.minor >= 13: pytest.skip("Tensorflow is not available in Python 3.13 yet")