From 984292bbd9812824486bdb61b1d2a50d75828b52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Tue, 3 Oct 2023 06:52:41 +0000 Subject: [PATCH] [DOP-9653] Update Excel package version to 0.20.1 --- .github/workflows/data/local-fs/matrix.yml | 8 -------- .github/workflows/data/local-fs/tracked.txt | 1 + docs/changelog/next_release/161.feature.rst | 1 + onetl/file/format/excel.py | 14 +++++++------- tests/fixtures/spark.py | 7 +++---- .../test_excel_integration.py | 6 ------ .../test_file/test_format_unit/test_excel_unit.py | 14 +++++++------- 7 files changed, 19 insertions(+), 32 deletions(-) create mode 100644 docs/changelog/next_release/161.feature.rst diff --git a/.github/workflows/data/local-fs/matrix.yml b/.github/workflows/data/local-fs/matrix.yml index 4329d6582..194150d89 100644 --- a/.github/workflows/data/local-fs/matrix.yml +++ b/.github/workflows/data/local-fs/matrix.yml @@ -16,12 +16,6 @@ min_excel: &min_excel java-version: 8 os: ubuntu-latest -max_excel: &max_excel - spark-version: 3.4.1 - python-version: '3.11' - java-version: 20 - os: ubuntu-latest - max: &max spark-version: 3.5.0 python-version: '3.11' @@ -36,13 +30,11 @@ latest: &latest matrix: small: - - <<: *max_excel - <<: *max full: - <<: *min - <<: *min_avro - <<: *min_excel - - <<: *max_excel - <<: *max nightly: - <<: *min diff --git a/.github/workflows/data/local-fs/tracked.txt b/.github/workflows/data/local-fs/tracked.txt index c763aed1e..013c04894 100644 --- a/.github/workflows/data/local-fs/tracked.txt +++ b/.github/workflows/data/local-fs/tracked.txt @@ -1 +1,2 @@ **/*local_fs* +**/*local-fs* diff --git a/docs/changelog/next_release/161.feature.rst b/docs/changelog/next_release/161.feature.rst new file mode 100644 index 000000000..7204f9078 --- /dev/null +++ b/docs/changelog/next_release/161.feature.rst @@ -0,0 +1 @@ +Update ``Excel`` package version to 0.20.2. diff --git a/onetl/file/format/excel.py b/onetl/file/format/excel.py index ffd11a5da..cdad8a4f5 100644 --- a/onetl/file/format/excel.py +++ b/onetl/file/format/excel.py @@ -100,7 +100,7 @@ class Excel(ReadWriteFileFormat): from pyspark.sql import SparkSession # Create Spark session with Excel package loaded - maven_packages = Excel.get_packages(spark_version="3.4.1") + maven_packages = Excel.get_packages(spark_version="3.5.0") spark = ( SparkSession.builder.appName("spark-app-name") .config("spark.jars.packages", ",".join(maven_packages)) @@ -150,7 +150,7 @@ def get_packages( If ``None``, ``spark_version`` is used to determine Scala version. version: str, optional - Package version in format ``major.minor.patch``. Default is ``0.19.0``. + Package version in format ``major.minor.patch``. Default is ``0.20.2``. .. warning:: @@ -168,12 +168,12 @@ def get_packages( from onetl.file.format import Excel - Excel.get_packages(spark_version="3.4.1") - Excel.get_packages(spark_version="3.4.1", scala_version="2.13") + Excel.get_packages(spark_version="3.5.0") + Excel.get_packages(spark_version="3.5.0", scala_version="2.13") Excel.get_packages( - spark_version="3.4.1", + spark_version="3.5.0", scala_version="2.13", - package_version="0.19.0", + package_version="0.20.2", ) """ @@ -187,7 +187,7 @@ def get_packages( raise ValueError(f"Package version should be at least 0.15, got {package_version}") log.warning("Passed custom package version %r, it is not guaranteed to be supported", package_version) else: - version = Version.parse("0.19.0") + version = Version.parse("0.20.2") spark_ver = Version.parse(spark_version) if spark_ver < (3, 2): diff --git a/tests/fixtures/spark.py b/tests/fixtures/spark.py index 452c0f978..2dbe213a0 100644 --- a/tests/fixtures/spark.py +++ b/tests/fixtures/spark.py @@ -77,10 +77,9 @@ def maven_packages(): # There is no MongoDB connector for Spark less than 3.2 packages.extend(MongoDB.get_packages(spark_version=pyspark_version)) - if pyspark_version < (3, 5): - # There is no Excel files support for Spark less than 3.2 - # And there is still no package released for 3.5.0 https://github.com/crealytics/spark-excel/issues/787 - packages.extend(Excel.get_packages(spark_version=pyspark_version)) + # There is no Excel files support for Spark less than 3.2 + # And there is still no package released for 3.5.0 https://github.com/crealytics/spark-excel/issues/787 + packages.extend(Excel.get_packages(spark_version=pyspark_version)) return packages diff --git a/tests/tests_integration/test_file_format_integration/test_excel_integration.py b/tests/tests_integration/test_file_format_integration/test_excel_integration.py index f9aaad38f..de8cc9cf9 100644 --- a/tests/tests_integration/test_file_format_integration/test_excel_integration.py +++ b/tests/tests_integration/test_file_format_integration/test_excel_integration.py @@ -33,8 +33,6 @@ def test_excel_reader_with_infer_schema( spark_version = get_spark_version(spark) if spark_version < (3, 2): pytest.skip("Excel files are supported on Spark 3.2+ only") - if spark_version >= (3, 5): - pytest.skip("Excel files are not supported on Spark 3.5+ yet") file_df_connection, source_path, _ = local_fs_file_df_connection_with_path_and_files df = file_df_dataframe @@ -83,8 +81,6 @@ def test_excel_reader_with_options( spark_version = get_spark_version(spark) if spark_version < (3, 2): pytest.skip("Excel files are supported on Spark 3.2+ only") - if spark_version >= (3, 5): - pytest.skip("Excel files are not supported on Spark 3.5+ yet") local_fs, source_path, _ = local_fs_file_df_connection_with_path_and_files df = file_df_dataframe @@ -121,8 +117,6 @@ def test_excel_writer( spark_version = get_spark_version(spark) if spark_version < (3, 2): pytest.skip("Excel files are supported on Spark 3.2+ only") - if spark_version >= (3, 5): - pytest.skip("Excel files are not supported on Spark 3.5+ yet") file_df_connection, source_path = local_fs_file_df_connection_with_path df = file_df_dataframe diff --git a/tests/tests_unit/test_file/test_format_unit/test_excel_unit.py b/tests/tests_unit/test_file/test_format_unit/test_excel_unit.py index e94386120..17b1401e2 100644 --- a/tests/tests_unit/test_file/test_format_unit/test_excel_unit.py +++ b/tests/tests_unit/test_file/test_format_unit/test_excel_unit.py @@ -32,16 +32,16 @@ def test_excel_get_packages_package_version_not_supported(): "spark_version, scala_version, package_version, packages", [ # Detect Scala version by Spark version - ("3.2.4", None, None, ["com.crealytics:spark-excel_2.12:3.2.4_0.19.0"]), - ("3.4.1", None, None, ["com.crealytics:spark-excel_2.12:3.4.1_0.19.0"]), + ("3.2.4", None, None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.2"]), + ("3.5.0", None, None, ["com.crealytics:spark-excel_2.12:3.5.0_0.20.2"]), # Override Scala version - ("3.2.4", "2.12", None, ["com.crealytics:spark-excel_2.12:3.2.4_0.19.0"]), - ("3.2.4", "2.13", None, ["com.crealytics:spark-excel_2.13:3.2.4_0.19.0"]), - ("3.4.1", "2.12", None, ["com.crealytics:spark-excel_2.12:3.4.1_0.19.0"]), - ("3.4.1", "2.13", None, ["com.crealytics:spark-excel_2.13:3.4.1_0.19.0"]), + ("3.2.4", "2.12", None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.2"]), + ("3.2.4", "2.13", None, ["com.crealytics:spark-excel_2.13:3.2.4_0.20.2"]), + ("3.5.0", "2.12", None, ["com.crealytics:spark-excel_2.12:3.5.0_0.20.2"]), + ("3.5.0", "2.13", None, ["com.crealytics:spark-excel_2.13:3.5.0_0.20.2"]), # Override package version ("3.2.0", None, "0.16.0", ["com.crealytics:spark-excel_2.12:3.2.0_0.16.0"]), - ("3.4.1", None, "0.18.0", ["com.crealytics:spark-excel_2.12:3.4.1_0.18.0"]), + ("3.5.0", None, "0.18.0", ["com.crealytics:spark-excel_2.12:3.5.0_0.18.0"]), ], ) def test_excel_get_packages(caplog, spark_version, scala_version, package_version, packages):