Skip to content

Commit

Permalink
[DOP-9653] Update Excel package version to 0.20.1
Browse files Browse the repository at this point in the history
  • Loading branch information
dolfinus committed Nov 14, 2023
1 parent 3810aaa commit 984292b
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 32 deletions.
8 changes: 0 additions & 8 deletions .github/workflows/data/local-fs/matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@ min_excel: &min_excel
java-version: 8
os: ubuntu-latest

max_excel: &max_excel
spark-version: 3.4.1
python-version: '3.11'
java-version: 20
os: ubuntu-latest

max: &max
spark-version: 3.5.0
python-version: '3.11'
Expand All @@ -36,13 +30,11 @@ latest: &latest

matrix:
small:
- <<: *max_excel
- <<: *max
full:
- <<: *min
- <<: *min_avro
- <<: *min_excel
- <<: *max_excel
- <<: *max
nightly:
- <<: *min
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/data/local-fs/tracked.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
**/*local_fs*
**/*local-fs*
1 change: 1 addition & 0 deletions docs/changelog/next_release/161.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update ``Excel`` package version to 0.20.2.
14 changes: 7 additions & 7 deletions onetl/file/format/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class Excel(ReadWriteFileFormat):
from pyspark.sql import SparkSession
# Create Spark session with Excel package loaded
maven_packages = Excel.get_packages(spark_version="3.4.1")
maven_packages = Excel.get_packages(spark_version="3.5.0")
spark = (
SparkSession.builder.appName("spark-app-name")
.config("spark.jars.packages", ",".join(maven_packages))
Expand Down Expand Up @@ -150,7 +150,7 @@ def get_packages(
If ``None``, ``spark_version`` is used to determine Scala version.
version: str, optional
Package version in format ``major.minor.patch``. Default is ``0.19.0``.
Package version in format ``major.minor.patch``. Default is ``0.20.2``.
.. warning::
Expand All @@ -168,12 +168,12 @@ def get_packages(
from onetl.file.format import Excel
Excel.get_packages(spark_version="3.4.1")
Excel.get_packages(spark_version="3.4.1", scala_version="2.13")
Excel.get_packages(spark_version="3.5.0")
Excel.get_packages(spark_version="3.5.0", scala_version="2.13")
Excel.get_packages(
spark_version="3.4.1",
spark_version="3.5.0",
scala_version="2.13",
package_version="0.19.0",
package_version="0.20.2",
)
"""
Expand All @@ -187,7 +187,7 @@ def get_packages(
raise ValueError(f"Package version should be at least 0.15, got {package_version}")
log.warning("Passed custom package version %r, it is not guaranteed to be supported", package_version)
else:
version = Version.parse("0.19.0")
version = Version.parse("0.20.2")

spark_ver = Version.parse(spark_version)
if spark_ver < (3, 2):
Expand Down
7 changes: 3 additions & 4 deletions tests/fixtures/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,9 @@ def maven_packages():
# There is no MongoDB connector for Spark less than 3.2
packages.extend(MongoDB.get_packages(spark_version=pyspark_version))

if pyspark_version < (3, 5):
# There is no Excel files support for Spark less than 3.2
# And there is still no package released for 3.5.0 https://github.com/crealytics/spark-excel/issues/787
packages.extend(Excel.get_packages(spark_version=pyspark_version))
# There is no Excel files support for Spark less than 3.2
# And there is still no package released for 3.5.0 https://github.com/crealytics/spark-excel/issues/787
packages.extend(Excel.get_packages(spark_version=pyspark_version))

return packages

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ def test_excel_reader_with_infer_schema(
spark_version = get_spark_version(spark)
if spark_version < (3, 2):
pytest.skip("Excel files are supported on Spark 3.2+ only")
if spark_version >= (3, 5):
pytest.skip("Excel files are not supported on Spark 3.5+ yet")

file_df_connection, source_path, _ = local_fs_file_df_connection_with_path_and_files
df = file_df_dataframe
Expand Down Expand Up @@ -83,8 +81,6 @@ def test_excel_reader_with_options(
spark_version = get_spark_version(spark)
if spark_version < (3, 2):
pytest.skip("Excel files are supported on Spark 3.2+ only")
if spark_version >= (3, 5):
pytest.skip("Excel files are not supported on Spark 3.5+ yet")

local_fs, source_path, _ = local_fs_file_df_connection_with_path_and_files
df = file_df_dataframe
Expand Down Expand Up @@ -121,8 +117,6 @@ def test_excel_writer(
spark_version = get_spark_version(spark)
if spark_version < (3, 2):
pytest.skip("Excel files are supported on Spark 3.2+ only")
if spark_version >= (3, 5):
pytest.skip("Excel files are not supported on Spark 3.5+ yet")

file_df_connection, source_path = local_fs_file_df_connection_with_path
df = file_df_dataframe
Expand Down
14 changes: 7 additions & 7 deletions tests/tests_unit/test_file/test_format_unit/test_excel_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,16 @@ def test_excel_get_packages_package_version_not_supported():
"spark_version, scala_version, package_version, packages",
[
# Detect Scala version by Spark version
("3.2.4", None, None, ["com.crealytics:spark-excel_2.12:3.2.4_0.19.0"]),
("3.4.1", None, None, ["com.crealytics:spark-excel_2.12:3.4.1_0.19.0"]),
("3.2.4", None, None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.2"]),
("3.5.0", None, None, ["com.crealytics:spark-excel_2.12:3.5.0_0.20.2"]),
# Override Scala version
("3.2.4", "2.12", None, ["com.crealytics:spark-excel_2.12:3.2.4_0.19.0"]),
("3.2.4", "2.13", None, ["com.crealytics:spark-excel_2.13:3.2.4_0.19.0"]),
("3.4.1", "2.12", None, ["com.crealytics:spark-excel_2.12:3.4.1_0.19.0"]),
("3.4.1", "2.13", None, ["com.crealytics:spark-excel_2.13:3.4.1_0.19.0"]),
("3.2.4", "2.12", None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.2"]),
("3.2.4", "2.13", None, ["com.crealytics:spark-excel_2.13:3.2.4_0.20.2"]),
("3.5.0", "2.12", None, ["com.crealytics:spark-excel_2.12:3.5.0_0.20.2"]),
("3.5.0", "2.13", None, ["com.crealytics:spark-excel_2.13:3.5.0_0.20.2"]),
# Override package version
("3.2.0", None, "0.16.0", ["com.crealytics:spark-excel_2.12:3.2.0_0.16.0"]),
("3.4.1", None, "0.18.0", ["com.crealytics:spark-excel_2.12:3.4.1_0.18.0"]),
("3.5.0", None, "0.18.0", ["com.crealytics:spark-excel_2.12:3.5.0_0.18.0"]),
],
)
def test_excel_get_packages(caplog, spark_version, scala_version, package_version, packages):
Expand Down

0 comments on commit 984292b

Please sign in to comment.