From 86839980aa6ad1a79f933c5485fed897db5de71c Mon Sep 17 00:00:00 2001 From: clarkzinzow Date: Wed, 21 Feb 2024 16:46:01 -0800 Subject: [PATCH] Fixes. --- tests/io/delta_lake/conftest.py | 9 ++++++--- tests/io/delta_lake/test_table_read.py | 6 ++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/io/delta_lake/conftest.py b/tests/io/delta_lake/conftest.py index ff47bd9db0..40b2befe93 100644 --- a/tests/io/delta_lake/conftest.py +++ b/tests/io/delta_lake/conftest.py @@ -3,9 +3,11 @@ import datetime import pandas as pd +import pyarrow as pa import pytest deltalake = pytest.importorskip("deltalake") +PYARROW_GE_13_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) >= (13, 0, 0) @pytest.fixture(params=[None, "part_idx"]) @@ -25,9 +27,10 @@ def local_deltalake_table(request, tmp_path, partition_by) -> deltalake.DeltaTab "e": [datetime.datetime(2024, 2, 10), datetime.datetime(2024, 2, 11), datetime.datetime(2024, 2, 12)], } ) - # Delta Lake casts timestamps to microsecond resolution on ingress, so we preemptively cast the Pandas DataFrame here - # to make equality assertions easier later. - base_df["e"] = base_df["e"].astype("datetime64[us]") + if PYARROW_GE_13_0_0: + # Delta Lake casts timestamps to microsecond resolution on ingress with later pyarrow versions, so we + # preemptively cast the Pandas DataFrame here to make equality assertions easier later. + base_df["e"] = base_df["e"].astype("datetime64[us]") dfs = [] for part_idx in range(request.param): part_df = base_df.copy() diff --git a/tests/io/delta_lake/test_table_read.py b/tests/io/delta_lake/test_table_read.py index b3715fe3b6..fb1ad310d2 100644 --- a/tests/io/delta_lake/test_table_read.py +++ b/tests/io/delta_lake/test_table_read.py @@ -14,6 +14,7 @@ PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0) pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="deltalake only supported if pyarrow >= 8.0.0") +PYARROW_GE_13_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) >= (13, 0, 0) def test_deltalake_read_basic(tmp_path): @@ -28,8 +29,9 @@ def test_deltalake_read_basic(tmp_path): deltalake.write_deltalake(path, pd_df) df = daft.read_delta_lake(str(path)) assert df.schema() == Schema.from_pyarrow_schema(deltalake.DeltaTable(path).schema().to_pyarrow()) - # Delta Lake casts timestamps to microsecond resolution on ingress. - pd_df["c"] = pd_df["c"].astype("datetime64[us]") + if PYARROW_GE_13_0_0: + # Delta Lake casts timestamps to microsecond resolution on ingress with later pyarrow versions. + pd_df["c"] = pd_df["c"].astype("datetime64[us]") pd.testing.assert_frame_equal(df.to_pandas(), pd_df)