Skip to content

Commit

Permalink
Fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
clarkzinzow committed Feb 22, 2024
1 parent 9be424a commit 8683998
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
9 changes: 6 additions & 3 deletions tests/io/delta_lake/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import datetime

import pandas as pd
import pyarrow as pa
import pytest

deltalake = pytest.importorskip("deltalake")
PYARROW_GE_13_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) >= (13, 0, 0)


@pytest.fixture(params=[None, "part_idx"])
Expand All @@ -25,9 +27,10 @@ def local_deltalake_table(request, tmp_path, partition_by) -> deltalake.DeltaTab
"e": [datetime.datetime(2024, 2, 10), datetime.datetime(2024, 2, 11), datetime.datetime(2024, 2, 12)],
}
)
# Delta Lake casts timestamps to microsecond resolution on ingress, so we preemptively cast the Pandas DataFrame here
# to make equality assertions easier later.
base_df["e"] = base_df["e"].astype("datetime64[us]")
if PYARROW_GE_13_0_0:
# Delta Lake casts timestamps to microsecond resolution on ingress with later pyarrow versions, so we
# preemptively cast the Pandas DataFrame here to make equality assertions easier later.
base_df["e"] = base_df["e"].astype("datetime64[us]")
dfs = []
for part_idx in range(request.param):
part_df = base_df.copy()
Expand Down
6 changes: 4 additions & 2 deletions tests/io/delta_lake/test_table_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="deltalake only supported if pyarrow >= 8.0.0")
PYARROW_GE_13_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) >= (13, 0, 0)


def test_deltalake_read_basic(tmp_path):
Expand All @@ -28,8 +29,9 @@ def test_deltalake_read_basic(tmp_path):
deltalake.write_deltalake(path, pd_df)
df = daft.read_delta_lake(str(path))
assert df.schema() == Schema.from_pyarrow_schema(deltalake.DeltaTable(path).schema().to_pyarrow())
# Delta Lake casts timestamps to microsecond resolution on ingress.
pd_df["c"] = pd_df["c"].astype("datetime64[us]")
if PYARROW_GE_13_0_0:
# Delta Lake casts timestamps to microsecond resolution on ingress with later pyarrow versions.
pd_df["c"] = pd_df["c"].astype("datetime64[us]")
pd.testing.assert_frame_equal(df.to_pandas(), pd_df)


Expand Down

0 comments on commit 8683998

Please sign in to comment.