Skip to content

Commit

Permalink
move default_types_mapper to from_pyarrow_table_dispatch for pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora committed Aug 16, 2023
1 parent 214acfa commit b05d701
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions dask/dataframe/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,22 @@ def get_pyarrow_table_from_pandas(obj, **kwargs):


@from_pyarrow_table_dispatch.register((pd.DataFrame,))
def get_pandas_dataframe_from_pyarrow(_, table, **kwargs):
def get_pandas_dataframe_from_pyarrow(meta, table, **kwargs):
# `kwargs` must be supported by `pyarrow.Table.to_pandas`
return table.to_pandas(**kwargs)
import pyarrow as pa

def default_types_mapper(pyarrow_dtype: pa.DataType) -> object:
# Avoid converting strings from `string[pyarrow]` to `string[python]`
# if we have *some* `string[pyarrow]`
if (
pyarrow_dtype in {pa.large_string(), pa.string()}
and pd.StringDtype("pyarrow") in meta.dtypes.values
):
return pd.StringDtype("pyarrow")
return None

types_mapper = kwargs.pop("types_mapper", default_types_mapper)
return table.to_pandas(types_mapper=types_mapper, **kwargs)


@meta_nonempty.register(pd.DatetimeTZDtype)
Expand Down

0 comments on commit b05d701

Please sign in to comment.