Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEAT] Enable MicroPartitions by default #1684

Merged
merged 3 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarking/parquet/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def boto3_get_object_read(path: str, columns: list[str] | None = None) -> pa.Tab


def daft_native_read(path: str, columns: list[str] | None = None) -> pa.Table:
tbl = daft.table.Table.read_parquet(path, columns=columns)
tbl = daft.table.MicroPartition.read_parquet(path, columns=columns)
return tbl.to_arrow()


Expand Down Expand Up @@ -76,7 +76,7 @@ def fn(files: list[str]) -> list[pa.Table]:


def daft_bulk_read(paths: list[str], columns: list[str] | None = None) -> list[pa.Table]:
tables = daft.table.LegacyTable.read_parquet_bulk(paths, columns=columns)
tables = daft.table.Table.read_parquet_bulk(paths, columns=columns)
return [t.to_arrow() for t in tables]


Expand Down
14 changes: 7 additions & 7 deletions daft/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from daft.logical.builder import LogicalPlanBuilder
from daft.runners.partitioning import PartitionCacheEntry, PartitionSet
from daft.runners.pyrunner import LocalPartitionSet
from daft.table import Table
from daft.table import MicroPartition
from daft.viz import DataFrameDisplay

if TYPE_CHECKING:
Expand Down Expand Up @@ -184,7 +184,7 @@ def __iter__(self) -> Iterator[Dict[str, Any]]:
yield row

@DataframePublicAPI
def iter_partitions(self) -> Iterator[Union[Table, "RayObjectRef"]]:
def iter_partitions(self) -> Iterator[Union[MicroPartition, "RayObjectRef"]]:
"""Begin executing this dataframe and return an iterator over the partitions.

Each partition will be returned as a daft.Table object (if using Python runner backend)
Expand Down Expand Up @@ -236,27 +236,27 @@ def _from_pydict(cls, data: Dict[str, InputListType]) -> "DataFrame":
f"Expected all columns to be of the same length, but received columns with lengths: {column_lengths}"
)

data_vpartition = Table.from_pydict(data)
data_vpartition = MicroPartition.from_pydict(data)
return cls._from_tables(data_vpartition)

@classmethod
def _from_arrow(cls, data: Union["pa.Table", List["pa.Table"]]) -> "DataFrame":
"""Creates a DataFrame from a pyarrow Table."""
if not isinstance(data, list):
data = [data]
data_vpartitions = [Table.from_arrow(table) for table in data]
data_vpartitions = [MicroPartition.from_arrow(table) for table in data]
return cls._from_tables(*data_vpartitions)

@classmethod
def _from_pandas(cls, data: Union["pd.DataFrame", List["pd.DataFrame"]]) -> "DataFrame":
"""Creates a Daft DataFrame from a pandas DataFrame."""
if not isinstance(data, list):
data = [data]
data_vpartitions = [Table.from_pandas(df) for df in data]
data_vpartitions = [MicroPartition.from_pandas(df) for df in data]
return cls._from_tables(*data_vpartitions)

@classmethod
def _from_tables(cls, *parts: Table) -> "DataFrame":
def _from_tables(cls, *parts: MicroPartition) -> "DataFrame":
"""Creates a Daft DataFrame from a single Table.

Args:
Expand Down Expand Up @@ -1036,7 +1036,7 @@ def _construct_show_display(self, n: int) -> "DataFrameDisplay":
if seen >= n:
break

preview_partition = Table.concat(tables)
preview_partition = MicroPartition.concat(tables)
if len(preview_partition) > n:
preview_partition = preview_partition.slice(0, n)
elif len(preview_partition) < n:
Expand Down
4 changes: 2 additions & 2 deletions daft/dataframe/preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

from dataclasses import dataclass

from daft.table import Table
from daft.table import MicroPartition


@dataclass(frozen=True)
class DataFramePreview:
"""A class containing all the metadata/data required to preview a dataframe."""

preview_partition: Table | None
preview_partition: MicroPartition | None
dataframe_num_rows: int | None
Loading
Loading