-
Notifications
You must be signed in to change notification settings - Fork 174
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEAT] daft-connect range use python generator (#3308)
- Loading branch information
1 parent
731a73e
commit 066cde1
Showing
20 changed files
with
281 additions
and
435 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING, Callable | ||
|
||
if TYPE_CHECKING: | ||
from collections.abc import Iterator | ||
|
||
from daft import DataType | ||
from daft.io._generator import GeneratorScanOperator | ||
from daft.logical.schema import Schema | ||
from daft.table.table import Table | ||
|
||
|
||
def _range_generators(start: int, end: int, step: int, partitions: int) -> Iterator[Callable[[], Iterator[Table]]]: | ||
# TODO: Partitioning with range scan is currently untested and unused. | ||
# There may be issues with balanced partitions and step size. | ||
|
||
# Calculate partition bounds upfront | ||
partition_size = (end - start) // partitions | ||
partition_bounds = [ | ||
(start + (i * partition_size), start + ((i + 1) * partition_size) if i < partitions - 1 else end) | ||
for i in range(partitions) | ||
] | ||
|
||
def generator(partition_idx: int) -> Iterator[Table]: | ||
partition_start, partition_end = partition_bounds[partition_idx] | ||
values = list(range(partition_start, partition_end, step)) | ||
yield Table.from_pydict({"id": values}) | ||
|
||
from functools import partial | ||
|
||
for partition_idx in range(partitions): | ||
yield partial(generator, partition_idx) | ||
|
||
|
||
class RangeScanOperator(GeneratorScanOperator): | ||
def __init__(self, start: int, end: int, step: int = 1, partitions: int = 1) -> None: | ||
schema = Schema._from_field_name_and_types([("id", DataType.int64())]) | ||
|
||
super().__init__(schema=schema, generators=_range_generators(start, end, step, partitions)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.