Skip to content

Commit

Permalink
Implemented from_documents() method for IterableDataset
Browse files Browse the repository at this point in the history
  • Loading branch information
kai-car committed Aug 12, 2024
1 parent 2eaf29c commit a7a5f49
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions src/pie_datasets/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,19 @@ def from_documents(
documents: List[Document],
document_converters: Optional[DocumentConvertersType] = None,
**dataset_kwargs,
) -> "Dataset":
raise NotImplementedError("from_documents is not implemented for IterableDataset")
) -> "IterableDataset":
if len(documents) == 0:
raise ValueError("No documents to create dataset from")
document_type = type(documents[0])
data = [doc.asdict() for doc in documents]
hf_dataset = datasets.Dataset.from_list(mapping=data, **dataset_kwargs)
hf_iterable_dataset = hf_dataset.to_iterable_dataset()
dataset = cls.from_hf_dataset(
hf_iterable_dataset,
document_type=document_type,
document_converters=document_converters,
)
return dataset

def __iter__(self):
for example in iter(super().__iter__()):
Expand Down

0 comments on commit a7a5f49

Please sign in to comment.