From a7a5f49f923c5e3fc8e704e0b4a5398b9e4304e0 Mon Sep 17 00:00:00 2001 From: Kai Carhuallanqui Date: Mon, 12 Aug 2024 14:14:56 +0200 Subject: [PATCH] Implemented from_documents() method for IterableDataset --- src/pie_datasets/core/dataset.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/pie_datasets/core/dataset.py b/src/pie_datasets/core/dataset.py index 0368ba51..6f58864b 100644 --- a/src/pie_datasets/core/dataset.py +++ b/src/pie_datasets/core/dataset.py @@ -493,8 +493,19 @@ def from_documents( documents: List[Document], document_converters: Optional[DocumentConvertersType] = None, **dataset_kwargs, - ) -> "Dataset": - raise NotImplementedError("from_documents is not implemented for IterableDataset") + ) -> "IterableDataset": + if len(documents) == 0: + raise ValueError("No documents to create dataset from") + document_type = type(documents[0]) + data = [doc.asdict() for doc in documents] + hf_dataset = datasets.Dataset.from_list(mapping=data, **dataset_kwargs) + hf_iterable_dataset = hf_dataset.to_iterable_dataset() + dataset = cls.from_hf_dataset( + hf_iterable_dataset, + document_type=document_type, + document_converters=document_converters, + ) + return dataset def __iter__(self): for example in iter(super().__iter__()):