Skip to content

Commit

Permalink
bugfix: IL-414 retry dataset creation on error
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixFehseTNG committed Apr 3, 2024
1 parent c75834a commit abee57c
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,16 @@ def __init__(self, filesystem: AbstractFileSystem, root_directory: Path) -> None
super().__init__(file_system=filesystem, root_directory=root_directory)

def create_dataset(
self, examples: Iterable[Example[Input, ExpectedOutput]], dataset_name: str
self,
examples: Iterable[Example[Input, ExpectedOutput]],
dataset_name: str,
id: str | None = None,
) -> Dataset:
dataset = Dataset(name=dataset_name)
if id is None:
dataset = Dataset(name=dataset_name)
else:
dataset = Dataset(name=dataset_name, id=id)

self.mkdir(self._dataset_directory(dataset.id))

dataset_path = self._dataset_path(dataset.id)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import time
from pathlib import Path
from typing import Optional
from typing import Iterable, Optional
import typing

import huggingface_hub # type: ignore
from huggingface_hub import HfFileSystem, create_repo
from huggingface_hub.utils import HfHubHTTPError

from intelligence_layer.evaluation.dataset.domain import Dataset
from intelligence_layer.core.task import Input
from intelligence_layer.evaluation.dataset.domain import (
Dataset,
Example,
ExpectedOutput,
)
from intelligence_layer.evaluation.dataset.file_dataset_repository import (
FileSystemDatasetRepository,
)
Expand Down Expand Up @@ -42,6 +50,25 @@ def __init__(self, repository_id: str, token: str, private: bool) -> None:
self._repository_id = repository_id
self._file_system = file_system # for better type checks

def create_dataset(
self,
examples: Iterable[Example[Input, ExpectedOutput]],
dataset_name: str,
id: str | None = None,
) -> Dataset:
failures = 0
exception = None
while failures < 5:
try:
dataset = super().create_dataset(examples, dataset_name, id)
return dataset
except Exception as e:
exception = typing.cast(HfHubHTTPError, e)
failures += 1
print(f"Failure {failures}")
time.sleep(0.5)
raise exception # RuntimeError("Cannot create dataset on Huggingface.")

def delete_repository(self) -> None:
huggingface_hub.delete_repo(
repo_id=self._repository_id,
Expand Down
13 changes: 9 additions & 4 deletions tests/evaluation/test_hugging_face_dataset_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,19 @@ def hugging_face_repository_with_dataset_and_examples(
Tuple[HuggingFaceDatasetRepository, Dataset, Sequence[Example[str, str]]]
]:
examples = [example_1, example_2]
dataset = hugging_face_dataset_repository.create_dataset(
examples=examples, dataset_name="test-hg-dataset"
)
id = str(uuid4())
try:
dataset = hugging_face_dataset_repository.create_dataset(
examples=examples, dataset_name="test-hg-dataset", id=id
)
except Exception as e:
hugging_face_dataset_repository.delete_dataset(id)
raise e

try:
yield hugging_face_dataset_repository, dataset, examples
finally:
hugging_face_dataset_repository.delete_dataset(dataset.id)
hugging_face_dataset_repository.delete_dataset(id)


def test_hugging_face_repository_can_create_and_delete_a_repository(
Expand Down

0 comments on commit abee57c

Please sign in to comment.