Skip to content

Commit

Permalink
Change delimiter from period to underscore
Browse files Browse the repository at this point in the history
  • Loading branch information
ljvmiranda921 committed Nov 17, 2023
1 parent d5bef23 commit e95f83e
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions seacrowd/sea_datasets/tatoeba/tatoeba.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class TatoebaDataset(datasets.GeneratorBasedBuilder):

SEACROWD_SCHEMA_NAME = "t2t"

dataset_names = sorted([f"tatoeba.{lang}" for lang in _LANGUAGES])
dataset_names = sorted([f"tatoeba_{lang}" for lang in _LANGUAGES])
BUILDER_CONFIGS = []
for name in dataset_names:
source_config = SEACrowdConfig(
Expand Down Expand Up @@ -94,8 +94,7 @@ def _info(self) -> datasets.DatasetInfo:

def _split_generators(self, dl_manager: DownloadManager) -> List[datasets.SplitGenerator]:
"""Return SplitGenerators."""
lang_source = self.config.name.split(".")[1]
lang = lang_source.split("_")[0]
lang = self.config.name.split("_")[1]
tatoeba_source_data = dl_manager.download_and_extract(_URL + f"tatoeba.{lang}-eng.{lang}")
tatoeba_eng_data = dl_manager.download_and_extract(_URL + f"tatoeba.{lang}-eng.eng")
return [
Expand Down

0 comments on commit e95f83e

Please sign in to comment.