Skip to content

Commit

Permalink
Change default texts value for blank samples
Browse files Browse the repository at this point in the history
  • Loading branch information
Railey Montalan authored and Railey Montalan committed Feb 27, 2024
1 parent e9f57a0 commit 911a582
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
Empty file.
19 changes: 14 additions & 5 deletions seacrowd/sea_datasets/wit/wit.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@

_SEACROWD_VERSION = "1.0.0"

logger = datasets.logging.get_logger(__name__)


class WITDataset(datasets.GeneratorBasedBuilder):
"""
Expand Down Expand Up @@ -205,7 +203,7 @@ def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datase
name=datasets.Split.VALIDATION,
gen_kwargs={
"filepaths": val_paths,
"split": "val",
"split": "validation",
},
),
]
Expand Down Expand Up @@ -238,12 +236,23 @@ def _generate_examples(self, filepaths: Path, split: str) -> Tuple[int, Dict]:
if self.config.schema == "seacrowd_imtext":
for d in data:
if d["language"] in language_list:
text = None
context = None
if d["caption_reference_description"] != "":
text = d["caption_reference_description"]
context = "caption_reference_description"
elif d["caption_attribution_description"] != "":
text = d["caption_attribution_description"]
context = "caption_attribution_description"
else:
text = d["caption_alt_text_description"]
context = "caption_alt_text_description"
x = {
"id": idx,
"image_paths": [d["image_url"]],
"texts": d["page_title"],
"texts": text,
"metadata": {
"context": None,
"context": context,
"labels": None,
},
}
Expand Down

0 comments on commit 911a582

Please sign in to comment.