Skip to content

Commit

Permalink
Merge pull request SEACrowd#85 from sabilmakbar/sabil/qa_backward_com…
Browse files Browse the repository at this point in the history
…patibility

Relates SEACrowd#36 | Add Commonsense Reasoning Task & Extend QA Schema to cater Metadata (as optional field)
  • Loading branch information
SamuelCahyawijaya authored Nov 20, 2023
2 parents bfd4f23 + 4202870 commit e5db66c
Show file tree
Hide file tree
Showing 9 changed files with 21 additions and 4 deletions.
3 changes: 2 additions & 1 deletion seacrowd/sea_datasets/belebele/belebele.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def _generate_examples(self, file: str) -> Tuple[int, Dict]:
"choices": choices,
"context": line['flores_passage'],
"answer": [answer],
"meta": {}
}
else:
raise ValueError(f"Invalid config {self.config.name}")
raise ValueError(f"Invalid config {self.config.name}")
1 change: 1 addition & 0 deletions seacrowd/sea_datasets/facqa/facqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,5 +151,6 @@ def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
"choices": [],
"context": listToString(ast.literal_eval(row.passage)),
"answer": [getAnswerString(ast.literal_eval(row.passage), ast.literal_eval(row.seq_label))],
"meta": {}
}
yield row.index, entry
1 change: 1 addition & 0 deletions seacrowd/sea_datasets/idk_mrc/idk_mrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,4 +228,5 @@ def _generate_examples(self, filepath: Path) -> Tuple[int, Dict]:
"choices": [],
"context": example["context"],
"answer": [ans["text"] for ans in qa["answers"]],
"meta": {}
}
2 changes: 1 addition & 1 deletion seacrowd/sea_datasets/mlqa/mlqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,5 +240,5 @@ def _generate_examples(self, filepath: Path, split: str, files=None) -> Tuple[in
count += 1

elif self.config.schema == "seacrowd_qa":
yield count, {"question_id": id_, "context": context, "question": question, "answer": {"answer_start": answers_start[0], "text": answers_text[0]}, "id": id_, "choices": [], "type": "extractive", "document_id": count}
yield count, {"question_id": id_, "context": context, "question": question, "answer": {"answer_start": answers_start[0], "text": answers_text[0]}, "id": id_, "choices": [], "type": "extractive", "document_id": count, "meta":{}}
count += 1
1 change: 1 addition & 0 deletions seacrowd/sea_datasets/squad_id/squad_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def _generate_examples(self, filepath: Path):
"choices": [],
"context": each_data["context"],
"answer": answers,
"meta": {}
}

else:
Expand Down
3 changes: 2 additions & 1 deletion seacrowd/sea_datasets/tydiqa_id/tydiqa_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ def _generate_examples(self, filepath: Path):
"type": 'abstractive',
"choices": [],
"context": example['context'],
"answer": [example['label']]
"answer": [example['label']],
"meta": {}
}
else:
raise ValueError(f"Invalid config: {self.config.name}")
3 changes: 2 additions & 1 deletion seacrowd/sea_datasets/xcopa/xcopa.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
]
}

_SUPPORTED_TASKS = [Tasks.QUESTION_ANSWERING]
_SUPPORTED_TASKS = [Tasks.COMMONSENSE_REASONING]

_SOURCE_VERSION = "1.0.0"

Expand Down Expand Up @@ -157,6 +157,7 @@ def _generate_examples(self, filepath):
"choices": [data["choice1"], data["choice2"]],
"context": data["premise"],
"answer": [data["choice1"] if data["label"] == 0 else data["choice2"]],
"meta": {}
}
yield idx, sample

Expand Down
2 changes: 2 additions & 0 deletions seacrowd/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class Tasks(Enum):
TOKEN_LEVEL_LANGUAGE_IDENTIFICATION = "LANGID"

# Pair Text Classification
COMMONSENSE_REASONING = "CR"
QUESTION_ANSWERING = "QA"
TEXTUAL_ENTAILMENT = "TE"
SEMANTIC_SIMILARITY = "STS"
Expand Down Expand Up @@ -191,6 +192,7 @@ class Licenses(Enum):
Tasks.KEYWORD_TAGGING: "SEQ_LABEL",
Tasks.SENTENCE_ORDERING: "SEQ_LABEL",
Tasks.TOKEN_LEVEL_LANGUAGE_IDENTIFICATION: "SEQ_LABEL",
Tasks.COMMONSENSE_REASONING: "QA",
Tasks.QUESTION_ANSWERING: "QA",
Tasks.TEXTUAL_ENTAILMENT: "PAIRS",
Tasks.SEMANTIC_SIMILARITY: "PAIRS_SCORE",
Expand Down
9 changes: 9 additions & 0 deletions seacrowd/utils/schemas/qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,14 @@
"choices": datasets.Sequence(datasets.Value("string")),
"context": datasets.Value("string"),
"answer": datasets.Sequence(datasets.Value("string")),

# the schema of 'meta' aren't specified either to allow some flexibility
"meta": {}

# notes on how to use this field of 'meta'
# you can choose two of options:
# 1. defining as empty dict if you don't think it's usable in `_generate_examples`, or
# 2. defining meta as dict of key with intended colname meta and its val with dataset.Features class
# in `_info` Dataloader method then populate it with the values in `_general_examples` Dataloader method
}
)

0 comments on commit e5db66c

Please sign in to comment.