Skip to content

Commit

Permalink
Implement minor updates
Browse files Browse the repository at this point in the history
- Don't hardcode "annotator_1" and use _DEFAULT_ANNOTATOR with comments.
- Fix some issues with the default config
  • Loading branch information
ljvmiranda921 committed Nov 20, 2023
1 parent 841c6f1 commit b0c0226
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions seacrowd/sea_datasets/cebuaner/cebuaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,14 @@
_HOMEPAGE = "https://github.com/mebzmoren/CebuaNER"
_LICENSE = Licenses.CC_BY_NC_SA_4_0.value
_URLS = {
"annotator_0": "https://github.com/mebzmoren/CebuaNER/raw/main/data/annotated_data/final-1.txt",
"annotator_1": "https://github.com/mebzmoren/CebuaNER/raw/main/data/annotated_data/final-2.txt",
"annotator_1": "https://github.com/mebzmoren/CebuaNER/raw/main/data/annotated_data/final-1.txt",
"annotator_2": "https://github.com/mebzmoren/CebuaNER/raw/main/data/annotated_data/final-2.txt",
}

# The alignment between annotators is high, and both can be used as gold-standard data.
# Hence, we chose the first value on the index.
_DEFAULT_ANNOTATOR = "annotator_1"

_SUPPORTED_TASKS = [Tasks.NAMED_ENTITY_RECOGNITION]
_SOURCE_VERSION = "1.0.0"
_SEACROWD_VERSION = "1.0.0"
Expand Down Expand Up @@ -102,7 +106,7 @@ class CebuaNERDataset(datasets.GeneratorBasedBuilder):
),
])

DEFAULT_CONFIG_NAME = f"{_DATASETNAME}_1_source"
DEFAULT_CONFIG_NAME = f"{_DATASETNAME}_source"

def _info(self) -> datasets.DatasetInfo:
if self.config.schema == "source":
Expand All @@ -126,7 +130,7 @@ def _info(self) -> datasets.DatasetInfo:

def _split_generators(self, dl_manager: DownloadManager) -> List[datasets.SplitGenerator]:
if self.config.subset_id == _DATASETNAME:
url = _URLS["annotator_0"]
url = _URLS[_DEFAULT_ANNOTATOR]
else:
_, annotator = self.config.subset_id.split("_", 1)
url = _URLS[annotator]
Expand Down

0 comments on commit b0c0226

Please sign in to comment.