Support for NoisyNER dataset #2193
Annotations
5 errors and 1 warning
test:
flair/__init__.py#L1
mypy-status
mypy exited with status 1.
|
test:
flair/datasets/sequence_labeling.py#L1
Black format check
--- /home/runner/work/flair/flair/flair/datasets/sequence_labeling.py 2024-05-30 20:15:36.453299+00:00
+++ /home/runner/work/flair/flair/flair/datasets/sequence_labeling.py 2024-05-30 20:18:05.199025+00:00
@@ -4849,11 +4849,11 @@
base_path: Optional[Union[str, Path]] = None,
in_memory: bool = True,
**corpusargs,
) -> None:
"""Initialize the NoisyNER corpus.
-
+
:param version: Chooses the labelset for the data.
v0 (default): clean labels
v1 to v7: different kinds of noisy labelsets (details: https://ojs.aaai.org/index.php/AAAI/article/view/16938)
:param base_path: Default is None, meaning the corpus gets automatically downloaded and saved.
You can override this by passing a path to a directory containing the unprocessed files but typically this
@@ -4894,11 +4894,11 @@
**corpusargs,
)
@classmethod
def _set_path(cls, base_path) -> Path:
- base_path = flair.cache_root/"datasets"/"estner" if not base_path else Path(base_path)
+ base_path = flair.cache_root / "datasets" / "estner" if not base_path else Path(base_path)
return base_path
@classmethod
def _load_features(cls, base_path) -> List[str]:
print(base_path)
@@ -4966,11 +4966,11 @@
column_separator = "\t" # CoNLL format
with open(f"{base_path}/estner_noisy_labelset{version}_{split}.tsv", "w") as out_file:
for instance in data:
out_file.write(column_separator.join(instance))
out_file.write("\n")
-
+
class MASAKHA_POS(MultiCorpus):
def __init__(
self,
languages: Union[str, List[str]] = "bam",
|
test:
flair/datasets/sequence_labeling.py#L341
ruff
pytest_ruff.RuffError: flair/datasets/sequence_labeling.py:4854:1: W293 Blank line contains whitespace
|
4852 | ) -> None:
4853 | """Initialize the NoisyNER corpus.
4854 |
| ^^^^^^^^ W293
4855 | :param version: Chooses the labelset for the data.
4856 | v0 (default): clean labels
|
= help: Remove whitespace from blank line
flair/datasets/sequence_labeling.py:4971:1: W293 [*] Blank line contains whitespace
|
4969 | out_file.write(column_separator.join(instance))
4970 | out_file.write("\n")
4971 |
| ^^^^^^^^^^^^^^^^ W293
4972 |
4973 | class MASAKHA_POS(MultiCorpus):
|
= help: Remove whitespace from blank line
|
test:
flair/datasets/sequence_labeling.py#L1
flair/datasets/sequence_labeling.py
4909: error: Incompatible return value type (got "List[List[str]]", expected "List[str]") [return-value]
4914: error: Incompatible return value type (got "List[List[Any]]", expected "List[str]") [return-value]
4939: error: Argument 1 to "append" of "list" has incompatible type "List[Any]"; expected "str" [arg-type]
|
test
Process completed with exit code 1.
|
test
Node.js 16 actions are deprecated. Please update the following actions to use Node.js 20: actions/checkout@v3, actions/setup-python@v4, actions/cache@v3. For more information see: https://github.blog/changelog/2023-09-22-github-actions-transitioning-from-node-16-to-node-20/.
|