edit types.py, cdcp.py, test_cdcp.py, add requirements.txt

ArneBinder · Nov 9, 2023 · 8729583 · 8729583
1 parent 9f1a243
commit 8729583
Show file tree

Hide file tree

Showing 5 changed files with 8 additions and 162 deletions.
diff --git a/dataset_builders/hf/cdcp/README.md b/dataset_builders/hf/cdcp/README.md
diff --git a/dataset_builders/pie/cdcp/cdcp.py b/dataset_builders/pie/cdcp/cdcp.py
@@ -39,8 +39,8 @@ class CDCPDocument(TextBasedDocument):
 
 def example_to_document(
     example: Dict[str, Any],
-    relation_label: Callable[[int], str],
-    proposition_label: Callable[[int], str],
+    relation_label: datasets.ClassLabel,
+    proposition_label: datasets.ClassLabel,
 ):
     document = CDCPDocument(id=example["id"], text=example["text"])
     for proposition_dict in dl2ld(example["propositions"]):
@@ -67,8 +67,8 @@ def example_to_document(
 
 def document_to_example(
     document: CDCPDocument,
-    relation_label: Callable[[int], str],
-    proposition_label: Callable[[int], str],
+    relation_label: datasets.ClassLabel,
+    proposition_label: datasets.ClassLabel,
 ) -> Dict[str, Any]:
     result = {"id": document.id, "text": document.text}
     proposition2dict = {}

diff --git a/dataset_builders/pie/cdcp/requirements.txt b/dataset_builders/pie/cdcp/requirements.txt
@@ -0,0 +1 @@
+pie-datasets>=0.3.0
diff --git a/src/pie_datasets/document/types.py b/src/pie_datasets/document/types.py
@@ -2,33 +2,13 @@
 import logging
 from typing import Any, Dict, Optional
 
-from pytorch_ie.annotations import (
-    BinaryRelation,
-    LabeledMultiSpan,
-    LabeledSpan,
-    Span,
-    _post_init_single_label,
-)
+from pytorch_ie.annotations import BinaryRelation, LabeledSpan
 from pytorch_ie.core import Annotation, AnnotationList, Document, annotation_field
 from pytorch_ie.documents import TextBasedDocument, TokenBasedDocument
 
 logger = logging.getLogger(__name__)
 
 
-# ========================= Annotation Types ========================= #
-
-
-@dataclasses.dataclass(eq=True, frozen=True)
-class Attribute(Annotation):
-    target_annotation: Annotation
-    label: str
-    value: Optional[str] = None
-    score: float = 1.0
-
-
-# ========================= Document Types ========================= #
-
-
 @dataclasses.dataclass
 class TokenDocumentWithLabeledSpans(TokenBasedDocument):
     labeled_spans: AnnotationList[LabeledSpan] = annotation_field(target="tokens")

diff --git a/tests/dataset_builders/pie/test_cdcp.py b/tests/dataset_builders/pie/test_cdcp.py
@@ -29,7 +29,7 @@
 DATASET_NAME = "cdcp"
 SPLIT_SIZES = {"train": 581, "test": 150}
 HF_DATASET_PATH = CDCP.BASE_DATASET_PATH
-PIE_DATASET_PATH = PIE_BASE_PATH / DATASET_NAME  # "pie/cdcp"
+PIE_DATASET_PATH = PIE_BASE_PATH / DATASET_NAME
 DATA_PATH = FIXTURES_ROOT / "dataset_builders" / "cdcp_acl17.zip"
 
 HF_EXAMPLE_00195 = {
@@ -103,7 +103,7 @@ def generate_document_kwargs(hf_dataset, split):
 
 @pytest.fixture(scope="module")
 def generated_document(hf_example, generate_document_kwargs):
-    return example_to_document(hf_example, **generate_document_kwargs)
+    return CDCP()._generate_document(hf_example, **generate_document_kwargs)
 
 
 def test_generated_document(generated_document, split):