Skip to content

Commit

Permalink
edit types.py, cdcp.py, test_cdcp.py, add requirements.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
idalr authored and ArneBinder committed Nov 9, 2023
1 parent 9f1a243 commit 8729583
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 162 deletions.
135 changes: 0 additions & 135 deletions dataset_builders/hf/cdcp/README.md

This file was deleted.

8 changes: 4 additions & 4 deletions dataset_builders/pie/cdcp/cdcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ class CDCPDocument(TextBasedDocument):

def example_to_document(
example: Dict[str, Any],
relation_label: Callable[[int], str],
proposition_label: Callable[[int], str],
relation_label: datasets.ClassLabel,
proposition_label: datasets.ClassLabel,
):
document = CDCPDocument(id=example["id"], text=example["text"])
for proposition_dict in dl2ld(example["propositions"]):
Expand All @@ -67,8 +67,8 @@ def example_to_document(

def document_to_example(
document: CDCPDocument,
relation_label: Callable[[int], str],
proposition_label: Callable[[int], str],
relation_label: datasets.ClassLabel,
proposition_label: datasets.ClassLabel,
) -> Dict[str, Any]:
result = {"id": document.id, "text": document.text}
proposition2dict = {}
Expand Down
1 change: 1 addition & 0 deletions dataset_builders/pie/cdcp/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pie-datasets>=0.3.0
22 changes: 1 addition & 21 deletions src/pie_datasets/document/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,13 @@
import logging
from typing import Any, Dict, Optional

from pytorch_ie.annotations import (
BinaryRelation,
LabeledMultiSpan,
LabeledSpan,
Span,
_post_init_single_label,
)
from pytorch_ie.annotations import BinaryRelation, LabeledSpan
from pytorch_ie.core import Annotation, AnnotationList, Document, annotation_field
from pytorch_ie.documents import TextBasedDocument, TokenBasedDocument

logger = logging.getLogger(__name__)


# ========================= Annotation Types ========================= #


@dataclasses.dataclass(eq=True, frozen=True)
class Attribute(Annotation):
target_annotation: Annotation
label: str
value: Optional[str] = None
score: float = 1.0


# ========================= Document Types ========================= #


@dataclasses.dataclass
class TokenDocumentWithLabeledSpans(TokenBasedDocument):
labeled_spans: AnnotationList[LabeledSpan] = annotation_field(target="tokens")
Expand Down
4 changes: 2 additions & 2 deletions tests/dataset_builders/pie/test_cdcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
DATASET_NAME = "cdcp"
SPLIT_SIZES = {"train": 581, "test": 150}
HF_DATASET_PATH = CDCP.BASE_DATASET_PATH
PIE_DATASET_PATH = PIE_BASE_PATH / DATASET_NAME # "pie/cdcp"
PIE_DATASET_PATH = PIE_BASE_PATH / DATASET_NAME
DATA_PATH = FIXTURES_ROOT / "dataset_builders" / "cdcp_acl17.zip"

HF_EXAMPLE_00195 = {
Expand Down Expand Up @@ -103,7 +103,7 @@ def generate_document_kwargs(hf_dataset, split):

@pytest.fixture(scope="module")
def generated_document(hf_example, generate_document_kwargs):
return example_to_document(hf_example, **generate_document_kwargs)
return CDCP()._generate_document(hf_example, **generate_document_kwargs)


def test_generated_document(generated_document, split):
Expand Down

0 comments on commit 8729583

Please sign in to comment.