diff --git a/dataset_builders/pie/cdcp/cdcp.py b/dataset_builders/pie/cdcp/cdcp.py index 28af654b..3ebe9aed 100644 --- a/dataset_builders/pie/cdcp/cdcp.py +++ b/dataset_builders/pie/cdcp/cdcp.py @@ -9,7 +9,6 @@ from pytorch_ie.core import Annotation, AnnotationList, Document, annotation_field from pytorch_ie.documents import TextDocumentWithLabeledSpansAndBinaryRelations - log = logging.getLogger(__name__) @@ -127,6 +126,7 @@ class CDCPConfig(datasets.BuilderConfig): def __init__(self, **kwargs): """BuilderConfig for CDCP. + Args: **kwargs: keyword arguments forwarded to super. """ diff --git a/tests/dataset_builders/pie/test_cdcp.py b/tests/dataset_builders/pie/test_cdcp.py index efd21202..8749ec60 100644 --- a/tests/dataset_builders/pie/test_cdcp.py +++ b/tests/dataset_builders/pie/test_cdcp.py @@ -6,25 +6,28 @@ from pytorch_ie import DatasetDict, tokenize_document from pytorch_ie.annotations import LabeledSpan from pytorch_ie.core import AnnotationList, Document, annotation_field -from pytorch_ie.documents import TextBasedDocument, TextDocumentWithLabeledSpansAndBinaryRelations +from pytorch_ie.documents import ( + TextBasedDocument, + TextDocumentWithLabeledSpansAndBinaryRelations, +) from transformers import AutoTokenizer, PreTrainedTokenizer from dataset_builders.pie.cdcp.cdcp import ( + CDCP, CDCPDocument, convert_to_text_document_with_labeled_spans_and_binary_relations, document_to_example, - example_to_document, CDCP, + example_to_document, ) +from src.document.types import TokenDocumentWithLabeledSpansAndBinaryRelations from tests import FIXTURES_ROOT from tests.dataset_builders.common import _deep_compare -from src.document.types import TokenDocumentWithLabeledSpansAndBinaryRelations - disable_caching() DATASET_NAME = "cdcp" SPLIT_SIZES = {"train": 581, "test": 150} -#HF_DATASET_PATH = "DFKI-SLT/cdcp" +# HF_DATASET_PATH = "DFKI-SLT/cdcp" HF_DATASET_PATH = CDCP.BASE_DATASET_PATH PIE_DATASET_PATH = "pie/cdcp" DATA_PATH = FIXTURES_ROOT / "dataset_builders" / "cdcp_acl17.zip" @@ -425,4 +428,4 @@ def test_tokenized_documents_with_entities_and_relations_all( ) # we just ensure that we get at least one tokenized document assert tokenized_docs is not None - assert len(tokenized_docs) > 0 \ No newline at end of file + assert len(tokenized_docs) > 0