Skip to content

Commit

Permalink
dont export core and fix readme examples (#173)
Browse files Browse the repository at this point in the history
* dont export core and fix readme examples

* move PIEDatasetDict from data.datasets.__init__ to data.__init__ and rename to DatasetDict

Co-authored-by: Arne Binder <[email protected]>
  • Loading branch information
ArneBinder and ArneBinder authored May 5, 2022
1 parent 0964642 commit 9d83359
Show file tree
Hide file tree
Showing 38 changed files with 65 additions and 59 deletions.
24 changes: 13 additions & 11 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,22 @@ Span-classification-based Named Entity Recognition
from dataclasses import dataclass
from pytorch_ie import AnnotationList, LabeledSpan, TextDocument, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.auto import AutoPipeline
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
@dataclass
class ExampleDocument(TextDocument):
entities: AnnotationList[LabeledSpan] = annotation_field(target="text")
# see below for the long version
ner_pipeline = AutoPipeline.from_pretrained("pie/example-ner-spanclf-conll03", device=-1, num_workers=0)
document = ExampleDocument(
"“Making a super tasty alt-chicken wing is only half of it,” said Po Bronson, general partner at SOSV and managing director of IndieBio."
)
# see below for the long version
ner_pipeline = AutoPipeline.from_pretrained("pie/example-ner-spanclf-conll03", device=-1, num_workers=0)
ner_pipeline(document, predict_field="entities")
for entity in document.entities.predictions:
Expand All @@ -89,8 +90,8 @@ To create the same pipeline as above without `AutoPipeline`:

.. code:: python
from pytorch_ie import Pipeline
from pytorch_ie.auto import AutoTaskModule, AutoModel
from pytorch_ie.pipeline import Pipeline
model_name_or_path = "pie/example-ner-spanclf-conll03"
ner_taskmodule = AutoTaskModule.from_pretrained(model_name_or_path)
Expand All @@ -101,7 +102,7 @@ Or, without `Auto` classes at all:

.. code:: python
from pytorch_ie import Pipeline
from pytorch_ie.pipeline import Pipeline
from pytorch_ie.models import TransformerSpanClassificationModel
from pytorch_ie.taskmodules import TransformerSpanClassificationTaskModule
Expand All @@ -118,22 +119,23 @@ Text-classification-based Relation Extraction
from dataclasses import dataclass
from pytorch_ie import AnnotationList, BinaryRelation, LabeledSpan, TextDocument, annotation_field
from pytorch_ie.annotations import BinaryRelation, LabeledSpan
from pytorch_ie.auto import AutoPipeline
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
@dataclass
class ExampleDocument(TextDocument):
entities: AnnotationList[LabeledSpan] = annotation_field(target="text")
relations: AnnotationList[BinaryRelation] = annotation_field(target="entities")
re_pipeline = AutoPipeline.from_pretrained("pie/example-re-textclf-tacred", device=-1, num_workers=0)
document = ExampleDocument(
"“Making a super tasty alt-chicken wing is only half of it,” said Po Bronson, general partner at SOSV and managing director of IndieBio."
)
re_pipeline = AutoPipeline.from_pretrained("pie/example-re-textclf-tacred", device=-1, num_workers=0)
for start, end, label in [(65, 75, "PER"), (96, 100, "ORG"), (126, 134, "ORG")]:
document.entities.append(LabeledSpan(start=start, end=end, label=label))
Expand Down
2 changes: 1 addition & 1 deletion datasets/conll2002/conll2002.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datasets
import pytorch_ie.data.builder
from pytorch_ie import AnnotationList, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans

Expand Down
2 changes: 1 addition & 1 deletion datasets/conll2003/conll2003.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datasets
import pytorch_ie.data.builder
from pytorch_ie import AnnotationList, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans

Expand Down
2 changes: 1 addition & 1 deletion datasets/conllpp/conllpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datasets
import pytorch_ie.data.builder
from pytorch_ie import AnnotationList, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datasets
import pytorch_ie.data.builder
from pytorch_ie import AnnotationList, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans

Expand Down
2 changes: 1 addition & 1 deletion datasets/germaner/germaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datasets
import pytorch_ie.data.builder
from pytorch_ie import AnnotationList, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans

Expand Down
2 changes: 1 addition & 1 deletion datasets/germeval_14/germeval_14.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datasets
import pytorch_ie.data.builder
from pytorch_ie import AnnotationList, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans

Expand Down
2 changes: 1 addition & 1 deletion datasets/ncbi_disease/ncbi_disease.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datasets
import pytorch_ie.data.builder
from pytorch_ie import AnnotationList, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans

Expand Down
2 changes: 1 addition & 1 deletion datasets/wikiann/wikiann.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datasets
import pytorch_ie.data.builder
from pytorch_ie import AnnotationList, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans

Expand Down
2 changes: 1 addition & 1 deletion datasets/wnut_17/wnut_17.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import datasets
import pytorch_ie.data.builder
from pytorch_ie import AnnotationList, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.utils.span import tokens_and_tags_to_text_and_labeled_spans

Expand Down
3 changes: 2 additions & 1 deletion examples/predict/ner_span_classification.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from dataclasses import dataclass

from pytorch_ie import AnnotationList, Pipeline, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.models import TransformerSpanClassificationModel
from pytorch_ie.pipeline import Pipeline
from pytorch_ie.taskmodules import TransformerSpanClassificationTaskModule


Expand Down
3 changes: 2 additions & 1 deletion examples/predict/re_generative.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from dataclasses import dataclass

from pytorch_ie import AnnotationList, Pipeline, annotation_field
from pytorch_ie.annotations import BinaryRelation, LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.models import TransformerSeq2SeqModel
from pytorch_ie.pipeline import Pipeline
from pytorch_ie.taskmodules import TransformerSeq2SeqTaskModule


Expand Down
3 changes: 2 additions & 1 deletion examples/predict/re_text_classification.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from dataclasses import dataclass

from pytorch_ie import AnnotationList, Pipeline, annotation_field
from pytorch_ie.annotations import BinaryRelation, LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.models import TransformerTextClassificationModel
from pytorch_ie.pipeline import Pipeline
from pytorch_ie.taskmodules import TransformerRETextClassificationTaskModule


Expand Down
3 changes: 2 additions & 1 deletion src/pytorch_ie/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# flake8: noqa

from pytorch_ie.auto import AutoModel, AutoPipeline, AutoTaskModule
from pytorch_ie.core import *
from pytorch_ie.data import *
from pytorch_ie.models import *
from pytorch_ie.pipeline import Pipeline
from pytorch_ie.taskmodules import *
7 changes: 7 additions & 0 deletions src/pytorch_ie/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
from typing import Dict, Union

from datasets import Split

from .builder import GeneratorBasedBuilder
from .dataset import Dataset
from .dataset_formatter import DocumentFormatter

DatasetDict = Dict[Union[str, Split], Dataset]

__all__ = [
"GeneratorBasedBuilder",
"Dataset",
"DatasetDict",
"DocumentFormatter",
]
6 changes: 0 additions & 6 deletions src/pytorch_ie/data/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
import pathlib
from typing import Dict, List, Union

from datasets import Split
from pytorch_ie import Document

HF_DATASETS_ROOT = pathlib.Path(__file__).parent / "hf_datasets"

PIEDatasetDict = Dict[Union[str, Split], List[Document]]
2 changes: 1 addition & 1 deletion src/pytorch_ie/documents.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import dataclasses
from typing import Any, Dict, Optional

from pytorch_ie import Document
from pytorch_ie.core import Document


@dataclasses.dataclass
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_ie/models/transformer_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from transformers import AutoModelForSeq2SeqLM, BatchEncoding
from transformers.modeling_outputs import Seq2SeqLMOutput

from pytorch_ie import PyTorchIEModel
from pytorch_ie.core import PyTorchIEModel
from pytorch_ie.core.taskmodule import Metadata
from pytorch_ie.documents import TextDocument

Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_ie/models/transformer_span_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
get_linear_schedule_with_warmup,
)

from pytorch_ie import PyTorchIEModel
from pytorch_ie.core import PyTorchIEModel
from pytorch_ie.models.modules.mlp import MLP

TransformerSpanClassificationModelBatchEncoding = BatchEncoding
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_ie/models/transformer_text_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from torch import Tensor, nn
from transformers import AdamW, AutoConfig, AutoModel, get_linear_schedule_with_warmup

from pytorch_ie import PyTorchIEModel
from pytorch_ie.core import PyTorchIEModel

TransformerTextClassificationModelBatchEncoding = MutableMapping[str, Any]
TransformerTextClassificationModelBatchOutput = Dict[str, Any]
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_ie/models/transformer_token_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from torch import Tensor, nn
from transformers import AutoConfig, AutoModelForTokenClassification, BatchEncoding

from pytorch_ie import PyTorchIEModel
from pytorch_ie.core import PyTorchIEModel

TransformerTokenClassificationModelBatchEncoding = BatchEncoding
TransformerTokenClassificationModelBatchOutput = Dict[str, Any]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from transformers.file_utils import PaddingStrategy
from transformers.tokenization_utils_base import BatchEncoding, TruncationStrategy

from pytorch_ie import TaskEncoding, TaskModule
from pytorch_ie.annotations import BinaryRelation, LabeledSpan, MultiLabeledBinaryRelation, Span
from pytorch_ie.core import TaskEncoding, TaskModule
from pytorch_ie.documents import TextDocument
from pytorch_ie.models import (
TransformerTextClassificationModelBatchOutput,
Expand Down
2 changes: 1 addition & 1 deletion src/pytorch_ie/taskmodules/transformer_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from transformers.file_utils import PaddingStrategy
from transformers.tokenization_utils_base import TruncationStrategy

from pytorch_ie import Annotation, TaskEncoding, TaskModule
from pytorch_ie.annotations import BinaryRelation, LabeledSpan, Span
from pytorch_ie.core import Annotation, TaskEncoding, TaskModule
from pytorch_ie.documents import TextDocument
from pytorch_ie.models import (
TransformerSeq2SeqModelBatchOutput,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from transformers.file_utils import PaddingStrategy
from transformers.tokenization_utils_base import BatchEncoding, TruncationStrategy

from pytorch_ie import TaskEncoding, TaskModule
from pytorch_ie.annotations import LabeledSpan, MultiLabeledSpan, Span
from pytorch_ie.core import TaskEncoding, TaskModule
from pytorch_ie.documents import TextDocument
from pytorch_ie.models.transformer_span_classification import (
TransformerSpanClassificationModelBatchOutput,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from transformers.file_utils import PaddingStrategy
from transformers.tokenization_utils_base import TruncationStrategy

from pytorch_ie import TaskEncoding, TaskModule
from pytorch_ie.annotations import Label, MultiLabel
from pytorch_ie.core import TaskEncoding, TaskModule
from pytorch_ie.documents import TextDocument
from pytorch_ie.models.transformer_text_classification import (
TransformerTextClassificationModelBatchOutput,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from transformers.file_utils import PaddingStrategy
from transformers.tokenization_utils_base import BatchEncoding, TruncationStrategy

from pytorch_ie import TaskEncoding, TaskModule
from pytorch_ie.annotations import LabeledSpan, Span
from pytorch_ie.core import TaskEncoding, TaskModule
from pytorch_ie.documents import TextDocument
from pytorch_ie.models.transformer_token_classification import (
TransformerTokenClassificationModelBatchOutput,
Expand Down
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import pytest

import datasets
from pytorch_ie import AnnotationList, Dataset, annotation_field
from pytorch_ie.annotations import BinaryRelation, LabeledSpan, Span
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.data import Dataset
from pytorch_ie.documents import TextDocument
from tests import FIXTURES_ROOT

Expand Down
3 changes: 2 additions & 1 deletion tests/data/datasets/test_brat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# type: ignore

"""
import os
import pytest
Expand Down Expand Up @@ -29,6 +29,7 @@
"T2\tperson 25 37\tJenny Durkan\n",
"R1\tmayor_of head:T2 tail:T1\n",
]
"""


# def get_doc1(with_ids: bool = False, **kwargs) -> TextDocument:
Expand Down
3 changes: 2 additions & 1 deletion tests/pipeline/test_ner_span_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

import pytest

from pytorch_ie import AnnotationList, Pipeline, annotation_field
from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.models import TransformerSpanClassificationModel
from pytorch_ie.pipeline import Pipeline
from pytorch_ie.taskmodules import TransformerSpanClassificationTaskModule


Expand Down
3 changes: 2 additions & 1 deletion tests/pipeline/test_re_generative.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

import pytest

from pytorch_ie import AnnotationList, Pipeline, annotation_field
from pytorch_ie.annotations import BinaryRelation, LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.models import TransformerSeq2SeqModel
from pytorch_ie.pipeline import Pipeline
from pytorch_ie.taskmodules import TransformerSeq2SeqTaskModule


Expand Down
3 changes: 2 additions & 1 deletion tests/pipeline/test_re_text_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@

import pytest

from pytorch_ie import AnnotationList, Pipeline, annotation_field
from pytorch_ie.annotations import BinaryRelation, LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
from pytorch_ie.models import TransformerTextClassificationModel
from pytorch_ie.pipeline import Pipeline
from pytorch_ie.taskmodules import TransformerRETextClassificationTaskModule


Expand Down
Loading

0 comments on commit 9d83359

Please sign in to comment.