-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add DOCUMENT_TYPE and document_type to DocumentMetric * allow to set document_type for DocumentStatistic as init parameter * set document_type to TextBasedDocument for TokenCountCollector if text_field=text * implement RequiresDocumentTypeMixin * use RequiresDocumentTypeMixin in metrics and taskmodules * default to super().document_type * resolve document_type in DocumentStatistic.__init__ * adjust method name * _get_best_dataset_converter_with_types(): accept matches with super classes * fix test_to_document_type_not_found()
- Loading branch information
1 parent
8b4ba0e
commit 4f26eca
Showing
8 changed files
with
89 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from .document import Annotation, AnnotationList, Document, annotation_field | ||
from .metric import DocumentMetric | ||
from .model import PyTorchIEModel | ||
from .module_mixins import RequiresDocumentTypeMixin | ||
from .statistic import DocumentStatistic | ||
from .taskmodule import TaskEncoding, TaskModule |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import logging | ||
from typing import Optional, Type | ||
|
||
from pytorch_ie.core.document import Document | ||
from pytorch_ie.data.dataset_dict import DatasetDict | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class RequiresDocumentTypeMixin: | ||
|
||
DOCUMENT_TYPE: Optional[Type[Document]] = None | ||
|
||
@property | ||
def document_type(self) -> Optional[Type[Document]]: | ||
return self.DOCUMENT_TYPE | ||
|
||
def convert_dataset(self, dataset: DatasetDict) -> DatasetDict: | ||
name = type(self).__name__ | ||
# auto-convert the dataset if a document type is specified | ||
if self.document_type is not None: | ||
if issubclass(dataset.document_type, self.document_type): | ||
logger.info( | ||
f"the dataset is already of the document type that is specified by {name}: " | ||
f"{self.document_type}" | ||
) | ||
else: | ||
logger.info( | ||
f"convert the dataset to the document type that is specified by {name}: " | ||
f"{self.document_type}" | ||
) | ||
dataset = dataset.to_document_type(self.document_type) | ||
else: | ||
logger.warning( | ||
f"{name} does not specify a document type. The dataset can not be automatically converted " | ||
f"to a document type." | ||
) | ||
|
||
return dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters