Merge pull request #37 from ArneBinder/create_core_package

create core package
ArneBinder · Nov 8, 2023 · ffe3ad5 · ffe3ad5
2 parents f2925d1 + d813fbb
commit ffe3ad5
Show file tree

Hide file tree

Showing 13 changed files with 47 additions and 36 deletions.
diff --git a/src/pie_datasets/__init__.py b/src/pie_datasets/__init__.py
@@ -1,13 +1,3 @@
-from .builder import ArrowBasedBuilder, GeneratorBasedBuilder
-from .dataset import Dataset, IterableDataset
-from .dataset_dict import DatasetDict
-from .document_formatter import DocumentFormatter
+# flake8: noqa
 
-__all__ = [
-    "GeneratorBasedBuilder",
-    "ArrowBasedBuilder",
-    "Dataset",
-    "IterableDataset",
-    "DatasetDict",
-    "DocumentFormatter",
-]
+from .core import *
diff --git a/src/pie_datasets/core/__init__.py b/src/pie_datasets/core/__init__.py
@@ -0,0 +1,11 @@
+from .builder import ArrowBasedBuilder, GeneratorBasedBuilder
+from .dataset import Dataset, IterableDataset
+from .dataset_dict import DatasetDict
+
+__all__ = [
+    "GeneratorBasedBuilder",
+    "ArrowBasedBuilder",
+    "Dataset",
+    "IterableDataset",
+    "DatasetDict",
+]
diff --git a/src/pie_datasets/builder.py → src/pie_datasets/core/builder.py b/src/pie_datasets/builder.py → src/pie_datasets/core/builder.py
diff --git a/src/pie_datasets/dataset.py → src/pie_datasets/core/dataset.py b/src/pie_datasets/dataset.py → src/pie_datasets/core/dataset.py
diff --git a/src/pie_datasets/dataset_dict.py → src/pie_datasets/core/dataset_dict.py b/src/pie_datasets/dataset_dict.py → src/pie_datasets/core/dataset_dict.py
@@ -19,7 +19,7 @@
 from pytorch_ie.core.document import Document
 from pytorch_ie.utils.hydra import resolve_target, serialize_document_type
 
-from pie_datasets.dataset import Dataset, IterableDataset, get_pie_dataset_type
+from .dataset import Dataset, IterableDataset, get_pie_dataset_type
 
 logger = logging.getLogger(__name__)
 

diff --git a/src/pie_datasets/document_formatter.py → src/pie_datasets/core/document_formatter.py b/src/pie_datasets/document_formatter.py → src/pie_datasets/core/document_formatter.py
diff --git a/src/pie_datasets/document/processing/regex_partitioner.py b/src/pie_datasets/document/processing/regex_partitioner.py
@@ -10,7 +10,7 @@
 from pytorch_ie.documents import TextBasedDocument
 
 from pie_datasets import Dataset, IterableDataset
-from pie_datasets.dataset_dict import EnterDatasetMixin, ExitDatasetMixin
+from pie_datasets.core.dataset_dict import EnterDatasetMixin, ExitDatasetMixin
 
 logger = logging.getLogger(__name__)
 

diff --git a/tests/fixtures/builder/datasets/wrong_builder_class_config/wrong_builder_class_config.py b/tests/fixtures/builder/datasets/wrong_builder_class_config/wrong_builder_class_config.py
@@ -6,7 +6,7 @@
 from pytorch_ie.core import AnnotationList, annotation_field
 from pytorch_ie.documents import TextDocument
 
-from pie_datasets.builder import ArrowBasedBuilder
+from pie_datasets import ArrowBasedBuilder
 from tests import FIXTURES_ROOT
 
 

diff --git a/tests/unit/core/__init__.py b/tests/unit/core/__init__.py
@@ -0,0 +1 @@
+TEST_PACKAGE = "tests.unit.core"
diff --git a/tests/unit/test_builder.py → tests/unit/core/test_builder.py b/tests/unit/test_builder.py → tests/unit/core/test_builder.py
@@ -1,6 +1,7 @@
 import re
 import tempfile
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Type
 
 import pytest
@@ -10,11 +11,14 @@
 from pytorch_ie.core import AnnotationList, annotation_field
 from pytorch_ie.documents import TextBasedDocument, TextDocumentWithSpans
 
-from pie_datasets.builder import PieDatasetBuilder
+from pie_datasets.core.builder import PieDatasetBuilder
 from tests import FIXTURES_ROOT
+from tests.unit.core import TEST_PACKAGE
 
 DATASETS_ROOT = FIXTURES_ROOT / "builder" / "datasets"
 
+TEST_MODULE = f"{TEST_PACKAGE}.{Path(__file__).stem}"
+
 
 def test_builder_class():
     dataset_module = dataset_module_factory(str(DATASETS_ROOT / "single_config"))
@@ -193,7 +197,7 @@ class RenamedExampleDocument(TextBasedDocument):
         builder = builder_cls(
             cache_dir=tmp_cache_dir,
             document_converters={
-                "tests.unit.test_builder.ExampleDocumentWithSimpleSpans": "tests.unit.test_builder.convert_example_document_to_example_document_with_simple_spans",
+                f"{TEST_MODULE}.ExampleDocumentWithSimpleSpans": f"{TEST_MODULE}.convert_example_document_to_example_document_with_simple_spans",
             },
         )
     assert isinstance(builder, PieDatasetBuilder)
@@ -213,12 +217,12 @@ def test_builder_with_document_converters_resolve_wrong_document_type():
         with pytest.raises(
             TypeError,
             match=re.escape(
-                "The key 'tests.unit.test_builder.NoDocumentType' for one of the converters can not be resolved to a document type."
+                f"The key '{TEST_MODULE}.NoDocumentType' for one of the converters can not be resolved to a document type."
             ),
         ):
             builder = builder_cls(
                 cache_dir=tmp_cache_dir,
                 document_converters={
-                    "tests.unit.test_builder.NoDocumentType": convert_example_document_to_example_document_with_simple_spans,
+                    f"{TEST_MODULE}.NoDocumentType": convert_example_document_to_example_document_with_simple_spans,
                 },
             )
diff --git a/tests/unit/test_dataset.py → tests/unit/core/test_dataset.py b/tests/unit/test_dataset.py → tests/unit/core/test_dataset.py
@@ -1,5 +1,6 @@
 from collections.abc import Iterator, Sequence
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Union
 
 import numpy
@@ -16,8 +17,11 @@
 from pytorch_ie.taskmodules import TransformerSpanClassificationTaskModule
 
 from pie_datasets import Dataset, IterableDataset
-from pie_datasets.dataset import get_pie_dataset_type
+from pie_datasets.core.dataset import get_pie_dataset_type
 from tests.conftest import TestDocument
+from tests.unit.core import TEST_PACKAGE
+
+TEST_MODULE = f"{TEST_PACKAGE}.{Path(__file__).stem}"
 
 
 def test_dataset(maybe_iterable_dataset):
@@ -281,12 +285,14 @@ class TestDocumentWithSpans(TestDocument):
     assert (
         str(excinfo.value)
         == "No valid key (either subclass or superclass) was found for the document type "
-        "'<class 'tests.unit.test_dataset.test_to_document_type_not_found.<locals>.TestDocumentWithSpans'>' "
+        f"'<class '{TEST_MODULE}.test_to_document_type_not_found.<locals>.TestDocumentWithSpans'>' "
         "in the document_converters of the dataset. Available keys: "
-        "{<class 'tests.unit.test_dataset.TestDocumentWithLabel'>}. Consider adding a respective converter "
+        "{<class '"
+        + TEST_MODULE
+        + ".TestDocumentWithLabel'>}. Consider adding a respective converter "
         "to the dataset with dataset.register_document_converter(my_converter_method) where "
         "my_converter_method should accept <class 'tests.conftest.TestDocument'> as input and return "
-        "'<class 'tests.unit.test_dataset.test_to_document_type_not_found.<locals>.TestDocumentWithSpans'>'."
+        f"'<class '{TEST_MODULE}.test_to_document_type_not_found.<locals>.TestDocumentWithSpans'>'."
     )
 
 

diff --git a/tests/unit/test_dataset_casting.py → tests/unit/core/test_dataset_casting.py b/tests/unit/test_dataset_casting.py → tests/unit/core/test_dataset_casting.py
diff --git a/tests/unit/test_dataset_dict.py → tests/unit/core/test_dataset_dict.py b/tests/unit/test_dataset_dict.py → tests/unit/core/test_dataset_dict.py
@@ -10,14 +10,17 @@
 from pytorch_ie.documents import TextBasedDocument, TextDocument
 
 from pie_datasets import Dataset, DatasetDict, IterableDataset
-from pie_datasets.dataset_dict import (
+from pie_datasets.core.dataset_dict import (
     EnterDatasetDictMixin,
     EnterDatasetMixin,
     ExitDatasetDictMixin,
     ExitDatasetMixin,
 )
 from tests import DATASET_BUILDERS_ROOT, FIXTURES_ROOT
 from tests.conftest import CREATE_FIXTURE_DATA, TestDocument
+from tests.unit.core import TEST_PACKAGE
+
+TEST_MODULE = f"{TEST_PACKAGE}.{Path(__file__).stem}"
 
 logger = logging.getLogger(__name__)
 
@@ -27,8 +30,6 @@
 PIE_DATASET_PATH = DATASET_BUILDERS_ROOT / "pie" / DATASET_NAME
 FIXTURE_DATA_PATH = FIXTURES_ROOT / "dataset_dict" / f"{DATASET_NAME}_extract"
 
-TEST_CLASS_PREFIX = "tests.unit.test_dataset_dict"
-
 
 @pytest.mark.skipif(condition=not CREATE_FIXTURE_DATA, reason="don't create fixture data again")
 def test_create_fixture_data():
@@ -167,7 +168,7 @@ def map_fn(doc):
 
 @pytest.mark.parametrize(
     "function",
-    [map_fn, f"{TEST_CLASS_PREFIX}.map_fn"],
+    [map_fn, f"{TEST_MODULE}.map_fn"],
 )
 def test_map(dataset_dict, function):
     dataset_dict_mapped = dataset_dict.map(function)
@@ -479,8 +480,8 @@ def test_register_document_converter(dataset_dict):
 
 def test_register_document_converter_resolve(dataset_dict):
     dataset_dict.register_document_converter(
-        f"{TEST_CLASS_PREFIX}.convert_to_document_with_label",
-        document_type=f"{TEST_CLASS_PREFIX}.TestDocumentWithLabel",
+        f"{TEST_MODULE}.convert_to_document_with_label",
+        document_type=f"{TEST_MODULE}.TestDocumentWithLabel",
     )
 
     for name, split in dataset_dict.items():
@@ -494,11 +495,11 @@ class NoDocument:
 def test_register_document_converter_resolve_wrong_document_type(dataset_dict):
     with pytest.raises(TypeError) as excinfo:
         dataset_dict.register_document_converter(
-            convert_to_document_with_label, document_type=f"{TEST_CLASS_PREFIX}.NoDocument"
+            convert_to_document_with_label, document_type=f"{TEST_MODULE}.NoDocument"
         )
     assert (
         str(excinfo.value)
-        == f"document_type must be or resolve to a subclass of Document, but is '{TEST_CLASS_PREFIX}.NoDocument'"
+        == f"document_type must be or resolve to a subclass of Document, but is '{TEST_MODULE}.NoDocument'"
     )
 
 
@@ -518,9 +519,7 @@ def test_to_document_type(dataset_dict):
 
 def test_to_document_resolve(dataset_dict):
     dataset_dict.register_document_converter(convert_to_document_with_label)
-    dataset_dict_converted = dataset_dict.to_document_type(
-        f"{TEST_CLASS_PREFIX}.TestDocumentWithLabel"
-    )
+    dataset_dict_converted = dataset_dict.to_document_type(f"{TEST_MODULE}.TestDocumentWithLabel")
     assert dataset_dict_converted.document_type == TestDocumentWithLabel
     for split in dataset_dict_converted.values():
         assert all(isinstance(doc, TestDocumentWithLabel) for doc in split)
@@ -529,11 +528,11 @@ def test_to_document_resolve(dataset_dict):
 def test_to_document_type_resolve_wrong_document_type(dataset_dict):
     dataset_dict.register_document_converter(convert_to_document_with_label)
     with pytest.raises(TypeError) as excinfo:
-        dataset_dict.to_document_type(f"{TEST_CLASS_PREFIX}.NoDocument")
+        dataset_dict.to_document_type(f"{TEST_MODULE}.NoDocument")
     assert (
         str(excinfo.value)
         == f"document_type must be a document type or a string that can be resolved to such a type, but got "
-        f"{TEST_CLASS_PREFIX}.NoDocument."
+        f"{TEST_MODULE}.NoDocument."
     )