raise exception when datasets would like to cache pipeline result

ArneBinder · May 9, 2022 · e8d0d1d · e8d0d1d
1 parent d3340ea
commit e8d0d1d
Showing 1 changed file with 5 additions and 0 deletions.
diff --git a/src/pytorch_ie/pipeline.py b/src/pytorch_ie/pipeline.py
@@ -11,6 +11,7 @@
 from torch import Tensor
 from torch.utils.data import DataLoader
 
+from datasets import is_caching_enabled
 from pytorch_ie.core.document import Document
 from pytorch_ie.core.model import PyTorchIEModel
 from pytorch_ie.core.taskmodule import (
@@ -390,6 +391,10 @@ def __call__(
                 batched=True,
                 **dataset_map_params,
             )
+            # For now, we do not allow caching of pipeline results since fingerprinting may be incorrect
+            # TODO: elaborate why it may be incorrect
+            if is_caching_enabled() and documents._fingerprint == processed_documents._fingerprint:
+                raise Exception("Caching is not allowed for pipeline calls")
         else:
             processed_documents = self._process_documents(
                 documents=documents,