From db64b7e48eb9f1763d522e3ed20fc3d8f912736e Mon Sep 17 00:00:00 2001 From: Jochen Ott Date: Tue, 24 Nov 2020 08:04:55 +0100 Subject: [PATCH] Fix conversion of 'store' argument Resolves #375 --- CHANGES.rst | 10 ++++++ kartothek/core/utils.py | 52 ++++++++++++++++++++------------ kartothek/io/dask/common_cube.py | 1 - tests/core/test_utils.py | 17 +++++++++++ 4 files changed, 60 insertions(+), 20 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a8d5defd..725416be 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,16 @@ Changelog ========= + +Version 3.17.1 (2020-11-24) +=========================== + +Bugfixes +^^^^^^^^ + +* Fix GitHub #375 by loosening checks of the supplied store argument + + Version 3.17.0 (2020-11-23) =========================== diff --git a/kartothek/core/utils.py b/kartothek/core/utils.py index a7e591b9..c8d282cc 100644 --- a/kartothek/core/utils.py +++ b/kartothek/core/utils.py @@ -1,6 +1,6 @@ import pickle from functools import partial -from typing import cast +from typing import Any, cast from simplekv import KeyValueStore from storefact import get_store_from_url @@ -51,22 +51,33 @@ def ensure_string_type(obj): return str(obj) +def _is_simplekv_key_value_store(obj: Any) -> bool: + """ + Check whether ``obj`` is the ``simplekv.KeyValueStore``-like class. + + simplekv uses duck-typing, e.g. for decorators. Therefore, + avoid `isinstance(store, KeyValueStore)`, as it would be unreliable. Instead, + only roughly verify that `store` looks like a KeyValueStore. + """ + return hasattr(obj, "iter_prefixes") + + def ensure_store(store: StoreInput) -> KeyValueStore: """ - Ensure that the input is a valid KeyValueStore. + Convert the ``store`` argument to a ``KeyValueStore``, without pickle test. """ - # This function is often used in an eager context where we may allow non-serializable stores, i.e. we do not need to check for serializability - if isinstance(store, KeyValueStore): + # This function is often used in an eager context where we may allow + # non-serializable stores, so skip the pickle test. + if _is_simplekv_key_value_store(store): return store return lazy_store(store)() -def _factory_from_store(pickled_store): +def _identity(store: KeyValueStore) -> KeyValueStore: """ - Ensure that we're using internally only deserialized stores / proper - factories to avoid funny business with unix sockets in forked subprocesses. + Helper function for `lazy_store`. """ - return pickle.loads(pickled_store) + return store def lazy_store(store: StoreInput) -> StoreFactory: @@ -77,23 +88,26 @@ def lazy_store(store: StoreInput) -> StoreFactory: * KeyValueStore If a KeyValueStore is provided, it is verified that the store is serializable + (i.e. that pickle.dumps does not raise an exception). """ if callable(store): return cast(StoreFactory, store) - elif isinstance(store, KeyValueStore): + elif isinstance(store, str): + ret_val = partial(get_store_from_url, store) + ret_val = cast(StoreFactory, ret_val) # type: ignore + return ret_val + else: + + if not _is_simplekv_key_value_store(store): + raise TypeError( + f"Provided incompatible store type. Got {type(store)} but expected {StoreInput}." + ) + try: - pickled_store = pickle.dumps(store) + pickle.dumps(store, pickle.HIGHEST_PROTOCOL) except Exception as exc: raise TypeError( """KeyValueStore not serializable. Please consult https://kartothek.readthedocs.io/en/stable/spec/store_interface.html for more information.""" ) from exc - return partial(_factory_from_store, pickled_store) - elif isinstance(store, str): - ret_val = partial(get_store_from_url, store) - ret_val = cast(StoreFactory, ret_val) # type: ignore - return ret_val - else: - raise TypeError( - f"Provided incompatible store type. Got {type(store)} but expected {StoreInput}." - ) + return partial(_identity, store) diff --git a/kartothek/io/dask/common_cube.py b/kartothek/io/dask/common_cube.py index 22563012..ee93a89e 100644 --- a/kartothek/io/dask/common_cube.py +++ b/kartothek/io/dask/common_cube.py @@ -532,7 +532,6 @@ def _multiplex_store_dataset_from_partitions_flat( store_factory=store, ) else: - print(f"_multiplex_store_dataset_from_partitions_flat: {k} no update") result[k] = store_dataset_from_partitions( v, dataset_metadata=metadata[k], diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index ad26ab23..98f94bf4 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -2,6 +2,7 @@ import pytest from simplekv import KeyValueStore +from simplekv.decorator import PrefixDecorator from storefact import get_store_from_url from kartothek.core.utils import ensure_store, lazy_store @@ -63,3 +64,19 @@ def test_ensure_store_fact(store_input_types): assert value == store.get(key) assert store_fact is lazy_store(store_fact) + + +def test_ensure_store_returns_same_store(): + store = get_store_from_url("memory://") + assert ensure_store(lambda: store) is store + + +def test_lazy_store_returns_same_store(): + store = get_store_from_url("memory://") + assert lazy_store(lambda: store)() is store + + +def test_lazy_store_accepts_decorated_store(): + store = get_store_from_url("memory://") + pstore = PrefixDecorator("pre", store) + assert lazy_store(pstore)() is pstore