Skip to content

Commit

Permalink
Merge pull request #376 from jochen-ott-by/fix-store-argument-conversion
Browse files Browse the repository at this point in the history
Fix conversion of 'store' argument
  • Loading branch information
marco-neumann-by authored Nov 24, 2020
2 parents 6bd5047 + db64b7e commit 082ace7
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 20 deletions.
10 changes: 10 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@
Changelog
=========


Version 3.17.1 (2020-11-24)
===========================

Bugfixes
^^^^^^^^

* Fix GitHub #375 by loosening checks of the supplied store argument


Version 3.17.0 (2020-11-23)
===========================

Expand Down
52 changes: 33 additions & 19 deletions kartothek/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pickle
from functools import partial
from typing import cast
from typing import Any, cast

from simplekv import KeyValueStore
from storefact import get_store_from_url
Expand Down Expand Up @@ -51,22 +51,33 @@ def ensure_string_type(obj):
return str(obj)


def _is_simplekv_key_value_store(obj: Any) -> bool:
"""
Check whether ``obj`` is the ``simplekv.KeyValueStore``-like class.
simplekv uses duck-typing, e.g. for decorators. Therefore,
avoid `isinstance(store, KeyValueStore)`, as it would be unreliable. Instead,
only roughly verify that `store` looks like a KeyValueStore.
"""
return hasattr(obj, "iter_prefixes")


def ensure_store(store: StoreInput) -> KeyValueStore:
"""
Ensure that the input is a valid KeyValueStore.
Convert the ``store`` argument to a ``KeyValueStore``, without pickle test.
"""
# This function is often used in an eager context where we may allow non-serializable stores, i.e. we do not need to check for serializability
if isinstance(store, KeyValueStore):
# This function is often used in an eager context where we may allow
# non-serializable stores, so skip the pickle test.
if _is_simplekv_key_value_store(store):
return store
return lazy_store(store)()


def _factory_from_store(pickled_store):
def _identity(store: KeyValueStore) -> KeyValueStore:
"""
Ensure that we're using internally only deserialized stores / proper
factories to avoid funny business with unix sockets in forked subprocesses.
Helper function for `lazy_store`.
"""
return pickle.loads(pickled_store)
return store


def lazy_store(store: StoreInput) -> StoreFactory:
Expand All @@ -77,23 +88,26 @@ def lazy_store(store: StoreInput) -> StoreFactory:
* KeyValueStore
If a KeyValueStore is provided, it is verified that the store is serializable
(i.e. that pickle.dumps does not raise an exception).
"""
if callable(store):
return cast(StoreFactory, store)
elif isinstance(store, KeyValueStore):
elif isinstance(store, str):
ret_val = partial(get_store_from_url, store)
ret_val = cast(StoreFactory, ret_val) # type: ignore
return ret_val
else:

if not _is_simplekv_key_value_store(store):
raise TypeError(
f"Provided incompatible store type. Got {type(store)} but expected {StoreInput}."
)

try:
pickled_store = pickle.dumps(store)
pickle.dumps(store, pickle.HIGHEST_PROTOCOL)
except Exception as exc:
raise TypeError(
"""KeyValueStore not serializable.
Please consult https://kartothek.readthedocs.io/en/stable/spec/store_interface.html for more information."""
) from exc
return partial(_factory_from_store, pickled_store)
elif isinstance(store, str):
ret_val = partial(get_store_from_url, store)
ret_val = cast(StoreFactory, ret_val) # type: ignore
return ret_val
else:
raise TypeError(
f"Provided incompatible store type. Got {type(store)} but expected {StoreInput}."
)
return partial(_identity, store)
1 change: 0 additions & 1 deletion kartothek/io/dask/common_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,6 @@ def _multiplex_store_dataset_from_partitions_flat(
store_factory=store,
)
else:
print(f"_multiplex_store_dataset_from_partitions_flat: {k} no update")
result[k] = store_dataset_from_partitions(
v,
dataset_metadata=metadata[k],
Expand Down
17 changes: 17 additions & 0 deletions tests/core/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
from simplekv import KeyValueStore
from simplekv.decorator import PrefixDecorator
from storefact import get_store_from_url

from kartothek.core.utils import ensure_store, lazy_store
Expand Down Expand Up @@ -63,3 +64,19 @@ def test_ensure_store_fact(store_input_types):
assert value == store.get(key)

assert store_fact is lazy_store(store_fact)


def test_ensure_store_returns_same_store():
store = get_store_from_url("memory://")
assert ensure_store(lambda: store) is store


def test_lazy_store_returns_same_store():
store = get_store_from_url("memory://")
assert lazy_store(lambda: store)() is store


def test_lazy_store_accepts_decorated_store():
store = get_store_from_url("memory://")
pstore = PrefixDecorator("pre", store)
assert lazy_store(pstore)() is pstore

0 comments on commit 082ace7

Please sign in to comment.