From cc71dd51f56fe559429fbfbb9a6bce89ffe1d520 Mon Sep 17 00:00:00 2001 From: fransik Date: Tue, 5 May 2020 22:59:48 +0200 Subject: [PATCH] Init numpy caching --- README.md | 63 ++++++++-- pandas_cacher/__init__.py | 4 +- pandas_cacher/pandas_cache.py | 218 ++++++++++++++++++++-------------- poetry.lock | 150 +++++++++++++++++------ pyproject.toml | 5 +- tests/test_pandas_cache.py | 117 +++++++++++++++++- 6 files changed, 413 insertions(+), 144 deletions(-) diff --git a/README.md b/README.md index b0aa8e4..a867479 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ -# Pandas cache +# Table cache Works by hashing the combinations of arguments of a function call with -the function name to create a unique id of a DataFrame retrieval. If +the function name to create a unique id of a table retrieval. If the function call is new the original function will be called, and the -resulting DataFrame(s) will be stored in a HDFStore indexed by the +resulting tables(s) will be stored in a HDFStore indexed by the hashed key. Next time the function is called with the same args the -DataFrame(s) will be retrieved from the store instead of executing the +tables(s) will be retrieved from the store instead of executing the function. The hashing of the arguments is done by first applying str() on the @@ -23,12 +23,12 @@ database-clients. The module automatically creates a `cache/data.h5` relative to `__main__`, to change this set the environment variable -`PANDAS_CACHE_PATH` to be the desired directory of the `data.h5` file. +`CACHE_PATH` to be the desired directory of the `data.h5` file. #### Disabling the cache with env-variable -To disable the pandas cache set the environment variable -`DISABLE_PANDAS_CACHE` to `TRUE`. +To disable the cache set the environment variable +`DISABLE_CACHE` to `TRUE`. ### Usage @@ -154,8 +154,7 @@ import pandas as pd @pandas_cache("a", "c") def simple_func(a, *args, **kwargs): sleep(5) - return pd.DataFrame([[1,2,3], [2,3,4]]), \ - pd.DataFrame([[1,2,3], [2,3,4]]) * 10 + return pd.DataFrame([[1,2,3], [2,3,4]]), pd.DataFrame([[1,2,3], [2,3,4]]) * 10 t0 = datetime.now() @@ -180,3 +179,49 @@ print(datetime.now() - t0) 1 20 30 40) 0:00:00.019578 ``` + +#### Disabling cache for tests + +Caching can be disabled using the environment variable DISABLE_CACHE to TRUE + +```python +from mock import patch +def test_cached_function(): + with patch.dict("os.environ", {"DISABLE_PANDAS_CACHE": "TRUE"}, clear=True): + assert cached_function() == target +``` + +#### Numpy caching + +```python +from pandas_cacher import numpy_cache +from time import sleep +from datetime import datetime +import numpy as np + + +@numpy_cache("a", "c") +def simple_func(a, *args, **kwargs): + sleep(5) + return np.array([[1, 2, 3], [2, 3, 4]]), np.array([[1, 2, 3], [2, 3, 4]]) * 10 + + +t0 = datetime.now() +print(simple_func(1, b=2, c=True)) +print(datetime.now() - t0) + +t0 = datetime.now() +print(simple_func(a=1, b=3, c=True)) +print(datetime.now() - t0) +``` + +```commandline +(array([[1, 2, 3], + [2, 3, 4]]), array([[10, 20, 30], + [20, 30, 40]])) +0:00:05.009084 +(array([[1, 2, 3], + [2, 3, 4]]), array([[10, 20, 30], + [20, 30, 40]])) +0:00:00.002000 +``` diff --git a/pandas_cacher/__init__.py b/pandas_cacher/__init__.py index 2e0f085..fd604fb 100644 --- a/pandas_cacher/__init__.py +++ b/pandas_cacher/__init__.py @@ -1,3 +1,3 @@ -from pandas_cacher.pandas_cache import pandas_cache # noqa: F401 +from pandas_cacher.pandas_cache import numpy_cache, pandas_cache # noqa: F401 -VERSION = "0.1.2" +VERSION = "0.1.3" diff --git a/pandas_cacher/pandas_cache.py b/pandas_cacher/pandas_cache.py index 3f70235..accf12a 100644 --- a/pandas_cacher/pandas_cache.py +++ b/pandas_cacher/pandas_cache.py @@ -5,117 +5,159 @@ import os import pathlib from collections import defaultdict -from typing import Any, Callable, Tuple, Union +from typing import Any, Callable, Dict, Iterable, Tuple, Type, Union +import h5py +import numpy as np import pandas as pd pandas_function = Callable[..., Union[Tuple[pd.DataFrame], pd.DataFrame]] +numpy_function = Callable[..., Union[Tuple[np.ndarray], np.ndarray]] +cached_data_type = Union[Tuple[Any], Any] +cache_able_function = Callable[..., cached_data_type] +store_function = Callable[[str, Callable[..., Any], Tuple[Any], Dict[str, Any]], Any] def get_path() -> pathlib.Path: - cache_path = os.environ.get("PANDAS_CACHE_PATH", "") + cache_path = os.environ.get("CACHE_PATH", "") cache_path = pathlib.Path.cwd() if cache_path == "" else pathlib.Path(cache_path) cache_path.mkdir(parents=True, exist_ok=True) return cache_path -def get_df_hdf( - key: str, func: pandas_function, f_args: Any, f_kwargs: Any -) -> Union[Tuple[pd.DataFrame], pd.DataFrame]: - """Retrieves the DataFrames from the HDFStore if the key exists, - else run the function then store & return the resulting DataFrames. +class StoreClass: + def __init__(self, file_path: str, mode: str): + raise NotImplementedError - Args: - key: Unique str hash of function call - func: Wrapped function, should return a DataFrame or tuple of them. - f_args: Arguments passed along to the function - f_kwargs: Keyword-Arguments passed along to the function + def __enter__(self): + raise NotImplementedError - Returns: DataFrames that func would originally return. + def __exit__(self, exc_type, exc_val, exc_tb): + raise NotImplementedError - """ - file_path = get_path() / "data.h5" - mode = "r+" if file_path.exists() else "w" - with pd.HDFStore(file_path, mode=mode) as store: - keys = defaultdict(list) - for s_key in store.keys(): - keys[s_key.split("/")[1]].append(s_key) - if key in keys.keys(): - dfs = [pd.read_hdf(store, key=key_) for key_ in keys[key]] - return tuple(dfs) if len(dfs) > 1 else dfs[0] - df = func(*f_args, **f_kwargs) - with pd.HDFStore(file_path, mode=mode) as store: - if isinstance(df, tuple): - for i, df_ in enumerate(df): - df_.to_hdf(store, key=f"{key}/df{i}") - else: - df.to_hdf(store, key=key) - return df - - -# pylint: disable=keyword-arg-before-vararg -def pandas_cache(orig_func: pandas_function = None, *args: str) -> pandas_function: - """Decorator for caching function calls that return pandas DataFrames. + def keys(self) -> Iterable: + raise NotImplementedError - Args: - *args: arguments of the function to use as filename - **kwargs: keyword-arguments of the function to use as filename + def create_dataset(self, key: str, data: ...) -> None: + raise NotImplementedError + + def __getitem__(self, key: str) -> ...: + raise NotImplementedError + + +class PandasStore(pd.HDFStore): + def create_dataset(self, key: str, data: pd.DataFrame) -> None: + data.to_hdf(self, key) + def __getitem__(self, key: str) -> pd.DataFrame: + return pd.read_hdf(self, key=key) - Returns: decorated function + +def store_factory(data_storer: Type[StoreClass]) -> Type[store_function]: + """Factory function for creating storing functions for the cache decorator. + + Args: + data_storer: class with a context manager, and file_path + mode parameters. + + Returns: function for storing tables """ - if isinstance(orig_func, str): - args = list(args) + [orig_func] - orig_func = None - def decorated(func: pandas_function) -> pandas_function: - """Wrapper of function that returns pandas DataFrames. + def store_func( + key: str, func: cache_able_function, f_args: Tuple[Any], f_kwargs: Dict[str, Any], + ) -> cached_data_type: + """Retrieves stored data if key exists in stored data if the key is new, retrieves data from + decorated function & stores the result with the given key. Args: - func: function to be wrapped, should return a DataFrame or tuple of them. + key: unique key used to retrieve/store data + func: original cached function + f_args: args to pass to the function + f_kwargs: kwargs to pass to the function - Returns: wrapped function + Returns: + Data retrieved from the store if existing else from function """ - - @functools.wraps(func) - def wrapped(*f_args: ..., **f_kwargs: ...) -> Union[Tuple[pd.DataFrame], pd.DataFrame]: - """ Hashes function arguments to a unique key, and uses the key - to store/retrieve DataFrames from the HDFStore. - - Args: - *f_args: Arguments passed along to the function - **f_kwargs: Keyword-Arguments passed along to the function - - Returns: DataFrame(s) - - """ - if os.environ.get("DISABLE_PANDAS_CACHE", "FALSE") == "TRUE": - return func(*f_args, **f_kwargs) - argspec = inspect.getfullargspec(func) - defaults = ( - dict(zip(argspec.args[::-1], argspec.defaults[::-1])) if argspec.defaults else {} - ) - kw_defaults = argspec.kwonlydefaults if argspec.kwonlydefaults else {} - full_args = { - **kw_defaults, - **defaults, - **f_kwargs, - **dict(zip(argspec.args, f_args)), - **{"arglist": f_args[len(argspec.args) :]}, - } - full_args = full_args if not args else {arg: full_args[arg] for arg in args} - full_args.pop("self", "") - full_args = {k: str(v) for k, v in full_args.items()} - key = ( - "df" - + hashlib.md5((func.__name__ + json.dumps(full_args)).encode("utf-8")).hexdigest() - ) - return get_df_hdf(key, func, f_args, f_kwargs) - - return wrapped - - if orig_func: - return decorated(orig_func) - return decorated + file_path = get_path() / "data.h5" + mode = "r+" if file_path.exists() else "w" + with data_storer(file_path, mode=mode) as store: + keys = defaultdict(list) + for s_key in store.keys(): + s_key_ = s_key.split("-")[0] if "-" in s_key else s_key + keys[s_key_.strip("/")].append(s_key) + if key in keys.keys(): + arrays = [store[key_][:] for key_ in keys[key]] + return tuple(arrays) if len(arrays) > 1 else arrays[0] + data = func(*f_args, **f_kwargs) + with data_storer(file_path, mode=mode) as store: + if isinstance(data, tuple): + for i, data_ in enumerate(data): + store.create_dataset(f"{key}-data{i}", data=data_) + else: + store.create_dataset(key, data=data) + return data + + return store_func + + +def cache_decorator_factory(table_getter: Type[store_function]) -> Type[cache_able_function]: + # pylint: disable=keyword-arg-before-vararg + def cache_decorator( + orig_func: cache_able_function = None, *args: str + ) -> Type[cache_able_function]: + if isinstance(orig_func, str): + args = list(args) + [orig_func] + orig_func = None + + def decorated(func: cache_able_function) -> Type[cache_able_function]: + @functools.wraps(func) + def wrapped(*f_args: Tuple[Any], **f_kwargs: Dict[str, Any]) -> cached_data_type: + """Hashes function arguments to a unique key, and uses the key to store/retrieve + data from the configured store. + + Args: + *f_args: Arguments passed along to the function + **f_kwargs: Keyword-Arguments passed along to the function + + Returns: Stored data if existing, else result from the function + + """ + if os.environ.get("DISABLE_CACHE", "FALSE") == "TRUE": + return func(*f_args, **f_kwargs) + argspec = inspect.getfullargspec(func) + defaults = ( + dict(zip(argspec.args[::-1], argspec.defaults[::-1])) + if argspec.defaults + else {} + ) + kw_defaults = argspec.kwonlydefaults if argspec.kwonlydefaults else {} + full_args = { + **kw_defaults, + **defaults, + **f_kwargs, + **dict(zip(argspec.args, f_args)), + **{"arglist": f_args[len(argspec.args) :]}, + } + full_args = full_args if not args else {arg: full_args[arg] for arg in args} + full_args.pop("self", "") + full_args = {k: str(v) for k, v in full_args.items()} + key = ( + "df" + + hashlib.md5( + (func.__name__ + json.dumps(full_args)).encode("utf-8") + ).hexdigest() + ) + return table_getter(key, func, f_args, f_kwargs) + + return wrapped + + if orig_func: + return decorated(orig_func) + return decorated + + return cache_decorator + + +pandas_cache = cache_decorator_factory(store_factory(PandasStore)) +numpy_cache = cache_decorator_factory(store_factory(h5py.File)) diff --git a/poetry.lock b/poetry.lock index 94abd40..d2b83f7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -95,8 +95,8 @@ category = "dev" description = "Validate configuration and produce human readable error messages." name = "cfgv" optional = false -python-versions = ">=3.6.1" -version = "3.1.0" +python-versions = ">=3.6" +version = "3.0.0" [[package]] category = "dev" @@ -191,6 +191,18 @@ mccabe = ">=0.6.0,<0.7.0" pycodestyle = ">=2.5.0,<2.6.0" pyflakes = ">=2.1.0,<2.2.0" +[[package]] +category = "main" +description = "Read and write HDF5 files from Python" +name = "h5py" +optional = false +python-versions = "*" +version = "2.10.0" + +[package.dependencies] +numpy = ">=1.7" +six = "*" + [[package]] category = "dev" description = "File identification library for Python" @@ -218,6 +230,27 @@ zipp = ">=0.5" docs = ["sphinx", "rst.linker"] testing = ["packaging", "importlib-resources"] +[[package]] +category = "dev" +description = "Read resources from Python packages" +marker = "python_version < \"3.7\"" +name = "importlib-resources" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +version = "1.5.0" + +[package.dependencies] +[package.dependencies.importlib-metadata] +python = "<3.8" +version = "*" + +[package.dependencies.zipp] +python = "<3.8" +version = ">=0.4" + +[package.extras] +docs = ["sphinx", "rst.linker", "jaraco.packaging"] + [[package]] category = "dev" description = "IPython Kernel for Jupyter" @@ -597,8 +630,8 @@ category = "main" description = "Powerful data structures for data analysis, time series, and statistics" name = "pandas" optional = false -python-versions = ">=3.6.1" -version = "1.0.3" +python-versions = ">=3.5.3" +version = "0.25.3" [package.dependencies] numpy = ">=1.13.3" @@ -709,8 +742,8 @@ category = "dev" description = "A framework for managing and maintaining multi-language pre-commit hooks." name = "pre-commit" optional = false -python-versions = ">=3.6.1" -version = "2.3.0" +python-versions = ">=3.6" +version = "2.1.1" [package.dependencies] cfgv = ">=2.0.0" @@ -724,6 +757,10 @@ virtualenv = ">=15.2" python = "<3.8" version = "*" +[package.dependencies.importlib-resources] +python = "<3.7" +version = "*" + [[package]] category = "dev" description = "Python client for the Prometheus monitoring system." @@ -740,8 +777,8 @@ category = "dev" description = "Library for building powerful interactive command lines in Python" name = "prompt-toolkit" optional = false -python-versions = ">=3.6.1" -version = "3.0.5" +python-versions = ">=3.6" +version = "3.0.3" [package.dependencies] wcwidth = "*" @@ -749,7 +786,7 @@ wcwidth = "*" [[package]] category = "dev" description = "Run a subprocess in a pseudo terminal" -marker = "sys_platform != \"win32\" or os_name != \"nt\" or python_version >= \"3.3\" and sys_platform != \"win32\"" +marker = "python_version >= \"3.3\" and sys_platform != \"win32\" or sys_platform != \"win32\" or os_name != \"nt\" or python_version >= \"3.3\" and sys_platform != \"win32\" and (python_version >= \"3.3\" and sys_platform != \"win32\" or sys_platform != \"win32\")" name = "ptyprocess" optional = false python-versions = "*" @@ -1059,6 +1096,10 @@ six = ">=1.9.0,<2" python = "<3.8" version = ">=0.12,<2" +[package.dependencies.importlib-resources] +python = "<3.7" +version = ">=1.0,<2" + [package.extras] docs = ["sphinx (>=2.0.0,<3)", "sphinx-argparse (>=0.2.5,<1)", "sphinx-rtd-theme (>=0.4.3,<1)", "towncrier (>=19.9.0rc1)", "proselint (>=0.10.2,<1)"] testing = ["pytest (>=4.0.0,<6)", "coverage (>=4.5.1,<6)", "pytest-mock (>=2.0.0,<3)", "pytest-env (>=0.6.2,<1)", "pytest-timeout (>=1.3.4,<2)", "packaging (>=20.0)", "xonsh (>=0.9.16,<1)"] @@ -1112,8 +1153,8 @@ docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] testing = ["jaraco.itertools", "func-timeout"] [metadata] -content-hash = "6048aeb19c7035aff50b31e79b10a72732a19339c13aee9684289c81cd5c3f84" -python-versions = "^3.7" +content-hash = "4751692649c28a83723ad8a6a3f02161c333096cc5e323b7d88bf2e35feeaa87" +python-versions = "^3.6" [metadata.files] appdirs = [ @@ -1148,8 +1189,8 @@ bleach = [ {file = "bleach-3.1.5.tar.gz", hash = "sha256:3c4c520fdb9db59ef139915a5db79f8b51bc2a7257ea0389f30c846883430a4b"}, ] cfgv = [ - {file = "cfgv-3.1.0-py2.py3-none-any.whl", hash = "sha256:1ccf53320421aeeb915275a196e23b3b8ae87dea8ac6698b1638001d4a486d53"}, - {file = "cfgv-3.1.0.tar.gz", hash = "sha256:c8e8f552ffcc6194f4e18dd4f68d9aef0c0d58ae7e7be8c82bee3c5e9edfa513"}, + {file = "cfgv-3.0.0-py2.py3-none-any.whl", hash = "sha256:f22b426ed59cd2ab2b54ff96608d846c33dfb8766a67f0b4a6ce130ce244414f"}, + {file = "cfgv-3.0.0.tar.gz", hash = "sha256:04b093b14ddf9fd4d17c53ebfd55582d27b76ed30050193c14e560770c5360eb"}, ] click = [ {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, @@ -1218,6 +1259,37 @@ flake8 = [ {file = "flake8-3.7.9-py2.py3-none-any.whl", hash = "sha256:49356e766643ad15072a789a20915d3c91dc89fd313ccd71802303fd67e4deca"}, {file = "flake8-3.7.9.tar.gz", hash = "sha256:45681a117ecc81e870cbf1262835ae4af5e7a8b08e40b944a8a6e6b895914cfb"}, ] +h5py = [ + {file = "h5py-2.10.0-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:ecf4d0b56ee394a0984de15bceeb97cbe1fe485f1ac205121293fc44dcf3f31f"}, + {file = "h5py-2.10.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:86868dc07b9cc8cb7627372a2e6636cdc7a53b7e2854ad020c9e9d8a4d3fd0f5"}, + {file = "h5py-2.10.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:aac4b57097ac29089f179bbc2a6e14102dd210618e94d77ee4831c65f82f17c0"}, + {file = "h5py-2.10.0-cp27-cp27m-win32.whl", hash = "sha256:7be5754a159236e95bd196419485343e2b5875e806fe68919e087b6351f40a70"}, + {file = "h5py-2.10.0-cp27-cp27m-win_amd64.whl", hash = "sha256:13c87efa24768a5e24e360a40e0bc4c49bcb7ce1bb13a3a7f9902cec302ccd36"}, + {file = "h5py-2.10.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:79b23f47c6524d61f899254f5cd5e486e19868f1823298bc0c29d345c2447172"}, + {file = "h5py-2.10.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:cbf28ae4b5af0f05aa6e7551cee304f1d317dbed1eb7ac1d827cee2f1ef97a99"}, + {file = "h5py-2.10.0-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:c0d4b04bbf96c47b6d360cd06939e72def512b20a18a8547fa4af810258355d5"}, + {file = "h5py-2.10.0-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:549ad124df27c056b2e255ea1c44d30fb7a17d17676d03096ad5cd85edb32dc1"}, + {file = "h5py-2.10.0-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:a5f82cd4938ff8761d9760af3274acf55afc3c91c649c50ab18fcff5510a14a5"}, + {file = "h5py-2.10.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:3dad1730b6470fad853ef56d755d06bb916ee68a3d8272b3bab0c1ddf83bb99e"}, + {file = "h5py-2.10.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:063947eaed5f271679ed4ffa36bb96f57bc14f44dd4336a827d9a02702e6ce6b"}, + {file = "h5py-2.10.0-cp35-cp35m-win32.whl", hash = "sha256:c54a2c0dd4957776ace7f95879d81582298c5daf89e77fb8bee7378f132951de"}, + {file = "h5py-2.10.0-cp35-cp35m-win_amd64.whl", hash = "sha256:6998be619c695910cb0effe5eb15d3a511d3d1a5d217d4bd0bebad1151ec2262"}, + {file = "h5py-2.10.0-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:ff7d241f866b718e4584fa95f520cb19405220c501bd3a53ee11871ba5166ea2"}, + {file = "h5py-2.10.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:54817b696e87eb9e403e42643305f142cd8b940fe9b3b490bbf98c3b8a894cf4"}, + {file = "h5py-2.10.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:d3c59549f90a891691991c17f8e58c8544060fdf3ccdea267100fa5f561ff62f"}, + {file = "h5py-2.10.0-cp36-cp36m-win32.whl", hash = "sha256:d7ae7a0576b06cb8e8a1c265a8bc4b73d05fdee6429bffc9a26a6eb531e79d72"}, + {file = "h5py-2.10.0-cp36-cp36m-win_amd64.whl", hash = "sha256:bffbc48331b4a801d2f4b7dac8a72609f0b10e6e516e5c480a3e3241e091c878"}, + {file = "h5py-2.10.0-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:51ae56894c6c93159086ffa2c94b5b3388c0400548ab26555c143e7cfa05b8e5"}, + {file = "h5py-2.10.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:16ead3c57141101e3296ebeed79c9c143c32bdd0e82a61a2fc67e8e6d493e9d1"}, + {file = "h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f0e25bb91e7a02efccb50aba6591d3fe2c725479e34769802fcdd4076abfa917"}, + {file = "h5py-2.10.0-cp37-cp37m-win32.whl", hash = "sha256:f23951a53d18398ef1344c186fb04b26163ca6ce449ebd23404b153fd111ded9"}, + {file = "h5py-2.10.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8bb1d2de101f39743f91512a9750fb6c351c032e5cd3204b4487383e34da7f75"}, + {file = "h5py-2.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:64f74da4a1dd0d2042e7d04cf8294e04ddad686f8eba9bb79e517ae582f6668d"}, + {file = "h5py-2.10.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:d35f7a3a6cefec82bfdad2785e78359a0e6a5fbb3f605dd5623ce88082ccd681"}, + {file = "h5py-2.10.0-cp38-cp38-win32.whl", hash = "sha256:6ef7ab1089e3ef53ca099038f3c0a94d03e3560e6aff0e9d6c64c55fb13fc681"}, + {file = "h5py-2.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:769e141512b54dee14ec76ed354fcacfc7d97fea5a7646b709f7400cf1838630"}, + {file = "h5py-2.10.0.tar.gz", hash = "sha256:84412798925dc870ffd7107f045d7659e60f5d46d1c70c700375248bf6bf512d"}, +] identify = [ {file = "identify-1.4.15-py2.py3-none-any.whl", hash = "sha256:88ed90632023e52a6495749c6732e61e08ec9f4f04e95484a5c37b9caf40283c"}, {file = "identify-1.4.15.tar.gz", hash = "sha256:23c18d97bb50e05be1a54917ee45cc61d57cb96aedc06aabb2b02331edf0dbf0"}, @@ -1226,6 +1298,10 @@ importlib-metadata = [ {file = "importlib_metadata-1.6.0-py2.py3-none-any.whl", hash = "sha256:2a688cbaa90e0cc587f1df48bdc97a6eadccdcd9c35fb3f976a09e3b5016d90f"}, {file = "importlib_metadata-1.6.0.tar.gz", hash = "sha256:34513a8a0c4962bc66d35b359558fd8a5e10cd472d37aec5f66858addef32c1e"}, ] +importlib-resources = [ + {file = "importlib_resources-1.5.0-py2.py3-none-any.whl", hash = "sha256:85dc0b9b325ff78c8bef2e4ff42616094e16b98ebd5e3b50fe7e2f0bbcdcde49"}, + {file = "importlib_resources-1.5.0.tar.gz", hash = "sha256:6f87df66833e1942667108628ec48900e02a4ab4ad850e25fbf07cb17cf734ca"}, +] ipykernel = [ {file = "ipykernel-5.2.1-py3-none-any.whl", hash = "sha256:003c9c1ab6ff87d11f531fee2b9ca59affab19676fc6b2c21da329aef6e73499"}, {file = "ipykernel-5.2.1.tar.gz", hash = "sha256:2937373c356fa5b634edb175c5ea0e4b25de8008f7c194f2d49cfbd1f9c970a8"}, @@ -1303,11 +1379,6 @@ markupsafe = [ {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6"}, {file = "MarkupSafe-1.1.1-cp37-cp37m-win32.whl", hash = "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2"}, {file = "MarkupSafe-1.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-win32.whl", hash = "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be"}, {file = "MarkupSafe-1.1.1.tar.gz", hash = "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b"}, ] mccabe = [ @@ -1403,22 +1474,25 @@ packaging = [ {file = "packaging-20.3.tar.gz", hash = "sha256:3c292b474fda1671ec57d46d739d072bfd495a4f51ad01a055121d81e952b7a3"}, ] pandas = [ - {file = "pandas-1.0.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d234bcf669e8b4d6cbcd99e3ce7a8918414520aeb113e2a81aeb02d0a533d7f7"}, - {file = "pandas-1.0.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:ca84a44cf727f211752e91eab2d1c6c1ab0f0540d5636a8382a3af428542826e"}, - {file = "pandas-1.0.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1fa4bae1a6784aa550a1c9e168422798104a85bf9c77a1063ea77ee6f8452e3a"}, - {file = "pandas-1.0.3-cp36-cp36m-win32.whl", hash = "sha256:863c3e4b7ae550749a0bb77fa22e601a36df9d2905afef34a6965bed092ba9e5"}, - {file = "pandas-1.0.3-cp36-cp36m-win_amd64.whl", hash = "sha256:a210c91a02ec5ff05617a298ad6f137b9f6f5771bf31f2d6b6367d7f71486639"}, - {file = "pandas-1.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11c7cb654cd3a0e9c54d81761b5920cdc86b373510d829461d8f2ed6d5905266"}, - {file = "pandas-1.0.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6597df07ea361231e60c00692d8a8099b519ed741c04e65821e632bc9ccb924c"}, - {file = "pandas-1.0.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:743bba36e99d4440403beb45a6f4f3a667c090c00394c176092b0b910666189b"}, - {file = "pandas-1.0.3-cp37-cp37m-win32.whl", hash = "sha256:07c1b58936b80eafdfe694ce964ac21567b80a48d972879a359b3ebb2ea76835"}, - {file = "pandas-1.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:12f492dd840e9db1688126216706aa2d1fcd3f4df68a195f9479272d50054645"}, - {file = "pandas-1.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0ebe327fb088df4d06145227a4aa0998e4f80a9e6aed4b61c1f303bdfdf7c722"}, - {file = "pandas-1.0.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:858a0d890d957ae62338624e4aeaf1de436dba2c2c0772570a686eaca8b4fc85"}, - {file = "pandas-1.0.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:387dc7b3c0424327fe3218f81e05fc27832772a5dffbed385013161be58df90b"}, - {file = "pandas-1.0.3-cp38-cp38-win32.whl", hash = "sha256:167a1315367cea6ec6a5e11e791d9604f8e03f95b57ad227409de35cf850c9c5"}, - {file = "pandas-1.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:1a7c56f1df8d5ad8571fa251b864231f26b47b59cbe41aa5c0983d17dbb7a8e4"}, - {file = "pandas-1.0.3.tar.gz", hash = "sha256:32f42e322fb903d0e189a4c10b75ba70d90958cc4f66a1781ed027f1a1d14586"}, + {file = "pandas-0.25.3-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:df8864824b1fe488cf778c3650ee59c3a0d8f42e53707de167ba6b4f7d35f133"}, + {file = "pandas-0.25.3-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:7458c48e3d15b8aaa7d575be60e1e4dd70348efcd9376656b72fecd55c59a4c3"}, + {file = "pandas-0.25.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:61741f5aeb252f39c3031d11405305b6d10ce663c53bc3112705d7ad66c013d0"}, + {file = "pandas-0.25.3-cp35-cp35m-win32.whl", hash = "sha256:adc3d3a3f9e59a38d923e90e20c4922fc62d1e5a03d083440468c6d8f3f1ae0a"}, + {file = "pandas-0.25.3-cp35-cp35m-win_amd64.whl", hash = "sha256:975c461accd14e89d71772e89108a050fa824c0b87a67d34cedf245f6681fc17"}, + {file = "pandas-0.25.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ee50c2142cdcf41995655d499a157d0a812fce55c97d9aad13bc1eef837ed36c"}, + {file = "pandas-0.25.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:4545467a637e0e1393f7d05d61dace89689ad6d6f66f267f86fff737b702cce9"}, + {file = "pandas-0.25.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:bbe3eb765a0b1e578833d243e2814b60c825b7fdbf4cdfe8e8aae8a08ed56ecf"}, + {file = "pandas-0.25.3-cp36-cp36m-win32.whl", hash = "sha256:8153705d6545fd9eb6dd2bc79301bff08825d2e2f716d5dced48daafc2d0b81f"}, + {file = "pandas-0.25.3-cp36-cp36m-win_amd64.whl", hash = "sha256:26382aab9c119735908d94d2c5c08020a4a0a82969b7e5eefb92f902b3b30ad7"}, + {file = "pandas-0.25.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:00dff3a8e337f5ed7ad295d98a31821d3d0fe7792da82d78d7fd79b89c03ea9d"}, + {file = "pandas-0.25.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e45055c30a608076e31a9fcd780a956ed3b1fa20db61561b8d88b79259f526f7"}, + {file = "pandas-0.25.3-cp37-cp37m-win32.whl", hash = "sha256:255920e63850dc512ce356233081098554d641ba99c3767dde9e9f35630f994b"}, + {file = "pandas-0.25.3-cp37-cp37m-win_amd64.whl", hash = "sha256:22361b1597c8c2ffd697aa9bf85423afa9e1fcfa6b1ea821054a244d5f24d75e"}, + {file = "pandas-0.25.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9962957a27bfb70ab64103d0a7b42fa59c642fb4ed4cb75d0227b7bb9228535d"}, + {file = "pandas-0.25.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:78bf638993219311377ce9836b3dc05f627a666d0dbc8cec37c0ff3c9ada673b"}, + {file = "pandas-0.25.3-cp38-cp38-win32.whl", hash = "sha256:6a3ac2c87e4e32a969921d1428525f09462770c349147aa8e9ab95f88c71ec71"}, + {file = "pandas-0.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:33970f4cacdd9a0ddb8f21e151bfb9f178afb7c36eb7c25b9094c02876f385c2"}, + {file = "pandas-0.25.3.tar.gz", hash = "sha256:52da74df8a9c9a103af0a72c9d5fdc8e0183a90884278db7f386b5692a2220a4"}, ] pandocfilters = [ {file = "pandocfilters-1.4.2.tar.gz", hash = "sha256:b3dd70e169bb5449e6bc6ff96aea89c5eea8c5f6ab5e207fc2f521a2cf4a0da9"}, @@ -1453,15 +1527,15 @@ ppft = [ {file = "ppft-1.6.6.1.tar.gz", hash = "sha256:9e2173042edd5cc9c7bee0d7731873f17fcdce0e42e4b7ab68857d0de7b631fc"}, ] pre-commit = [ - {file = "pre_commit-2.3.0-py2.py3-none-any.whl", hash = "sha256:979b53dab1af35063a483bfe13b0fcbbf1a2cf8c46b60e0a9a8d08e8269647a1"}, - {file = "pre_commit-2.3.0.tar.gz", hash = "sha256:f3e85e68c6d1cbe7828d3471896f1b192cfcf1c4d83bf26e26beeb5941855257"}, + {file = "pre_commit-2.1.1-py2.py3-none-any.whl", hash = "sha256:09ebe467f43ce24377f8c2f200fe3cd2570d328eb2ce0568c8e96ce19da45fa6"}, + {file = "pre_commit-2.1.1.tar.gz", hash = "sha256:f8d555e31e2051892c7f7b3ad9f620bd2c09271d87e9eedb2ad831737d6211eb"}, ] prometheus-client = [ {file = "prometheus_client-0.7.1.tar.gz", hash = "sha256:71cd24a2b3eb335cb800c7159f423df1bd4dcd5171b234be15e3f31ec9f622da"}, ] prompt-toolkit = [ - {file = "prompt_toolkit-3.0.5-py3-none-any.whl", hash = "sha256:df7e9e63aea609b1da3a65641ceaf5bc7d05e0a04de5bd45d05dbeffbabf9e04"}, - {file = "prompt_toolkit-3.0.5.tar.gz", hash = "sha256:563d1a4140b63ff9dd587bda9557cffb2fe73650205ab6f4383092fb882e7dc8"}, + {file = "prompt_toolkit-3.0.3-py3-none-any.whl", hash = "sha256:c93e53af97f630f12f5f62a3274e79527936ed466f038953dfa379d4941f651a"}, + {file = "prompt_toolkit-3.0.3.tar.gz", hash = "sha256:a402e9bf468b63314e37460b68ba68243d55b2f8c4d0192f85a019af3945050e"}, ] ptyprocess = [ {file = "ptyprocess-0.6.0-py2.py3-none-any.whl", hash = "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"}, diff --git a/pyproject.toml b/pyproject.toml index 2cbd674..573e19b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,15 @@ [tool.poetry] name = "pandas_cacher" -version = "0.1.2" +version = "0.1.3" description = "Pandas cacher" authors = ["Statnett Datascience "] [tool.poetry.dependencies] numpy = "*" pandas = "*" -python = "^3.5" +python = "^3.6" tables = "*" +h5py = "^2.10.0" [tool.poetry.dev-dependencies] autopep8 = "*" diff --git a/tests/test_pandas_cache.py b/tests/test_pandas_cache.py index 8930f97..8a41b67 100644 --- a/tests/test_pandas_cache.py +++ b/tests/test_pandas_cache.py @@ -3,11 +3,12 @@ import tempfile from unittest.mock import Mock, patch +import numpy as np import pandas as pd import pathos.multiprocessing as mp from pandas.testing import assert_frame_equal -from pandas_cacher import pandas_cache +from pandas_cacher import numpy_cache, pandas_cache def test_pd_cache(): @@ -34,7 +35,7 @@ def pandas_getter_2(self, a, b, *args): c = ClassFunc() with tempfile.TemporaryDirectory() as d: - with patch.dict("os.environ", {"PANDAS_CACHE_PATH": str(d)}, clear=True): + with patch.dict("os.environ", {"CACHE_PATH": str(d)}, clear=True): df1 = pandas_getter_clean(1, 32) df2 = pandas_getter_clean(1, 32) @@ -58,12 +59,66 @@ def pandas_getter_2(self, a, b, *args): df_getter.reset_mock() - os.environ["DISABLE_PANDAS_CACHE"] = "TRUE" + os.environ["DISABLE_CACHE"] = "TRUE" pandas_getter_clean(1, 2) pandas_getter_clean(1, 2) assert 2 == df_getter.call_count +def test_np_cache(): + array_getter = Mock() + array_getter.return_value = np.array([[1, 2, 3], [2, 3, 4]]) + + @numpy_cache("a", "b", "c") + def numpy_getter(a, b, *args, c=False, **kwargs): + return array_getter() + + @numpy_cache + def numpy_getter_clean(a, b): + return array_getter() + + class ClassFunc: + @numpy_cache + def numpy_getter(self, a, b, *args): + return array_getter() + + @numpy_cache + def numpy_getter_2(self, a, b, *args): + return array_getter() + + c = ClassFunc() + + with tempfile.TemporaryDirectory() as d: + with patch.dict("os.environ", {"CACHE_PATH": str(d)}, clear=True): + + a1 = numpy_getter_clean(1, 32) + a2 = numpy_getter_clean(1, 32) + np.testing.assert_equal(a1, a2) + array_getter.assert_called_once() + numpy_getter(1, 32, 3, c=True) + numpy_getter(1, 32, 4, c=True) + assert 2 == array_getter.call_count + + array_getter.reset_mock() + + date = datetime.datetime(2019, 1, 1) + c.numpy_getter(date, 2) + c.numpy_getter(date, 2) + c.numpy_getter(date, 2, [1, 2, 3]) + c.numpy_getter_2(date, 2, [1, 2, 3]) + assert 3 == array_getter.call_count + c.numpy_getter(1, 2, 3) + c.numpy_getter(1, 2, 4, 5) + assert 5 == array_getter.call_count + + array_getter.reset_mock() + + os.environ["DISABLE_CACHE"] = "TRUE" + numpy_getter_clean(1, 2) + numpy_getter_clean(1, 2) + assert 2 == array_getter.call_count + + def test_multiple_pd_cache(): df_getter = Mock() df_getter.return_value = ( @@ -88,7 +143,7 @@ def pandas_getter_2(self, a, b, *args): c = ClassFunc() with tempfile.TemporaryDirectory() as d: - with patch.dict("os.environ", {"PANDAS_CACHE_PATH": str(d)}, clear=True): + with patch.dict("os.environ", {"CACHE_PATH": str(d)}, clear=True): df1, df2, df3 = pandas_getter(1, 2) df11, df12, df13 = pandas_getter(1, 2) @@ -116,6 +171,58 @@ def pandas_getter_2(self, a, b, *args): assert 5 == df_getter.call_count +def test_multiple_np_cache(): + array_getter = Mock() + array_getter.return_value = ( + np.array([[1, 2, 3], [2, 3, 4]]), + np.array([[1, 2, 3], [2, 3, 4]]) + 1, + np.array([[1, 2, 3], [2, 3, 4]]) + 2, + ) + + @numpy_cache("a", "b", "c") + def numpy_getter(a, b, *args, c=False, **kwargs): + return array_getter() + + class ClassFunc: + @numpy_cache + def numpy_getter(self, a, b, *args): + return array_getter() + + @numpy_cache + def numpy_getter_2(self, a, b, *args): + return array_getter() + + c = ClassFunc() + + with tempfile.TemporaryDirectory() as d: + with patch.dict("os.environ", {"CACHE_PATH": str(d)}, clear=True): + + df1, df2, df3 = numpy_getter(1, 2) + df11, df12, df13 = numpy_getter(1, 2) + + np.testing.assert_equal(df1, df11) + np.testing.assert_equal(df2, df12) + np.testing.assert_equal(df3, df13) + + numpy_getter(1, 2, d=[1, 2, 3]) + array_getter.assert_called_once() + numpy_getter(1, 2, 3, c=True) + numpy_getter(1, 2, 4, c=True) + assert 2 == array_getter.call_count + + array_getter.reset_mock() + + date = datetime.datetime(2019, 1, 1) + c.numpy_getter(date, 2) + c.numpy_getter(date, 2) + c.numpy_getter(date, 2, [1, 2, 3]) + c.numpy_getter_2(date, 2, [1, 2, 3]) + assert 3 == array_getter.call_count + c.numpy_getter(1, 2, 3) + c.numpy_getter(1, 2, 4, 5) + assert 5 == array_getter.call_count + + def test_pathos(): def df_getter(*args, **kwargs): return pd.DataFrame([[1, 2, 3], [4, 5, 6]]) @@ -130,7 +237,7 @@ def pandas_getter(a, b, *args, c=False, **kwargs): return df_getter() with tempfile.TemporaryDirectory() as d: - with patch.dict("os.environ", {"PANDAS_CACHE_PATH": str(d)}, clear=True): + with patch.dict("os.environ", {"CACHE_PATH": str(d)}, clear=True): pandas_multi_getter(1, 2) pandas_getter(1, 2)