diff --git a/cf_pandas/__init__.py b/cf_pandas/__init__.py index 4c249a8..2638fa9 100644 --- a/cf_pandas/__init__.py +++ b/cf_pandas/__init__.py @@ -8,7 +8,7 @@ from .options import set_options # noqa from .reg import Reg from .utils import always_iterable, astype, match_criteria_key, standard_names -from .vocab import Vocab +from .vocab import Vocab, merge from .widget import Selector, dropdown diff --git a/cf_pandas/vocab.py b/cf_pandas/vocab.py index 0574322..bd8568c 100644 --- a/cf_pandas/vocab.py +++ b/cf_pandas/vocab.py @@ -4,7 +4,7 @@ import pathlib from collections import defaultdict -from typing import DefaultDict, Dict, Optional, Union +from typing import DefaultDict, Dict, Optional, Sequence, Union from .utils import astype @@ -51,22 +51,37 @@ def make_entry( expressions = astype(expressions, list) entry: DefaultDict[str, Dict[str, str]] = defaultdict(dict) entry[nickname][attr] = "|".join(expressions) - self.__add__(entry) + self.__iadd__(entry) - def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): + def add( + self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], method: str + ) -> "Vocab": """Add two Vocab objects together... by adding their `.vocab`s together. Expressions are piped together but otherwise not changed. + This is used for both `__add__` and `__iadd__`. Parameters ---------- other_vocab: Vocab Other Vocab object to combine with. + method : str + Whether to run as "add" which returns a new Vocab object or "iadd" which adds to the original object. + + Returns + ------- + Vocab + vocab + other_vocab either as a new object or in place. """ if isinstance(other_vocab, Vocab): other_vocab = other_vocab.vocab + if method == "add": + output = Vocab() + elif method == "iadd": + output = self + nicknames = set(list(self.vocab.keys()) + list(other_vocab.keys())) for nickname in nicknames: @@ -82,8 +97,22 @@ def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]) + "|" + other_vocab[nickname].get(attribute, "") ).strip("|") - self.vocab[nickname][attribute] = new_expressions - return self + output.vocab[nickname][attribute] = new_expressions + return output + + def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): + """vocab1 + vocab2""" + return self.add(other_vocab, "add") + + def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]): + """vocab1 += vocab2""" + return self.add(other_vocab, "iadd") + + def __radd__( + self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"] + ) -> "Vocab": + """right add?""" + return self.__add__(other_vocab) def save(self, savename: Union[str, pathlib.PurePath]): """Save to file. @@ -108,3 +137,23 @@ def open_file(self, openname: Union[str, pathlib.PurePath]): return json.loads( open(pathlib.PurePath(openname).with_suffix(".json"), "r").read() ) + + +def merge(vocabs: Sequence[Vocab]) -> Vocab: + """Add together multiple Vocab objects. + + Parameters + ---------- + vocabs : Sequence[Vocab] + Sequence of Vocab objects to merge. + + Returns + ------- + Vocab + Single Vocab object made up of input vocabs. + """ + + final_vocab = Vocab() + for vocab in vocabs: + final_vocab += vocab + return final_vocab diff --git a/docs/demo_vocab.md b/docs/demo_vocab.md index e370eda..113d784 100644 --- a/docs/demo_vocab.md +++ b/docs/demo_vocab.md @@ -6,7 +6,7 @@ jupytext: format_version: 0.13 jupytext_version: 1.14.0 kernelspec: - display_name: Python 3 (ipykernel) + display_name: Python 3.10.6 ('cf-pandas') language: python name: python3 --- @@ -139,6 +139,19 @@ vocab2.make_entry("other_variable_nickname", "match_that_string", attr="standard vocab1 + vocab2 ``` +Merge 2 or more Vocab objects: + +```{code-cell} ipython3 +cfp.merge([vocab1, vocab2]) +``` + +Can also add in place + +```{code-cell} ipython3 +# also works +vocab1 += vocab2 +``` + ## Use the `Reg` class to write regular expressions We used simple exact matching regular expressions above, but for anything more complicated it can be hard to write regular expressions. You can use the `Reg` class in `cf-pandas` to write regular expressions with several options, as demonstrated more in [another doc page](https://cf-pandas.readthedocs.io/en/latest/demo_reg.html), and briefly here. diff --git a/tests/test_vocab.py b/tests/test_vocab.py index ebcb6d6..ffe4e09 100644 --- a/tests/test_vocab.py +++ b/tests/test_vocab.py @@ -20,19 +20,27 @@ def test_make_entry(): def test_add_vocabs(): vocab = cfp.Vocab() - vocab.make_entry("temp", ["a", "b"], attr="standard_name") - vocab.make_entry("salt", ["a", "b"], attr="name") + vocab.vocab = defaultdict( + dict, {"temp": {"standard_name": "a|b"}, "salt": {"name": "a|b"}} + ) compare = {"temp": {"standard_name": "a|b|a|b"}, "salt": {"name": "a|b|a|b"}} assert (vocab + vocab).vocab == compare vocab2 = cfp.Vocab() - vocab2.make_entry("temp", ["a", "b"], attr="name") + vocab2.vocab = defaultdict(dict, {"temp": {"name": "a|b"}}) compare = { - "temp": {"name": "a|b", "standard_name": "a|b|a|b"}, - "salt": {"name": "a|b|a|b"}, + "temp": {"standard_name": "a|b", "name": "a|b"}, + "salt": {"name": "a|b"}, } assert (vocab + vocab2).vocab == compare + # also merge + assert cfp.merge([vocab, vocab2]).vocab == compare + + # also iadd + vocab += vocab2 + assert vocab.vocab == compare + def test_make_more_entries(): vocab = cfp.Vocab()