Skip to content

Commit

Permalink
Merge pull request #19 from kthyng/add
Browse files Browse the repository at this point in the history
Added merge and iadd
  • Loading branch information
kthyng authored Dec 10, 2022
2 parents 39ab8fe + 3281df4 commit 0845a12
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 12 deletions.
2 changes: 1 addition & 1 deletion cf_pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .options import set_options # noqa
from .reg import Reg
from .utils import always_iterable, astype, match_criteria_key, standard_names
from .vocab import Vocab
from .vocab import Vocab, merge
from .widget import Selector, dropdown


Expand Down
59 changes: 54 additions & 5 deletions cf_pandas/vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pathlib

from collections import defaultdict
from typing import DefaultDict, Dict, Optional, Union
from typing import DefaultDict, Dict, Optional, Sequence, Union

from .utils import astype

Expand Down Expand Up @@ -51,22 +51,37 @@ def make_entry(
expressions = astype(expressions, list)
entry: DefaultDict[str, Dict[str, str]] = defaultdict(dict)
entry[nickname][attr] = "|".join(expressions)
self.__add__(entry)
self.__iadd__(entry)

def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]):
def add(
self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], method: str
) -> "Vocab":
"""Add two Vocab objects together...
by adding their `.vocab`s together. Expressions are piped together but otherwise not changed.
This is used for both `__add__` and `__iadd__`.
Parameters
----------
other_vocab: Vocab
Other Vocab object to combine with.
method : str
Whether to run as "add" which returns a new Vocab object or "iadd" which adds to the original object.
Returns
-------
Vocab
vocab + other_vocab either as a new object or in place.
"""

if isinstance(other_vocab, Vocab):
other_vocab = other_vocab.vocab

if method == "add":
output = Vocab()
elif method == "iadd":
output = self

nicknames = set(list(self.vocab.keys()) + list(other_vocab.keys()))
for nickname in nicknames:

Expand All @@ -82,8 +97,22 @@ def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"])
+ "|"
+ other_vocab[nickname].get(attribute, "")
).strip("|")
self.vocab[nickname][attribute] = new_expressions
return self
output.vocab[nickname][attribute] = new_expressions
return output

def __add__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]):
"""vocab1 + vocab2"""
return self.add(other_vocab, "add")

def __iadd__(self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]):
"""vocab1 += vocab2"""
return self.add(other_vocab, "iadd")

def __radd__(
self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"]
) -> "Vocab":
"""right add?"""
return self.__add__(other_vocab)

def save(self, savename: Union[str, pathlib.PurePath]):
"""Save to file.
Expand All @@ -108,3 +137,23 @@ def open_file(self, openname: Union[str, pathlib.PurePath]):
return json.loads(
open(pathlib.PurePath(openname).with_suffix(".json"), "r").read()
)


def merge(vocabs: Sequence[Vocab]) -> Vocab:
"""Add together multiple Vocab objects.
Parameters
----------
vocabs : Sequence[Vocab]
Sequence of Vocab objects to merge.
Returns
-------
Vocab
Single Vocab object made up of input vocabs.
"""

final_vocab = Vocab()
for vocab in vocabs:
final_vocab += vocab
return final_vocab
15 changes: 14 additions & 1 deletion docs/demo_vocab.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jupytext:
format_version: 0.13
jupytext_version: 1.14.0
kernelspec:
display_name: Python 3 (ipykernel)
display_name: Python 3.10.6 ('cf-pandas')
language: python
name: python3
---
Expand Down Expand Up @@ -139,6 +139,19 @@ vocab2.make_entry("other_variable_nickname", "match_that_string", attr="standard
vocab1 + vocab2
```

Merge 2 or more Vocab objects:

```{code-cell} ipython3
cfp.merge([vocab1, vocab2])
```

Can also add in place

```{code-cell} ipython3
# also works
vocab1 += vocab2
```

## Use the `Reg` class to write regular expressions

We used simple exact matching regular expressions above, but for anything more complicated it can be hard to write regular expressions. You can use the `Reg` class in `cf-pandas` to write regular expressions with several options, as demonstrated more in [another doc page](https://cf-pandas.readthedocs.io/en/latest/demo_reg.html), and briefly here.
Expand Down
18 changes: 13 additions & 5 deletions tests/test_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,27 @@ def test_make_entry():

def test_add_vocabs():
vocab = cfp.Vocab()
vocab.make_entry("temp", ["a", "b"], attr="standard_name")
vocab.make_entry("salt", ["a", "b"], attr="name")
vocab.vocab = defaultdict(
dict, {"temp": {"standard_name": "a|b"}, "salt": {"name": "a|b"}}
)
compare = {"temp": {"standard_name": "a|b|a|b"}, "salt": {"name": "a|b|a|b"}}
assert (vocab + vocab).vocab == compare

vocab2 = cfp.Vocab()
vocab2.make_entry("temp", ["a", "b"], attr="name")
vocab2.vocab = defaultdict(dict, {"temp": {"name": "a|b"}})
compare = {
"temp": {"name": "a|b", "standard_name": "a|b|a|b"},
"salt": {"name": "a|b|a|b"},
"temp": {"standard_name": "a|b", "name": "a|b"},
"salt": {"name": "a|b"},
}
assert (vocab + vocab2).vocab == compare

# also merge
assert cfp.merge([vocab, vocab2]).vocab == compare

# also iadd
vocab += vocab2
assert vocab.vocab == compare


def test_make_more_entries():
vocab = cfp.Vocab()
Expand Down

0 comments on commit 0845a12

Please sign in to comment.