Skip to content

Commit

Permalink
Merge pull request #17 from kthyng/split
Browse files Browse the repository at this point in the history
Split
  • Loading branch information
kthyng authored Dec 9, 2022
2 parents f5a5e70 + aa5b6b9 commit 6a79ff3
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ repos:
- id: file-contents-sorter
files: requirements-dev.txt

- repo: https://gitlab.com/pycqa/flake8
- repo: https://github.com/pycqa/flake8
rev: 3.7.9
hooks:
- id: flake8
Expand Down
7 changes: 7 additions & 0 deletions cf_pandas/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@
import cf_pandas as cfp


try:
# delete the accessor to avoid warning
del pd.DataFrame.cf
except AttributeError:
pass


@pd.api.extensions.register_dataframe_accessor("cf")
class CFAccessor:
"""Dataframe accessor analogous to cf-xarray accessor."""
Expand Down
36 changes: 27 additions & 9 deletions cf_pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def match_criteria_key(
available_values: list,
keys_to_match: Union[str, list],
criteria: Optional[dict] = None,
split: bool = False,
) -> list:
"""Use criteria to choose match to key from available available_values.
Expand All @@ -72,6 +73,8 @@ def match_criteria_key(
Key(s) from criteria to match with available_values.
criteria : dict, optional
Criteria to use to map from variable to attributes describing the variable. If user has defined custom_criteria, this will be used by default.
split : bool, optional
If split is True, split the available_values by white space before performing matches. This is helpful e.g. when columns headers have the form "standard_name (units)" and you want to match standard_name.
Returns
-------
Expand All @@ -93,17 +96,32 @@ def match_criteria_key(
# criterion is the attribute type — in this function we don't use it,
# instead we use all the patterns available in criteria to match with available_values
for criterion, patterns in custom_criteria[key].items():
results.extend(
list(
set(
[
value
for value in available_values
if regex.match(patterns, value)
]
if split:
results.extend(
list(
set(
[
value
for value in available_values
for value_part in value.split()
if regex.match(patterns, value_part)
]
)
)
)

else:
results.extend(
list(
set(
[
value
for value in available_values
if regex.match(patterns, value)
]
)
)
)
)

# catch scenario that user input valid reader variable names
else:
Expand Down
16 changes: 16 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,22 @@ def test_match_criteria_key():
assert cfp.match_criteria_key(vals, ["wind_s"], criteria) == ["wind_speed"]


def test_match_criteria_key_split():

vals = ["wind_speed (m/s)", "WIND_SPEED", "wind_speed_status"]

# test function with set_options criteria
with cfp.set_options(custom_criteria=criteria):
assert cfp.match_criteria_key(vals, ["wind_s"], split=True) == [
"wind_speed (m/s)"
]

# test function with input criteria
assert cfp.match_criteria_key(vals, ["wind_s"], criteria, split=True) == [
"wind_speed (m/s)"
]


def test_standard_names():

names = cfp.standard_names()
Expand Down

0 comments on commit 6a79ff3

Please sign in to comment.