Skip to content

Commit

Permalink
Merge pull request #22 from kthyng/main
Browse files Browse the repository at this point in the history
get axes/coords use regex matching too
  • Loading branch information
kthyng authored Jan 3, 2023
2 parents df4d94a + a1f3aa8 commit 27c07f0
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 4 deletions.
30 changes: 26 additions & 4 deletions cf_pandas/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import itertools
from collections import ChainMap
from typing import (
Any,
Callable,
Expand All @@ -25,9 +26,14 @@

import cf_pandas as cfp

from .criteria import coordinate_criteria
from .criteria import coordinate_criteria, guess_regex
from .options import OPTIONS
from .utils import always_iterable, match_criteria_key, set_up_criteria
from .utils import (
_is_datetime_like,
always_iterable,
match_criteria_key,
set_up_criteria,
)
from .vocab import Vocab

#: `axis` names understood by cf_xarray
Expand Down Expand Up @@ -195,7 +201,6 @@ def axes(self) -> Dict[str, List[str]]:
"""
# vardict = {key: self.__getitem__(key) for key in _AXIS_NAMES}
vardict = {key: _get_all(self._obj, key) for key in _AXIS_NAMES}

return {k: sorted(v) for k, v in vardict.items() if v}

@property
Expand Down Expand Up @@ -275,23 +280,28 @@ def standard_names(self):

def _get_axis_coord(obj: Union[DataFrame, Series], key: str) -> list:
"""
Translate from axis or coord name to variable name
Translate from axis or coord name to variable name. After matching based on coordinate_criteria,
if there are no matches for key, then guess_regex is used to search for matches.
Parameters
----------
obj : DataArray, Dataset
DataArray belonging to the coordinate to be checked
key : str, ["X", "Y", "Z", "T", "longitude", "latitude", "vertical", "time"]
key to check for.
Returns
-------
List[str], Variable name(s) in parent xarray object that matches axis or coordinate `key`
Notes
-----
This functions checks for the following attributes in order
- `standard_name` (CF option)
- `_CoordinateAxisType` (from THREDDS)
- `axis` (CF option)
- `positive` (CF standard for non-pressure vertical coordinate)
References
----------
MetPy's parse_cf
Expand Down Expand Up @@ -340,6 +350,18 @@ def _get_axis_coord(obj: Union[DataFrame, Series], key: str) -> list:
# units = getattr(col.data, "units", None)
# if units in expected:
# results.update((col,))

# also use the guess_regex approach by default, but only if no results so far
# this takes the logic from cf-xarray guess_coord_axis
if len(results) == 0:
if obj[col].ndim == 1 and _is_datetime_like(obj[col]):
results.update((col,))
continue # prevent second detection

pattern = guess_regex[key]
if pattern.match(col.lower()):
results.update((col,))

return list(results)


Expand Down
11 changes: 11 additions & 0 deletions cf_pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from collections import ChainMap
from typing import Any, Iterable, Optional, Union

import numpy as np
import pandas as pd
import regex
from pandas import Series

from .options import OPTIONS

Expand Down Expand Up @@ -152,3 +154,12 @@ def standard_names():
standard_names = [entry.get("id") for entry in soup.find_all("entry")]

return standard_names


def _is_datetime_like(da: Series) -> bool:
if np.issubdtype(da.dtype, np.datetime64) or np.issubdtype(
da.dtype, np.timedelta64
):
return True

return False
7 changes: 7 additions & 0 deletions tests/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,10 @@ def test_set_item():
assert all(df.cf["temp"].values == np.arange(8))
df.cf["longitude"] = np.arange(8)
assert all(df.cf["longitude"].values == np.arange(8))


def test_get_by_guess_regex():
df = pd.DataFrame(columns=["lon", "lat", "min"])
assert df.cf["longitude"].name == "lon"
assert df.cf["latitude"].name == "lat"
assert df.cf["time"].name == "min"
12 changes: 12 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from unittest import mock

import pandas as pd
import requests

import cf_pandas as cfp
Expand Down Expand Up @@ -49,3 +50,14 @@ def test_standard_names(mock_requests):
mock_requests.return_value = resp
names = cfp.standard_names()
assert "wind_speed" in names


def test__is_datetime_like():
df = pd.DataFrame()
df["time"] = pd.date_range(start="2001-1-1", end="2001-1-5", freq="1D")
assert cfp.utils._is_datetime_like(df["time"])

df = pd.DataFrame()
df["time"] = ["2001-1-1", "2001-1-2", "2001-1-3"]
assert not cfp.utils._is_datetime_like(df["time"])
assert cfp.utils._is_datetime_like(pd.to_datetime(df["time"]))

0 comments on commit 27c07f0

Please sign in to comment.