From f57caad117f42893441957f2105d6e26bfce866e Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Thu, 15 Dec 2022 16:52:58 -0600 Subject: [PATCH 1/4] tons of new stuff from cf-xarray! --- cf_pandas/accessor.py | 414 +++++++++++++++++++++++++++++++++++++++-- cf_pandas/utils.py | 3 + cf_pandas/vocab.py | 2 + docs/demo_overview.md | 65 +++++-- tests/test_accessor.py | 86 +++++++++ tests/test_utils.py | 12 +- 6 files changed, 554 insertions(+), 28 deletions(-) diff --git a/cf_pandas/accessor.py b/cf_pandas/accessor.py index 05cd085..e124d4b 100644 --- a/cf_pandas/accessor.py +++ b/cf_pandas/accessor.py @@ -2,10 +2,46 @@ From cf-xarray. """ +import itertools + +from typing import ( + Any, + Callable, + Dict, + Hashable, + Iterable, + List, + Mapping, + MutableMapping, + Sequence, + Set, + Tuple, + TypeVar, + Union, + cast, +) + import pandas as pd +from pandas import DataFrame, Series + import cf_pandas as cfp +from .criteria import coordinate_criteria +from .options import OPTIONS +from .utils import always_iterable, match_criteria_key, set_up_criteria +from .vocab import Vocab + + +#: `axis` names understood by cf_xarray +_AXIS_NAMES = ("X", "Y", "Z", "T") + +#: `coordinate` types understood by cf_xarray. +_COORD_NAMES = ("longitude", "latitude", "vertical", "time") + +# Type for Mapper functions +Mapper = Callable[[DataFrame, str], List[str]] + try: # delete the accessor to avoid warning @@ -19,28 +55,376 @@ class CFAccessor: """Dataframe accessor analogous to cf-xarray accessor.""" def __init__(self, pandas_obj): - # self._validate(pandas_obj) + self._validate(pandas_obj) self._obj = pandas_obj - # @staticmethod - # def _validate(obj): - # # verify there is a column latitude and a column longitude - # if "latitude" not in obj.columns or "longitude" not in obj.columns: - # raise AttributeError("Must have 'latitude' and 'longitude'.") + @staticmethod + def _validate(obj): + """what is necessary for basic use.""" + + # verify that necessary keys are present. Z would also be nice but might be missing. + # but don't use the accessor to check + keys = ["T", "longitude", "latitude"] + missing_keys = [key for key in keys if len(_get_axis_coord(obj, key)) == 0] + if len(missing_keys) > 0: + raise AttributeError( + f'{"longitude", "latitude", "time"} must be identifiable in DataFrame but {missing_keys} are missing.' + ) + + # for key in keys: + # if len(_get_axis_coord(obj, "T")) == 0: + + # if (len(_get_axis_coord(obj, "T")) == 0) or (len(_get_axis_coord(obj, "longitude")) == 0) + + # if not {"longitude", "latitude", "time"} <= obj.cf.coordinates(): + # raise AttributeError(f'{"longitude", "latitude", "time"} must be identifiable in DataFrame but recognized keys are {obj.cf.keys()}.') + + def __getitem__(self, key: str) -> Union[pd.Series, pd.DataFrame]: + """Select columns or columns by alias. + + If one column matches key, return a Series. Otherwise return a DataFrame. - def __getitem__(self, key: str): - """Redefinition of dict-like behavior. - This enables user to use syntax `reader[dataset_id]` to read in and - save dataset into the object. Parameters ---------- key: str - dataset_id for a dataset that is available in the search/reader - object. + key in custom criteria/vocabulary to match with columns of DataFrame, or in axes or coordinates. + Returns ------- - xarray Dataset of the data associated with key + Series, DataFrame + with matching column(s) included. + + Example + ------- + >>> df.cf[alias] """ - col_name = cfp.match_criteria_key(self._obj.columns.values, key, split=True) - return self._obj[col_name] + # if key is a coordinate or axes, use a different method to match + valid_keys = _COORD_NAMES + _AXIS_NAMES + if key in valid_keys: + col_names = _get_axis_coord(self._obj, key) + + else: + col_names = _get_custom_criteria(self._obj, key) + + # return series + if len(col_names) == 1: + return self._obj[col_names[0]] + # return DataFrame + elif len(col_names) > 1: + return self._obj[col_names] + else: + raise ValueError("Some error has occurred.") + + def __setitem__(self, key: str, values: Union[Sequence, Series]): + """Set column by alias. + + Parameters + ---------- + key: str + key in custom criteria/vocabulary to match with columns of DataFrame, or in axes or coordinates. + values : Union[Sequence, pd.Series] + Values to set into object. + + Raises + ------ + ValueError + Can only set one column at once. + """ + + col = self.__getitem__(key) + if isinstance(col, Series): + self._obj[col.name] = values + # return self._obj[col.name] + elif col is None: + # make new column + self._obj[key] = values + # return self._obj[key] + else: + raise ValueError("Setting item only works if key matches one column only.") + + def __contains__(self, item: str) -> bool: + """ + Check whether item is a valid key for indexing with .cf + """ + return item in self.keys() + + def keys(self) -> Set[str]: + """ + Utility function that returns valid keys for .cf[]. + + This is useful for checking whether a key is valid for indexing, i.e. + that the attributes necessary to allow indexing by that key exist. + + Returns + ------- + set + Set of valid key names that can be used with __getitem__ or .cf[key]. + """ + + varnames = list(self.axes) + list(self.coordinates) + try: + varnames.extend(list(self.custom_keys)) + except ValueError: + # don't have criteria defined, then no custom keys to report + pass + # varnames.extend(list(self.cell_measures)) + # varnames.extend(list(self.standard_names)) + # varnames.extend(list(self.cf_roles)) + + return set(varnames) + + @property + def axes(self) -> Dict[str, List[str]]: + """ + Property that returns a dictionary mapping valid Axis standard names for ``.cf[]`` + to variable names. + + This is useful for checking whether a key is valid for indexing, i.e. + that the attributes necessary to allow indexing by that key exist. + It will return the Axis names ``("X", "Y", "Z", "T")`` + present in ``.columns``. + + Returns + ------- + dict + Dictionary with keys that can be used with ``__getitem__`` or as ``.cf[key]``. + Keys will be the appropriate subset of ("X", "Y", "Z", "T"). + Values are lists of variable names that match that particular key. + """ + # vardict = {key: self.__getitem__(key) for key in _AXIS_NAMES} + vardict = {key: _get_all(self._obj, key) for key in _AXIS_NAMES} + + return {k: sorted(v) for k, v in vardict.items() if v} + + @property + def coordinates(self) -> Dict[str, List[str]]: + """ + Property that returns a dictionary mapping valid Coordinate standard names for ``.cf[]`` + to variable names. + + This is useful for checking whether a key is valid for indexing, i.e. + that the attributes necessary to allow indexing by that key exist. + It will return the Coordinate names ``("latitude", "longitude", "vertical", "time")`` + present in ``.columns``. + + Returns + ------- + dict + Dictionary of valid Coordinate names that can be used with ``__getitem__`` or ``.cf[key]``. + Keys will be the appropriate subset of ``("latitude", "longitude", "vertical", "time")``. + Values are lists of variable names that match that particular key. + """ + # vardict = {key: self.__getitem__(key) for key in _COORD_NAMES} + vardict = {key: _get_all(self._obj, key) for key in _COORD_NAMES} + + return {k: sorted(v) for k, v in vardict.items() if v} + + @property + def custom_keys(self): + """ + Returns a dictionary mapping criteria keys to variable names. + + Returns + ------- + dict + Dictionary mapping criteria keys to variable names. + + Notes + ----- + Need to use this with context manager version of providing custom_criteria. + """ + + custom_criteria = set_up_criteria() + vardict = { + key: _get_custom_criteria(self._obj, key) for key in custom_criteria.keys() + } + + return vardict + + @property + def standard_names(self): + """ + Returns a dictionary mapping standard_names to variable names, if there is a match. Compares with all cf-standard names. + + Returns + ------- + dict + Dictionary mapping standard_names to variable names. + + Notes + ----- + This is not the same as the cf-xarray accessor method of the same name, which searches for variables with standard_name attributes and surfaces those values to map to the variable name. + """ + + names = cfp.standard_names() + + vardict = {} + for key in names: + local_criteria = Vocab().make_entry(key, f"{key}$") + key_match = _get_custom_criteria( + self._obj, key, criteria=local_criteria.vocab + ) + + if len(key_match) > 0: + vardict[key] = key_match + + return vardict + + +def _get_axis_coord(obj: Union[DataFrame, Series], key: str) -> list: + """ + Translate from axis or coord name to variable name + Parameters + ---------- + obj : DataArray, Dataset + DataArray belonging to the coordinate to be checked + key : str, ["X", "Y", "Z", "T", "longitude", "latitude", "vertical", "time"] + key to check for. + Returns + ------- + List[str], Variable name(s) in parent xarray object that matches axis or coordinate `key` + Notes + ----- + This functions checks for the following attributes in order + - `standard_name` (CF option) + - `_CoordinateAxisType` (from THREDDS) + - `axis` (CF option) + - `positive` (CF standard for non-pressure vertical coordinate) + References + ---------- + MetPy's parse_cf + """ + + valid_keys = _COORD_NAMES + _AXIS_NAMES + if key not in valid_keys: + raise KeyError( + f"cf_xarray did not understand key {key!r}. Expected one of {valid_keys!r}" + ) + + # search_in = set() + # attrs_or_encoding = ChainMap(obj.attrs, obj.encoding) + # coordinates = attrs_or_encoding.get("coordinates", None) + + # # Handles case where the coordinates attribute is None + # # This is used to tell xarray to not write a coordinates attribute + # if coordinates: + # search_in.update(coordinates.split(" ")) + # if not search_in: + # search_in = set(obj.coords) + + # # maybe only do this for key in _AXIS_NAMES? + # search_in.update(obj.indexes) + + # search_in = search_in & set(obj.coords) + results: set = set() + for col in obj.columns: + # var = obj.coords[coord] + if key in coordinate_criteria: + # import pdb; pdb.set_trace() + for criterion, expected in coordinate_criteria[key].items(): + # allow for the column header having a space in it that separate + # the name from the units, for example + strings = col.split() + for string in strings: + string = string.lower() + if string.startswith("(") and string.endswith(")"): + if string.strip(")(") in expected: + results.update((col,)) + if string in expected: + # if col.attrs.get(criterion, None) in expected: + results.update((col,)) + # if criterion == "units": + # # deal with pint-backed objects + # units = getattr(col.data, "units", None) + # if units in expected: + # results.update((col,)) + return list(results) + + +def _get_all(obj: DataFrame, key: str) -> List[str]: + """ + One or more of ('X', 'Y', 'Z', 'T', 'longitude', 'latitude', 'vertical', 'time', + 'area', 'volume'), or arbitrary measures, or standard names + """ + all_mappers = ( + _get_custom_criteria, + # functools.partial(_get_custom_criteria, criteria=cf_role_criteria), + _get_axis_coord, + # _get_measure, + # _get_with_standard_name, + ) + results = apply_mapper(all_mappers, obj, key, error=False, default=None) + return list(set(results)) + + +def apply_mapper( + mappers: Union[Mapper, Tuple[Mapper, ...]], + obj: DataFrame, + key: Hashable, + error: bool = True, + default: Any = None, +) -> List[Any]: + """ + Applies a mapping function; does error handling / returning defaults. + Expects the mapper function to raise an error if passed a bad key. + It should return a list in all other cases including when there are no + results for a good key. + """ + + if not isinstance(key, Hashable): + if default is None: + raise ValueError( + "`default` must be provided when `key` is not not a valid DataArray name (of hashable type)." + ) + return list(always_iterable(default)) + + default = [] if default is None else list(always_iterable(default)) + + def _apply_single_mapper(mapper): + + try: + results = mapper(obj, key) + except (KeyError, ValueError) as e: + if error or "I expected only one." in repr(e): + raise e + else: + results = [] + return results + + if not isinstance(mappers, Iterable): + mappers = (mappers,) + + # apply a sequence of mappers + # if the mapper fails, it *should* return an empty list + # if the mapper raises an error, that is processed based on `error` + results = [] + for mapper in mappers: + results.append(_apply_single_mapper(mapper)) + + flat = list(itertools.chain(*results)) + # # de-duplicate + # if all(not isinstance(r, DataArray) for r in flat): + # results = list(set(flat)) + # else: + # results = flat + results = flat + + nresults = any(bool(v) for v in [results]) + if not nresults: + if error: + raise KeyError( + f"cf-xarray cannot interpret key {key!r}. Perhaps some needed attributes are missing." + ) + else: + # none of the mappers worked. Return the default + return default + return results + + +# Already use match_criteria_key in other functions, and it is a bit more generic so can be used +# without accessor. +def _get_custom_criteria(obj: DataFrame, key: str, criteria=None) -> List[str]: + + results = match_criteria_key(obj.columns, key, criteria, split=True) + return results diff --git a/cf_pandas/utils.py b/cf_pandas/utils.py index 00b84e3..0cbb039 100644 --- a/cf_pandas/utils.py +++ b/cf_pandas/utils.py @@ -54,6 +54,9 @@ def set_up_criteria(criteria: Union[dict, Iterable] = None) -> ChainMap: else: criteria_it = always_iterable(criteria, allowed=(tuple, list, set)) + # # Add in coordinate_criteria to be able to identify coordinates too + # criteria_it[0].update(coordinate_criteria) + return ChainMap(*criteria_it) diff --git a/cf_pandas/vocab.py b/cf_pandas/vocab.py index bd8568c..384e058 100644 --- a/cf_pandas/vocab.py +++ b/cf_pandas/vocab.py @@ -53,6 +53,8 @@ def make_entry( entry[nickname][attr] = "|".join(expressions) self.__iadd__(entry) + return self + def add( self, other_vocab: Union[DefaultDict[str, Dict[str, str]], "Vocab"], method: str ) -> "Vocab": diff --git a/docs/demo_overview.md b/docs/demo_overview.md index b2a8e31..e6e1fc7 100644 --- a/docs/demo_overview.md +++ b/docs/demo_overview.md @@ -6,22 +6,65 @@ jupytext: format_version: 0.13 jupytext_version: 1.14.0 kernelspec: - display_name: Python 3 (ipykernel) + display_name: Python 3.10.6 ('cf-pandas') language: python name: python3 --- # How to use `cf-pandas` -The main use of `cf-pandas` currently is for selecting a variable from a `pandas DataFrame` using the accessor and a custom vocabulary that searches column names for a match to the regular expressions. There are several class and utilities that support this functionality that are used internally but are also helpful for other packages. +The main use of `cf-pandas` currently is for selecting columns of a DataFrame that represent axes or coordinates of the dataset and for selecting a variable from a `pandas DataFrame` using the accessor and a custom vocabulary that searches column names for a match to the regular expressions, as well as some other capabilities that have been ported over from `cf-xarray`. There are several class and utilities that support this functionality that are used internally but are also helpful for other packages. ```{code-cell} ipython3 import cf_pandas as cfp import pandas as pd ``` +## Get some data + +```{code-cell} ipython3 +# Some data +url = "https://files.stage.platforms.axds.co/axiom/netcdf_harvest/basis/2013/BE2013_/data.csv.gz" +df = pd.read_csv(url) +df +``` + +## Basic accessor usage + +The terminology all comes from `cf-xarray` which deals with multi-dimensional data and has more layers of standardized attributes. This package ports over useful functionality, retaining some of the complexity of terminology and syntax from `cf-xarray` which doesn't always apply. The perspective is to be able to think about and use DataFrames of data in a similar manner to Datasets of data/model output. + +When you use the `cf-pandas` accessor it will first validate that columns representing time, latitude, and longitude are present and identifiable (by validating the object). + +Using an approach copied directly from `cf-xarray`, `cf-pandas` contains a mapping of names from the CF conventions that define the axes ("T", "Z", "Y", "X") and coordinates ("time", "vertical", "latitude", "longitude"). These are built in and used to identify columns containing axes and coordinates using name matching (column names are split by white space for the comparison). + +Check axes and coordinates mappings of the dataset: + +```{code-cell} ipython3 +df.cf.axes, df.cf.coordinates +``` + +Check all available keys: + +```{code-cell} ipython3 +df.cf.keys() +``` + +Is a certain key in the DataFrame? + +```{code-cell} ipython3 +"T" in df.cf, "X" in df.cf +``` + +What CF standard names can be identified with strict matching in the column names? Column names will be split by white space for this comparison. + +```{code-cell} ipython3 +df.cf.standard_names +``` + ## Select variable +Selecting a variable typically requires knowing the name of the column representing the variable. What is demonstrated here is an approach to selecting a column name containing the variable using regular expression matching. In this case, the user defines the regular expression matching that will be used to identify matches to a variable. There are helper functions for this process available in `cf-pandas`; see the `Reg`, `Vocab`, and `widget` classes and below for more information. + +++ ### Create custom vocabulary @@ -40,16 +83,10 @@ reg = cfp.Reg(include="salinity", exclude="soil", exclude_end="_qc") # Make an entry to add to your vocabulary vocab.make_entry("salt", reg.pattern(), attr="standard_name") -vocab -``` - -### Get some data +# Add another entry to vocab +vocab.make_entry("temp", "temp") -```{code-cell} ipython3 -# Some data -url = "https://files.stage.platforms.axds.co/axiom/netcdf_harvest/basis/2013/BE2013_/data.csv.gz" -df = pd.read_csv(url) -df +vocab ``` ### Access variable @@ -70,6 +107,12 @@ cfp.set_options(custom_criteria=vocab.vocab) df.cf["salt"] ``` +Display mapping of all variables in the dataset that can be identified using the custom criteria/vocab we defined above: + +```{code-cell} ipython3 +df.cf.custom_keys +``` + ## Other utilities +++ diff --git a/tests/test_accessor.py b/tests/test_accessor.py index 5212897..78b920a 100644 --- a/tests/test_accessor.py +++ b/tests/test_accessor.py @@ -1,7 +1,11 @@ """Test cf-pandas.""" +from unittest import mock + +import numpy as np import pandas as pd import pytest +import requests import cf_pandas as cfp @@ -10,6 +14,9 @@ "wind_s": { "standard_name": "wind_speed$", }, + "temp2": { + "standard_name": "temp$", + }, } @@ -19,6 +26,16 @@ def test_options(): cfp.set_options(DISPLAY_WIDTH=80) +def test_validate(): + df = pd.DataFrame( + columns=[ + "temp", + ] + ) + with pytest.raises(AttributeError): + df.cf.keys() + + def test_match_criteria_key_accessor(): df = pd.DataFrame( @@ -28,9 +45,78 @@ def test_match_criteria_key_accessor(): "wind_speed (m/s)", "WIND_SPEED", "wind_speed_status", + "longitude (degrees_east)", + "X", + "latitude", + "time", ] ) # test accessor with set_options criteria with cfp.set_options(custom_criteria=criteria): assert sorted(df.cf["wind_s"].columns) == ["wind_speed", "wind_speed (m/s)"] + assert isinstance(df.cf["wind_s"], pd.DataFrame) + assert df.cf["temp2"].name == "temp" + assert isinstance(df.cf["temp2"], pd.Series) + assert df.cf["longitude"].name == "longitude (degrees_east)" + assert df.cf.custom_keys["temp2"] == ["temp"] + assert sorted(df.cf.custom_keys["wind_s"]) == ["wind_speed", "wind_speed (m/s)"] + assert sorted(df.cf.axes) == ["T", "X"] + assert sorted(df.cf.coordinates) == ["latitude", "longitude", "time"] + assert sorted(df.cf.keys()) == [ + "T", + "X", + "latitude", + "longitude", + "temp2", + "time", + "wind_s", + ] + assert "X" in df.cf + assert "Y" not in df.cf + + +@mock.patch("requests.get") +def test_standard_names(mock_requests): + + resp = requests.models.Response + resp.content = b"""\n\n 79\n 2022-03-19T15:25:54Z\n Centre for Environmental Data Analysis\n support@ceda.ac.uk\n\n \n \n \n \n \n """ + mock_requests.return_value = resp + df = pd.DataFrame( + columns=[ + "temp", + "wind_speed", + "wind_speed (m/s)", + "WIND_SPEED", + "wind_speed_status", + "longitude (degrees_east)", + "X", + "latitude", + "time", + ] + ) + assert df.cf.standard_names["longitude"] == ["longitude (degrees_east)"] + assert sorted(df.cf.standard_names["wind_speed"]) == [ + "wind_speed", + "wind_speed (m/s)", + ] + + +def test_set_item(): + df = pd.DataFrame( + columns=[ + "temp", + "wind_speed", + "wind_speed (m/s)", + "WIND_SPEED", + "wind_speed_status", + "longitude", + "latitude", + "time", + ] + ) + with cfp.set_options(custom_criteria=criteria): + df.cf["temp"] = np.arange(8) + assert all(df.cf["temp"].values == np.arange(8)) + df.cf["longitude"] = np.arange(8) + assert all(df.cf["longitude"].values == np.arange(8)) diff --git a/tests/test_utils.py b/tests/test_utils.py index 0a1d806..7750498 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,9 @@ """Test cf-pandas utils.""" +from unittest import mock + +import requests + import cf_pandas as cfp @@ -38,7 +42,11 @@ def test_match_criteria_key_split(): ] -def test_standard_names(): +@mock.patch("requests.get") +def test_standard_names(mock_requests): + resp = requests.models.Response + resp.content = b"""\n\n 79\n 2022-03-19T15:25:54Z\n Centre for Environmental Data Analysis\n support@ceda.ac.uk\n\n \n \n \n \n \n """ + mock_requests.return_value = resp names = cfp.standard_names() - assert "sea_water_temperature" in names + assert "wind_speed" in names From 6197165fa116ed7606bf8a9206033248a7453470 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Thu, 15 Dec 2022 17:11:15 -0600 Subject: [PATCH 2/4] forgot criteria --- cf_pandas/criteria.py | 117 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 cf_pandas/criteria.py diff --git a/cf_pandas/criteria.py b/cf_pandas/criteria.py new file mode 100644 index 0000000..e158574 --- /dev/null +++ b/cf_pandas/criteria.py @@ -0,0 +1,117 @@ +""" +Criteria for identifying axes and coordinate variables. +Reused with modification from cf-xarray which modified from MetPy under the terms of the BSD 3-Clause License. +Copyright (c) 2017 MetPy Developers. +""" + +from typing import Mapping, MutableMapping, Tuple +import re + +coordinate_criteria: MutableMapping[str, MutableMapping[str, Tuple]] = { + "latitude": { + "standard_name": ("latitude",), + "units": ( + "degree_north", + "degree_N", + "degreeN", + "degrees_north", + "degrees_N", + "degreesN", + ), + "_CoordinateAxisType": ("Lat",), + }, + "longitude": { + "standard_name": ("longitude",), + "units": ( + "degree_east", + "degree_E", + "degreeE", + "degrees_east", + "degrees_E", + "degreesE", + ), + "_CoordinateAxisType": ("Lon",), + }, + "Z": { + "standard_name": ( + "model_level_number", + "atmosphere_ln_pressure_coordinate", + "atmosphere_sigma_coordinate", + "atmosphere_hybrid_sigma_pressure_coordinate", + "atmosphere_hybrid_height_coordinate", + "atmosphere_sleve_coordinate", + "ocean_sigma_coordinate", + "ocean_s_coordinate", + "ocean_s_coordinate_g1", + "ocean_s_coordinate_g2", + "ocean_sigma_z_coordinate", + "ocean_double_sigma_coordinate", + ), + "_CoordinateAxisType": ( + "GeoZ", + "Height", + "Pressure", + ), + "axis": ("Z",), + "cartesian_axis": ("Z",), + "grads_dim": ("z",), + }, + "vertical": { + "standard_name": ( + "air_pressure", + "height", + "depth", + "geopotential_height", + # computed dimensional coordinate name + "altitude", + "height_above_geopotential_datum", + "height_above_reference_ellipsoid", + "height_above_mean_sea_level", + ), + "positive": ("up", "down"), + }, + "X": { + "standard_name": ("projection_x_coordinate",), + "_CoordinateAxisType": ("GeoX",), + "axis": ("X",), + "cartesian_axis": ("X",), + "grads_dim": ("x",), + }, + "Y": { + "standard_name": ("projection_y_coordinate",), + "_CoordinateAxisType": ("GeoY",), + "axis": ("Y",), + "cartesian_axis": ("Y",), + "grads_dim": ("y",), + }, + "T": { + "standard_name": ("time",), + "_CoordinateAxisType": ("Time",), + "axis": ("T",), + "cartesian_axis": ("T",), + "grads_dim": ("t",), + }, +} + +coordinate_criteria["time"] = coordinate_criteria["T"] + +# "long_name" and "standard_name" criteria are the same. For convenience. +for coord, attrs in coordinate_criteria.items(): + coordinate_criteria[coord]["long_name"] = coordinate_criteria[coord][ + "standard_name" + ] +coordinate_criteria["X"]["long_name"] += ("cell index along first dimension",) +coordinate_criteria["Y"]["long_name"] += ("cell index along second dimension",) + +guess_regex = { + "time": re.compile("\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*"), + "Z": re.compile( + "(z|nav_lev|gdep|lv_|[o]*lev|bottom_top|sigma|h(ei)?ght|altitude|depth|" + "isobaric|pres|isotherm)[a-z_]*[0-9]*" + ), + "Y": re.compile("y|j|nlat|nj"), + "latitude": re.compile("y?(nav_lat|lat|gphi)[a-z0-9]*"), + "X": re.compile("x|i|nlon|ni"), + "longitude": re.compile("x?(nav_lon|lon|glam)[a-z0-9]*"), +} +guess_regex["T"] = guess_regex["time"] \ No newline at end of file From cc74b0c00e2213ef842edef0b4799e61475a28d1 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Thu, 15 Dec 2022 17:16:42 -0600 Subject: [PATCH 3/4] forgot to lint criteria --- .pre-commit-config.yaml | 1 + cf_pandas/criteria.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1db000c..9b19465 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -58,5 +58,6 @@ repos: rev: v1.16.0 hooks: - id: codespell + exclude: cf_pandas/criteria.py args: - --quiet-level=2 diff --git a/cf_pandas/criteria.py b/cf_pandas/criteria.py index e158574..27c5cd4 100644 --- a/cf_pandas/criteria.py +++ b/cf_pandas/criteria.py @@ -4,9 +4,11 @@ Copyright (c) 2017 MetPy Developers. """ -from typing import Mapping, MutableMapping, Tuple import re +from typing import Mapping, MutableMapping, Tuple + + coordinate_criteria: MutableMapping[str, MutableMapping[str, Tuple]] = { "latitude": { "standard_name": ("latitude",), @@ -114,4 +116,4 @@ "X": re.compile("x|i|nlon|ni"), "longitude": re.compile("x?(nav_lon|lon|glam)[a-z0-9]*"), } -guess_regex["T"] = guess_regex["time"] \ No newline at end of file +guess_regex["T"] = guess_regex["time"] From 36ca7d0f17d82ab47fef82e768442f29a4ed24c3 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Thu, 15 Dec 2022 17:26:50 -0600 Subject: [PATCH 4/4] updated isort and everything lint changed --- .pre-commit-config.yaml | 21 +++++++++++---------- cf_pandas/__init__.py | 1 - cf_pandas/accessor.py | 3 --- cf_pandas/criteria.py | 2 -- cf_pandas/options.py | 2 -- cf_pandas/vocab.py | 1 - docs/conf.py | 1 - setup.py | 1 - tests/test_accessor.py | 1 - tests/test_reg.py | 2 -- tests/test_utils.py | 1 - tests/test_vocab.py | 1 - 12 files changed, 11 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9b19465..a0b788d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,17 +28,18 @@ repos: exclude: docs/source/conf.py args: [--max-line-length=105, --ignore=E203,E501,W503, --select=select=C,E,F,W,B,B950] -- repo: https://github.com/pre-commit/mirrors-isort - rev: v4.3.21 +- repo: https://github.com/pycqa/isort + rev: 5.8.0 hooks: - - id: isort - additional_dependencies: [toml] - args: [--project=gcm_filters, --multi-line=3, --lines-after-imports=2, --lines-between-types=1, --trailing-comma, --force-grid-wrap=0, --use-parentheses, --line-width=88] - -# - repo: https://github.com/asottile/seed-isort-config -# rev: v2.1.1 -# hooks: -# - id: seed-isort-config + - id: isort + name: isort (python) + args: ["--profile", "black", "--filter-files"] + - id: isort + name: isort (cython) + types: [cython] + - id: isort + name: isort (pyi) + types: [pyi] - repo: https://github.com/psf/black rev: 22.3.0 diff --git a/cf_pandas/__init__.py b/cf_pandas/__init__.py index 2638fa9..a318508 100644 --- a/cf_pandas/__init__.py +++ b/cf_pandas/__init__.py @@ -11,7 +11,6 @@ from .vocab import Vocab, merge from .widget import Selector, dropdown - try: __version__ = get_distribution("cf-pandas").version except DistributionNotFound: diff --git a/cf_pandas/accessor.py b/cf_pandas/accessor.py index e124d4b..899d10c 100644 --- a/cf_pandas/accessor.py +++ b/cf_pandas/accessor.py @@ -3,7 +3,6 @@ """ import itertools - from typing import ( Any, Callable, @@ -22,7 +21,6 @@ ) import pandas as pd - from pandas import DataFrame, Series import cf_pandas as cfp @@ -32,7 +30,6 @@ from .utils import always_iterable, match_criteria_key, set_up_criteria from .vocab import Vocab - #: `axis` names understood by cf_xarray _AXIS_NAMES = ("X", "Y", "Z", "T") diff --git a/cf_pandas/criteria.py b/cf_pandas/criteria.py index 27c5cd4..5b30f35 100644 --- a/cf_pandas/criteria.py +++ b/cf_pandas/criteria.py @@ -5,10 +5,8 @@ """ import re - from typing import Mapping, MutableMapping, Tuple - coordinate_criteria: MutableMapping[str, MutableMapping[str, Tuple]] = { "latitude": { "standard_name": ("latitude",), diff --git a/cf_pandas/options.py b/cf_pandas/options.py index 5fd9bd6..9d72b67 100644 --- a/cf_pandas/options.py +++ b/cf_pandas/options.py @@ -3,12 +3,10 @@ """ import copy - from typing import Any, MutableMapping import cf_pandas as cfp - OPTIONS: MutableMapping[str, Any] = { "custom_criteria": [], # "warn_on_missing_variables": True, diff --git a/cf_pandas/vocab.py b/cf_pandas/vocab.py index 384e058..0c2f43c 100644 --- a/cf_pandas/vocab.py +++ b/cf_pandas/vocab.py @@ -2,7 +2,6 @@ import json import pathlib - from collections import defaultdict from typing import DefaultDict, Dict, Optional, Sequence, Union diff --git a/docs/conf.py b/docs/conf.py index e82cee8..9708e90 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,7 +20,6 @@ # see https://pypi.org/project/setuptools-scm/ for details from pkg_resources import get_distribution - print("python exec:", sys.executable) print("sys.path:", sys.path) root = pathlib.Path(__file__).parent.parent.absolute() diff --git a/setup.py b/setup.py index c8dcf2e..03d17c1 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ from setuptools import setup - setup( use_scm_version={ "write_to": "cf_pandas/_version.py", diff --git a/tests/test_accessor.py b/tests/test_accessor.py index 78b920a..dba8455 100644 --- a/tests/test_accessor.py +++ b/tests/test_accessor.py @@ -9,7 +9,6 @@ import cf_pandas as cfp - criteria = { "wind_s": { "standard_name": "wind_speed$", diff --git a/tests/test_reg.py b/tests/test_reg.py index a1323ef..9fc2ffa 100644 --- a/tests/test_reg.py +++ b/tests/test_reg.py @@ -2,12 +2,10 @@ import pandas as pd import pytest - from pandas import testing as tm import cf_pandas as cfp - strings = [ "sea_water_temperature", "sea_water_temperature [celsius]", diff --git a/tests/test_utils.py b/tests/test_utils.py index 7750498..3b63842 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,7 +6,6 @@ import cf_pandas as cfp - criteria = { "wind_s": { "standard_name": "wind_speed$", diff --git a/tests/test_vocab.py b/tests/test_vocab.py index ffe4e09..cf1007d 100644 --- a/tests/test_vocab.py +++ b/tests/test_vocab.py @@ -1,7 +1,6 @@ """Test vocab""" import os - from collections import defaultdict import cf_pandas as cfp