From 2f21ae076ca1ee8c146464b2ba4cf17edcfd8e87 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Thu, 23 Feb 2023 17:17:34 -0600 Subject: [PATCH 1/9] tons of changes --- intake_axds/axds.py | 70 +++++++---- intake_axds/axds_cat.py | 29 +++-- intake_axds/utils.py | 249 ++++++++++++++++++++++++++------------ tests/test_axds_sensor.py | 56 +++++---- tests/test_utils.py | 63 ++-------- 5 files changed, 270 insertions(+), 197 deletions(-) diff --git a/intake_axds/axds.py b/intake_axds/axds.py index 0709a1c..e05611e 100644 --- a/intake_axds/axds.py +++ b/intake_axds/axds.py @@ -6,6 +6,7 @@ from . import __version__ from .utils import ( + check_station, load_metadata, make_data_url, make_filter, @@ -22,9 +23,9 @@ class AXDSSensorSource(base.DataSource): Parameters ---------- internal_id : Optional[int], optional - Internal station id for Axiom, by default None. Not the UUID or dataset_id. Need to input internal_id or dataset_id. If both are input, be sure they are for the same station. - dataset_id : Optional[str], optional - The UUID for the station, by default None. Not the internal_id. Need to input internal_id or dataset_id. If both are input, be sure they are for the same station. + Internal station id for Axiom, by default None. Not the UUID. Need to input internal_id or UUID. If both are input, be sure they are for the same station. + uuid : Optional[str], optional + The UUID for the station, by default None. Not the internal_id. Need to input internal_id or UUID. If both are input, be sure they are for the same station. Note that there may also be a "datasetId" parameter which is sometimes but not always the same as the UUID. start_time : Optional[str], optional At what datetime for data to start, by default None. Must be interpretable by pandas ``Timestamp``. If not input, the datetime at which the dataset starts will be used. end_time : Optional[str], optional @@ -72,7 +73,7 @@ class AXDSSensorSource(base.DataSource): def __init__( self, internal_id: Optional[int] = None, - dataset_id: Optional[str] = None, + uuid: Optional[str] = None, start_time: Optional[str] = None, end_time: Optional[str] = None, qartod: Union[int, List[int], bool] = False, @@ -83,12 +84,12 @@ def __init__( only_pgids: Optional[List[int]] = None, ): - if internal_id is None and dataset_id is None: + if internal_id is None and uuid is None: raise ValueError( - "internal_id and dataset_id cannot both be None. Input one of them." + "internal_id and uuid cannot both be None. Input one of them." ) - self.dataset_id = dataset_id + self.uuid = uuid self.start_time = start_time self.end_time = end_time self.internal_id = internal_id @@ -109,31 +110,48 @@ def __init__( metadata = metadata or {} + if self.internal_id is None or self.uuid is None: + # uses whichever id is not None + url = make_search_docs_url(internal_id=self.internal_id, uuid=self.uuid) + result = response_from_url(url)[0] + assert isinstance(result, dict) # for mypy + metadata.update(load_metadata("sensor_station", result)) + self.internal_id = metadata["internal_id"] + self.uuid = metadata["uuid"] + self.search_docs_url = url + # need dataset_id to get metadata - if self.dataset_id is None: - assert self.internal_id is not None - res = response_from_url(make_metadata_url(make_filter(self.internal_id))) - assert isinstance(res, dict) - self.dataset_id = res["data"]["stations"][0]["uuid"] - metadata["version"] = res["data"]["stations"][0]["version"] - - # need internal_id to get data - elif self.internal_id is None: - assert self.dataset_id is not None - res = response_from_url(make_search_docs_url(self.dataset_id))[0] - assert isinstance(res, dict) # for mypy - self.internal_id = res["id"] - metadata["version"] = res["data"]["version"] + # or use UUID as another approach like if you have the dataset_id. Not sure why they are + # sometimes different + # if self.dataset_id is None or self.uuid is None: + # assert self.internal_id is not None + # # this works but maybe better to match in the two cases + # result = response_from_url(make_metadata_url(make_filter(self.internal_id))) + # assert isinstance(result, dict) + # metadata.update(load_metadata("sensor_station", result)) + # self.dataset_id = metadata["datasetID"] + # self.uuid = metadata["uuid"] + + # # need internal_id to get data + # elif self.internal_id is None: + # assert self.dataset_id is not None or self.uuid is not None + # result = response_from_url(make_search_docs_url(self.dataset_id))[0] + # assert isinstance(result, dict) # for mypy + # metadata.update(load_metadata("sensor_station", result)) + # self.internal_id = metadata["internal_id"] + # self.uuid = metadata["uuid"] + + # not checking for now + # # check station for if we want the output or not — for when source is used directly. + # _ = check_station(metadata, verbose=True) self._dataframe = None - metadata["dataset_id"] = self.dataset_id - # this is what shows in the source if you print it self._captured_init_kwargs.update( { "internal_id": self.internal_id, - "dataset_id": self.dataset_id, + "uuid": self.uuid, "start_time": self.start_time, "end_time": self.end_time, "qartod": self.qartod, @@ -358,7 +376,7 @@ def data_urls(self): if not hasattr(self, "_data_urls"): # get extended metadata which we need both for reading the data and as metadata - result = response_from_url(make_search_docs_url(self.dataset_id))[0] + result = response_from_url(make_search_docs_url(uuid=self.uuid))[0] self.metadata.update(load_metadata("sensor_station", result)) start_time = self.start_time or self.metadata["minTime"] @@ -374,7 +392,7 @@ def data_urls(self): return self._data_urls def _load(self): - """How to load in a specific station once you know it by dataset_id""" + """How to load in a specific station once you know it by uuid""" dfs = [self._load_to_dataframe(url) for url in self.data_urls] diff --git a/intake_axds/axds_cat.py b/intake_axds/axds_cat.py index d7cb03a..8ea03e2 100644 --- a/intake_axds/axds_cat.py +++ b/intake_axds/axds_cat.py @@ -21,6 +21,7 @@ match_key_to_parameter, match_std_names_to_parameter, response_from_url, + check_station, ) @@ -441,20 +442,20 @@ def _load_all_results(self) -> list: return combined_results def _load(self): - """Find all dataset ids and create catalog.""" + """Find all UUIDs and create catalog.""" results = self._load_all_results() self._entries = {} for result in results: - dataset_id = result["uuid"] + uuid = result["uuid"] # don't repeat an entry (it won't actually allow you to, but probably saves time not to try) - if dataset_id in self._entries: + if uuid in self._entries: continue if self.verbose: - print(f"Dataset ID: {dataset_id}") + print(f"Dataset ID: {uuid}") # # quick check if OPENDAP is in the access methods for this uuid, otherwise move on # if self.datatype == "module": @@ -476,14 +477,12 @@ def _load(self): # ) # continue - description = f"AXDS dataset_id {dataset_id} of datatype {self.datatype}" + description = f"AXDS dataset_id {uuid} of datatype {self.datatype}" metadata = load_metadata(self.datatype, result) - - # don't save Camera sensor data for now - if "webcam" in metadata["variables"]: - if self.verbose: - print(f"Dataset_id {dataset_id} is a webcam so is being skipped.") + + keep_station = check_station(metadata, verbose=self.verbose) + if not keep_station: continue # Find urlpath @@ -508,8 +507,8 @@ def _load(self): # this Source has different arg requirements elif self.datatype == "sensor_station": args = { - "dataset_id": dataset_id, "internal_id": metadata["internal_id"], + "uuid": uuid, "start_time": self.kwargs_search.get("min_time", None), "end_time": self.kwargs_search.get("max_time", None), # "kwargs_search": self.kwargs_search, @@ -564,7 +563,7 @@ def _load(self): # urlpath = urlpaths[0] entry = LocalCatalogEntry( - name=dataset_id, + name=uuid, description=description, driver=plugin, direct_access="allow", @@ -582,4 +581,8 @@ def _load(self): entry._plugin = [plugin] - self._entries[dataset_id] = entry + self._entries[uuid] = entry + + # final tally + if self.verbose: + print(f"Final number of stations found after removing some: {len(self._entries)}.") diff --git a/intake_axds/utils.py b/intake_axds/utils.py index a29c940..bdc3107 100644 --- a/intake_axds/utils.py +++ b/intake_axds/utils.py @@ -3,6 +3,7 @@ from importlib.metadata import PackageNotFoundError, version from operator import itemgetter from typing import Optional, Union +from nested_lookup import nested_lookup import cf_pandas as cfp import pandas as pd @@ -155,91 +156,136 @@ def load_metadata(datatype: str, results: dict) -> dict: #: Dict[str, str] dict Metadata to store with catalog entry. """ - # matching names in intake-erddap - keys = ["datasetID", "title", "summary", "type", "minTime", "maxTime"] - # names of keys in Axiom system. - items = [ - "uuid", - "label", - "description", - "type", - "start_date_time", - "end_date_time", - ] - values = itemgetter(*items)(results) - metadata = dict(zip(keys, values)) + + # mostly matching names in intake-erddap + metadata = {} + keys = ["uuid", "label", "description"] + new_names = ["uuid", "title", "summary"] + # keys = ["datasetID", "title", "summary"]#, "minTime", "maxTime"] + for new_name, key in zip(new_names, keys): + found = [value for value in nested_lookup(key, results) if value is not None] + if len(found) > 0: + metadata[new_name] = found[0] # take first instance + + new_name = "minTime" + found = nested_lookup('start', results, wild=True, with_keys=True) + for key, values in found.items(): + if values == [None]: + continue + if len(values) == 1: + metadata[new_name] = values[0] + elif len(values) > 1: + metadata[new_name] = min(values) + + new_name = "maxTime" + found = nested_lookup('end', results, wild=True, with_keys=True) + for key, values in found.items(): + if values == [None]: + continue + if len(values) == 1: + metadata[new_name] = values[0] + elif len(values) > 1: + metadata[new_name] = min(values) + + # # matching names in intake-erddap + # keys = ["datasetID", "title", "summary", "type", "minTime", "maxTime"] + # # names of keys in Axiom system. + # items = [ + # "uuid", + # "label", + # "description", + # "type", + # "start_date_time", + # "end_date_time", + # ] + # values = itemgetter(*items)(results) + # metadata = dict(zip(keys, values)) if datatype == "platform2": - metadata["institution"] = ( - results["source"]["meta"]["attributes"]["institution"] - if "institution" in results["source"]["meta"]["attributes"] - else None - ) - metadata["geospatial_bounds"] = results["source"]["meta"]["attributes"][ - "geospatial_bounds" - ] + # import pdb; pdb.set_trace() + metadata["institution"] = nested_lookup("institution", results) + metadata["geospatial_bounds"] = nested_lookup("geospatial_bounds", results)[0] + + # metadata["institution"] = ( + # results["source"]["meta"]["attributes"]["institution"] + # if "institution" in results["source"]["meta"]["attributes"] + # else None + # ) + # metadata["geospatial_bounds"] = results["source"]["meta"]["attributes"][ + # "geospatial_bounds" + # ] p1 = wkt.loads(metadata["geospatial_bounds"]) keys = ["minLongitude", "minLatitude", "maxLongitude", "maxLatitude"] metadata.update(dict(zip(keys, p1.bounds))) - - # save variable details if they have a standard_name - # some platforms have lots of variables that seem irrelevant - out = { - attrs["attributes"]["standard_name"]: { - "variable_name": varname, - "units": attrs["attributes"]["units"] - if "units" in attrs["attributes"] - else None, - "unit_id": attrs["attributes"]["unit_id"] - if "unit_id" in attrs["attributes"] - else None, - "long_name": attrs["attributes"]["long_name"], - "parameter_id": attrs["attributes"]["parameter_id"] - if "parameter_id" in attrs["attributes"] - else None, - } - for varname, attrs in results["source"]["meta"]["variables"].items() - if "standard_name" in attrs["attributes"] - } - - metadata["variables_details"] = out - metadata["variables"] = list(out.keys()) + + metadata["variables_details"] = nested_lookup('variables', results) + metadata["variables"] = nested_lookup("standard_name", results) + + # # save variable details if they have a standard_name + # # some platforms have lots of variables that seem irrelevant + # out = { + # attrs["attributes"]["standard_name"]: { + # "variable_name": varname, + # "units": attrs["attributes"]["units"] + # if "units" in attrs["attributes"] + # else None, + # "unit_id": attrs["attributes"]["unit_id"] + # if "unit_id" in attrs["attributes"] + # else None, + # "long_name": attrs["attributes"]["long_name"], + # "parameter_id": attrs["attributes"]["parameter_id"] + # if "parameter_id" in attrs["attributes"] + # else None, + # } + # for varname, attrs in results["source"]["meta"]["variables"].items() + # if "standard_name" in attrs["attributes"] + # } + + # metadata["variables_details"] = out + # metadata["variables"] = list(out.keys()) elif datatype == "sensor_station": + + lon, lat, depth = nested_lookup('location', results)[0]["coordinates"] - # INSTITUTION? - # location is lon, lat, depth and type - # e.g. {'coordinates': [-123.711083, 38.914556, 0.0], 'type': 'Point'} - lon, lat, depth = results["data"]["location"]["coordinates"] + # # location is lon, lat, depth and type + # # e.g. {'coordinates': [-123.711083, 38.914556, 0.0], 'type': 'Point'} + # lon, lat, depth = results["data"]["location"]["coordinates"] keys = ["minLongitude", "minLatitude", "maxLongitude", "maxLatitude"] metadata.update(dict(zip(keys, [lon, lat, lon, lat]))) - # e.g. 106793 - metadata["internal_id"] = results["data"]["id"] - - # variables, standard_names (or at least parameterNames) - figs = results["data"]["figures"] - - out = { - subPlot["datasetVariableId"]: { - "parameterGroupLabel": fig["label"], - "parameterGroupId": fig["parameterGroupId"], - "datasetVariableId": subPlot["datasetVariableId"], - "parameterId": subPlot["parameterId"], - "label": subPlot["label"], - "deviceId": subPlot["deviceId"], - } - for fig in figs - for plot in fig["plots"] - for subPlot in plot["subPlots"] - } - metadata["variables_details"] = out - metadata["variables"] = list(out.keys()) - - # include datumConversion info if present - if len(results["data"]["datumConversions"]) > 0: - metadata["datumConversions"] = results["data"]["datumConversions"] + metadata["internal_id"] = int([value for value in nested_lookup('id', results) if value is not None][0]) + # # e.g. 106793 + # metadata["internal_id"] = results["data"]["id"] + + metadata["variables_details"] = nested_lookup('figures', results)[0] + metadata["variables"] = list(set(nested_lookup('datasetVariableId', results))) + + # # variables, standard_names (or at least parameterNames) + # figs = results["data"]["figures"] + + # out = { + # subPlot["datasetVariableId"]: { + # "parameterGroupLabel": fig["label"], + # "parameterGroupId": fig["parameterGroupId"], + # "datasetVariableId": subPlot["datasetVariableId"], + # "parameterId": subPlot["parameterId"], + # "label": subPlot["label"], + # "deviceId": subPlot["deviceId"], + # } + # for fig in figs + # for plot in fig["plots"] + # for subPlot in plot["subPlots"] + # } + # metadata["variables_details"] = out + # metadata["variables"] = list(out.keys()) + + metadata["datumConversions"] = nested_lookup('datumConversions', results)[0] + + # # include datumConversion info if present + # if len(results["data"]["datumConversions"]) > 0: + # metadata["datumConversions"] = results["data"]["datumConversions"] filter = f"%7B%22stations%22:%5B%22{metadata['internal_id']}%22%5D%7D" baseurl = "https://sensors.axds.co/api" @@ -249,11 +295,49 @@ def load_metadata(datatype: str, results: dict) -> dict: #: Dict[str, str] # also save units here # 1 or 2? - metadata["version"] = results["data"]["version"] + metadata["version"] = nested_lookup('version', results)[0] + # metadata["version"] = results["data"]["version"] + + # name on other sites, esp for ERDDAP + metadata["foreignNames"] = list(set(nested_lookup('foreignName', results, wild=True))) + # metadata["foreignNames"] = [aff["~foreignName"] for aff in results["data"]["affiliations"] if aff["~foreignName"] is not None] return metadata +def check_station(metadata: dict, verbose: bool) -> bool: + """Whether to keep station or not. + + Parameters + ---------- + metadata : dict + metadata about station. + verbose : bool, optional + Set to True for helpful information. + + Returns + ------- + bool + True to keep station, False to skip. + """ + + keep = True + # don't save Camera sensor data for now + if "webcam" in metadata["variables"]: + keep = False + if verbose: + print(f"UUID {metadata['uuid']} is a webcam and should be skipped.") + + # these are IOOS ERDDAP and were setup to be different stations so we can see which stations + # are successfully being served through IOOS RAs. It duplicates the data (purposely) + elif "ism-" in metadata["uuid"]: + keep = False + if verbose: + print(f"UUID {metadata['uuid']} is a duplicate station from IOOS and should be skipped.") + + return keep + + def make_label(label: str, units: Optional[str] = None, use_units: bool = True) -> str: """making column name @@ -284,7 +368,7 @@ def make_filter(internal_id: int, parameterGroupId: Optional[int] = None) -> str Parameters ---------- internal_id : int - internal id for station. Not the dataset_id or uuid. + internal id for station. Not the uuid. parameterGroupId : Optional[int], optional Parameter Group ID to narrow search, by default None @@ -359,20 +443,27 @@ def make_metadata_url(filter: str) -> str: return f"{baseurl}/metadata/filter/custom?filter={filter}" -def make_search_docs_url(dataset_id: str) -> str: +def make_search_docs_url(internal_id: Optional[int] = None, uuid: Optional[str] = None) -> str: """Url for Axiom Search docs. + + Uses whichever of internal_id and uuid is not None to formulate url. Parameters ---------- - dataset_id : str - dataset_id or uuid. + internal_id : Optional[int], optional + Internal station id for Axiom. Not the UUID. + uuid : str + uuid for station. Returns ------- str Url for finding Axiom Search docs """ - return f"https://search.axds.co/v2/docs?verbose=false&id={dataset_id}" + if internal_id is not None: + return f"https://search.axds.co/v2/docs?verbose=false&id=sensor_station:{internal_id}" + elif uuid is not None: + return f"https://search.axds.co/v2/docs?verbose=false&id={uuid}" def response_from_url(url: str) -> Union[list, dict]: diff --git a/tests/test_axds_sensor.py b/tests/test_axds_sensor.py index 9f74669..d69d6ca 100644 --- a/tests/test_axds_sensor.py +++ b/tests/test_axds_sensor.py @@ -134,7 +134,7 @@ def test_intake_opener(): def test_binned(): source = AXDSSensorSource( - internal_id=123456, dataset_id="test", bin_interval="monthly" + internal_id=123456, uuid="test", bin_interval="monthly" ) assert source.binned @@ -142,14 +142,15 @@ def test_binned(): @mock.patch("requests.get") def test_ids(mock_requests): mock_requests.side_effect = [ - FakeResponseSensorAPI123456(), + # FakeResponseSensorAPI123456(), + FakeResponseSearchDocsV2(), FakeResponseSearchDocsV2(), ] source = AXDSSensorSource(internal_id=123456) - assert source.dataset_id == "test_sensor" + assert source.uuid == "test_platform_parquet" - source = AXDSSensorSource(dataset_id="test_sensor") - assert source.internal_id == "123456" + source = AXDSSensorSource(uuid="test_platform_parquet") + assert source.internal_id == 123456 with pytest.raises(ValueError): source = AXDSSensorSource() @@ -158,38 +159,40 @@ def test_ids(mock_requests): def test_times(): # doesn't need response because both internal_id and dataset_id are faked upon init source = AXDSSensorSource( - internal_id=123456, dataset_id="fake", start_time="2000-1-1" + internal_id=123456, uuid="fake", start_time="2000-1-1" ) assert source.end_time is None source = AXDSSensorSource( - internal_id=123456, dataset_id="fake", end_time="2000-1-1" + internal_id=123456, uuid="fake", end_time="2000-1-1" ) assert source.start_time is None -@mock.patch("requests.get") -def test_filters(mock_requests): +# not using this approach now +# @mock.patch("requests.get") +# def test_filters(mock_requests): - mock_requests.side_effect = [ - FakeResponseSensorAPI123456(), - FakeResponseSensorAPI111111(), - ] - # V2 - source = AXDSSensorSource(internal_id=123456) - assert source.get_filters() == ["%7B%22stations%22%3A%5B%22123456%22%5D%7D"] +# mock_requests.side_effect = [ +# FakeResponseSensorAPI123456(), +# FakeResponseSensorAPI111111(), +# ] +# # V2 +# source = AXDSSensorSource(internal_id=123456) +# assert source.get_filters() == ["%7B%22stations%22%3A%5B%22123456%22%5D%7D"] - # V1 - source = AXDSSensorSource(internal_id=1111111, only_pgids=[7]) - assert source.get_filters() == [ - "%7B%22stations%22%3A%5B%221111111%22%5D%2C%22parameterGroups%22%3A%5B7%5D%7D" - ] +# # V1 +# source = AXDSSensorSource(internal_id=1111111, only_pgids=[7]) +# assert source.get_filters() == [ +# "%7B%22stations%22%3A%5B%221111111%22%5D%2C%22parameterGroups%22%3A%5B7%5D%7D" +# ] @mock.patch("requests.get") def test_data_urls_V2(mock_requests): mock_requests.side_effect = [ - FakeResponseSensorAPI123456(), + # FakeResponseSensorAPI123456(), + FakeResponseSearchDocsV2(), FakeResponseSearchDocsV2(), ] source = AXDSSensorSource(internal_id=123456) @@ -204,14 +207,15 @@ def test_data_urls_V2(mock_requests): def test_data_urls_V1(mock_requests): mock_requests.side_effect = [ - FakeResponseSensorAPI111111(), + # FakeResponseSensorAPI111111(), + FakeResponseSearchDocsV1(), FakeResponseSearchDocsV1(), ] - source = AXDSSensorSource(internal_id=1111111, only_pgids=[7]) + source = AXDSSensorSource(internal_id=123456, only_pgids=[7]) assert source.get_filters() == [ - "%7B%22stations%22%3A%5B%221111111%22%5D%2C%22parameterGroups%22%3A%5B7%5D%7D" + "%7B%22stations%22%3A%5B%22123456%22%5D%2C%22parameterGroups%22%3A%5B7%5D%7D" ] urls = [ - "https://sensors.axds.co/api/observations/filter/custom?filter=%7B%22stations%22%3A%5B%221111111%22%5D%2C%22parameterGroups%22%3A%5B7%5D%7D&start=2019-03-15T02:58:51Z&end=2019-04-08T07:54:56Z" + "https://sensors.axds.co/api/observations/filter/custom?filter=%7B%22stations%22%3A%5B%22123456%22%5D%2C%22parameterGroups%22%3A%5B7%5D%7D&start=2019-03-15T02:58:51Z&end=2019-04-08T07:54:56Z" ] assert source.data_urls == urls diff --git a/tests/test_utils.py b/tests/test_utils.py index e1981ec..143eb1b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -135,10 +135,15 @@ def test_parameters_and_std_names(mock_requests): def test_make_search_docs_url(): - dataset_id = "test_id" + uuid = "test_id" assert ( - f"https://search.axds.co/v2/docs?verbose=false&id={dataset_id}" - == utils.make_search_docs_url(dataset_id) + f"https://search.axds.co/v2/docs?verbose=false&id={uuid}" + == utils.make_search_docs_url(uuid=uuid) + ) + internal_id = "test_internal_id" + assert ( + f"https://search.axds.co/v2/docs?verbose=false&id=sensor_station:{internal_id}" + == utils.make_search_docs_url(internal_id=internal_id) ) @@ -195,32 +200,7 @@ def test_load_metadata(): }, } metadata = utils.load_metadata(datatype, results) - test_results = { - "datasetID": "uuid", - "title": "label", - "summary": "desc", - "type": "type", - "minTime": "2000-1-1", - "maxTime": "2000-1-2", - "minLongitude": -123.711083, - "minLatitude": 38.914556, - "maxLongitude": -123.711083, - "maxLatitude": 38.914556, - "internal_id": 106793, - "variables_details": { - "datasetVariableId": { - "parameterGroupLabel": "label", - "parameterGroupId": "parameterGroupId", - "datasetVariableId": "datasetVariableId", - "parameterId": "parameterId", - "label": "label", - "deviceId": "deviceId", - } - }, - "variables": ["datasetVariableId"], - "metadata_url": "https://sensors.axds.co/api/metadata/filter/custom?filter=%7B%22stations%22:%5B%22106793%22%5D%7D", - "version": 2, - } + test_results = {'uuid': 'uuid', 'title': 'label', 'summary': 'desc', 'minTime': '2000-1-1', 'maxTime': '2000-1-2', 'minLongitude': -123.711083, 'minLatitude': 38.914556, 'maxLongitude': -123.711083, 'maxLatitude': 38.914556, 'internal_id': 106793, 'variables_details': [{'label': 'label', 'parameterGroupId': 'parameterGroupId', 'plots': [{'subPlots': [{'datasetVariableId': 'datasetVariableId', 'parameterId': 'parameterId', 'label': 'label', 'deviceId': 'deviceId'}]}]}], 'variables': ['datasetVariableId'], 'datumConversions': [], 'metadata_url': 'https://sensors.axds.co/api/metadata/filter/custom?filter=%7B%22stations%22:%5B%22106793%22%5D%7D', 'version': 2, 'foreignNames': []} assert metadata == test_results datatype = "platform2" @@ -252,28 +232,5 @@ def test_load_metadata(): }, } metadata = utils.load_metadata(datatype, results) - test_results = { - "datasetID": "uuid", - "title": "label", - "summary": "desc", - "type": "type", - "minTime": "2000-1-1", - "maxTime": "2000-1-2", - "institution": "institution", - "geospatial_bounds": "POLYGON ((0 -80, 0 90, 359.9200439453125 90, 359.9200439453125 -80, 0 -80))", - "minLongitude": 0.0, - "minLatitude": -80.0, - "maxLongitude": 359.9200439453125, - "maxLatitude": 90.0, - "variables_details": { - "standard_name": { - "variable_name": "variable_name", - "units": "units", - "unit_id": "unit_id", - "long_name": "long_name", - "parameter_id": "parameter_id", - } - }, - "variables": ["standard_name"], - } + test_results = {'uuid': 'uuid', 'title': 'label', 'summary': 'desc', 'minTime': '2000-1-1', 'maxTime': '2000-1-2', 'institution': ['institution'], 'geospatial_bounds': 'POLYGON ((0 -80, 0 90, 359.9200439453125 90, 359.9200439453125 -80, 0 -80))', 'minLongitude': 0.0, 'minLatitude': -80.0, 'maxLongitude': 359.9200439453125, 'maxLatitude': 90.0, 'variables_details': [{'variable_name': {'attributes': {'standard_name': 'standard_name', 'units': 'units', 'unit_id': 'unit_id', 'long_name': 'long_name', 'parameter_id': 'parameter_id'}}}], 'variables': ['standard_name']} assert metadata == test_results From 94ac2e17ce8a7a73c7f3d34be5e95b0bd6a846ca Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Thu, 23 Feb 2023 17:32:16 -0600 Subject: [PATCH 2/9] cleaned up code --- intake_axds/axds.py | 23 ----------- intake_axds/utils.py | 90 ++------------------------------------------ 2 files changed, 3 insertions(+), 110 deletions(-) diff --git a/intake_axds/axds.py b/intake_axds/axds.py index e05611e..61a7fb3 100644 --- a/intake_axds/axds.py +++ b/intake_axds/axds.py @@ -120,27 +120,6 @@ def __init__( self.uuid = metadata["uuid"] self.search_docs_url = url - # need dataset_id to get metadata - # or use UUID as another approach like if you have the dataset_id. Not sure why they are - # sometimes different - # if self.dataset_id is None or self.uuid is None: - # assert self.internal_id is not None - # # this works but maybe better to match in the two cases - # result = response_from_url(make_metadata_url(make_filter(self.internal_id))) - # assert isinstance(result, dict) - # metadata.update(load_metadata("sensor_station", result)) - # self.dataset_id = metadata["datasetID"] - # self.uuid = metadata["uuid"] - - # # need internal_id to get data - # elif self.internal_id is None: - # assert self.dataset_id is not None or self.uuid is not None - # result = response_from_url(make_search_docs_url(self.dataset_id))[0] - # assert isinstance(result, dict) # for mypy - # metadata.update(load_metadata("sensor_station", result)) - # self.internal_id = metadata["internal_id"] - # self.uuid = metadata["uuid"] - # not checking for now # # check station for if we want the output or not — for when source is used directly. # _ = check_station(metadata, verbose=True) @@ -249,8 +228,6 @@ def _load_to_dataframe(self, url: str) -> pd.DataFrame: ): continue - # import pdb; pdb.set_trace() - columns = {} # all non-index columns in dataframe indices = {} # indices for dataframe diff --git a/intake_axds/utils.py b/intake_axds/utils.py index bdc3107..46de5e7 100644 --- a/intake_axds/utils.py +++ b/intake_axds/utils.py @@ -93,9 +93,6 @@ def match_key_to_parameter( # want unique but ordered returned return list(zip(*sorted(set(zip(pglabels, pgids))))) - # return pglabels, pgids - # return list(set(pglabels)) - def match_std_names_to_parameter(standard_names: list) -> list: """Find Parameter Group values that match standard_names. @@ -139,9 +136,6 @@ def match_std_names_to_parameter(standard_names: list) -> list: # want unique but ordered returned return list(zip(*sorted(set(zip(pglabels, pgids))))) - # return pglabels, pgids - # return list(set(pglabels)) - def load_metadata(datatype: str, results: dict) -> dict: #: Dict[str, str] """Load metadata for catalog entry. @@ -161,7 +155,6 @@ def load_metadata(datatype: str, results: dict) -> dict: #: Dict[str, str] metadata = {} keys = ["uuid", "label", "description"] new_names = ["uuid", "title", "summary"] - # keys = ["datasetID", "title", "summary"]#, "minTime", "maxTime"] for new_name, key in zip(new_names, keys): found = [value for value in nested_lookup(key, results) if value is not None] if len(found) > 0: @@ -187,33 +180,9 @@ def load_metadata(datatype: str, results: dict) -> dict: #: Dict[str, str] elif len(values) > 1: metadata[new_name] = min(values) - # # matching names in intake-erddap - # keys = ["datasetID", "title", "summary", "type", "minTime", "maxTime"] - # # names of keys in Axiom system. - # items = [ - # "uuid", - # "label", - # "description", - # "type", - # "start_date_time", - # "end_date_time", - # ] - # values = itemgetter(*items)(results) - # metadata = dict(zip(keys, values)) - if datatype == "platform2": - # import pdb; pdb.set_trace() metadata["institution"] = nested_lookup("institution", results) metadata["geospatial_bounds"] = nested_lookup("geospatial_bounds", results)[0] - - # metadata["institution"] = ( - # results["source"]["meta"]["attributes"]["institution"] - # if "institution" in results["source"]["meta"]["attributes"] - # else None - # ) - # metadata["geospatial_bounds"] = results["source"]["meta"]["attributes"][ - # "geospatial_bounds" - # ] p1 = wkt.loads(metadata["geospatial_bounds"]) keys = ["minLongitude", "minLatitude", "maxLongitude", "maxLatitude"] @@ -222,85 +191,32 @@ def load_metadata(datatype: str, results: dict) -> dict: #: Dict[str, str] metadata["variables_details"] = nested_lookup('variables', results) metadata["variables"] = nested_lookup("standard_name", results) - # # save variable details if they have a standard_name - # # some platforms have lots of variables that seem irrelevant - # out = { - # attrs["attributes"]["standard_name"]: { - # "variable_name": varname, - # "units": attrs["attributes"]["units"] - # if "units" in attrs["attributes"] - # else None, - # "unit_id": attrs["attributes"]["unit_id"] - # if "unit_id" in attrs["attributes"] - # else None, - # "long_name": attrs["attributes"]["long_name"], - # "parameter_id": attrs["attributes"]["parameter_id"] - # if "parameter_id" in attrs["attributes"] - # else None, - # } - # for varname, attrs in results["source"]["meta"]["variables"].items() - # if "standard_name" in attrs["attributes"] - # } - - # metadata["variables_details"] = out - # metadata["variables"] = list(out.keys()) - elif datatype == "sensor_station": + # location is lon, lat, depth and type + # e.g. {'coordinates': [-123.711083, 38.914556, 0.0], 'type': 'Point'} lon, lat, depth = nested_lookup('location', results)[0]["coordinates"] - - # # location is lon, lat, depth and type - # # e.g. {'coordinates': [-123.711083, 38.914556, 0.0], 'type': 'Point'} - # lon, lat, depth = results["data"]["location"]["coordinates"] keys = ["minLongitude", "minLatitude", "maxLongitude", "maxLatitude"] metadata.update(dict(zip(keys, [lon, lat, lon, lat]))) + # e.g. 106793 metadata["internal_id"] = int([value for value in nested_lookup('id', results) if value is not None][0]) - # # e.g. 106793 - # metadata["internal_id"] = results["data"]["id"] metadata["variables_details"] = nested_lookup('figures', results)[0] metadata["variables"] = list(set(nested_lookup('datasetVariableId', results))) - - # # variables, standard_names (or at least parameterNames) - # figs = results["data"]["figures"] - - # out = { - # subPlot["datasetVariableId"]: { - # "parameterGroupLabel": fig["label"], - # "parameterGroupId": fig["parameterGroupId"], - # "datasetVariableId": subPlot["datasetVariableId"], - # "parameterId": subPlot["parameterId"], - # "label": subPlot["label"], - # "deviceId": subPlot["deviceId"], - # } - # for fig in figs - # for plot in fig["plots"] - # for subPlot in plot["subPlots"] - # } - # metadata["variables_details"] = out - # metadata["variables"] = list(out.keys()) metadata["datumConversions"] = nested_lookup('datumConversions', results)[0] - # # include datumConversion info if present - # if len(results["data"]["datumConversions"]) > 0: - # metadata["datumConversions"] = results["data"]["datumConversions"] - filter = f"%7B%22stations%22:%5B%22{metadata['internal_id']}%22%5D%7D" baseurl = "https://sensors.axds.co/api" metadata_url = f"{baseurl}/metadata/filter/custom?filter={filter}" metadata["metadata_url"] = metadata_url - # also save units here - # 1 or 2? metadata["version"] = nested_lookup('version', results)[0] - # metadata["version"] = results["data"]["version"] # name on other sites, esp for ERDDAP metadata["foreignNames"] = list(set(nested_lookup('foreignName', results, wild=True))) - # metadata["foreignNames"] = [aff["~foreignName"] for aff in results["data"]["affiliations"] if aff["~foreignName"] is not None] return metadata From 095a4d9f848bb2187fc8d4107ed0e8a520403a8c Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 12 Sep 2023 11:10:15 -0500 Subject: [PATCH 3/9] linting --- intake_axds/axds.py | 4 +-- intake_axds/axds_cat.py | 10 +++--- intake_axds/utils.py | 61 +++++++++++++++++++++---------------- tests/test_axds_sensor.py | 12 ++------ tests/test_utils.py | 64 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 107 insertions(+), 44 deletions(-) diff --git a/intake_axds/axds.py b/intake_axds/axds.py index 61a7fb3..d63cd4f 100644 --- a/intake_axds/axds.py +++ b/intake_axds/axds.py @@ -6,12 +6,10 @@ from . import __version__ from .utils import ( - check_station, load_metadata, make_data_url, make_filter, make_label, - make_metadata_url, make_search_docs_url, response_from_url, ) @@ -120,7 +118,7 @@ def __init__( self.uuid = metadata["uuid"] self.search_docs_url = url - # not checking for now + # not checking for now # # check station for if we want the output or not — for when source is used directly. # _ = check_station(metadata, verbose=True) diff --git a/intake_axds/axds_cat.py b/intake_axds/axds_cat.py index 8ea03e2..a1265e3 100644 --- a/intake_axds/axds_cat.py +++ b/intake_axds/axds_cat.py @@ -17,11 +17,11 @@ from . import __version__ from .axds import AXDSSensorSource from .utils import ( + check_station, load_metadata, match_key_to_parameter, match_std_names_to_parameter, response_from_url, - check_station, ) @@ -480,7 +480,7 @@ def _load(self): description = f"AXDS dataset_id {uuid} of datatype {self.datatype}" metadata = load_metadata(self.datatype, result) - + keep_station = check_station(metadata, verbose=self.verbose) if not keep_station: continue @@ -582,7 +582,9 @@ def _load(self): entry._plugin = [plugin] self._entries[uuid] = entry - + # final tally if self.verbose: - print(f"Final number of stations found after removing some: {len(self._entries)}.") + print( + f"Final number of stations found after removing some: {len(self._entries)}." + ) diff --git a/intake_axds/utils.py b/intake_axds/utils.py index 46de5e7..9763619 100644 --- a/intake_axds/utils.py +++ b/intake_axds/utils.py @@ -1,14 +1,13 @@ """Utils to run.""" from importlib.metadata import PackageNotFoundError, version -from operator import itemgetter from typing import Optional, Union -from nested_lookup import nested_lookup import cf_pandas as cfp import pandas as pd import requests +from nested_lookup import nested_lookup from shapely import wkt @@ -159,20 +158,20 @@ def load_metadata(datatype: str, results: dict) -> dict: #: Dict[str, str] found = [value for value in nested_lookup(key, results) if value is not None] if len(found) > 0: metadata[new_name] = found[0] # take first instance - + new_name = "minTime" - found = nested_lookup('start', results, wild=True, with_keys=True) - for key, values in found.items(): + found_dict = nested_lookup("start", results, wild=True, with_keys=True) + for key, values in found_dict.items(): if values == [None]: continue if len(values) == 1: metadata[new_name] = values[0] elif len(values) > 1: metadata[new_name] = min(values) - + new_name = "maxTime" - found = nested_lookup('end', results, wild=True, with_keys=True) - for key, values in found.items(): + found_dict = nested_lookup("end", results, wild=True, with_keys=True) + for key, values in found_dict.items(): if values == [None]: continue if len(values) == 1: @@ -187,25 +186,27 @@ def load_metadata(datatype: str, results: dict) -> dict: #: Dict[str, str] p1 = wkt.loads(metadata["geospatial_bounds"]) keys = ["minLongitude", "minLatitude", "maxLongitude", "maxLatitude"] metadata.update(dict(zip(keys, p1.bounds))) - - metadata["variables_details"] = nested_lookup('variables', results) + + metadata["variables_details"] = nested_lookup("variables", results) metadata["variables"] = nested_lookup("standard_name", results) elif datatype == "sensor_station": - + # location is lon, lat, depth and type # e.g. {'coordinates': [-123.711083, 38.914556, 0.0], 'type': 'Point'} - lon, lat, depth = nested_lookup('location', results)[0]["coordinates"] + lon, lat, depth = nested_lookup("location", results)[0]["coordinates"] keys = ["minLongitude", "minLatitude", "maxLongitude", "maxLatitude"] metadata.update(dict(zip(keys, [lon, lat, lon, lat]))) # e.g. 106793 - metadata["internal_id"] = int([value for value in nested_lookup('id', results) if value is not None][0]) - - metadata["variables_details"] = nested_lookup('figures', results)[0] - metadata["variables"] = list(set(nested_lookup('datasetVariableId', results))) - - metadata["datumConversions"] = nested_lookup('datumConversions', results)[0] + metadata["internal_id"] = int( + [value for value in nested_lookup("id", results) if value is not None][0] + ) + + metadata["variables_details"] = nested_lookup("figures", results)[0] + metadata["variables"] = list(set(nested_lookup("datasetVariableId", results))) + + metadata["datumConversions"] = nested_lookup("datumConversions", results)[0] filter = f"%7B%22stations%22:%5B%22{metadata['internal_id']}%22%5D%7D" baseurl = "https://sensors.axds.co/api" @@ -213,10 +214,12 @@ def load_metadata(datatype: str, results: dict) -> dict: #: Dict[str, str] metadata["metadata_url"] = metadata_url # 1 or 2? - metadata["version"] = nested_lookup('version', results)[0] - + metadata["version"] = nested_lookup("version", results)[0] + # name on other sites, esp for ERDDAP - metadata["foreignNames"] = list(set(nested_lookup('foreignName', results, wild=True))) + metadata["foreignNames"] = list( + set(nested_lookup("foreignName", results, wild=True)) + ) return metadata @@ -243,13 +246,15 @@ def check_station(metadata: dict, verbose: bool) -> bool: keep = False if verbose: print(f"UUID {metadata['uuid']} is a webcam and should be skipped.") - - # these are IOOS ERDDAP and were setup to be different stations so we can see which stations + + # these are IOOS ERDDAP and were setup to be different stations so we can see which stations # are successfully being served through IOOS RAs. It duplicates the data (purposely) elif "ism-" in metadata["uuid"]: keep = False if verbose: - print(f"UUID {metadata['uuid']} is a duplicate station from IOOS and should be skipped.") + print( + f"UUID {metadata['uuid']} is a duplicate station from IOOS and should be skipped." + ) return keep @@ -359,9 +364,11 @@ def make_metadata_url(filter: str) -> str: return f"{baseurl}/metadata/filter/custom?filter={filter}" -def make_search_docs_url(internal_id: Optional[int] = None, uuid: Optional[str] = None) -> str: +def make_search_docs_url( + internal_id: Optional[int] = None, uuid: Optional[str] = None +) -> str: """Url for Axiom Search docs. - + Uses whichever of internal_id and uuid is not None to formulate url. Parameters @@ -380,6 +387,8 @@ def make_search_docs_url(internal_id: Optional[int] = None, uuid: Optional[str] return f"https://search.axds.co/v2/docs?verbose=false&id=sensor_station:{internal_id}" elif uuid is not None: return f"https://search.axds.co/v2/docs?verbose=false&id={uuid}" + else: + raise KeyError("Correct key was not input for return") def response_from_url(url: str) -> Union[list, dict]: diff --git a/tests/test_axds_sensor.py b/tests/test_axds_sensor.py index d69d6ca..dabc480 100644 --- a/tests/test_axds_sensor.py +++ b/tests/test_axds_sensor.py @@ -133,9 +133,7 @@ def test_intake_opener(): def test_binned(): - source = AXDSSensorSource( - internal_id=123456, uuid="test", bin_interval="monthly" - ) + source = AXDSSensorSource(internal_id=123456, uuid="test", bin_interval="monthly") assert source.binned @@ -158,13 +156,9 @@ def test_ids(mock_requests): def test_times(): # doesn't need response because both internal_id and dataset_id are faked upon init - source = AXDSSensorSource( - internal_id=123456, uuid="fake", start_time="2000-1-1" - ) + source = AXDSSensorSource(internal_id=123456, uuid="fake", start_time="2000-1-1") assert source.end_time is None - source = AXDSSensorSource( - internal_id=123456, uuid="fake", end_time="2000-1-1" - ) + source = AXDSSensorSource(internal_id=123456, uuid="fake", end_time="2000-1-1") assert source.start_time is None diff --git a/tests/test_utils.py b/tests/test_utils.py index 143eb1b..e06d81e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -200,7 +200,41 @@ def test_load_metadata(): }, } metadata = utils.load_metadata(datatype, results) - test_results = {'uuid': 'uuid', 'title': 'label', 'summary': 'desc', 'minTime': '2000-1-1', 'maxTime': '2000-1-2', 'minLongitude': -123.711083, 'minLatitude': 38.914556, 'maxLongitude': -123.711083, 'maxLatitude': 38.914556, 'internal_id': 106793, 'variables_details': [{'label': 'label', 'parameterGroupId': 'parameterGroupId', 'plots': [{'subPlots': [{'datasetVariableId': 'datasetVariableId', 'parameterId': 'parameterId', 'label': 'label', 'deviceId': 'deviceId'}]}]}], 'variables': ['datasetVariableId'], 'datumConversions': [], 'metadata_url': 'https://sensors.axds.co/api/metadata/filter/custom?filter=%7B%22stations%22:%5B%22106793%22%5D%7D', 'version': 2, 'foreignNames': []} + test_results = { + "uuid": "uuid", + "title": "label", + "summary": "desc", + "minTime": "2000-1-1", + "maxTime": "2000-1-2", + "minLongitude": -123.711083, + "minLatitude": 38.914556, + "maxLongitude": -123.711083, + "maxLatitude": 38.914556, + "internal_id": 106793, + "variables_details": [ + { + "label": "label", + "parameterGroupId": "parameterGroupId", + "plots": [ + { + "subPlots": [ + { + "datasetVariableId": "datasetVariableId", + "parameterId": "parameterId", + "label": "label", + "deviceId": "deviceId", + } + ] + } + ], + } + ], + "variables": ["datasetVariableId"], + "datumConversions": [], + "metadata_url": "https://sensors.axds.co/api/metadata/filter/custom?filter=%7B%22stations%22:%5B%22106793%22%5D%7D", + "version": 2, + "foreignNames": [], + } assert metadata == test_results datatype = "platform2" @@ -232,5 +266,31 @@ def test_load_metadata(): }, } metadata = utils.load_metadata(datatype, results) - test_results = {'uuid': 'uuid', 'title': 'label', 'summary': 'desc', 'minTime': '2000-1-1', 'maxTime': '2000-1-2', 'institution': ['institution'], 'geospatial_bounds': 'POLYGON ((0 -80, 0 90, 359.9200439453125 90, 359.9200439453125 -80, 0 -80))', 'minLongitude': 0.0, 'minLatitude': -80.0, 'maxLongitude': 359.9200439453125, 'maxLatitude': 90.0, 'variables_details': [{'variable_name': {'attributes': {'standard_name': 'standard_name', 'units': 'units', 'unit_id': 'unit_id', 'long_name': 'long_name', 'parameter_id': 'parameter_id'}}}], 'variables': ['standard_name']} + test_results = { + "uuid": "uuid", + "title": "label", + "summary": "desc", + "minTime": "2000-1-1", + "maxTime": "2000-1-2", + "institution": ["institution"], + "geospatial_bounds": "POLYGON ((0 -80, 0 90, 359.9200439453125 90, 359.9200439453125 -80, 0 -80))", + "minLongitude": 0.0, + "minLatitude": -80.0, + "maxLongitude": 359.9200439453125, + "maxLatitude": 90.0, + "variables_details": [ + { + "variable_name": { + "attributes": { + "standard_name": "standard_name", + "units": "units", + "unit_id": "unit_id", + "long_name": "long_name", + "parameter_id": "parameter_id", + } + } + } + ], + "variables": ["standard_name"], + } assert metadata == test_results From 5fd04922ed02359c51fdc3d4aa007ae62f41c160 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 12 Sep 2023 11:25:58 -0500 Subject: [PATCH 4/9] added nested_lookup to envs --- ci/environment-py3.10.yml | 1 + ci/environment-py3.8.yml | 1 + ci/environment-py3.9.yml | 1 + docs/environment.yml | 1 + environment.yml | 1 + 5 files changed, 5 insertions(+) diff --git a/ci/environment-py3.10.yml b/ci/environment-py3.10.yml index 38e655f..8f51ce8 100644 --- a/ci/environment-py3.10.yml +++ b/ci/environment-py3.10.yml @@ -5,6 +5,7 @@ dependencies: - python=3.10 ############## These will have to be adjusted to your specific project - cf_pandas + - nested_lookup - pandas - requests - shapely diff --git a/ci/environment-py3.8.yml b/ci/environment-py3.8.yml index 955cce4..304406c 100644 --- a/ci/environment-py3.8.yml +++ b/ci/environment-py3.8.yml @@ -5,6 +5,7 @@ dependencies: - python=3.8 ############## These will have to be adjusted to your specific project - cf_pandas + - nested_lookup - pandas - requests - shapely diff --git a/ci/environment-py3.9.yml b/ci/environment-py3.9.yml index a93b32a..dcb2d2a 100644 --- a/ci/environment-py3.9.yml +++ b/ci/environment-py3.9.yml @@ -5,6 +5,7 @@ dependencies: - python=3.9 ############## These will have to be adjusted to your specific project - cf_pandas + - nested_lookup - pandas - requests - shapely diff --git a/docs/environment.yml b/docs/environment.yml index f0b3902..a1ea15a 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -8,6 +8,7 @@ dependencies: - aiohttp - cf_pandas - h5netcdf + - nested_lookup - pandas - requests - shapely diff --git a/environment.yml b/environment.yml index 6fecb39..367dc93 100644 --- a/environment.yml +++ b/environment.yml @@ -8,6 +8,7 @@ dependencies: - aiohttp - cf_pandas - h5netcdf + - nested_lookup - pandas - pip - requests From 37d4c1b4c55c1e3cc71d77065714b1a1ed5f2249 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 12 Sep 2023 11:32:54 -0500 Subject: [PATCH 5/9] shifted nested-lookup to pip --- ci/environment-py3.10.yml | 3 ++- ci/environment-py3.8.yml | 3 ++- ci/environment-py3.9.yml | 3 ++- docs/environment.yml | 2 +- environment.yml | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ci/environment-py3.10.yml b/ci/environment-py3.10.yml index 8f51ce8..2e3a597 100644 --- a/ci/environment-py3.10.yml +++ b/ci/environment-py3.10.yml @@ -5,14 +5,15 @@ dependencies: - python=3.10 ############## These will have to be adjusted to your specific project - cf_pandas - - nested_lookup - pandas - requests - shapely ############## + - pip - pytest - pip: - codecov + - nested_lookup - pytest-cov - coverage[toml] - git+https://github.com/intake/intake diff --git a/ci/environment-py3.8.yml b/ci/environment-py3.8.yml index 304406c..6b5191a 100644 --- a/ci/environment-py3.8.yml +++ b/ci/environment-py3.8.yml @@ -5,14 +5,15 @@ dependencies: - python=3.8 ############## These will have to be adjusted to your specific project - cf_pandas - - nested_lookup - pandas - requests - shapely ############## + - pip - pytest - pip: - codecov + - nested_lookup - pytest-cov - coverage[toml] - git+https://github.com/intake/intake diff --git a/ci/environment-py3.9.yml b/ci/environment-py3.9.yml index dcb2d2a..5180f42 100644 --- a/ci/environment-py3.9.yml +++ b/ci/environment-py3.9.yml @@ -5,14 +5,15 @@ dependencies: - python=3.9 ############## These will have to be adjusted to your specific project - cf_pandas - - nested_lookup - pandas - requests - shapely ############## + - pip - pytest - pip: - codecov + - nested_lookup - pytest-cov - coverage[toml] - git+https://github.com/intake/intake diff --git a/docs/environment.yml b/docs/environment.yml index a1ea15a..057b420 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -8,7 +8,6 @@ dependencies: - aiohttp - cf_pandas - h5netcdf - - nested_lookup - pandas - requests - shapely @@ -30,6 +29,7 @@ dependencies: - intake-parquet - intake-xarray - intake-axds + - nested_lookup # - "dask[complete]" - docrep<=0.2.7 - furo diff --git a/environment.yml b/environment.yml index 367dc93..ecc7729 100644 --- a/environment.yml +++ b/environment.yml @@ -8,12 +8,12 @@ dependencies: - aiohttp - cf_pandas - h5netcdf - - nested_lookup - pandas - pip - requests - shapely - pip: + - nested_lookup - git+https://github.com/intake/intake - intake-parquet - intake-xarray From 45eeeff7c5e5cea9cfa500d4a5da674bac7d30a8 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 12 Sep 2023 11:40:42 -0500 Subject: [PATCH 6/9] trying again with intake now --- ci/environment-py3.10.yml | 2 +- ci/environment-py3.8.yml | 2 +- ci/environment-py3.9.yml | 2 +- environment.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/environment-py3.10.yml b/ci/environment-py3.10.yml index 2e3a597..bf83eee 100644 --- a/ci/environment-py3.10.yml +++ b/ci/environment-py3.10.yml @@ -16,7 +16,7 @@ dependencies: - nested_lookup - pytest-cov - coverage[toml] - - git+https://github.com/intake/intake + - intake - intake-parquet - intake-xarray # - "dask[complete]" diff --git a/ci/environment-py3.8.yml b/ci/environment-py3.8.yml index 6b5191a..9451e3f 100644 --- a/ci/environment-py3.8.yml +++ b/ci/environment-py3.8.yml @@ -16,7 +16,7 @@ dependencies: - nested_lookup - pytest-cov - coverage[toml] - - git+https://github.com/intake/intake + - intake - intake-parquet - intake-xarray # - "dask[complete]" diff --git a/ci/environment-py3.9.yml b/ci/environment-py3.9.yml index 5180f42..44720f3 100644 --- a/ci/environment-py3.9.yml +++ b/ci/environment-py3.9.yml @@ -16,7 +16,7 @@ dependencies: - nested_lookup - pytest-cov - coverage[toml] - - git+https://github.com/intake/intake + - intake - intake-parquet - intake-xarray # - "dask[complete]" diff --git a/environment.yml b/environment.yml index ecc7729..a75d055 100644 --- a/environment.yml +++ b/environment.yml @@ -14,7 +14,7 @@ dependencies: - shapely - pip: - nested_lookup - - git+https://github.com/intake/intake + - intake - intake-parquet - intake-xarray # - "dask[complete]" From e5f35c45b022d3f8b92fbdb94627b88a76c7f8cd Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 12 Sep 2023 12:18:55 -0500 Subject: [PATCH 7/9] now trying all intakes from conda-forge --- ci/environment-py3.10.yml | 6 +++--- ci/environment-py3.8.yml | 6 +++--- ci/environment-py3.9.yml | 6 +++--- environment.yml | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ci/environment-py3.10.yml b/ci/environment-py3.10.yml index bf83eee..aa8df21 100644 --- a/ci/environment-py3.10.yml +++ b/ci/environment-py3.10.yml @@ -5,6 +5,9 @@ dependencies: - python=3.10 ############## These will have to be adjusted to your specific project - cf_pandas + - intake + - intake-parquet + - intake-xarray - pandas - requests - shapely @@ -16,7 +19,4 @@ dependencies: - nested_lookup - pytest-cov - coverage[toml] - - intake - - intake-parquet - - intake-xarray # - "dask[complete]" diff --git a/ci/environment-py3.8.yml b/ci/environment-py3.8.yml index 9451e3f..67022b3 100644 --- a/ci/environment-py3.8.yml +++ b/ci/environment-py3.8.yml @@ -5,6 +5,9 @@ dependencies: - python=3.8 ############## These will have to be adjusted to your specific project - cf_pandas + - intake + - intake-parquet + - intake-xarray - pandas - requests - shapely @@ -16,7 +19,4 @@ dependencies: - nested_lookup - pytest-cov - coverage[toml] - - intake - - intake-parquet - - intake-xarray # - "dask[complete]" diff --git a/ci/environment-py3.9.yml b/ci/environment-py3.9.yml index 44720f3..e95914c 100644 --- a/ci/environment-py3.9.yml +++ b/ci/environment-py3.9.yml @@ -5,6 +5,9 @@ dependencies: - python=3.9 ############## These will have to be adjusted to your specific project - cf_pandas + - intake + - intake-parquet + - intake-xarray - pandas - requests - shapely @@ -16,7 +19,4 @@ dependencies: - nested_lookup - pytest-cov - coverage[toml] - - intake - - intake-parquet - - intake-xarray # - "dask[complete]" diff --git a/environment.yml b/environment.yml index a75d055..d4fddd5 100644 --- a/environment.yml +++ b/environment.yml @@ -8,13 +8,13 @@ dependencies: - aiohttp - cf_pandas - h5netcdf + - intake + - intake-parquet + - intake-xarray - pandas - pip - requests - shapely - pip: - nested_lookup - - intake - - intake-parquet - - intake-xarray # - "dask[complete]" From 7cdbaa9b502c928af155620c46969f9c81e1c111 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 12 Sep 2023 12:30:07 -0500 Subject: [PATCH 8/9] try intake from pypi again --- ci/environment-py3.10.yml | 6 +++--- ci/environment-py3.8.yml | 6 +++--- ci/environment-py3.9.yml | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ci/environment-py3.10.yml b/ci/environment-py3.10.yml index aa8df21..bf83eee 100644 --- a/ci/environment-py3.10.yml +++ b/ci/environment-py3.10.yml @@ -5,9 +5,6 @@ dependencies: - python=3.10 ############## These will have to be adjusted to your specific project - cf_pandas - - intake - - intake-parquet - - intake-xarray - pandas - requests - shapely @@ -19,4 +16,7 @@ dependencies: - nested_lookup - pytest-cov - coverage[toml] + - intake + - intake-parquet + - intake-xarray # - "dask[complete]" diff --git a/ci/environment-py3.8.yml b/ci/environment-py3.8.yml index 67022b3..9451e3f 100644 --- a/ci/environment-py3.8.yml +++ b/ci/environment-py3.8.yml @@ -5,9 +5,6 @@ dependencies: - python=3.8 ############## These will have to be adjusted to your specific project - cf_pandas - - intake - - intake-parquet - - intake-xarray - pandas - requests - shapely @@ -19,4 +16,7 @@ dependencies: - nested_lookup - pytest-cov - coverage[toml] + - intake + - intake-parquet + - intake-xarray # - "dask[complete]" diff --git a/ci/environment-py3.9.yml b/ci/environment-py3.9.yml index e95914c..44720f3 100644 --- a/ci/environment-py3.9.yml +++ b/ci/environment-py3.9.yml @@ -5,9 +5,6 @@ dependencies: - python=3.9 ############## These will have to be adjusted to your specific project - cf_pandas - - intake - - intake-parquet - - intake-xarray - pandas - requests - shapely @@ -19,4 +16,7 @@ dependencies: - nested_lookup - pytest-cov - coverage[toml] + - intake + - intake-parquet + - intake-xarray # - "dask[complete]" From 9f4cfcce7ab8d1b0a852fb7ce28f84dd5471912b Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 12 Sep 2023 14:01:30 -0500 Subject: [PATCH 9/9] ok finally realized intake changed --- tests/test_axds_cat.py | 3 ++- tests/test_axds_sensor.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_axds_cat.py b/tests/test_axds_cat.py index b44f1cc..05d8c8f 100644 --- a/tests/test_axds_cat.py +++ b/tests/test_axds_cat.py @@ -246,7 +246,8 @@ def json(self): def test_intake_opener(): - assert "open_axds_cat" in intake.openers + # intake.openers isn't available anymore + assert "open_axds_cat" in intake.__dir__() @mock.patch("requests.get") diff --git a/tests/test_axds_sensor.py b/tests/test_axds_sensor.py index dabc480..0352530 100644 --- a/tests/test_axds_sensor.py +++ b/tests/test_axds_sensor.py @@ -129,7 +129,8 @@ def json(self): def test_intake_opener(): - assert "open_axds_sensor" in intake.openers + # intake.openers isn't available anymore + assert "open_axds_sensor" in intake.__dir__() def test_binned():