diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1bea8e4..33faa56 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,12 +13,6 @@ repos: - id: file-contents-sorter files: requirements-dev.txt -- repo: https://github.com/psf/black - rev: 24.2.0 - hooks: - - id: black - language_version: python3 - - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.8.0 hooks: @@ -59,6 +53,24 @@ repos: rev: v0.2.2 hooks: - id: ruff + args: ["--fix", "--show-fixes"] + - id: ruff-format + +- repo: https://github.com/nbQA-dev/nbQA + rev: 1.7.1 + hooks: + - id: nbqa-check-ast + - id: nbqa-black + - id: nbqa-ruff + args: [ + --fix, + --config=ruff.toml, + ] + +- repo: https://github.com/bdice/nb-strip-paths + rev: v0.1.0 + hooks: + - id: nb-strip-paths - repo: https://github.com/tox-dev/pyproject-fmt rev: 1.7.0 diff --git a/erddapy/core/griddap.py b/erddapy/core/griddap.py index f5e430a..37ad908 100644 --- a/erddapy/core/griddap.py +++ b/erddapy/core/griddap.py @@ -32,7 +32,9 @@ def _griddap_get_constraints( phrase, *__ = var.split("[") var_name = phrase.split(" ")[-1] variable_names.append(var_name) - table = pd.DataFrame({"dimension name": [], "min": [], "max": [], "length": []}) + table = pd.DataFrame( + {"dimension name": [], "min": [], "max": [], "length": []}, + ) for dim in dim_names: url = f"{dataset_url}.csvp?{dim}" data = pd.read_csv(url).values @@ -62,7 +64,10 @@ def _griddap_get_constraints( return constraints_dict, dim_names, variable_names -def _griddap_check_constraints(user_constraints: dict, original_constraints: dict): +def _griddap_check_constraints( + user_constraints: dict, + original_constraints: dict, +): """Check that constraints changed by user match those expected by dataset.""" if user_constraints.keys() != original_constraints.keys(): raise ValueError( @@ -70,7 +75,10 @@ def _griddap_check_constraints(user_constraints: dict, original_constraints: dic ) -def _griddap_check_variables(user_variables: ListLike, original_variables: ListLike): +def _griddap_check_variables( + user_variables: ListLike, + original_variables: ListLike, +): """Check user has not requested variables that do not exist in dataset.""" invalid_variables = [] for variable in user_variables: diff --git a/erddapy/core/interfaces.py b/erddapy/core/interfaces.py index fcf4f20..6cbabd7 100644 --- a/erddapy/core/interfaces.py +++ b/erddapy/core/interfaces.py @@ -33,7 +33,9 @@ def to_pandas( try: return pd.read_csv(data, **(pandas_kwargs or {})) except Exception as e: - raise ValueError(f"Could not read url {url} with Pandas.read_csv.") from e + raise ValueError( + f"Could not read url {url} with Pandas.read_csv.", + ) from e def to_ncCF( diff --git a/erddapy/core/netcdf.py b/erddapy/core/netcdf.py index 97d20ca..27266f1 100644 --- a/erddapy/core/netcdf.py +++ b/erddapy/core/netcdf.py @@ -36,7 +36,11 @@ def _tempnc(data: BinaryIO) -> Generator[str, None, None]: tmp = None try: - tmp = NamedTemporaryFile(suffix=".nc", prefix="erddapy_", delete=delete) + tmp = NamedTemporaryFile( + suffix=".nc", + prefix="erddapy_", + delete=delete, + ) tmp.write(data.read()) tmp.flush() yield tmp.name diff --git a/erddapy/core/url.py b/erddapy/core/url.py index f7d668d..005c416 100644 --- a/erddapy/core/url.py +++ b/erddapy/core/url.py @@ -30,7 +30,9 @@ def _sort_url(url): else: variables, constraints = parts.query.split("&", maxsplit=1) sorted_variables = ",".join(sorted(variables.split(","))) - sorted_query = OrderedDict(sorted(dict(parse.parse_qsl(constraints)).items())) + sorted_query = OrderedDict( + sorted(dict(parse.parse_qsl(constraints)).items()), + ) sorted_query_str = parse.unquote(parse.urlencode(sorted_query)) sorted_url = f"{parts.scheme}://{parts.netloc}{parts.path}?{parts.params}{sorted_variables}&{sorted_query_str}{parts.fragment}" else: @@ -134,7 +136,9 @@ def _format_constraints_url(kwargs: dict) -> str: def _check_substrings(constraint): """Extend the OPeNDAP with extra strings.""" substrings = ["now", "min", "max"] - return any(True for substring in substrings if substring in str(constraint)) + return any( + True for substring in substrings if substring in str(constraint) + ) def parse_dates( @@ -326,7 +330,9 @@ def get_info_url( """ if not dataset_id: - raise ValueError(f"You must specify a valid dataset_id, got {dataset_id}") + raise ValueError( + f"You must specify a valid dataset_id, got {dataset_id}", + ) url = f"{server}/info/{dataset_id}/index.{response}" return url @@ -393,7 +399,9 @@ def get_download_url( """ if not dataset_id: - raise ValueError(f"Please specify a valid `dataset_id`, got {dataset_id}") + raise ValueError( + f"Please specify a valid `dataset_id`, got {dataset_id}", + ) if not protocol: raise ValueError(f"Please specify a valid `protocol`, got {protocol}") diff --git a/erddapy/erddapy.py b/erddapy/erddapy.py index 6e05c4a..21f8a51 100644 --- a/erddapy/erddapy.py +++ b/erddapy/erddapy.py @@ -161,7 +161,9 @@ def griddap_initialize( f"Method only valid using griddap protocol, got {self.protocol}", ) if dataset_id is None: - raise ValueError(f"Must set a valid dataset_id, got {self.dataset_id}") + raise ValueError( + f"Must set a valid dataset_id, got {self.dataset_id}", + ) # Return the opendap URL without any slicing so the user can do it later. if self.response == "opendap": return @@ -315,10 +317,14 @@ def get_download_url( constraints = constraints if constraints else self.constraints if not dataset_id: - raise ValueError(f"Please specify a valid `dataset_id`, got {dataset_id}") + raise ValueError( + f"Please specify a valid `dataset_id`, got {dataset_id}", + ) if not protocol: - raise ValueError(f"Please specify a valid `protocol`, got {protocol}") + raise ValueError( + f"Please specify a valid `protocol`, got {protocol}", + ) if ( protocol == "griddap" @@ -362,7 +368,11 @@ def to_pandas( response = kw.pop("response", "csvp") distinct = kw.pop("distinct", False) url = self.get_download_url(response=response, distinct=distinct) - return to_pandas(url, requests_kwargs=requests_kwargs, pandas_kwargs=dict(**kw)) + return to_pandas( + url, + requests_kwargs=requests_kwargs, + pandas_kwargs=dict(**kw), + ) def to_ncCF(self, protocol: str = None, **kw): """Load the data request into a Climate and Forecast compliant netCDF4-python object.""" @@ -392,7 +402,12 @@ def to_xarray( requests_kwargs = {**{"auth": self.auth}, **requests_kwargs} else: requests_kwargs = {"auth": self.auth} - return to_xarray(url, response, requests_kwargs, xarray_kwargs=dict(**kw)) + return to_xarray( + url, + response, + requests_kwargs, + xarray_kwargs=dict(**kw), + ) def to_iris(self, **kw): """Load the data request into an iris.CubeList. @@ -409,7 +424,9 @@ def _get_variables_uncached(self, dataset_id: OptionalStr = None) -> dict: dataset_id = self.dataset_id if dataset_id is None: - raise ValueError(f"You must specify a valid dataset_id, got {dataset_id}") + raise ValueError( + f"You must specify a valid dataset_id, got {dataset_id}", + ) url = self.get_info_url(dataset_id=dataset_id, response="csv") @@ -419,14 +436,21 @@ def _get_variables_uncached(self, dataset_id: OptionalStr = None) -> dict: self._dataset_id = dataset_id for variable in set(_df["Variable Name"]): attributes = ( - _df.loc[_df["Variable Name"] == variable, ["Attribute Name", "Value"]] + _df.loc[ + _df["Variable Name"] == variable, + ["Attribute Name", "Value"], + ] .set_index("Attribute Name") .to_dict()["Value"] ) variables.update({variable: attributes}) return variables - def get_var_by_attr(self, dataset_id: OptionalStr = None, **kwargs) -> list[str]: + def get_var_by_attr( + self, + dataset_id: OptionalStr = None, + **kwargs, + ) -> list[str]: """ Return a variable based on its attributes. diff --git a/erddapy/multiple_server_search.py b/erddapy/multiple_server_search.py index d779b67..32a5f23 100644 --- a/erddapy/multiple_server_search.py +++ b/erddapy/multiple_server_search.py @@ -74,7 +74,10 @@ def search_servers( f"Protocol must be tabledap or griddap, got {protocol}", ) if servers_list: - urls = {server: _format_search_string(server, query) for server in servers_list} + urls = { + server: _format_search_string(server, query) + for server in servers_list + } else: urls = { key: _format_search_string(server.url, query) diff --git a/notebooks/00-quick_intro.ipynb b/notebooks/00-quick_intro.ipynb index 86b160a..aa0636c 100644 --- a/notebooks/00-quick_intro.ipynb +++ b/notebooks/00-quick_intro.ipynb @@ -32,7 +32,6 @@ "source": [ "from erddapy import ERDDAP\n", "\n", - "\n", "server = \"https://standards.sensors.ioos.us/erddap\"\n", "e = ERDDAP(\n", " server=server,\n", @@ -63,7 +62,7 @@ " \"latitude\",\n", " \"longitude\",\n", " \"sea_water_temperature\",\n", - " \"air_temperature\"\n", + " \"air_temperature\",\n", "]\n", "\n", "e.constraints = {\n", diff --git a/notebooks/01a-griddap.ipynb b/notebooks/01a-griddap.ipynb index c26a50e..c1d13f8 100644 --- a/notebooks/01a-griddap.ipynb +++ b/notebooks/01a-griddap.ipynb @@ -24,7 +24,6 @@ "import geopandas\n", "import pooch\n", "\n", - "\n", "url = \"https://naturalearth.s3.amazonaws.com/4.1.1/50m_physical/ne_50m_geography_marine_polys.zip\"\n", "fname = pooch.retrieve(\n", " url,\n", @@ -87,9 +86,7 @@ "e.griddap_initialize()\n", "\n", "print(f\"variables in this dataset:\\n\\n{e.variables}\")\n", - "print(\n", - " f\"\\nconstraints of this dataset:\\n\\n{json.dumps(e.constraints, indent=1)}\"\n", - ")" + "print(f\"\\nconstraints of this dataset:\\n\\n{json.dumps(e.constraints, indent=1)}\")" ] }, { @@ -188,7 +185,6 @@ "import cartopy.crs as ccrs\n", "import matplotlib.pyplot as plt\n", "\n", - "\n", "fig, ax = plt.subplots(subplot_kw={\"projection\": ccrs.PlateCarree()})\n", "ds[\"ROSE\"].plot(ax=ax)\n", "ax.coastlines();" @@ -210,7 +206,6 @@ "source": [ "import regionmask\n", "\n", - "\n", "region = regionmask.from_geopandas(SA, name=name)\n", "region.plot();" ] @@ -340,7 +335,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.1" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/notebooks/01b-tabledap.ipynb b/notebooks/01b-tabledap.ipynb index 6036a2d..7758f4a 100644 --- a/notebooks/01b-tabledap.ipynb +++ b/notebooks/01b-tabledap.ipynb @@ -26,7 +26,6 @@ "source": [ "from erddapy import ERDDAP\n", "\n", - "\n", "server = \"https://gliders.ioos.us/erddap\"\n", "e = ERDDAP(server=server)\n", "\n", @@ -103,15 +102,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can refine our search by providing some constraints.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's narrow the search area, time span, and look for **sea_water_temperature**\n", - ".\n" + "We can refine our search by providing some constraints.\n", + "\n", + "Let's narrow the search area, time span, and look for **sea_water_temperature**." ] }, { @@ -129,7 +122,6 @@ "source": [ "from doc_helpers import show_iframe\n", "\n", - "\n", "kw = {\n", " \"standard_name\": \"sea_water_temperature\",\n", " \"min_lon\": -72.0,\n", @@ -284,8 +276,7 @@ "\n", "# First one, slow.\n", "e.get_var_by_attr(\n", - " dataset_id=\"whoi_406-20160902T1700\",\n", - " standard_name=\"sea_water_temperature\"\n", + " dataset_id=\"whoi_406-20160902T1700\", standard_name=\"sea_water_temperature\"\n", ")" ] }, @@ -306,8 +297,7 @@ "\n", "# Second one on the same glider, a little bit faster.\n", "e.get_var_by_attr(\n", - " dataset_id=\"whoi_406-20160902T1700\",\n", - " standard_name=\"sea_water_practical_salinity\"\n", + " dataset_id=\"whoi_406-20160902T1700\", standard_name=\"sea_water_practical_salinity\"\n", ")" ] }, @@ -394,14 +384,18 @@ }, "outputs": [], "source": [ - "from joblib import Parallel, delayed\n", "import multiprocessing\n", + "\n", + "from joblib import Parallel, delayed\n", + "\n", "from erddapy.core import get_download_url, to_pandas\n", "\n", "\n", "def request_whoi(dataset_id):\n", " variables = [\"longitude\", \"latitude\", \"temperature\", \"salinity\"]\n", - " url = get_download_url(server, dataset_id, protocol=\"tabledap\", variables=variables, response=\"csv\")\n", + " url = get_download_url(\n", + " server, dataset_id, protocol=\"tabledap\", variables=variables, response=\"csv\"\n", + " )\n", " # Drop units in the first line and NaNs.\n", " df = to_pandas(url, pandas_kwargs={\"skiprows\": (1,)}).dropna()\n", " return (dataset_id, df)\n", @@ -412,7 +406,7 @@ " delayed(request_whoi)(dataset_id) for dataset_id in whoi_gliders[:5]\n", ")\n", "\n", - "dfs = {glider: df for (glider, df) in downloads}" + "dfs = dict(downloads)" ] }, { @@ -435,15 +429,14 @@ }, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", "import cartopy.crs as ccrs\n", - "from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter\n", + "import matplotlib.pyplot as plt\n", + "from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter\n", "\n", "\n", "def make_map():\n", " fig, ax = plt.subplots(\n", - " figsize=(9, 9),\n", - " subplot_kw=dict(projection=ccrs.PlateCarree())\n", + " figsize=(9, 9), subplot_kw={\"projection\": ccrs.PlateCarree()}\n", " )\n", " ax.coastlines(resolution=\"10m\")\n", " lon_formatter = LongitudeFormatter(zero_direction_label=True)\n", @@ -456,14 +449,14 @@ "\n", "fig, ax = make_map()\n", "lons, lats = [], []\n", - "for glider, df in dfs.items():\n", + "for _, df in dfs.items():\n", " lon, lat = df[\"longitude\"], df[\"latitude\"]\n", " lons.extend(lon.array)\n", " lats.extend(lat.array)\n", " ax.plot(lon, lat)\n", "\n", "dx = dy = 0.25\n", - "extent = min(lons)-dx, max(lons)+dx, min(lats)+dy, max(lats)+dy\n", + "extent = min(lons) - dx, max(lons) + dx, min(lats) + dy, max(lats) + dy\n", "ax.set_extent(extent)\n", "\n", "ax.set_xticks([extent[0], extent[1]], crs=ccrs.PlateCarree())\n", @@ -492,7 +485,7 @@ "ax.set_xlabel(\"temperature\")\n", "ax.grid(True)\n", "\n", - "for glider, df in dfs.items():\n", + "for _, df in dfs.items():\n", " glider_scatter(df, ax)\n", "\n", "ax.axis([5.5, 30, 30, 38])" @@ -553,7 +546,6 @@ "source": [ "import matplotlib.dates as mdates\n", "\n", - "\n", "fig, ax = plt.subplots(figsize=(17, 2))\n", "cs = ax.scatter(\n", " df.index,\n", @@ -602,7 +594,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.1" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/notebooks/02-extras.ipynb b/notebooks/02-extras.ipynb index b7482b4..9baa416 100644 --- a/notebooks/02-extras.ipynb +++ b/notebooks/02-extras.ipynb @@ -17,7 +17,6 @@ "source": [ "from erddapy import ERDDAP\n", "\n", - "\n", "e = ERDDAP(server=\"https://gliders.ioos.us/erddap\")" ] }, @@ -29,7 +28,6 @@ "source": [ "from netCDF4 import Dataset\n", "\n", - "\n", "e.constraints = None\n", "e.protocol = \"tabledap\"\n", "e.dataset_id = \"whoi_406-20160902T1700\"\n", diff --git a/notebooks/03-advanced_search.ipynb b/notebooks/03-advanced_search.ipynb index 889d641..f18e4a5 100644 --- a/notebooks/03-advanced_search.ipynb +++ b/notebooks/03-advanced_search.ipynb @@ -18,7 +18,6 @@ "source": [ "from erddapy import ERDDAP\n", "\n", - "\n", "e = ERDDAP(server=\"https://pae-paha.pacioos.hawaii.edu/erddap\", protocol=\"griddap\")" ] }, @@ -198,7 +197,6 @@ "source": [ "from erddapy.multiple_server_search import advanced_search_servers\n", "\n", - "\n", "min_time = \"2017-07-01T00:00:00Z\"\n", "max_time = \"2017-09-01T00:00:00Z\"\n", "min_lon, max_lon = -127, -123.75\n", diff --git a/notebooks/04-refactor-update.ipynb b/notebooks/04-refactor-update.ipynb index fe3cf2f..7b345c1 100644 --- a/notebooks/04-refactor-update.ipynb +++ b/notebooks/04-refactor-update.ipynb @@ -23,6 +23,7 @@ "source": [ "# Methods available in the erddapy 'core' module\n", "import erddapy\n", + "\n", "[i for i in dir(erddapy.core) if not i.startswith(\"_\")]" ] }, @@ -84,7 +85,7 @@ " protocol=\"tabledap\",\n", " response=\"csv\",\n", " variables=variables,\n", - " constraints=constraints\n", + " constraints=constraints,\n", ")\n", "\n", "print(url)" diff --git a/pyproject.toml b/pyproject.toml index 9b672ff..5a07481 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,24 +44,6 @@ write_to = "erddapy/_version.py" write_to_template = "__version__ = '{version}'" tag_regex = "^(?Pv)?(?P[^\\+]+)(?P.*)?$" -[tool.ruff] -select = [ - "A", # flake8-builtins - "B", # flake8-bugbear - "C4", # flake8-comprehensions - "F", # flakes - "I", # import sorting - "T20", # flake8-print - "UP", # upgrade -] -line-length = 79 - -[tool.ruff.per-file-ignores] -"docs/source/conf.py" = [ - "E402", - "A001", -] - [tool.check-manifest] ignore = [ "*.yml", diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..c0a164c --- /dev/null +++ b/ruff.toml @@ -0,0 +1,28 @@ +line-length = 79 + +lint.select = [ + "A", # flake8-builtins + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "F", # flakes + "I", # import sorting + "T20", # flake8-print + "UP", # upgrade +] + +[lint.extend-per-file-ignores] +"docs/source/conf.py" = [ + "E402", # Module level import not at top of file + "A001", # builtin-variable-shadowing +] +# nbqa-ruff acts on converted .py so we cannot glob .ipynb :-/ +# https://github.com/nbQA-dev/nbQA/issues/823 +"notebooks/*" = [ + "D100", # Missing docstring in public module + "E402", # Module level import not at top of file + "FBT003", # Boolean positional value in function call + "T201", # `print` found" + "B018", # Found useless expression. Either assign it to a variable or remove it +] +[lint.pycodestyle] +max-doc-length = 180 diff --git a/tests/test_erddapy.py b/tests/test_erddapy.py index 8bc973b..a577015 100644 --- a/tests/test_erddapy.py +++ b/tests/test_erddapy.py @@ -74,7 +74,9 @@ def test__quote_string_constraints(): assert isinstance(kw["cdm_data_type"], str) assert kw["min_time"].startswith('"') and kw["min_time"].endswith('"') - assert kw["cdm_data_type"].startswith('"') and kw["cdm_data_type"].endswith('"') + assert kw["cdm_data_type"].startswith('"') and kw[ + "cdm_data_type" + ].endswith('"') for _k, v in kw.items(): if isinstance(v, str): diff --git a/tests/test_servers.py b/tests/test_servers.py index d088168..b13d8a5 100644 --- a/tests/test_servers.py +++ b/tests/test_servers.py @@ -17,4 +17,6 @@ def test_servers(): """ for server in servers.values(): # Should raise HTTPError if broken, otherwise returns the URL. - assert check_url_response(server.url, follow_redirects=True) == server.url + assert ( + check_url_response(server.url, follow_redirects=True) == server.url + ) diff --git a/tests/test_to_objects.py b/tests/test_to_objects.py index aca2f51..f233ad5 100644 --- a/tests/test_to_objects.py +++ b/tests/test_to_objects.py @@ -185,7 +185,10 @@ def test_to_iris_tabledap(dataset_tabledap): assert isinstance(cubes, iris.cube.CubeList) assert isinstance(cubes.extract_cube("Profile ID"), iris.cube.Cube) - assert isinstance(cubes.extract_cube("sea_water_temperature"), iris.cube.Cube) + assert isinstance( + cubes.extract_cube("sea_water_temperature"), + iris.cube.Cube, + ) @pytest.mark.web diff --git a/tests/test_url_builder.py b/tests/test_url_builder.py index d4dcccf..6211408 100644 --- a/tests/test_url_builder.py +++ b/tests/test_url_builder.py @@ -143,7 +143,9 @@ def test_download_url_unconstrained(e): variables = ["station", "z"] url = e.get_download_url(dataset_id=dataset_id, variables=variables) assert url == check_url_response(url, follow_redirects=True) - assert url.startswith(f"{e.server}/{e.protocol}/{dataset_id}.{e.response}?") + assert url.startswith( + f"{e.server}/{e.protocol}/{dataset_id}.{e.response}?", + ) assert sorted(url.split("?")[1].split(",")) == sorted(variables) @@ -249,7 +251,10 @@ def test_get_var_by_attr(e): ) == [] ) - assert e.get_var_by_attr(dataset_id="org_cormp_cap2", standard_name="time") == [ + assert e.get_var_by_attr( + dataset_id="org_cormp_cap2", + standard_name="time", + ) == [ "time", ] @@ -260,7 +265,10 @@ def test_download_url_distinct(e): """Check download URL results with and without the distinct option.""" dataset_id = "org_cormp_cap2" variables = ["station", "z"] - no_distinct_url = e.get_download_url(dataset_id=dataset_id, variables=variables) + no_distinct_url = e.get_download_url( + dataset_id=dataset_id, + variables=variables, + ) with_distinct_url = e.get_download_url( dataset_id=dataset_id, variables=variables, @@ -268,7 +276,10 @@ def test_download_url_distinct(e): ) assert not no_distinct_url.endswith("&distinct()") assert with_distinct_url.endswith("&distinct()") - assert no_distinct_url == check_url_response(no_distinct_url, follow_redirects=True) + assert no_distinct_url == check_url_response( + no_distinct_url, + follow_redirects=True, + ) assert with_distinct_url == check_url_response( with_distinct_url, follow_redirects=True,