From 8b88d9286358a1ab6b9882cc8e904953ea2fc954 Mon Sep 17 00:00:00 2001 From: Thomas Williams Date: Thu, 16 May 2024 16:32:05 -0400 Subject: [PATCH 1/7] CLI --- pyproject.toml | 7 ++- src/cli.py | 127 +++++++++++++++++++++++++++++++++++++++++++++ src/rashdf/geom.py | 14 ++--- tests/test_cli.py | 87 +++++++++++++++++++++++++++++++ 4 files changed, 226 insertions(+), 9 deletions(-) create mode 100644 src/cli.py create mode 100644 tests/test_cli.py diff --git a/pyproject.toml b/pyproject.toml index bb5f583..fcb9a7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,8 +12,8 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] -version = "0.1.1" -dependencies = ["h5py", "geopandas"] +version = "0.2.0" +dependencies = ["h5py", "geopandas", "pyarrow"] [project.optional-dependencies] dev = ["pre-commit", "ruff", "pytest"] @@ -21,6 +21,9 @@ dev = ["pre-commit", "ruff", "pytest"] [project.urls] repository = "https://github.com/fema-ffrd/rashdf" +[project.scripts] +rashdf = "cli:main" + [tool.pytest.ini_options] pythonpath = "src" testpaths = "tests" diff --git a/src/cli.py b/src/cli.py new file mode 100644 index 0000000..b0ea3b5 --- /dev/null +++ b/src/cli.py @@ -0,0 +1,127 @@ +from rashdf import RasGeomHdf + +import fiona +from geopandas import GeoDataFrame + +import argparse +from ast import literal_eval +import sys +from typing import List + + +COMMANDS = [ + "mesh_areas", + "mesh_cell_points", + "mesh_cell_polygons", + "mesh_cell_faces", + "refinement_regions", + "bc_lines", + "breaklines", + "structures", +] + + +def docstring_to_help(docstring: str) -> str: + """Extract the first line of a docstring to use as help text for the rashdf CLI. + + Note that this function replaces 'Return' with 'Export' in the help text. + + Parameters + ---------- + docstring : str + The docstring to extract the first line from. + + Returns + ------- + str + The first line of the docstring with 'Return' replaced by 'Export'. + """ + help_text = docstring.split("\n")[0] + help_text = help_text.replace("Return", "Export") + return help_text + + +def fiona_supported_drivers() -> List[str]: + """Return a list of drivers supported by Fiona for writing output files. + + Returns + ------- + list + A list of drivers supported by Fiona for writing output files. + """ + drivers = [d for d, s in fiona.supported_drivers.items() if "w" in s] + return drivers + + +def parse_args(args: str) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Extract data from HEC-RAS HDF files.") + parser.add_argument( + "--fiona-drivers", + action="store_true", + help="List the drivers supported by Fiona for writing output files.", + ) + subparsers = parser.add_subparsers(help="Sub-command help") + for command in COMMANDS: + f = getattr(RasGeomHdf, command) + subparser = subparsers.add_parser( + command, description=docstring_to_help(f.__doc__) + ) + subparser.set_defaults(func=command) + subparser.add_argument("hdf_file", type=str, help="Path to HEC-RAS HDF file.") + subparser.add_argument("output_file", type=str, help="Path to output file.") + subparser.add_argument("--to-crs", type=str, help="Output CRS.") + output_group = subparser.add_mutually_exclusive_group() + output_group.add_argument( + "--parquet", action="store_true", help="Output as Parquet." + ) + output_group.add_argument( + "--feather", action="store_true", help="Output as Feather." + ) + output_group.add_argument( + "--json", action="store_true", help="Output as GeoJSON." + ) + subparser.add_argument( + "--kwargs", + type=str, + help=( + "Keyword arguments as a Python dictionary literal" + " passed to the corresponding GeoPandas output method." + ), + ) + args = parser.parse_args(args) + return args + + +def export(args: argparse.Namespace): + if args.fiona_drivers: + for driver in fiona_supported_drivers(): + print(driver) + return + if "://" in args.hdf_file: + geom_hdf = RasGeomHdf.open_uri(args.hdf_file) + else: + geom_hdf = RasGeomHdf(args.hdf_file) + func = getattr(geom_hdf, args.func) + gdf: GeoDataFrame = func() + kwargs = literal_eval(args.kwargs) if args.kwargs else {} + if args.to_crs: + gdf = gdf.to_crs(args.to_crs) + if args.json: + gdf.to_json(args.output_file, **kwargs) + return + elif args.parquet: + gdf.to_parquet(args.output_file, **kwargs) + return + elif args.feather: + gdf.to_feather(args.output_file, **kwargs) + return + gdf.to_file(args.output_file, **kwargs) + + +def main(): + args = parse_args(sys.argv[1:]) + export(args) + + +if __name__ == "__main__": + main() diff --git a/src/rashdf/geom.py b/src/rashdf/geom.py index 8defbc9..27e38a1 100644 --- a/src/rashdf/geom.py +++ b/src/rashdf/geom.py @@ -85,7 +85,7 @@ def mesh_areas(self) -> GeoDataFrame: ) def mesh_cell_polygons(self) -> GeoDataFrame: - """Return the 2D flow mesh cell polygons. + """Return 2D flow mesh cell polygons. Returns ------- @@ -139,7 +139,7 @@ def mesh_cell_polygons(self) -> GeoDataFrame: return GeoDataFrame(cell_dict, geometry="geometry", crs=self.projection()) def mesh_cell_points(self) -> GeoDataFrame: - """Return the 2D flow mesh cell points. + """Return 2D flow mesh cell points. Returns ------- @@ -165,7 +165,7 @@ def mesh_cell_points(self) -> GeoDataFrame: return GeoDataFrame(pnt_dict, geometry="geometry", crs=self.projection()) def mesh_cell_faces(self) -> GeoDataFrame: - """Return the 2D flow mesh cell faces. + """Return 2D flow mesh cell faces. Returns ------- @@ -245,7 +245,7 @@ def get_geom_2d_flow_area_attrs(self): return d2_flow_area_attrs def bc_lines(self) -> GeoDataFrame: - """Return the 2D mesh area boundary condition lines. + """Return 2D mesh area boundary condition lines. Returns ------- @@ -294,7 +294,7 @@ def bc_lines(self) -> GeoDataFrame: ) def breaklines(self) -> GeoDataFrame: - """Return the 2D mesh area breaklines. + """Return 2D mesh area breaklines. Returns ------- @@ -336,7 +336,7 @@ def breaklines(self) -> GeoDataFrame: ) def refinement_regions(self) -> GeoDataFrame: - """Return the 2D mesh area refinement regions. + """Return 2D mesh area refinement regions. Returns ------- @@ -370,7 +370,7 @@ def refinement_regions(self) -> GeoDataFrame: ) def structures(self) -> GeoDataFrame: - """Return the model structures. + """Return model structures. Returns ------- diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..630a167 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,87 @@ +from src.cli import parse_args, export, docstring_to_help, fiona_supported_drivers + +import geopandas as gpd +from pyproj import CRS + +from pathlib import Path + +TEST_DATA = Path("./tests/data") +MUNCIE_G05 = TEST_DATA / "ras/Muncie.g05.hdf" + + +def test_docstring_to_help(): + docstring = """This is a test docstring. + This is not part of the help message. + """ + assert docstring_to_help(docstring) == "This is a test docstring." + + docstring = """Return the something or other. + Blah blah blah.""" + assert docstring_to_help(docstring) == "Export the something or other." + + +def test_fiona_supported_drivers(): + drivers = fiona_supported_drivers() + assert "ESRI Shapefile" in drivers + assert "GeoJSON" in drivers + assert "GPKG" in drivers + + +def test_parse_args(): + args = parse_args(["mesh_areas", "test.hdf", "test.json"]) + assert args.func == "mesh_areas" + assert args.hdf_file == "test.hdf" + assert args.output_file == "test.json" + assert args.to_crs is None + assert not args.parquet + assert not args.feather + assert not args.json + assert args.kwargs is None + + args = parse_args( + [ + "mesh_areas", + "test.hdf", + "test.json", + "--to-crs", + "EPSG:4326", + "--parquet", + "--kwargs", + '{"compression": "gzip"}', + ] + ) + assert args.func == "mesh_areas" + assert args.hdf_file == "test.hdf" + assert args.output_file == "test.json" + assert args.to_crs == "EPSG:4326" + assert args.parquet + assert not args.feather + assert not args.json + assert args.kwargs == '{"compression": "gzip"}' + + args = parse_args(["--fiona-drivers"]) + assert args.fiona_drivers + + +def test_export(tmp_path: Path): + test_json_path = tmp_path / "test.json" + args = parse_args(["mesh_areas", str(MUNCIE_G05), str(test_json_path)]) + export(args) + gdf = gpd.read_file(test_json_path) + assert len(gdf) == 2 + + test_parquet_path = tmp_path / "test.parquet" + args = parse_args( + [ + "mesh_cell_points", + str(MUNCIE_G05), + str(test_parquet_path), + "--parquet", + "--to-crs", + "EPSG:4326", + ] + ) + export(args) + gdf = gpd.read_parquet(test_parquet_path) + assert len(gdf) == 5790 + assert gdf.crs == CRS.from_epsg(4326) From d3c99d374090b67cc0dac3d44e9c4b6c0ab171e6 Mon Sep 17 00:00:00 2001 From: Thomas Williams Date: Fri, 17 May 2024 10:58:55 -0400 Subject: [PATCH 2/7] tweak help text --- src/cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cli.py b/src/cli.py index b0ea3b5..33152f8 100644 --- a/src/cli.py +++ b/src/cli.py @@ -69,7 +69,9 @@ def parse_args(args: str) -> argparse.Namespace: subparser.set_defaults(func=command) subparser.add_argument("hdf_file", type=str, help="Path to HEC-RAS HDF file.") subparser.add_argument("output_file", type=str, help="Path to output file.") - subparser.add_argument("--to-crs", type=str, help="Output CRS.") + subparser.add_argument( + "--to-crs", type=str, help='Output CRS. (e.g., "EPSG:4326")' + ) output_group = subparser.add_mutually_exclusive_group() output_group.add_argument( "--parquet", action="store_true", help="Output as Parquet." From 29aca3b99e2a6a08c52096efe15bdb57178f1fee Mon Sep 17 00:00:00 2001 From: Thomas Williams Date: Tue, 21 May 2024 12:11:22 -0400 Subject: [PATCH 3/7] address case of null docstring --- src/cli.py | 9 ++++++--- tests/test_cli.py | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/cli.py b/src/cli.py index 33152f8..d2332d3 100644 --- a/src/cli.py +++ b/src/cli.py @@ -6,7 +6,7 @@ import argparse from ast import literal_eval import sys -from typing import List +from typing import List, Optional COMMANDS = [ @@ -21,21 +21,24 @@ ] -def docstring_to_help(docstring: str) -> str: +def docstring_to_help(docstring: Optional[str]) -> str: """Extract the first line of a docstring to use as help text for the rashdf CLI. Note that this function replaces 'Return' with 'Export' in the help text. Parameters ---------- - docstring : str + docstring : Optional[str] The docstring to extract the first line from. Returns ------- str The first line of the docstring with 'Return' replaced by 'Export'. + If the docstring is None, an empty string is returned. """ + if docstring is None: + return "" help_text = docstring.split("\n")[0] help_text = help_text.replace("Return", "Export") return help_text diff --git a/tests/test_cli.py b/tests/test_cli.py index 630a167..204eb96 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -19,6 +19,9 @@ def test_docstring_to_help(): Blah blah blah.""" assert docstring_to_help(docstring) == "Export the something or other." + docstring = None + assert docstring_to_help(docstring) == "" + def test_fiona_supported_drivers(): drivers = fiona_supported_drivers() From 1a7c88f5122317d4a03b02d2ad51dac28b86df24 Mon Sep 17 00:00:00 2001 From: Thomas Williams Date: Tue, 21 May 2024 13:44:14 -0400 Subject: [PATCH 4/7] write geojson to stdout --- src/cli.py | 34 ++++++++++++++++++++++++++-------- tests/test_cli.py | 17 +++++++++++++++-- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/cli.py b/src/cli.py index d2332d3..e359aa4 100644 --- a/src/cli.py +++ b/src/cli.py @@ -2,11 +2,13 @@ import fiona from geopandas import GeoDataFrame +import pandas as pd import argparse from ast import literal_eval import sys from typing import List, Optional +import warnings COMMANDS = [ @@ -71,7 +73,9 @@ def parse_args(args: str) -> argparse.Namespace: ) subparser.set_defaults(func=command) subparser.add_argument("hdf_file", type=str, help="Path to HEC-RAS HDF file.") - subparser.add_argument("output_file", type=str, help="Path to output file.") + subparser.add_argument( + "output_file", type=str, help="Path to output file.", nargs="?" + ) subparser.add_argument( "--to-crs", type=str, help='Output CRS. (e.g., "EPSG:4326")' ) @@ -82,9 +86,6 @@ def parse_args(args: str) -> argparse.Namespace: output_group.add_argument( "--feather", action="store_true", help="Output as Feather." ) - output_group.add_argument( - "--json", action="store_true", help="Output as GeoJSON." - ) subparser.add_argument( "--kwargs", type=str, @@ -97,7 +98,7 @@ def parse_args(args: str) -> argparse.Namespace: return args -def export(args: argparse.Namespace): +def export(args: argparse.Namespace) -> Optional[str]: if args.fiona_drivers: for driver in fiona_supported_drivers(): print(driver) @@ -111,9 +112,26 @@ def export(args: argparse.Namespace): kwargs = literal_eval(args.kwargs) if args.kwargs else {} if args.to_crs: gdf = gdf.to_crs(args.to_crs) - if args.json: - gdf.to_json(args.output_file, **kwargs) - return + if not args.output_file: + # convert any datetime64 columns to ISO strings + for col in gdf.select_dtypes(include=["datetime64"]).columns: + gdf[col] = gdf[col].apply( + lambda x: pd.Timestamp(x).isoformat() if pd.notnull(x) else None + ) + with warnings.catch_warnings(): + # Squash warnings about converting the CRS to OGC URN format. + # Likely to come up since USACE's Albers projection is a custom CRS. + # A warning written to stdout might cause issues with downstream processing. + warnings.filterwarnings( + "ignore", + ( + "GeoDataFrame's CRS is not representable in URN OGC format." + " Resulting JSON will contain no CRS information." + ), + ) + result = gdf.to_json(**kwargs) + print(result) + return result elif args.parquet: gdf.to_parquet(args.output_file, **kwargs) return diff --git a/tests/test_cli.py b/tests/test_cli.py index 204eb96..208c6e6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,6 +3,7 @@ import geopandas as gpd from pyproj import CRS +import json from pathlib import Path TEST_DATA = Path("./tests/data") @@ -31,6 +32,15 @@ def test_fiona_supported_drivers(): def test_parse_args(): + args = parse_args(["structures", "test.hdf"]) + assert args.func == "structures" + assert args.hdf_file == "test.hdf" + assert args.output_file is None + assert args.to_crs is None + assert not args.parquet + assert not args.feather + assert args.kwargs is None + args = parse_args(["mesh_areas", "test.hdf", "test.json"]) assert args.func == "mesh_areas" assert args.hdf_file == "test.hdf" @@ -38,7 +48,6 @@ def test_parse_args(): assert args.to_crs is None assert not args.parquet assert not args.feather - assert not args.json assert args.kwargs is None args = parse_args( @@ -59,7 +68,6 @@ def test_parse_args(): assert args.to_crs == "EPSG:4326" assert args.parquet assert not args.feather - assert not args.json assert args.kwargs == '{"compression": "gzip"}' args = parse_args(["--fiona-drivers"]) @@ -67,6 +75,11 @@ def test_parse_args(): def test_export(tmp_path: Path): + args = parse_args(["structures", str(MUNCIE_G05)]) + exported = json.loads(export(args)) + gdf = gpd.GeoDataFrame.from_features(exported) + assert len(gdf) == 3 + test_json_path = tmp_path / "test.json" args = parse_args(["mesh_areas", str(MUNCIE_G05), str(test_json_path)]) export(args) From 7a356fdad4e60d07fdd58447dc0a6aa3342c8e17 Mon Sep 17 00:00:00 2001 From: Thomas Williams Date: Tue, 21 May 2024 13:44:34 -0400 Subject: [PATCH 5/7] basic CLI instructions in README --- README.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/README.md b/README.md index 19fdacb..a577d17 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,43 @@ datetime.datetime(2024, 3, 27, 9, 32, 15)], 'Time Stamp Solution Went Unstable': 'Not Applicable'} ``` +## CLI +The `rashdf` command-line interface allows export directly to a variety of formats, enabled +by GeoPandas. +``` +$ rashdf [] [] +``` + +CLI help: +``` +$ rashdf --help +``` + +Print the output formats supported by Fiona: +``` +$ rashdf --fiona-drivers +``` + +Help for a specific subcommand: +``` +$ rashdf mesh_cell_polygons --help +``` + +Example: export mesh cell faces to an ESRI Shapefile +``` +$ rashdf mesh_cell_faces BigRiver.g01.hdf big-river-mesh-cell-faces.shp +``` + +Example: export mesh cell points to GeoParquet +``` +$ rashdf mesh_cell_points LittleCreek.g01.hdf --parquet little-creek-mesh-cell-points.parquet +``` + +Example: write structures GeoJSON to `stdout`: +``` +$ rashdf structures Potomac.p01.hdf +``` + ## Documentation Coming soon. From a5fa6cef1248a1463653e29d51126287810f477c Mon Sep 17 00:00:00 2001 From: Thomas Williams Date: Tue, 21 May 2024 13:54:44 -0400 Subject: [PATCH 6/7] include --to-crs usage example in README --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index a577d17..2a94a63 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,11 @@ Example: export mesh cell points to GeoParquet $ rashdf mesh_cell_points LittleCreek.g01.hdf --parquet little-creek-mesh-cell-points.parquet ``` +Example: export breaklines to OGC GeoPackage and reproject to a different CRS +``` +$ rashdf breaklines Whitemarsh.p01.hdf whitemarsh-breaklines.gpkg --to-crs EPSG:4326 +``` + Example: write structures GeoJSON to `stdout`: ``` $ rashdf structures Potomac.p01.hdf From c3ba9581cda090155a2a8f770fcfb76ac4a212fa Mon Sep 17 00:00:00 2001 From: Thomas Williams Date: Wed, 22 May 2024 09:44:21 -0400 Subject: [PATCH 7/7] convert datetime fields to string for most export formats --- src/cli.py | 17 +++++++++++------ src/rashdf/utils.py | 26 ++++++++++++++++++++++++-- tests/test_cli.py | 5 +++++ tests/test_utils.py | 21 +++++++++++++++++++++ 4 files changed, 61 insertions(+), 8 deletions(-) diff --git a/src/cli.py b/src/cli.py index e359aa4..0ce239a 100644 --- a/src/cli.py +++ b/src/cli.py @@ -1,11 +1,12 @@ from rashdf import RasGeomHdf +from rashdf.utils import df_datetimes_to_str import fiona from geopandas import GeoDataFrame -import pandas as pd import argparse from ast import literal_eval +from pathlib import Path import sys from typing import List, Optional import warnings @@ -113,11 +114,8 @@ def export(args: argparse.Namespace) -> Optional[str]: if args.to_crs: gdf = gdf.to_crs(args.to_crs) if not args.output_file: - # convert any datetime64 columns to ISO strings - for col in gdf.select_dtypes(include=["datetime64"]).columns: - gdf[col] = gdf[col].apply( - lambda x: pd.Timestamp(x).isoformat() if pd.notnull(x) else None - ) + # convert any datetime columns to strings + gdf = df_datetimes_to_str(gdf) with warnings.catch_warnings(): # Squash warnings about converting the CRS to OGC URN format. # Likely to come up since USACE's Albers projection is a custom CRS. @@ -138,6 +136,13 @@ def export(args: argparse.Namespace) -> Optional[str]: elif args.feather: gdf.to_feather(args.output_file, **kwargs) return + output_file_path = Path(args.output_file) + output_file_ext = output_file_path.suffix + if output_file_ext not in [".gpkg"]: + # unless the user specifies a format that supports datetime, + # convert any datetime columns to string + # TODO: besides Geopackage, which of the standard Fiona formats allow datetime? + gdf = df_datetimes_to_str(gdf) gdf.to_file(args.output_file, **kwargs) diff --git a/src/rashdf/utils.py b/src/rashdf/utils.py index 4fca703..ac36d31 100644 --- a/src/rashdf/utils.py +++ b/src/rashdf/utils.py @@ -1,9 +1,10 @@ -import numpy as np import h5py -from typing import Any, List, Tuple, Union, Optional +import numpy as np +import pandas as pd from datetime import datetime, timedelta import re +from typing import Any, List, Tuple, Union, Optional def parse_ras_datetime(datetime_str: str) -> datetime: @@ -221,3 +222,24 @@ def get_first_hdf_group(parent_group: h5py.Group) -> Optional[h5py.Group]: if isinstance(item, h5py.Group): return item return None + + +def df_datetimes_to_str(df: pd.DataFrame) -> pd.DataFrame: + """Convert any datetime64 columns in a DataFrame to strings. + + Parameters + ---------- + df : DataFrame + The DataFrame to convert. + + Returns + ------- + DataFrame + The DataFrame with any datetime64 columns converted to strings. + """ + df_result = df.copy() + for col in df.select_dtypes(include=["datetime64"]).columns: + df_result[col] = df[col].apply( + lambda x: pd.Timestamp(x).isoformat() if pd.notnull(x) else None + ) + return df_result diff --git a/tests/test_cli.py b/tests/test_cli.py index 208c6e6..6132a68 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -79,6 +79,11 @@ def test_export(tmp_path: Path): exported = json.loads(export(args)) gdf = gpd.GeoDataFrame.from_features(exported) assert len(gdf) == 3 + assert gdf["Last Edited"].to_list() == [ + "2024-04-15T15:21:34", + "2024-04-15T15:21:48", + "2024-04-15T15:26:15", + ] test_json_path = tmp_path / "test.json" args = parse_args(["mesh_areas", str(MUNCIE_G05), str(test_json_path)]) diff --git a/tests/test_utils.py b/tests/test_utils.py index d42171f..a80558d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,7 @@ from src.rashdf import utils import numpy as np +import pandas as pd import pytest from datetime import datetime, timedelta @@ -21,3 +22,23 @@ def test_convert_ras_hdf_value(): assert utils.convert_ras_hdf_value(b"01:23:45") == timedelta( hours=1, minutes=23, seconds=45 ) + + +def test_df_datetimes_to_str(): + df = pd.DataFrame( + { + "datetime": [ + datetime(2024, 3, 15, 16, 39, 1), + datetime(2024, 3, 16, 16, 39, 1), + ], + "asdf": [ + 0.123, + 0.456, + ], + } + ) + assert df["datetime"].dtype.name == "datetime64[ns]" + df = utils.df_datetimes_to_str(df) + assert df["datetime"].dtype.name == "object" + assert df["datetime"].tolist() == ["2024-03-15T16:39:01", "2024-03-16T16:39:01"] + assert df["asdf"].tolist() == [0.123, 0.456]