diff --git a/README.md b/README.md index 19fdacb..2a94a63 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,48 @@ datetime.datetime(2024, 3, 27, 9, 32, 15)], 'Time Stamp Solution Went Unstable': 'Not Applicable'} ``` +## CLI +The `rashdf` command-line interface allows export directly to a variety of formats, enabled +by GeoPandas. +``` +$ rashdf [] [] +``` + +CLI help: +``` +$ rashdf --help +``` + +Print the output formats supported by Fiona: +``` +$ rashdf --fiona-drivers +``` + +Help for a specific subcommand: +``` +$ rashdf mesh_cell_polygons --help +``` + +Example: export mesh cell faces to an ESRI Shapefile +``` +$ rashdf mesh_cell_faces BigRiver.g01.hdf big-river-mesh-cell-faces.shp +``` + +Example: export mesh cell points to GeoParquet +``` +$ rashdf mesh_cell_points LittleCreek.g01.hdf --parquet little-creek-mesh-cell-points.parquet +``` + +Example: export breaklines to OGC GeoPackage and reproject to a different CRS +``` +$ rashdf breaklines Whitemarsh.p01.hdf whitemarsh-breaklines.gpkg --to-crs EPSG:4326 +``` + +Example: write structures GeoJSON to `stdout`: +``` +$ rashdf structures Potomac.p01.hdf +``` + ## Documentation Coming soon. diff --git a/pyproject.toml b/pyproject.toml index bb5f583..fcb9a7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,8 +12,8 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] -version = "0.1.1" -dependencies = ["h5py", "geopandas"] +version = "0.2.0" +dependencies = ["h5py", "geopandas", "pyarrow"] [project.optional-dependencies] dev = ["pre-commit", "ruff", "pytest"] @@ -21,6 +21,9 @@ dev = ["pre-commit", "ruff", "pytest"] [project.urls] repository = "https://github.com/fema-ffrd/rashdf" +[project.scripts] +rashdf = "cli:main" + [tool.pytest.ini_options] pythonpath = "src" testpaths = "tests" diff --git a/src/cli.py b/src/cli.py new file mode 100644 index 0000000..0ce239a --- /dev/null +++ b/src/cli.py @@ -0,0 +1,155 @@ +from rashdf import RasGeomHdf +from rashdf.utils import df_datetimes_to_str + +import fiona +from geopandas import GeoDataFrame + +import argparse +from ast import literal_eval +from pathlib import Path +import sys +from typing import List, Optional +import warnings + + +COMMANDS = [ + "mesh_areas", + "mesh_cell_points", + "mesh_cell_polygons", + "mesh_cell_faces", + "refinement_regions", + "bc_lines", + "breaklines", + "structures", +] + + +def docstring_to_help(docstring: Optional[str]) -> str: + """Extract the first line of a docstring to use as help text for the rashdf CLI. + + Note that this function replaces 'Return' with 'Export' in the help text. + + Parameters + ---------- + docstring : Optional[str] + The docstring to extract the first line from. + + Returns + ------- + str + The first line of the docstring with 'Return' replaced by 'Export'. + If the docstring is None, an empty string is returned. + """ + if docstring is None: + return "" + help_text = docstring.split("\n")[0] + help_text = help_text.replace("Return", "Export") + return help_text + + +def fiona_supported_drivers() -> List[str]: + """Return a list of drivers supported by Fiona for writing output files. + + Returns + ------- + list + A list of drivers supported by Fiona for writing output files. + """ + drivers = [d for d, s in fiona.supported_drivers.items() if "w" in s] + return drivers + + +def parse_args(args: str) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Extract data from HEC-RAS HDF files.") + parser.add_argument( + "--fiona-drivers", + action="store_true", + help="List the drivers supported by Fiona for writing output files.", + ) + subparsers = parser.add_subparsers(help="Sub-command help") + for command in COMMANDS: + f = getattr(RasGeomHdf, command) + subparser = subparsers.add_parser( + command, description=docstring_to_help(f.__doc__) + ) + subparser.set_defaults(func=command) + subparser.add_argument("hdf_file", type=str, help="Path to HEC-RAS HDF file.") + subparser.add_argument( + "output_file", type=str, help="Path to output file.", nargs="?" + ) + subparser.add_argument( + "--to-crs", type=str, help='Output CRS. (e.g., "EPSG:4326")' + ) + output_group = subparser.add_mutually_exclusive_group() + output_group.add_argument( + "--parquet", action="store_true", help="Output as Parquet." + ) + output_group.add_argument( + "--feather", action="store_true", help="Output as Feather." + ) + subparser.add_argument( + "--kwargs", + type=str, + help=( + "Keyword arguments as a Python dictionary literal" + " passed to the corresponding GeoPandas output method." + ), + ) + args = parser.parse_args(args) + return args + + +def export(args: argparse.Namespace) -> Optional[str]: + if args.fiona_drivers: + for driver in fiona_supported_drivers(): + print(driver) + return + if "://" in args.hdf_file: + geom_hdf = RasGeomHdf.open_uri(args.hdf_file) + else: + geom_hdf = RasGeomHdf(args.hdf_file) + func = getattr(geom_hdf, args.func) + gdf: GeoDataFrame = func() + kwargs = literal_eval(args.kwargs) if args.kwargs else {} + if args.to_crs: + gdf = gdf.to_crs(args.to_crs) + if not args.output_file: + # convert any datetime columns to strings + gdf = df_datetimes_to_str(gdf) + with warnings.catch_warnings(): + # Squash warnings about converting the CRS to OGC URN format. + # Likely to come up since USACE's Albers projection is a custom CRS. + # A warning written to stdout might cause issues with downstream processing. + warnings.filterwarnings( + "ignore", + ( + "GeoDataFrame's CRS is not representable in URN OGC format." + " Resulting JSON will contain no CRS information." + ), + ) + result = gdf.to_json(**kwargs) + print(result) + return result + elif args.parquet: + gdf.to_parquet(args.output_file, **kwargs) + return + elif args.feather: + gdf.to_feather(args.output_file, **kwargs) + return + output_file_path = Path(args.output_file) + output_file_ext = output_file_path.suffix + if output_file_ext not in [".gpkg"]: + # unless the user specifies a format that supports datetime, + # convert any datetime columns to string + # TODO: besides Geopackage, which of the standard Fiona formats allow datetime? + gdf = df_datetimes_to_str(gdf) + gdf.to_file(args.output_file, **kwargs) + + +def main(): + args = parse_args(sys.argv[1:]) + export(args) + + +if __name__ == "__main__": + main() diff --git a/src/rashdf/geom.py b/src/rashdf/geom.py index 6e04944..d4d0968 100644 --- a/src/rashdf/geom.py +++ b/src/rashdf/geom.py @@ -86,7 +86,7 @@ def mesh_areas(self) -> GeoDataFrame: ) def mesh_cell_polygons(self) -> GeoDataFrame: - """Return the 2D flow mesh cell polygons. + """Return 2D flow mesh cell polygons. Returns ------- @@ -140,7 +140,7 @@ def mesh_cell_polygons(self) -> GeoDataFrame: return GeoDataFrame(cell_dict, geometry="geometry", crs=self.projection()) def mesh_cell_points(self) -> GeoDataFrame: - """Return the 2D flow mesh cell points. + """Return 2D flow mesh cell points. Returns ------- @@ -166,7 +166,7 @@ def mesh_cell_points(self) -> GeoDataFrame: return GeoDataFrame(pnt_dict, geometry="geometry", crs=self.projection()) def mesh_cell_faces(self) -> GeoDataFrame: - """Return the 2D flow mesh cell faces. + """Return 2D flow mesh cell faces. Returns ------- @@ -246,7 +246,7 @@ def get_geom_2d_flow_area_attrs(self): return d2_flow_area_attrs def bc_lines(self) -> GeoDataFrame: - """Return the 2D mesh area boundary condition lines. + """Return 2D mesh area boundary condition lines. Returns ------- @@ -295,7 +295,7 @@ def bc_lines(self) -> GeoDataFrame: ) def breaklines(self) -> GeoDataFrame: - """Return the 2D mesh area breaklines. + """Return 2D mesh area breaklines. Returns ------- @@ -337,7 +337,7 @@ def breaklines(self) -> GeoDataFrame: ) def refinement_regions(self) -> GeoDataFrame: - """Return the 2D mesh area refinement regions. + """Return 2D mesh area refinement regions. Returns ------- diff --git a/src/rashdf/utils.py b/src/rashdf/utils.py index 4fca703..ac36d31 100644 --- a/src/rashdf/utils.py +++ b/src/rashdf/utils.py @@ -1,9 +1,10 @@ -import numpy as np import h5py -from typing import Any, List, Tuple, Union, Optional +import numpy as np +import pandas as pd from datetime import datetime, timedelta import re +from typing import Any, List, Tuple, Union, Optional def parse_ras_datetime(datetime_str: str) -> datetime: @@ -221,3 +222,24 @@ def get_first_hdf_group(parent_group: h5py.Group) -> Optional[h5py.Group]: if isinstance(item, h5py.Group): return item return None + + +def df_datetimes_to_str(df: pd.DataFrame) -> pd.DataFrame: + """Convert any datetime64 columns in a DataFrame to strings. + + Parameters + ---------- + df : DataFrame + The DataFrame to convert. + + Returns + ------- + DataFrame + The DataFrame with any datetime64 columns converted to strings. + """ + df_result = df.copy() + for col in df.select_dtypes(include=["datetime64"]).columns: + df_result[col] = df[col].apply( + lambda x: pd.Timestamp(x).isoformat() if pd.notnull(x) else None + ) + return df_result diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..6132a68 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,108 @@ +from src.cli import parse_args, export, docstring_to_help, fiona_supported_drivers + +import geopandas as gpd +from pyproj import CRS + +import json +from pathlib import Path + +TEST_DATA = Path("./tests/data") +MUNCIE_G05 = TEST_DATA / "ras/Muncie.g05.hdf" + + +def test_docstring_to_help(): + docstring = """This is a test docstring. + This is not part of the help message. + """ + assert docstring_to_help(docstring) == "This is a test docstring." + + docstring = """Return the something or other. + Blah blah blah.""" + assert docstring_to_help(docstring) == "Export the something or other." + + docstring = None + assert docstring_to_help(docstring) == "" + + +def test_fiona_supported_drivers(): + drivers = fiona_supported_drivers() + assert "ESRI Shapefile" in drivers + assert "GeoJSON" in drivers + assert "GPKG" in drivers + + +def test_parse_args(): + args = parse_args(["structures", "test.hdf"]) + assert args.func == "structures" + assert args.hdf_file == "test.hdf" + assert args.output_file is None + assert args.to_crs is None + assert not args.parquet + assert not args.feather + assert args.kwargs is None + + args = parse_args(["mesh_areas", "test.hdf", "test.json"]) + assert args.func == "mesh_areas" + assert args.hdf_file == "test.hdf" + assert args.output_file == "test.json" + assert args.to_crs is None + assert not args.parquet + assert not args.feather + assert args.kwargs is None + + args = parse_args( + [ + "mesh_areas", + "test.hdf", + "test.json", + "--to-crs", + "EPSG:4326", + "--parquet", + "--kwargs", + '{"compression": "gzip"}', + ] + ) + assert args.func == "mesh_areas" + assert args.hdf_file == "test.hdf" + assert args.output_file == "test.json" + assert args.to_crs == "EPSG:4326" + assert args.parquet + assert not args.feather + assert args.kwargs == '{"compression": "gzip"}' + + args = parse_args(["--fiona-drivers"]) + assert args.fiona_drivers + + +def test_export(tmp_path: Path): + args = parse_args(["structures", str(MUNCIE_G05)]) + exported = json.loads(export(args)) + gdf = gpd.GeoDataFrame.from_features(exported) + assert len(gdf) == 3 + assert gdf["Last Edited"].to_list() == [ + "2024-04-15T15:21:34", + "2024-04-15T15:21:48", + "2024-04-15T15:26:15", + ] + + test_json_path = tmp_path / "test.json" + args = parse_args(["mesh_areas", str(MUNCIE_G05), str(test_json_path)]) + export(args) + gdf = gpd.read_file(test_json_path) + assert len(gdf) == 2 + + test_parquet_path = tmp_path / "test.parquet" + args = parse_args( + [ + "mesh_cell_points", + str(MUNCIE_G05), + str(test_parquet_path), + "--parquet", + "--to-crs", + "EPSG:4326", + ] + ) + export(args) + gdf = gpd.read_parquet(test_parquet_path) + assert len(gdf) == 5790 + assert gdf.crs == CRS.from_epsg(4326) diff --git a/tests/test_utils.py b/tests/test_utils.py index d42171f..a80558d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,7 @@ from src.rashdf import utils import numpy as np +import pandas as pd import pytest from datetime import datetime, timedelta @@ -21,3 +22,23 @@ def test_convert_ras_hdf_value(): assert utils.convert_ras_hdf_value(b"01:23:45") == timedelta( hours=1, minutes=23, seconds=45 ) + + +def test_df_datetimes_to_str(): + df = pd.DataFrame( + { + "datetime": [ + datetime(2024, 3, 15, 16, 39, 1), + datetime(2024, 3, 16, 16, 39, 1), + ], + "asdf": [ + 0.123, + 0.456, + ], + } + ) + assert df["datetime"].dtype.name == "datetime64[ns]" + df = utils.df_datetimes_to_str(df) + assert df["datetime"].dtype.name == "object" + assert df["datetime"].tolist() == ["2024-03-15T16:39:01", "2024-03-16T16:39:01"] + assert df["asdf"].tolist() == [0.123, 0.456]