Skip to content

Commit

Permalink
Merge branch 'main' into update_datetime_parse_funcs
Browse files Browse the repository at this point in the history
  • Loading branch information
thwllms authored May 23, 2024
2 parents 374b608 + 883ef6a commit deb0fd1
Show file tree
Hide file tree
Showing 7 changed files with 360 additions and 9 deletions.
42 changes: 42 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,48 @@ datetime.datetime(2024, 3, 27, 9, 32, 15)],
'Time Stamp Solution Went Unstable': 'Not Applicable'}
```

## CLI
The `rashdf` command-line interface allows export directly to a variety of formats, enabled
by GeoPandas.
```
$ rashdf <sub-command> <hdf-file> [<output-path>] [<options>]
```

CLI help:
```
$ rashdf --help
```

Print the output formats supported by Fiona:
```
$ rashdf --fiona-drivers
```

Help for a specific subcommand:
```
$ rashdf mesh_cell_polygons --help
```

Example: export mesh cell faces to an ESRI Shapefile
```
$ rashdf mesh_cell_faces BigRiver.g01.hdf big-river-mesh-cell-faces.shp
```

Example: export mesh cell points to GeoParquet
```
$ rashdf mesh_cell_points LittleCreek.g01.hdf --parquet little-creek-mesh-cell-points.parquet
```

Example: export breaklines to OGC GeoPackage and reproject to a different CRS
```
$ rashdf breaklines Whitemarsh.p01.hdf whitemarsh-breaklines.gpkg --to-crs EPSG:4326
```

Example: write structures GeoJSON to `stdout`:
```
$ rashdf structures Potomac.p01.hdf
```

## Documentation
Coming soon.

Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,17 @@ classifiers = [
"Programming Language :: Python :: 3.12",
]
version = "0.2.1"
dependencies = ["h5py", "geopandas"]
dependencies = ["h5py", "geopandas", "pyarrow"]

[project.optional-dependencies]
dev = ["pre-commit", "ruff", "pytest"]

[project.urls]
repository = "https://github.com/fema-ffrd/rashdf"

[project.scripts]
rashdf = "cli:main"

[tool.pytest.ini_options]
pythonpath = "src"
testpaths = "tests"
Expand Down
155 changes: 155 additions & 0 deletions src/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from rashdf import RasGeomHdf
from rashdf.utils import df_datetimes_to_str

import fiona
from geopandas import GeoDataFrame

import argparse
from ast import literal_eval
from pathlib import Path
import sys
from typing import List, Optional
import warnings


COMMANDS = [
"mesh_areas",
"mesh_cell_points",
"mesh_cell_polygons",
"mesh_cell_faces",
"refinement_regions",
"bc_lines",
"breaklines",
"structures",
]


def docstring_to_help(docstring: Optional[str]) -> str:
"""Extract the first line of a docstring to use as help text for the rashdf CLI.
Note that this function replaces 'Return' with 'Export' in the help text.
Parameters
----------
docstring : Optional[str]
The docstring to extract the first line from.
Returns
-------
str
The first line of the docstring with 'Return' replaced by 'Export'.
If the docstring is None, an empty string is returned.
"""
if docstring is None:
return ""
help_text = docstring.split("\n")[0]
help_text = help_text.replace("Return", "Export")
return help_text


def fiona_supported_drivers() -> List[str]:
"""Return a list of drivers supported by Fiona for writing output files.
Returns
-------
list
A list of drivers supported by Fiona for writing output files.
"""
drivers = [d for d, s in fiona.supported_drivers.items() if "w" in s]
return drivers


def parse_args(args: str) -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Extract data from HEC-RAS HDF files.")
parser.add_argument(
"--fiona-drivers",
action="store_true",
help="List the drivers supported by Fiona for writing output files.",
)
subparsers = parser.add_subparsers(help="Sub-command help")
for command in COMMANDS:
f = getattr(RasGeomHdf, command)
subparser = subparsers.add_parser(
command, description=docstring_to_help(f.__doc__)
)
subparser.set_defaults(func=command)
subparser.add_argument("hdf_file", type=str, help="Path to HEC-RAS HDF file.")
subparser.add_argument(
"output_file", type=str, help="Path to output file.", nargs="?"
)
subparser.add_argument(
"--to-crs", type=str, help='Output CRS. (e.g., "EPSG:4326")'
)
output_group = subparser.add_mutually_exclusive_group()
output_group.add_argument(
"--parquet", action="store_true", help="Output as Parquet."
)
output_group.add_argument(
"--feather", action="store_true", help="Output as Feather."
)
subparser.add_argument(
"--kwargs",
type=str,
help=(
"Keyword arguments as a Python dictionary literal"
" passed to the corresponding GeoPandas output method."
),
)
args = parser.parse_args(args)
return args


def export(args: argparse.Namespace) -> Optional[str]:
if args.fiona_drivers:
for driver in fiona_supported_drivers():
print(driver)
return
if "://" in args.hdf_file:
geom_hdf = RasGeomHdf.open_uri(args.hdf_file)
else:
geom_hdf = RasGeomHdf(args.hdf_file)
func = getattr(geom_hdf, args.func)
gdf: GeoDataFrame = func()
kwargs = literal_eval(args.kwargs) if args.kwargs else {}
if args.to_crs:
gdf = gdf.to_crs(args.to_crs)
if not args.output_file:
# convert any datetime columns to strings
gdf = df_datetimes_to_str(gdf)
with warnings.catch_warnings():
# Squash warnings about converting the CRS to OGC URN format.
# Likely to come up since USACE's Albers projection is a custom CRS.
# A warning written to stdout might cause issues with downstream processing.
warnings.filterwarnings(
"ignore",
(
"GeoDataFrame's CRS is not representable in URN OGC format."
" Resulting JSON will contain no CRS information."
),
)
result = gdf.to_json(**kwargs)
print(result)
return result
elif args.parquet:
gdf.to_parquet(args.output_file, **kwargs)
return
elif args.feather:
gdf.to_feather(args.output_file, **kwargs)
return
output_file_path = Path(args.output_file)
output_file_ext = output_file_path.suffix
if output_file_ext not in [".gpkg"]:
# unless the user specifies a format that supports datetime,
# convert any datetime columns to string
# TODO: besides Geopackage, which of the standard Fiona formats allow datetime?
gdf = df_datetimes_to_str(gdf)
gdf.to_file(args.output_file, **kwargs)


def main():
args = parse_args(sys.argv[1:])
export(args)


if __name__ == "__main__":
main()
12 changes: 6 additions & 6 deletions src/rashdf/geom.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def mesh_areas(self) -> GeoDataFrame:
)

def mesh_cell_polygons(self) -> GeoDataFrame:
"""Return the 2D flow mesh cell polygons.
"""Return 2D flow mesh cell polygons.
Returns
-------
Expand Down Expand Up @@ -140,7 +140,7 @@ def mesh_cell_polygons(self) -> GeoDataFrame:
return GeoDataFrame(cell_dict, geometry="geometry", crs=self.projection())

def mesh_cell_points(self) -> GeoDataFrame:
"""Return the 2D flow mesh cell points.
"""Return 2D flow mesh cell points.
Returns
-------
Expand All @@ -166,7 +166,7 @@ def mesh_cell_points(self) -> GeoDataFrame:
return GeoDataFrame(pnt_dict, geometry="geometry", crs=self.projection())

def mesh_cell_faces(self) -> GeoDataFrame:
"""Return the 2D flow mesh cell faces.
"""Return 2D flow mesh cell faces.
Returns
-------
Expand Down Expand Up @@ -246,7 +246,7 @@ def get_geom_2d_flow_area_attrs(self):
return d2_flow_area_attrs

def bc_lines(self) -> GeoDataFrame:
"""Return the 2D mesh area boundary condition lines.
"""Return 2D mesh area boundary condition lines.
Returns
-------
Expand Down Expand Up @@ -295,7 +295,7 @@ def bc_lines(self) -> GeoDataFrame:
)

def breaklines(self) -> GeoDataFrame:
"""Return the 2D mesh area breaklines.
"""Return 2D mesh area breaklines.
Returns
-------
Expand Down Expand Up @@ -337,7 +337,7 @@ def breaklines(self) -> GeoDataFrame:
)

def refinement_regions(self) -> GeoDataFrame:
"""Return the 2D mesh area refinement regions.
"""Return 2D mesh area refinement regions.
Returns
-------
Expand Down
26 changes: 24 additions & 2 deletions src/rashdf/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import numpy as np
import h5py
from typing import Any, List, Tuple, Union, Optional
import numpy as np
import pandas as pd

from datetime import datetime, timedelta
import re
from typing import Any, List, Tuple, Union, Optional


def parse_ras_datetime(datetime_str: str) -> datetime:
Expand Down Expand Up @@ -240,3 +241,24 @@ def get_first_hdf_group(parent_group: h5py.Group) -> Optional[h5py.Group]:
if isinstance(item, h5py.Group):
return item
return None


def df_datetimes_to_str(df: pd.DataFrame) -> pd.DataFrame:
"""Convert any datetime64 columns in a DataFrame to strings.
Parameters
----------
df : DataFrame
The DataFrame to convert.
Returns
-------
DataFrame
The DataFrame with any datetime64 columns converted to strings.
"""
df_result = df.copy()
for col in df.select_dtypes(include=["datetime64"]).columns:
df_result[col] = df[col].apply(
lambda x: pd.Timestamp(x).isoformat() if pd.notnull(x) else None
)
return df_result
Loading

0 comments on commit deb0fd1

Please sign in to comment.