Skip to content

Commit

Permalink
Refactor model handling, support all models
Browse files Browse the repository at this point in the history
  • Loading branch information
robbibt committed Nov 12, 2024
1 parent 56beb4c commit f9f970b
Show file tree
Hide file tree
Showing 4 changed files with 465 additions and 136 deletions.
114 changes: 61 additions & 53 deletions eo_tides/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pyTMD
from tqdm import tqdm

from .utils import DatetimeLike, _set_directory, _standardise_time, idw, list_models
from .utils import DatetimeLike, _set_directory, _standardise_models, _standardise_time, idw, list_models


def _ensemble_model(
Expand Down Expand Up @@ -276,6 +276,7 @@ def _model_tides(
)

# TODO: Return constituents
print(model, amp.shape)
# print(amp.shape, ph.shape, c)
# print(pd.DataFrame({"amplitude": amp}))

Expand Down Expand Up @@ -511,7 +512,7 @@ def model_tides(
"""
# Turn inputs into arrays for consistent handling
models_requested = list(np.atleast_1d(model))
# models_requested = list(np.atleast_1d(model))
x = np.atleast_1d(x)
y = np.atleast_1d(y)
time = _standardise_time(time)
Expand Down Expand Up @@ -540,58 +541,65 @@ def model_tides(
# provided, try global environment variable.
directory = _set_directory(directory)

# Get full list of supported models from pyTMD database;
# add ensemble option to list of models
available_models, valid_models = list_models(
directory, show_available=False, show_supported=False, raise_error=True
# Standardise model list, handling "all" and "ensemble" functionality
models_to_process, models_requested, ensemble_models = _standardise_models(
model=model,
directory=directory,
ensemble_models=ensemble_models,
)
# TODO: This is hacky, find a better way. Perhaps a kwarg that
# turns ensemble functionality on, and checks that supplied
# models match models expected for ensemble?
available_models = available_models + ["ensemble"]
valid_models = valid_models + ["ensemble"]

# Error if any models are not supported
if not all(m in valid_models for m in models_requested):
error_text = (
f"One or more of the requested models are not valid:\n"
f"{models_requested}\n\n"
"The following models are supported:\n"
f"{valid_models}"
)
raise ValueError(error_text)

# Error if any models are not available in `directory`
if not all(m in available_models for m in models_requested):
error_text = (
f"One or more of the requested models are valid, but not available in `{directory}`:\n"
f"{models_requested}\n\n"
f"The following models are available in `{directory}`:\n"
f"{available_models}"
)
raise ValueError(error_text)

# If ensemble modelling is requested, use a custom list of models
# for subsequent processing
if "ensemble" in models_requested:
print("Running ensemble tide modelling")
models_to_process = (
ensemble_models
if ensemble_models is not None
else [
"FES2014",
"TPXO9-atlas-v5",
"EOT20",
"HAMTIDE11",
"GOT4.10",
"FES2012",
"TPXO8-atlas-v1",
]
)

# Otherwise, models to process are the same as those requested
else:
models_to_process = models_requested
# # Get full list of supported models from pyTMD database;
# # add ensemble option to list of models
# available_models, valid_models = list_models(
# directory, show_available=False, show_supported=False, raise_error=True
# )
# # TODO: This is hacky, find a better way. Perhaps a kwarg that
# # turns ensemble functionality on, and checks that supplied
# # models match models expected for ensemble?
# available_models = available_models + ["ensemble"]
# valid_models = valid_models + ["ensemble"]

# # Error if any models are not supported
# if not all(m in valid_models for m in models_requested):
# error_text = (
# f"One or more of the requested models are not valid:\n"
# f"{models_requested}\n\n"
# "The following models are supported:\n"
# f"{valid_models}"
# )
# raise ValueError(error_text)

# # Error if any models are not available in `directory`
# if not all(m in available_models for m in models_requested):
# error_text = (
# f"One or more of the requested models are valid, but not available in `{directory}`:\n"
# f"{models_requested}\n\n"
# f"The following models are available in `{directory}`:\n"
# f"{available_models}"
# )
# raise ValueError(error_text)

# # If ensemble modelling is requested, use a custom list of models
# # for subsequent processing
# if "ensemble" in models_requested:
# print("Running ensemble tide modelling")
# models_to_process = (
# ensemble_models
# if ensemble_models is not None
# else [
# "FES2014",
# "TPXO9-atlas-v5",
# "EOT20",
# "HAMTIDE11",
# "GOT4.10",
# "FES2012",
# "TPXO8-atlas-v1",
# ]
# )

# # Otherwise, models to process are the same as those requested
# else:
# models_to_process = models_requested

# Update tide modelling func to add default keyword arguments that
# are used for every iteration during parallel processing
Expand Down Expand Up @@ -685,7 +693,7 @@ def model_tides(

# Optionally compute ensemble model and add to dataframe
if "ensemble" in models_requested:
ensemble_df = _ensemble_model(tide_df, crs, models_to_process, **ensemble_kwargs)
ensemble_df = _ensemble_model(tide_df, crs, ensemble_models, **ensemble_kwargs)

# Update requested models with any custom ensemble models, then
# filter the dataframe to keep only models originally requested
Expand Down
95 changes: 94 additions & 1 deletion eo_tides/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,92 @@ def _standardise_time(
return np.atleast_1d(time)


def _standardise_models(
model: str | list[str],
directory: str,
ensemble_models: list[str] | None = None,
) -> tuple[list, list, list]:
"""
Take an input model name or list of names, and return a list
of models to process, requested models, and ensemble models,
as required by the `model_tides` function.
Handles two special values passed to `model`: "all", which
will model tides for all models available in `directory`, and
"ensemble", which will model tides for all models in a list
of custom ensemble models.
"""

# Turn inputs into arrays for consistent handling
models_requested = list(np.atleast_1d(model))

# Get full list of supported models from pyTMD database
available_models, valid_models = list_models(
directory, show_available=False, show_supported=False, raise_error=True
)
custom_options = ["ensemble", "all"]

# Error if any models are not supported
if not all(m in valid_models + custom_options for m in models_requested):
error_text = (
f"One or more of the requested models are not valid:\n"
f"{models_requested}\n\n"
"The following models are supported:\n"
f"{valid_models}"
)
raise ValueError(error_text)

# Error if any models are not available in `directory`
if not all(m in available_models + custom_options for m in models_requested):
error_text = (
f"One or more of the requested models are valid, but not available in `{directory}`:\n"
f"{models_requested}\n\n"
f"The following models are available in `{directory}`:\n"
f"{available_models}"
)
raise ValueError(error_text)

# If "all" models are requested, update requested list to include available models
if "all" in models_requested:
models_requested = available_models + [m for m in models_requested if m != "all"]

# If "ensemble" modeling is requested, use custom list of ensemble models
if "ensemble" in models_requested:
print("Running ensemble tide modelling")
ensemble_models = (
ensemble_models
if ensemble_models is not None
else [
"FES2014",
"TPXO9-atlas-v5",
"EOT20",
"HAMTIDE11",
"GOT4.10",
"FES2012",
"TPXO8-atlas-v1",
]
)

# Error if any ensemble models are not available in `directory`
if not all(m in available_models for m in ensemble_models):
error_text = (
f"One or more of the requested ensemble models are not available in `{directory}`:\n"
f"{ensemble_models}\n\n"
f"The following models are available in `{directory}`:\n"
f"{available_models}"
)
raise ValueError(error_text)

# Return set of all ensemble plus any other requested models
models_to_process = sorted(list(set(ensemble_models + [m for m in models_requested if m != "ensemble"])))

# Otherwise, models to process are the same as those requested
else:
models_to_process = models_requested

return models_to_process, models_requested, ensemble_models


def _clip_model_file(
nc: xr.Dataset,
bbox: BoundingBox,
Expand Down Expand Up @@ -393,7 +479,14 @@ def list_models(
expected_paths = {}
for m in supported_models:
model_file = model_database[m]["model_file"]
model_file = model_file[0] if isinstance(model_file, list) else model_file

# Handle GOT5.6 differently to ensure we test for presence of GOT5.6 constituents
if m in ("GOT5.6", "GOT5.6_extrapolated"):
model_file = [file for file in model_file if "GOT5.6" in file][0]
else:
model_file = model_file[0] if isinstance(model_file, list) else model_file

# Add path to dict
expected_paths[m] = str(directory / pathlib.Path(model_file).expanduser().parent)

# Define column widths
Expand Down
45 changes: 44 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,50 @@
import pandas as pd
import pytest

from eo_tides.utils import _standardise_time, clip_models, idw, list_models
from eo_tides.utils import _standardise_models, _standardise_time, clip_models, idw, list_models


@pytest.mark.parametrize(
"model, ensemble_models, exp_process, exp_request, exp_ensemble",
[
# Case 1, 2: Specific model in str and list format
("EOT20", None, ["EOT20"], ["EOT20"], None),
(["EOT20"], None, ["EOT20"], ["EOT20"], None),
# Case 3, 4: Using "all" to request all available models
("all", None, ["EOT20", "GOT5.5", "HAMTIDE11"], ["EOT20", "GOT5.5", "HAMTIDE11"], None),
(["all"], None, ["EOT20", "GOT5.5", "HAMTIDE11"], ["EOT20", "GOT5.5", "HAMTIDE11"], None),
# Case 5, 6: Using "ensemble" to model tides for specific set of ensemble models
("ensemble", ["EOT20", "HAMTIDE11"], ["EOT20", "HAMTIDE11"], ["ensemble"], ["EOT20", "HAMTIDE11"]),
(["ensemble"], ["EOT20", "HAMTIDE11"], ["EOT20", "HAMTIDE11"], ["ensemble"], ["EOT20", "HAMTIDE11"]),
# Case 7: Modelling tides using ensemble set and an additional model
(
["ensemble", "GOT5.5"],
["EOT20", "HAMTIDE11"],
["EOT20", "GOT5.5", "HAMTIDE11"],
["ensemble", "GOT5.5"],
["EOT20", "HAMTIDE11"],
),
# Case 8: Modelling tides for all available models, AND ensemble set
(
["all", "ensemble"],
["EOT20", "HAMTIDE11"],
["EOT20", "GOT5.5", "HAMTIDE11"],
["EOT20", "GOT5.5", "HAMTIDE11", "ensemble"],
["EOT20", "HAMTIDE11"],
),
],
)
def test_standardise_models(model, ensemble_models, exp_process, exp_request, exp_ensemble):
# Return lists of models
models_to_process, models_requested, ensemble_models = _standardise_models(
model=model,
directory="tests/data/tide_models",
ensemble_models=ensemble_models,
)

assert models_to_process == exp_process
assert models_requested == exp_request
assert (sorted(ensemble_models) if ensemble_models else None) == (sorted(exp_ensemble) if exp_ensemble else None)


def test_clip_models():
Expand Down
Loading

0 comments on commit f9f970b

Please sign in to comment.