Refactor model handling, support all models

GeoscienceAustralia · Nov 12, 2024 · f9f970b · f9f970b
1 parent 56beb4c
commit f9f970b
Show file tree

Hide file tree

Showing 4 changed files with 465 additions and 136 deletions.
diff --git a/eo_tides/model.py b/eo_tides/model.py
@@ -21,7 +21,7 @@
 import pyTMD
 from tqdm import tqdm
 
-from .utils import DatetimeLike, _set_directory, _standardise_time, idw, list_models
+from .utils import DatetimeLike, _set_directory, _standardise_models, _standardise_time, idw, list_models
 
 
 def _ensemble_model(
@@ -276,6 +276,7 @@ def _model_tides(
         )
 
         # TODO: Return constituents
+        print(model, amp.shape)
         # print(amp.shape, ph.shape, c)
         # print(pd.DataFrame({"amplitude": amp}))
 
@@ -511,7 +512,7 @@ def model_tides(
 
     """
     # Turn inputs into arrays for consistent handling
-    models_requested = list(np.atleast_1d(model))
+    # models_requested = list(np.atleast_1d(model))
     x = np.atleast_1d(x)
     y = np.atleast_1d(y)
     time = _standardise_time(time)
@@ -540,58 +541,65 @@ def model_tides(
     # provided, try global environment variable.
     directory = _set_directory(directory)
 
-    # Get full list of supported models from pyTMD database;
-    # add ensemble option to list of models
-    available_models, valid_models = list_models(
-        directory, show_available=False, show_supported=False, raise_error=True
+    # Standardise model list, handling "all" and "ensemble" functionality
+    models_to_process, models_requested, ensemble_models = _standardise_models(
+        model=model,
+        directory=directory,
+        ensemble_models=ensemble_models,
     )
-    # TODO: This is hacky, find a better way. Perhaps a kwarg that
-    # turns ensemble functionality on, and checks that supplied
-    # models match models expected for ensemble?
-    available_models = available_models + ["ensemble"]
-    valid_models = valid_models + ["ensemble"]
-
-    # Error if any models are not supported
-    if not all(m in valid_models for m in models_requested):
-        error_text = (
-            f"One or more of the requested models are not valid:\n"
-            f"{models_requested}\n\n"
-            "The following models are supported:\n"
-            f"{valid_models}"
-        )
-        raise ValueError(error_text)
-
-    # Error if any models are not available in `directory`
-    if not all(m in available_models for m in models_requested):
-        error_text = (
-            f"One or more of the requested models are valid, but not available in `{directory}`:\n"
-            f"{models_requested}\n\n"
-            f"The following models are available in `{directory}`:\n"
-            f"{available_models}"
-        )
-        raise ValueError(error_text)
 
-    # If ensemble modelling is requested, use a custom list of models
-    # for subsequent processing
-    if "ensemble" in models_requested:
-        print("Running ensemble tide modelling")
-        models_to_process = (
-            ensemble_models
-            if ensemble_models is not None
-            else [
-                "FES2014",
-                "TPXO9-atlas-v5",
-                "EOT20",
-                "HAMTIDE11",
-                "GOT4.10",
-                "FES2012",
-                "TPXO8-atlas-v1",
-            ]
-        )
-
-    # Otherwise, models to process are the same as those requested
-    else:
-        models_to_process = models_requested
+    # # Get full list of supported models from pyTMD database;
+    # # add ensemble option to list of models
+    # available_models, valid_models = list_models(
+    #     directory, show_available=False, show_supported=False, raise_error=True
+    # )
+    # # TODO: This is hacky, find a better way. Perhaps a kwarg that
+    # # turns ensemble functionality on, and checks that supplied
+    # # models match models expected for ensemble?
+    # available_models = available_models + ["ensemble"]
+    # valid_models = valid_models + ["ensemble"]
+
+    # # Error if any models are not supported
+    # if not all(m in valid_models for m in models_requested):
+    #     error_text = (
+    #         f"One or more of the requested models are not valid:\n"
+    #         f"{models_requested}\n\n"
+    #         "The following models are supported:\n"
+    #         f"{valid_models}"
+    #     )
+    #     raise ValueError(error_text)
+
+    # # Error if any models are not available in `directory`
+    # if not all(m in available_models for m in models_requested):
+    #     error_text = (
+    #         f"One or more of the requested models are valid, but not available in `{directory}`:\n"
+    #         f"{models_requested}\n\n"
+    #         f"The following models are available in `{directory}`:\n"
+    #         f"{available_models}"
+    #     )
+    #     raise ValueError(error_text)
+
+    # # If ensemble modelling is requested, use a custom list of models
+    # # for subsequent processing
+    # if "ensemble" in models_requested:
+    #     print("Running ensemble tide modelling")
+    #     models_to_process = (
+    #         ensemble_models
+    #         if ensemble_models is not None
+    #         else [
+    #             "FES2014",
+    #             "TPXO9-atlas-v5",
+    #             "EOT20",
+    #             "HAMTIDE11",
+    #             "GOT4.10",
+    #             "FES2012",
+    #             "TPXO8-atlas-v1",
+    #         ]
+    #     )
+
+    # # Otherwise, models to process are the same as those requested
+    # else:
+    #     models_to_process = models_requested
 
     # Update tide modelling func to add default keyword arguments that
     # are used for every iteration during parallel processing
@@ -685,7 +693,7 @@ def model_tides(
 
     # Optionally compute ensemble model and add to dataframe
     if "ensemble" in models_requested:
-        ensemble_df = _ensemble_model(tide_df, crs, models_to_process, **ensemble_kwargs)
+        ensemble_df = _ensemble_model(tide_df, crs, ensemble_models, **ensemble_kwargs)
 
         # Update requested models with any custom ensemble models, then
         # filter the dataframe to keep only models originally requested

diff --git a/eo_tides/utils.py b/eo_tides/utils.py
@@ -68,6 +68,92 @@ def _standardise_time(
     return np.atleast_1d(time)
 
 
+def _standardise_models(
+    model: str | list[str],
+    directory: str,
+    ensemble_models: list[str] | None = None,
+) -> tuple[list, list, list]:
+    """
+    Take an input model name or list of names, and return a list
+    of models to process, requested models, and ensemble models,
+    as required by the `model_tides` function.
+
+    Handles two special values passed to `model`: "all", which
+    will model tides for all models available in `directory`, and
+    "ensemble", which will model tides for all models in a list
+    of custom ensemble models.
+    """
+
+    # Turn inputs into arrays for consistent handling
+    models_requested = list(np.atleast_1d(model))
+
+    # Get full list of supported models from pyTMD database
+    available_models, valid_models = list_models(
+        directory, show_available=False, show_supported=False, raise_error=True
+    )
+    custom_options = ["ensemble", "all"]
+
+    # Error if any models are not supported
+    if not all(m in valid_models + custom_options for m in models_requested):
+        error_text = (
+            f"One or more of the requested models are not valid:\n"
+            f"{models_requested}\n\n"
+            "The following models are supported:\n"
+            f"{valid_models}"
+        )
+        raise ValueError(error_text)
+
+    # Error if any models are not available in `directory`
+    if not all(m in available_models + custom_options for m in models_requested):
+        error_text = (
+            f"One or more of the requested models are valid, but not available in `{directory}`:\n"
+            f"{models_requested}\n\n"
+            f"The following models are available in `{directory}`:\n"
+            f"{available_models}"
+        )
+        raise ValueError(error_text)
+
+    # If "all" models are requested, update requested list to include available models
+    if "all" in models_requested:
+        models_requested = available_models + [m for m in models_requested if m != "all"]
+
+    # If "ensemble" modeling is requested, use custom list of ensemble models
+    if "ensemble" in models_requested:
+        print("Running ensemble tide modelling")
+        ensemble_models = (
+            ensemble_models
+            if ensemble_models is not None
+            else [
+                "FES2014",
+                "TPXO9-atlas-v5",
+                "EOT20",
+                "HAMTIDE11",
+                "GOT4.10",
+                "FES2012",
+                "TPXO8-atlas-v1",
+            ]
+        )
+
+        # Error if any ensemble models are not available in `directory`
+        if not all(m in available_models for m in ensemble_models):
+            error_text = (
+                f"One or more of the requested ensemble models are not available in `{directory}`:\n"
+                f"{ensemble_models}\n\n"
+                f"The following models are available in `{directory}`:\n"
+                f"{available_models}"
+            )
+            raise ValueError(error_text)
+
+        # Return set of all ensemble plus any other requested models
+        models_to_process = sorted(list(set(ensemble_models + [m for m in models_requested if m != "ensemble"])))
+
+    # Otherwise, models to process are the same as those requested
+    else:
+        models_to_process = models_requested
+
+    return models_to_process, models_requested, ensemble_models
+
+
 def _clip_model_file(
     nc: xr.Dataset,
     bbox: BoundingBox,
@@ -393,7 +479,14 @@ def list_models(
     expected_paths = {}
     for m in supported_models:
         model_file = model_database[m]["model_file"]
-        model_file = model_file[0] if isinstance(model_file, list) else model_file
+
+        # Handle GOT5.6 differently to ensure we test for presence of GOT5.6 constituents
+        if m in ("GOT5.6", "GOT5.6_extrapolated"):
+            model_file = [file for file in model_file if "GOT5.6" in file][0]
+        else:
+            model_file = model_file[0] if isinstance(model_file, list) else model_file
+
+        # Add path to dict
         expected_paths[m] = str(directory / pathlib.Path(model_file).expanduser().parent)
 
     # Define column widths

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -6,7 +6,50 @@
 import pandas as pd
 import pytest
 
-from eo_tides.utils import _standardise_time, clip_models, idw, list_models
+from eo_tides.utils import _standardise_models, _standardise_time, clip_models, idw, list_models
+
+
+@pytest.mark.parametrize(
+    "model, ensemble_models, exp_process, exp_request, exp_ensemble",
+    [
+        # Case 1, 2: Specific model in str and list format
+        ("EOT20", None, ["EOT20"], ["EOT20"], None),
+        (["EOT20"], None, ["EOT20"], ["EOT20"], None),
+        # Case 3, 4: Using "all" to request all available models
+        ("all", None, ["EOT20", "GOT5.5", "HAMTIDE11"], ["EOT20", "GOT5.5", "HAMTIDE11"], None),
+        (["all"], None, ["EOT20", "GOT5.5", "HAMTIDE11"], ["EOT20", "GOT5.5", "HAMTIDE11"], None),
+        # Case 5, 6: Using "ensemble" to model tides for specific set of ensemble models
+        ("ensemble", ["EOT20", "HAMTIDE11"], ["EOT20", "HAMTIDE11"], ["ensemble"], ["EOT20", "HAMTIDE11"]),
+        (["ensemble"], ["EOT20", "HAMTIDE11"], ["EOT20", "HAMTIDE11"], ["ensemble"], ["EOT20", "HAMTIDE11"]),
+        # Case 7: Modelling tides using ensemble set and an additional model
+        (
+            ["ensemble", "GOT5.5"],
+            ["EOT20", "HAMTIDE11"],
+            ["EOT20", "GOT5.5", "HAMTIDE11"],
+            ["ensemble", "GOT5.5"],
+            ["EOT20", "HAMTIDE11"],
+        ),
+        # Case 8: Modelling tides for all available models, AND ensemble set
+        (
+            ["all", "ensemble"],
+            ["EOT20", "HAMTIDE11"],
+            ["EOT20", "GOT5.5", "HAMTIDE11"],
+            ["EOT20", "GOT5.5", "HAMTIDE11", "ensemble"],
+            ["EOT20", "HAMTIDE11"],
+        ),
+    ],
+)
+def test_standardise_models(model, ensemble_models, exp_process, exp_request, exp_ensemble):
+    # Return lists of models
+    models_to_process, models_requested, ensemble_models = _standardise_models(
+        model=model,
+        directory="tests/data/tide_models",
+        ensemble_models=ensemble_models,
+    )
+
+    assert models_to_process == exp_process
+    assert models_requested == exp_request
+    assert (sorted(ensemble_models) if ensemble_models else None) == (sorted(exp_ensemble) if exp_ensemble else None)
 
 
 def test_clip_models():