diff --git a/doc/examples/example_cli_famos.ipynb b/doc/examples/example_cli_famos.ipynb index 5956c66..a1d32a1 100644 --- a/doc/examples/example_cli_famos.ipynb +++ b/doc/examples/example_cli_famos.ipynb @@ -33,6 +33,7 @@ "\n", "from example_cli_famos_helpers import (\n", " parse_summary_to_progress_list,\n", + " petab_select_problem_yaml, # noqa: F401\n", ")\n", "\n", "output_path = Path().resolve() / \"output_famos\"\n", @@ -141,8 +142,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/dilan/Documents/future_annex/model_selection/packages/petab_select/petab_select/candidate_space.py:376: RuntimeWarning: Model `model_subspace_1-0001011010010010` has been previously excluded from the candidate space so is skipped here.\n", - " warnings.warn(\n" + "petab_select/candidate_space.py:1137: RuntimeWarning: Model `model_subspace_1-0001011010010010` has been previously excluded from the candidate space so is skipped here.\n", + " return_value = self.inner_candidate_space.consider(model)\n" ] }, { @@ -173,7 +174,6 @@ ], "source": [ "%%bash -s \"$petab_select_problem_yaml\" \"$output_path_str\"\n", - "\n", "petab_select_problem_yaml=$1\n", "output_path_str=$2\n", "\n", diff --git a/doc/examples/example_cli_famos_calibration_tool.py b/doc/examples/example_cli_famos_calibration_tool.py index c78cabb..f5b58c2 100644 --- a/doc/examples/example_cli_famos_calibration_tool.py +++ b/doc/examples/example_cli_famos_calibration_tool.py @@ -7,14 +7,12 @@ models_yaml = sys.argv[1] calibrated_models_yaml = sys.argv[2] -models = petab_select.model.models_from_yaml_list(models_yaml) +models = petab_select.Models.from_yaml(models_yaml) predecessor_model_hashes = set() for model in models: calibrate(model=model) predecessor_model_hashes |= {model.predecessor_model_hash} -petab_select.model.models_to_yaml_list( - models=models, output_yaml=calibrated_models_yaml -) +models.to_yaml(output_yaml=calibrated_models_yaml) if len(predecessor_model_hashes) == 0: pass diff --git a/doc/examples/workflow_cli.ipynb b/doc/examples/workflow_cli.ipynb index 46c5a51..6f4cf83 100644 --- a/doc/examples/workflow_cli.ipynb +++ b/doc/examples/workflow_cli.ipynb @@ -177,7 +177,7 @@ "output_path_str=$1\n", "\n", "petab_select end_iteration \\\n", - "--state=output/state.dill \\\n", + "--state=$output_path_str/state.dill \\\n", "--calibrated-models=model_selection/calibrated_models_1.yaml \\\n", "--output-models=$output_path_str/models_1.yaml \\\n", "--output-metadata=$output_path_str/metadata.yaml \\\n", @@ -289,7 +289,7 @@ "petab_select get_best \\\n", "--problem model_selection/petab_select_problem.yaml \\\n", "--models model_selection/calibrated_models_1.yaml \\\n", - "--output output_cli/predecessor_model.yaml\n", + "--output $output_path_str/predecessor_model.yaml\n", "# create a copy of the original PEtab select problem and update its paths\n", "cp model_selection/petab_select_problem.yaml $output_path_str/custom_problem.yaml\n", "sed -i 's|- model_space.tsv|- ../model_selection/model_space.tsv|' $output_path_str/custom_problem.yaml\n", @@ -470,7 +470,7 @@ "id": "889dedc1", "metadata": {}, "source": [ - "As we are performing a forward search from `M1_4`, which has two parameters, then all models in this iteration with have 3+ parameters. This model space contains only one model with 3 or more estimated parameters. We finalize the iteration with its calibration results." + "As we are performing a forward search from `M1_4`, which has two parameters, then all models in this iteration will have 3+ parameters. This model space contains only one model with 3 or more estimated parameters. We finalize the iteration with its calibration results." ] }, { @@ -531,7 +531,7 @@ "metadata": {}, "source": [ "## Fourth iteration\n", - "As there are no models in the model space with 4+ parameters, subsequent forward searches will return no candidate models. This can be used by tools to detect when model selection terminates." + "As there are no models in the model space with 4+ parameters, subsequent forward searches will return no candidate models. Tools can detect when to terminate by inspecting the metadata produced by `end_iteration`, as demonstrated at the end of this iteration." ] }, { @@ -600,8 +600,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "null\n", - "...\n", + "[]\n", "\n" ] } @@ -611,6 +610,43 @@ " print(f.read())" ] }, + { + "cell_type": "code", + "execution_count": 16, + "id": "02df7ed9-422d-4f28-9b01-8670be873933", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash -s \"$output_path_str\"\n", + "output_path_str=$1\n", + "\n", + "petab_select end_iteration \\\n", + "--state=$output_path_str/state.dill \\\n", + "--output-models=$output_path_str/models_4.yaml \\\n", + "--output-metadata=$output_path_str/metadata.yaml \\\n", + "--relative-paths" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "57e483fd-5ffa-48a4-8c2a-359f6ebd1422", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "terminate: true\n", + "\n" + ] + } + ], + "source": [ + "with open(\"output_cli/metadata.yaml\") as f:\n", + " print(f.read())" + ] + }, { "cell_type": "markdown", "id": "7b0b1123", @@ -622,7 +658,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "d5b5087d", "metadata": {}, "outputs": [], @@ -643,7 +679,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "30721bfa", "metadata": {}, "outputs": [ @@ -716,7 +752,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "id": "73d54111", "metadata": {}, "outputs": [], @@ -736,7 +772,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "id": "c36564f1", "metadata": {}, "outputs": [ @@ -781,7 +817,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "d5d03cd6", "metadata": {}, "outputs": [ diff --git a/doc/examples/workflow_python.ipynb b/doc/examples/workflow_python.ipynb index 2a20398..170c767 100644 --- a/doc/examples/workflow_python.ipynb +++ b/doc/examples/workflow_python.ipynb @@ -35,7 +35,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Information about the model selection problem.\n", + "Information about the model selection problem:\n", "YAML: model_selection/petab_select_problem.yaml\n", "Method: forward\n", "Criterion: Criterion.AIC\n", @@ -306,7 +306,7 @@ "Model ID: M1_2-000\n", "Criterion.AIC: 140\n", "\n", - "\u001B[1mBEST MODEL OF CURRENT ITERATION\u001B[0m\n", + "\u001b[1mBEST MODEL OF CURRENT ITERATION\u001b[0m\n", "Model subspace ID: M1_3\n", "PEtab YAML location: model_selection/petab_problem.yaml\n", "Custom model parameters: {'k1': 'estimate', 'k2': 0.1, 'k3': 0}\n", @@ -356,7 +356,7 @@ "Model ID: M1_5-000\n", "Criterion.AIC: -70\n", "\n", - "\u001B[1mBEST MODEL OF CURRENT ITERATION\u001B[0m\n", + "\u001b[1mBEST MODEL OF CURRENT ITERATION\u001b[0m\n", "Model subspace ID: M1_6\n", "PEtab YAML location: model_selection/petab_problem.yaml\n", "Custom model parameters: {'k1': 'estimate', 'k2': 'estimate', 'k3': 0}\n", @@ -399,7 +399,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[1mBEST MODEL OF CURRENT ITERATION\u001B[0m\n", + "\u001b[1mBEST MODEL OF CURRENT ITERATION\u001b[0m\n", "Model subspace ID: M1_7\n", "PEtab YAML location: model_selection/petab_problem.yaml\n", "Custom model parameters: {'k1': 'estimate', 'k2': 'estimate', 'k3': 'estimate'}\n", diff --git a/petab_select/__init__.py b/petab_select/__init__.py index 233d233..665c410 100644 --- a/petab_select/__init__.py +++ b/petab_select/__init__.py @@ -9,6 +9,7 @@ from .model import * from .model_space import * from .model_subspace import * +from .models import * from .problem import * from .ui import * diff --git a/petab_select/candidate_space.py b/petab_select/candidate_space.py index 865bebf..fad615e 100644 --- a/petab_select/candidate_space.py +++ b/petab_select/candidate_space.py @@ -27,6 +27,7 @@ ) from .handlers import TYPE_LIMIT, LimitHandler from .model import Model, ModelHash, default_compare +from .models import Models __all__ = [ "BackwardCandidateSpace", @@ -102,7 +103,7 @@ def __init__( limit: TYPE_LIMIT = np.inf, summary_tsv: TYPE_PATH = None, previous_predecessor_model: Model | None = None, - calibrated_models: dict[ModelHash, Model] = None, + calibrated_models: Models | None = None, ): """See class attributes for arguments.""" self.method = method @@ -125,15 +126,13 @@ def __init__( if self.previous_predecessor_model is None: self.previous_predecessor_model = self.predecessor_model - self.set_iteration_user_calibrated_models({}) + self.set_iteration_user_calibrated_models(Models()) self.criterion = criterion - self.calibrated_models = calibrated_models - if self.calibrated_models is None: - self.calibrated_models = {} - self.latest_iteration_calibrated_models = {} + self.calibrated_models = calibrated_models or Models() + self.latest_iteration_calibrated_models = Models() def set_iteration_user_calibrated_models( - self, user_calibrated_models: dict[str, Model] | None + self, user_calibrated_models: Models | None ) -> None: """Hide previously-calibrated models from the calibration tool. @@ -146,18 +145,17 @@ def set_iteration_user_calibrated_models( Args: user_calibrated_models: - The previously-calibrated models. Keys are model hashes, values - are models. + The previously-calibrated models. """ if not user_calibrated_models: - self.iteration_user_calibrated_models = {} + self.iteration_user_calibrated_models = Models() return - iteration_uncalibrated_models = [] - iteration_user_calibrated_models = {} + iteration_uncalibrated_models = Models() + iteration_user_calibrated_models = Models() for model in self.models: if ( - (user_model := user_calibrated_models.get(model.get_hash())) + (user_model := user_calibrated_models[model.get_hash()]) is not None ) and ( user_model.get_criterion( @@ -209,11 +207,11 @@ def get_iteration_calibrated_models( The full list of calibrated models. """ combined_calibrated_models = ( - self.iteration_user_calibrated_models | calibrated_models + self.iteration_user_calibrated_models + calibrated_models ) if reset: self.set_iteration_user_calibrated_models( - user_calibrated_models={} + user_calibrated_models=Models() ) return combined_calibrated_models @@ -418,7 +416,7 @@ def consider(self, model: Model | None) -> bool: def reset_accepted(self) -> None: """Reset the accepted models.""" - self.models = [] + self.models = Models() self.distances = [] def set_predecessor_model(self, predecessor_model: Model | str | None): @@ -452,6 +450,7 @@ def set_excluded_hashes( extend: Whether to replace or extend the current excluded hashes. """ + # FIXME refactor to use `Models` and rename `set_excluded_models`? if isinstance(hashes, Model | ModelHash): hashes = [hashes] excluded_hashes = set() @@ -642,7 +641,7 @@ def distances_in_estimated_parameters( def update_after_calibration( self, *args, - iteration_calibrated_models: dict[ModelHash, Model], + iteration_calibrated_models: Models, **kwargs, ): """Do work in the candidate space after calibration. @@ -654,7 +653,7 @@ def update_after_calibration( are here, to ensure candidate spaces can be switched easily and still receive sufficient arguments. """ - self.calibrated_models |= iteration_calibrated_models + self.calibrated_models += iteration_calibrated_models self.latest_iteration_calibrated_models = iteration_calibrated_models self.set_excluded_hashes( self.latest_iteration_calibrated_models, @@ -999,7 +998,7 @@ def __init__( else: self.most_distant_max_number = 1 - self.best_models = [] + self.best_models = Models() self.best_model_of_current_run = predecessor_model self.jumped_to_most_distant = False @@ -1030,7 +1029,7 @@ def read_arguments_from_yaml_dict(cls, yaml_dict) -> dict: def update_after_calibration( self, *args, - iteration_calibrated_models: dict[str, Model], + iteration_calibrated_models: Models, **kwargs, ) -> None: """See `CandidateSpace.update_after_calibration`.""" @@ -1045,7 +1044,7 @@ def update_after_calibration( # to False and continue to candidate generation if self.jumped_to_most_distant: self.jumped_to_most_distant = False - jumped_to_model = one(iteration_calibrated_models.values()) + jumped_to_model = one(iteration_calibrated_models) self.set_predecessor_model(jumped_to_model) self.previous_predecessor_model = jumped_to_model self.best_model_of_current_run = jumped_to_model @@ -1057,7 +1056,7 @@ def update_after_calibration( logging.info("Switching method") self.switch_method() self.switch_inner_candidate_space( - excluded_hashes=list(self.calibrated_models), + excluded_hashes=self.calibrated_models, ) logging.info( "Method switched to ", self.inner_candidate_space.method @@ -1067,14 +1066,14 @@ def update_after_calibration( def update_from_iteration_calibrated_models( self, - iteration_calibrated_models: dict[str, Model], + iteration_calibrated_models: Models, ) -> bool: """Update ``self.best_models`` with the latest ``iteration_calibrated_models`` and determine if there was a new best model. If so, return ``False``. ``True`` otherwise. """ go_into_switch_method = True - for model in iteration_calibrated_models.values(): + for model in iteration_calibrated_models: if ( self.best_model_of_current_run == VIRTUAL_INITIAL_MODEL or default_compare( @@ -1319,6 +1318,7 @@ def get_most_distant( most_distance = 0 most_distant_indices = [] + # FIXME for multiple PEtab problems? parameter_ids = self.best_models[0].petab_parameters for model in self.best_models: @@ -1334,7 +1334,7 @@ def get_most_distant( # initialize the least distance to the maximal possible value of it complement_least_distance = len(complement_parameters) # get the complement least distance - for calibrated_model in self.calibrated_models.values(): + for calibrated_model in self.calibrated_models: calibrated_model_estimated_parameters = np.array( [ p == ESTIMATE diff --git a/petab_select/cli.py b/petab_select/cli.py index f318205..37f8355 100644 --- a/petab_select/cli.py +++ b/petab_select/cli.py @@ -12,8 +12,9 @@ from . import ui from .candidate_space import CandidateSpace -from .constants import CANDIDATE_SPACE, MODELS, PETAB_YAML, TERMINATE -from .model import ModelHash, models_from_yaml_list, models_to_yaml_list +from .constants import CANDIDATE_SPACE, MODELS, PETAB_YAML, PROBLEM, TERMINATE +from .model import ModelHash +from .models import Models, models_to_yaml_list from .problem import Problem @@ -21,8 +22,8 @@ def read_state(filename: str) -> dict[str, Any]: with open(filename, "rb") as f: state = dill.load(f) - state["problem"] = dill.loads(state["problem"]) - state["candidate_space"] = dill.loads(state["candidate_space"]) + state[PROBLEM] = dill.loads(state[PROBLEM]) + state[CANDIDATE_SPACE] = dill.loads(state[CANDIDATE_SPACE]) return state @@ -40,8 +41,8 @@ def get_state( candidate_space: CandidateSpace, ) -> dict[str, Any]: state = { - "problem": dill.dumps(problem), - "candidate_space": dill.dumps(candidate_space), + PROBLEM: dill.dumps(problem), + CANDIDATE_SPACE: dill.dumps(candidate_space), } return state @@ -80,34 +81,6 @@ def cli(): default=None, help="The method used to identify the candidate models. Defaults to the method in the problem YAML.", ) -# @click.option( -# '--previous-predecessor-model', -# '-P', -# 'previous_predecessor_model_yaml', -# type=str, -# default=None, -# help='(Optional) The predecessor model used in the previous iteration of model selection.', -# ) -# @click.option( -# '--calibrated-models', -# '-C', -# 'calibrated_models_yamls', -# type=str, -# multiple=True, -# default=None, -# help='(Optional) Models that have been calibrated.', -# ) -# @click.option( -# '--newly-calibrated-models', -# '-N', -# 'newly_calibrated_models_yamls', -# type=str, -# multiple=True, -# default=None, -# help=( -# '(Optional) Models that were calibrated in the most recent iteration.' -# ), -# ) @click.option( "--limit", "-l", @@ -157,10 +130,6 @@ def start_iteration( state_dill: str, uncalibrated_models_yaml: str, method: str = None, - # previous_predecessor_model_yaml: str = None, - # best: str = None, - # calibrated_models_yamls: List[str] = None, - # newly_calibrated_models_yamls: List[str] = None, limit: float = np.inf, limit_sent: float = np.inf, relative_paths: bool = False, @@ -194,11 +163,11 @@ def start_iteration( problem = state["problem"] candidate_space = state["candidate_space"] - excluded_models = [] + excluded_models = Models() # TODO seems like default is `()`, not `None`... if excluded_model_files is not None: - for model_yaml_list in excluded_model_files: - excluded_models.extend(models_from_yaml_list(model_yaml_list)) + for models_yaml in excluded_model_files: + excluded_models.extend(Models.from_yaml(models_yaml)) # TODO test excluded_model_hashes = [] @@ -214,49 +183,12 @@ def start_iteration( ModelHash.from_hash(hash_str) for hash_str in excluded_model_hashes ] - # previous_predecessor_model = candidate_space.predecessor_model - # if previous_predecessor_model_yaml is not None: - # previous_predecessor_model = Model.from_yaml( - # previous_predecessor_model_yaml - # ) - - # # FIXME write single methods to take all models from lists of lists of - # # models recursively - # calibrated_models = None - # if calibrated_models_yamls: - # calibrated_models = {} - # for calibrated_models_yaml in calibrated_models_yamls: - # calibrated_models.update( - # { - # model.get_hash(): model - # for model in models_from_yaml_list(calibrated_models_yaml) - # } - # ) - - # newly_calibrated_models = None - # if newly_calibrated_models_yamls: - # newly_calibrated_models = {} - # for newly_calibrated_models_yaml in newly_calibrated_models_yamls: - # newly_calibrated_models.update( - # { - # model.get_hash(): model - # for model in models_from_yaml_list( - # newly_calibrated_models_yaml - # ) - # } - # ) - ui.start_iteration( problem=problem, candidate_space=candidate_space, - # previous_predecessor_model=previous_predecessor_model, - # calibrated_models=calibrated_models, - # newly_calibrated_models=newly_calibrated_models, limit=limit, limit_sent=limit_sent, excluded_hashes=excluded_hashes, - # excluded_models=excluded_models, - # excluded_model_hashes=excluded_model_hashes, ) # Save state @@ -332,15 +264,10 @@ def end_iteration( problem = state["problem"] candidate_space = state["candidate_space"] - calibrated_models = {} + calibrated_models = Models() if calibrated_models_yamls: for calibrated_models_yaml in calibrated_models_yamls: - calibrated_models.update( - { - model.get_hash(): model - for model in models_from_yaml_list(calibrated_models_yaml) - } - ) + calibrated_models.extend(Models.from_yaml(calibrated_models_yaml)) # Finalize iteration results iteration_results = ui.end_iteration( @@ -409,9 +336,9 @@ def model_to_petab( Documentation for arguments can be viewed with `petab_select model_to_petab --help`. """ - models = [] + models = Models() for models_yaml in models_yamls: - models.extend(models_from_yaml_list(models_yaml)) + models.extend(Models.from_yaml(models_yaml)) model0 = None try: @@ -468,9 +395,9 @@ def models_to_petab( Documentation for arguments can be viewed with `petab_select models_to_petab --help`. """ - models = [] + models = Models() for models_yaml in models_yamls: - models.extend(models_from_yaml_list(models_yaml)) + models.extend(Models.from_yaml(models_yaml)) model_ids = pd.Series([model.model_id for model in models]) duplicates = "\n".join(set(model_ids[model_ids.duplicated()])) @@ -559,9 +486,9 @@ def get_best( problem = Problem.from_yaml(problem_yaml) - models = [] + models = Models() for models_yaml in models_yamls: - models.extend(models_from_yaml_list(models_yaml)) + models.extend(Models.from_yaml(models_yaml)) best_model = ui.get_best( problem=problem, diff --git a/petab_select/constants.py b/petab_select/constants.py index b56e8a4..9afc1cb 100644 --- a/petab_select/constants.py +++ b/petab_select/constants.py @@ -1,5 +1,7 @@ """Constants for the PEtab Select package.""" +from __future__ import annotations + import string import sys from enum import Enum @@ -84,6 +86,7 @@ VERSION = "version" MODEL_SPACE_FILES = "model_space_files" PROBLEM_ID = "problem_id" +PROBLEM = "problem" CANDIDATE_SPACE = "candidate_space" CANDIDATE_SPACE_ARGUMENTS = "candidate_space_arguments" diff --git a/petab_select/model.py b/petab_select/model.py index 6c7602f..fbb040d 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -45,8 +45,6 @@ __all__ = [ "Model", "default_compare", - "models_from_yaml_list", - "models_to_yaml_list", "ModelHash", ] @@ -56,7 +54,7 @@ class Model(PetabMixin): NB: some of these attribute names correspond to constants defined in the `constants.py` file, to facilitate loading models from/saving models to - disk (see the `saved_attributes` attribute). + disk (see the `Model.saved_attributes` class attribute). Attributes: converters_load: @@ -371,9 +369,8 @@ def from_yaml(model_yaml: TYPE_PATH) -> Model: raise raise ValueError( "The provided YAML file contains a list with greater than " - "one element. Use the `models_from_yaml_list` method or " - "provide a PEtab Select model YAML file with only one " - "model specified." + "one element. Use the `Models.from_yaml` or provide a " + "YAML file with only one model specified." ) return Model.from_dict(model_dict, base_path=Path(model_yaml).parent) @@ -656,10 +653,10 @@ def default_compare( model1: The new model. criterion: - The criterion by which models will be compared. + The criterion. criterion_threshold: The value by which the new model must improve on the original - model. Should be non-negative. + model. Should be non-negative, regardless of the criterion. Returns: ``True` if ``model1`` has a better criterion value than ``model0``, else @@ -704,97 +701,6 @@ def default_compare( raise NotImplementedError(f"Unknown criterion: {criterion}.") -def models_from_yaml_list( - model_list_yaml: TYPE_PATH, - petab_problem: petab.Problem = None, - allow_single_model: bool = True, -) -> list[Model]: - """Generate a model from a PEtab Select list of model YAML file. - - Args: - model_list_yaml: - The path to the PEtab Select list of model YAML file. - petab_problem: - See :meth:`Model.from_dict`. - allow_single_model: - Given a YAML file that contains a single model directly (not in - a 1-element list), if ``True`` then the single model will be read in, - else a ``ValueError`` will be raised. - - Returns: - A list of model instances, initialized with the provided - attributes. - """ - with open(str(model_list_yaml)) as f: - model_dict_list = yaml.safe_load(f) - if not model_dict_list: - return [] - - if not isinstance(model_dict_list, list): - if allow_single_model: - return [ - Model.from_dict( - model_dict_list, - base_path=Path(model_list_yaml).parent, - petab_problem=petab_problem, - ) - ] - raise ValueError("The YAML file does not contain a list of models.") - - return [ - Model.from_dict( - model_dict, - base_path=Path(model_list_yaml).parent, - petab_problem=petab_problem, - ) - for model_dict in model_dict_list - ] - - -def models_to_yaml_list( - models: list[Model | str] | dict[ModelHash, Model | str], - output_yaml: TYPE_PATH, - relative_paths: bool = True, -) -> None: - """Generate a YAML listing of models. - - Args: - models: - The models. - output_yaml: - The location where the YAML will be saved. - relative_paths: - Whether to rewrite the paths in each model (e.g. the path to the - model's PEtab problem) relative to the `output_yaml` location. - """ - if isinstance(models, dict): - models = list(models.values()) - - skipped_indices = [] - for index, model in enumerate(models): - if isinstance(model, Model): - continue - if model == VIRTUAL_INITIAL_MODEL: - continue - warnings.warn(f"Unexpected model, skipping: {model}.", stacklevel=2) - skipped_indices.append(index) - models = [ - model - for index, model in enumerate(models) - if index not in skipped_indices - ] - - paths_relative_to = None - if relative_paths: - paths_relative_to = Path(output_yaml).parent - model_dicts = [ - model.to_dict(paths_relative_to=paths_relative_to) for model in models - ] - model_dicts = None if not model_dicts else model_dicts - with open(output_yaml, "w") as f: - yaml.dump(model_dicts, f) - - class ModelHash(str): """A class to handle model hash functionality. diff --git a/petab_select/models.py b/petab_select/models.py new file mode 100644 index 0000000..f712add --- /dev/null +++ b/petab_select/models.py @@ -0,0 +1,393 @@ +from __future__ import annotations + +import warnings +from collections import Counter +from collections.abc import Iterable, MutableSequence +from pathlib import Path +from typing import TYPE_CHECKING, TypeAlias + +import yaml + +from .constants import TYPE_PATH +from .model import ( + Model, + ModelHash, +) + +if TYPE_CHECKING: + import petab + + from .problem import Problem + + # `Models` can be constructed from actual `Model`s, + # or `ModelHash`s, or the `str` of a model hash. + ModelLike: TypeAlias = Model | ModelHash | str + ModelsLike: TypeAlias = "Models" | Iterable[Model | ModelHash | str] + # Access a model by list index, model hash, slice of indices, model hash + # string, or an iterable of these things. + ModelIndex: TypeAlias = int | ModelHash | slice | str | Iterable + +__all__ = [ + "Models", + "models_from_yaml_list", + "models_to_yaml_list", +] + + +class Models(MutableSequence): + """A collection of models. + + Behaves like a list of models, but also supports operations + involving objects that can be mapped to model(s). For example, model hashes + can be used to add or access models. + + Some list methods are not yet implemented -- feel free to request anything + that feels intuitive. + + Provide a PEtab Select ``problem`` to the constructor or via + ``set_problem``, to use add models by hashes. This means that all models + must belong to the same PEtab Select problem. + """ + + def set_problem(self, problem: Problem) -> None: + """Set the PEtab Select problem for this set of models.""" + self._problem = problem + + def lint(self): + """Lint the models, e.g. check all hashes are unique. + + Currently raises an exception when invalid. + """ + duplicates = [ + model_hash + for model_hash, count in Counter(self._hashes).items() + if count > 1 + ] + if duplicates: + raise ValueError( + "Multiple models exist with the same hash. " + f"Model hashes: `{duplicates}`." + ) + + @staticmethod + def from_yaml( + models_yaml: TYPE_PATH, + petab_problem: petab.Problem = None, + problem: Problem = None, + ) -> Models: + """Generate models from a PEtab Select list of model YAML file. + + Args: + models_yaml: + The path to the PEtab Select list of model YAML file. + petab_problem: + See :meth:`Model.from_dict`. + problem: + The PEtab Select problem. + + Returns: + The models. + """ + with open(str(models_yaml)) as f: + model_dict_list = yaml.safe_load(f) + if not model_dict_list: + # Empty file + models = [] + elif not isinstance(model_dict_list, list): + # File contains a single model + models = [ + Model.from_dict( + model_dict_list, + base_path=Path(models_yaml).parent, + petab_problem=petab_problem, + ) + ] + else: + # File contains a list of models + models = [ + Model.from_dict( + model_dict, + base_path=Path(models_yaml).parent, + petab_problem=petab_problem, + ) + for model_dict in model_dict_list + ] + + return Models(models=models, problem=problem) + + def to_yaml( + self, + output_yaml: TYPE_PATH, + relative_paths: bool = True, + ) -> None: + """Generate a YAML listing of models. + + Args: + output_yaml: + The location where the YAML will be saved. + relative_paths: + Whether to rewrite the paths in each model (e.g. the path to the + model's PEtab problem) relative to the `output_yaml` location. + """ + paths_relative_to = None + if relative_paths: + paths_relative_to = Path(output_yaml).parent + model_dicts = [ + model.to_dict(paths_relative_to=paths_relative_to) + for model in self + ] + with open(output_yaml, "w") as f: + yaml.safe_dump(model_dicts, f) + + # `list` methods. Compared to `UserList`, some methods are skipped. + # https://github.com/python/cpython/blob/main/Lib/collections/__init__.py + + def __init__( + self, models: Iterable[ModelLike] = None, problem: Problem = None + ) -> Models: + self._models = [] + self._hashes = [] + self._problem = problem + + if models is None: + models = [] + self.extend(models) + + def __repr__(self) -> str: + """Get the model hashes that can regenerate these models. + + N.B.: some information, e.g. criteria, will be lost if the hashes are + used to reproduce the set of models. + """ + return repr(self._hashes) + + # skipped __lt__, __le__ + + def __eq__(self, other) -> bool: + other_hashes = Models(other)._hashes + same_length = len(self._hashes) == len(other_hashes) + same_hashes = set(self._hashes) == set(other_hashes) + return same_length and same_hashes + + # skipped __gt__, __ge__, __cast + + def __contains__(self, item: ModelLike) -> bool: + match item: + case Model(): + return item in self._models + case ModelHash() | str(): + return item in self._hashes + case _: + raise TypeError(f"Unexpected type: `{type(item)}`.") + + def __len__(self) -> int: + return len(self._models) + + def __getitem__( + self, item: ModelIndex | Iterable[ModelIndex] + ) -> Model | Models: + match item: + case int(): + return self._models[item] + case ModelHash() | str(): + return self._models[self._hashes.index(item)] + case slice(): + return self.__class__(self._models[item]) + case Iterable(): + # TODO sensible to yield here? + return [self[item_] for item_ in item] + case _: + raise TypeError(f"Unexpected type: `{type(item)}`.") + + def __setitem__(self, key: ModelIndex, item: ModelLike) -> None: + match key: + case int(): + pass + case ModelHash() | str(): + key = self._hashes.index(key) + case slice(): + for key_, item_ in zip( + range(*key.indices(len(self))), item, strict=True + ): + self[key_] = item_ + case Iterable(): + for key_, item_ in zip(key, item, strict=True): + self[key_] = item_ + case _: + raise TypeError(f"Unexpected type: `{type(key)}`.") + + match item: + case Model(): + pass + case ModelHash() | str(): + item = self._problem.model_hash_to_model(item) + case _: + raise TypeError(f"Unexpected type: `{type(item)}`.") + + if key < len(self._models): + self._models[key] = item + self._hashes[key] = item.get_hash() + else: + # Key doesn't exist, e.g., instead of + # models[1] = model1 + # the user did something like + # models[model1_hash] = model1 + # to add a new model. + self.append(item) + + def __delitem__(self, key: ModelIndex) -> None: + match key: + case ModelHash() | str(): + key = self._hashes.index(key) + case slice(): + for key_ in range(*key.indices(len(self))): + del self[key_] + case Iterable(): + for key_ in key: + del self[key_] + case _: + raise TypeError(f"Unexpected type: `{type(key)}`.") + + del self._models[key] + del self._hashes[key] + + def __add__( + self, other: ModelLike | ModelsLike, left: bool = True + ) -> Models: + match other: + case Models(): + new_models = other._models + case Model(): + new_models = [other] + case ModelHash() | str(): + # Assumes the models belong to the same PEtab Select problem. + new_models = [self._problem.model_hash_to_model(other)] + case Iterable(): + # Assumes the models belong to the same PEtab Select problem. + new_models = Models(other, problem=self._problem)._models + case _: + raise TypeError(f"Unexpected type: `{type(other)}`.") + + models = self._models + new_models + if not left: + models = new_models + self._models + return Models(models=models, problem=self._problem) + + def __radd__(self, other: ModelLike | ModelsLike) -> Models: + return self.__add__(other=other, left=False) + + def __iadd__(self, other: ModelLike | ModelsLike) -> Models: + return self.__add__(other=other) + + # skipped __mul__, __rmul__, __imul__ + + def __copy__(self) -> Models: + return Models(models=self._models, problem=self._problem) + + def append(self, item: ModelLike) -> None: + # Re-use __setitem__ logic + self._models.append(None) + self._hashes.append(None) + self[-1] = item + + def insert(self, index: int, item: ModelLike): + # Re-use __setitem__ logic + self._models.insert(index, None) + self._hashes.insert(index, None) + self[index] = item + + # def pop(self, index: int = -1): + # model = self._models[index] + + # # Re-use __delitem__ logic + # del self[index] + + # return model + + # def remove(self, item: ModelLike): + # # Re-use __delitem__ logic + # if isinstance(item, Model): + # item = item.get_hash() + # del self[item] + + # skipped clear, copy, count + + def index(self, item: ModelLike, *args) -> int: + if isinstance(item, Model): + item = item.get_hash() + return self._hashes.index(item, *args) + + # skipped reverse, sort + + def extend(self, other: Iterable[ModelLike]) -> None: + # Re-use append and therein __setitem__ logic + for model_like in other: + self.append(model_like) + + +def models_from_yaml_list( + model_list_yaml: TYPE_PATH, + petab_problem: petab.Problem = None, + allow_single_model: bool = True, + problem: Problem = None, +) -> Models: + """Generate a model from a PEtab Select list of model YAML file. + + Deprecated. Use `petab_select.Models.from_yaml` instead. + + Args: + model_list_yaml: + The path to the PEtab Select list of model YAML file. + petab_problem: + See :meth:`Model.from_dict`. + allow_single_model: + Given a YAML file that contains a single model directly (not in + a 1-element list), if ``True`` then the single model will be read in, + else a ``ValueError`` will be raised. + problem: + The PEtab Select problem. + + Returns: + The models. + """ + warnings.warn( + ( + "Use `petab_select.Models.from_yaml` instead. " + "The `allow_single_model` argument is fixed to `True` now." + ), + DeprecationWarning, + stacklevel=2, + ) + return Models.from_yaml( + models_yaml=model_list_yaml, + petab_problem=petab_problem, + problem=problem, + ) + + +def models_to_yaml_list( + models: Models, + output_yaml: TYPE_PATH, + relative_paths: bool = True, +) -> None: + """Generate a YAML listing of models. + + Deprecated. Use `petab_select.Models.to_yaml` instead. + + Args: + models: + The models. + output_yaml: + The location where the YAML will be saved. + relative_paths: + Whether to rewrite the paths in each model (e.g. the path to the + model's PEtab problem) relative to the `output_yaml` location. + """ + warnings.warn( + "Use `petab_select.Models.to_yaml` instead.", + DeprecationWarning, + stacklevel=2, + ) + Models(models=models).to_yaml( + output_yaml=output_yaml, relative_paths=relative_paths + ) diff --git a/petab_select/problem.py b/petab_select/problem.py index c7e2014..b0a763b 100644 --- a/petab_select/problem.py +++ b/petab_select/problem.py @@ -1,5 +1,6 @@ """The model selection problem class.""" +import warnings from collections.abc import Callable, Iterable from functools import partial from pathlib import Path @@ -21,6 +22,7 @@ ) from .model import Model, ModelHash, default_compare from .model_space import ModelSpace +from .models import Models __all__ = [ "Problem", @@ -122,7 +124,7 @@ def get_path(self, relative_path: str | Path) -> Path: def exclude_models( self, - models: Iterable[Model], + models: Models, ) -> None: """Exclude models from the model space. @@ -142,7 +144,13 @@ def exclude_model_hashes( model_hashes: The model hashes. """ - self.model_space.exclude_model_hashes(model_hashes) + # FIXME think about design here -- should we have exclude_models here? + warnings.warn( + "Use `exclude_models` instead. It also accepts hashes.", + DeprecationWarning, + stacklevel=2, + ) + self.exclude_models(models=Models(models=model_hashes, problem=self)) @staticmethod def from_yaml( @@ -212,7 +220,8 @@ def from_yaml( def get_best( self, - models: list[Model] | dict[ModelHash, Model] | None, + models: Models, + # models: list[Model] | dict[ModelHash, Model] | None, criterion: str | None | None = None, compute_criterion: bool = False, ) -> Model: @@ -222,11 +231,9 @@ def get_best( Args: models: - The best model will be taken from these models. + The models. criterion: - The criterion by which models will be compared. Defaults to - ``self.criterion`` (e.g. as defined in the PEtab Select problem YAML - file). + The criterion. Defaults to the problem criterion. compute_criterion: Whether to try computing criterion values, if sufficient information is available (e.g., likelihood and number of @@ -235,8 +242,6 @@ def get_best( Returns: The best model. """ - if isinstance(models, dict): - models = list(models.values()) if criterion is None: criterion = self.criterion diff --git a/petab_select/ui.py b/petab_select/ui.py index f5ed1f1..d2dd3f1 100644 --- a/petab_select/ui.py +++ b/petab_select/ui.py @@ -18,6 +18,7 @@ Method, ) from .model import Model, ModelHash, default_compare +from .models import Models from .problem import Problem __all__ = [ @@ -45,7 +46,7 @@ def start_iteration( limit_sent: float | int = np.inf, excluded_hashes: list[ModelHash] | None = None, criterion: Criterion | None = None, - user_calibrated_models: list[Model] | dict[ModelHash, Model] | None = None, + user_calibrated_models: Models | None = None, ) -> CandidateSpace: """Search the model space for candidate models. @@ -71,8 +72,7 @@ def start_iteration( The criterion by which models will be compared. Defaults to the criterion defined in the PEtab Select problem. user_calibrated_models: - Models that were already calibrated by the user. When supplied as a - `dict`, the keys are model hashes. If a model in the + Models that were already calibrated by the user. If a model in the candidates has the same hash as a model in `user_calibrated_models`, then the candidate will be replaced with the calibrated version. Calibration tools will only receive uncalibrated @@ -124,7 +124,7 @@ def start_iteration( ) is None ): - candidate_space.models = [copy.deepcopy(predecessor_model)] + candidate_space.models = Models([copy.deepcopy(predecessor_model)]) # Dummy zero likelihood, which the predecessor model will # improve on after it's actually calibrated. predecessor_model.set_criterion(Criterion.LH, 0.0) @@ -145,7 +145,7 @@ def start_iteration( # this is not the first step of the search. if candidate_space.latest_iteration_calibrated_models: predecessor_model = problem.get_best( - candidate_space.latest_iteration_calibrated_models.values(), + candidate_space.latest_iteration_calibrated_models, criterion=criterion, ) # If the new predecessor model isn't better than the previous one, @@ -194,7 +194,7 @@ def start_iteration( if isinstance(candidate_space, FamosCandidateSpace): try: candidate_space.update_after_calibration( - iteration_calibrated_models={}, + iteration_calibrated_models=Models(), ) continue except StopIteration: @@ -214,8 +214,8 @@ def start_iteration( def end_iteration( candidate_space: CandidateSpace, - calibrated_models: list[Model] | dict[str, Model], -) -> dict[str, dict[ModelHash, Model] | bool | CandidateSpace]: + calibrated_models: Models, +) -> dict[str, Models | bool | CandidateSpace]: """Finalize model selection iteration. All models from the current iteration are provided to the calibration tool. @@ -234,17 +234,11 @@ def end_iteration( Returns: A dictionary, with the following items: :const:`petab_select.constants.MODELS`: - All calibrated models for the current iteration as a - dictionary, where keys are model hashes, and values are models. + All calibrated models for the current iteration. :const:`petab_select.constants.TERMINATE`: Whether PEtab Select has decided to end the model selection, as a boolean. """ - if isinstance(calibrated_models, list): - calibrated_models = { - model.get_hash(): model for model in calibrated_models - } - iteration_results = { MODELS: candidate_space.get_iteration_calibrated_models( calibrated_models=calibrated_models, @@ -288,7 +282,7 @@ def model_to_petab( def models_to_petab( - models: list[Model], + models: Models, output_path_prefix: list[TYPE_PATH] | None = None, ) -> list[dict[str, petab.Problem | TYPE_PATH]]: """Generate the PEtab problems for a list of models.