diff --git a/pyproject.toml b/pyproject.toml index 817721b..8306449 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "modrover" -version = "0.1.0" +version = "0.1.1" description = "Model space explorer for inference and prediction" readme = "README.rst" requires-python = ">=3.10" @@ -12,7 +12,13 @@ license = { file = "LICENSE" } authors = [ { name = "IHME Math Sciences", email = "ihme.math.sciences@gmail.com" }, ] -dependencies = ["regmod==0.1.0", "pplkit", "matplotlib"] +dependencies = [ + "jax[cpu]==0.4.5", + "jaxlib==0.4.4", + "regmod==0.1.0", + "pplkit", + "matplotlib", +] [project.optional-dependencies] test = ["pytest"] @@ -25,4 +31,4 @@ github = "https://github.com/ihmeuw-msca/modrover" project = "modrover" author = "IHME Math Sciences" copyright = "2023, IHME Math Sciences" -version = "0.1.0" +version = "0.1.1" diff --git a/src/modrover/learner.py b/src/modrover/learner.py index 782268c..36a1aac 100644 --- a/src/modrover/learner.py +++ b/src/modrover/learner.py @@ -65,7 +65,7 @@ def __init__( # initialize null model self.model = self._get_model() - self.score: Optional[float] = None + self.score = np.nan self.status = ModelStatus.NOT_FITTED # initialize cross validation model diff --git a/src/modrover/rover.py b/src/modrover/rover.py index e8891ba..0a40382 100644 --- a/src/modrover/rover.py +++ b/src/modrover/rover.py @@ -227,8 +227,8 @@ def plot(self, bins: Optional[int] = None) -> plt.Figure: bins = np.linspace(cmin, cmax, bins + 1) summary = self.summary - score = learner_info["score"].to_numpy() - vmin, vmax = score.min(), score.max() + score_scaled = learner_info["score_scaled"].to_numpy() + vmin, vmax = score_scaled.min(), score_scaled.max() highlight_index = { "final": learner_info["weight"] > 0, "invalid": ~learner_info["valid"], @@ -262,7 +262,7 @@ def plot(self, bins: Optional[int] = None) -> plt.Figure: coef, coef_jitter, alpha=0.2, - c=score, + c=score_scaled, edgecolors="none", vmin=vmin, vmax=vmax, @@ -441,7 +441,7 @@ def _get_super_learner( learner_ids, weights = df["learner_id"], df["weight"] coefs = df[list(self.variables)].to_numpy() super_coef = coefs.T.dot(weights) - super_vcov = self._get_super_vcov(learner_ids, weights) + super_vcov = self._get_super_vcov(learner_ids, weights, super_coef) super_learner = self._get_learner( learner_id=self.super_learner_id, use_cache=False @@ -486,6 +486,8 @@ def _get_learner_info( df.loc[df["valid"], "weight"] = self._get_super_weights( df.loc[df["valid"], "learner_id"], top_pct_score, top_pct_learner ) + + df["score_scaled"] = df["score"] / df["score"].dropna().max() self._learner_info = df return df @@ -503,14 +505,19 @@ def _get_super_coef( return super_coef def _get_super_vcov( - self, learner_ids: list[LearnerID], weights: NDArray + self, + learner_ids: list[LearnerID], + weights: NDArray, + super_coef: NDArray, ) -> NDArray: super_vcov = np.zeros((self.num_vars, self.num_vars)) for learner_id, weight in zip(learner_ids, weights): + learner = self.learners[learner_id] coef_index = self._get_coef_index(learner_id) - super_vcov[np.ix_(coef_index, coef_index)] += ( - weight * self.learners[learner_id].vcov + super_vcov[np.ix_(coef_index, coef_index)] += weight * ( + learner.vcov + np.outer(learner.coef, learner.coef) ) + super_vcov -= np.outer(super_coef, super_coef) return super_vcov def _get_coef_index(self, learner_id: LearnerID) -> list[int]: diff --git a/tests/integration/test_ensembles.py b/tests/integration/test_ensembles.py index bd80425..f8f0a02 100644 --- a/tests/integration/test_ensembles.py +++ b/tests/integration/test_ensembles.py @@ -121,5 +121,6 @@ def test_get_super_learner(mock_rover): super_learner.coef, mock_rover._get_super_coef(learner_ids, weights) ) assert np.allclose( - super_learner.vcov, mock_rover._get_super_vcov(learner_ids, weights) + super_learner.vcov, + mock_rover._get_super_vcov(learner_ids, weights, super_learner.coef), ) diff --git a/tests/test_learner.py b/tests/test_learner.py index 75cf234..3033eaa 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -1,7 +1,6 @@ import numpy as np import pandas as pd import pytest - from modrover.globals import model_type_dict from modrover.learner import Learner, ModelStatus @@ -36,7 +35,7 @@ def test_model_init(model_specs): # Check that model is "new" assert learner.status == ModelStatus.NOT_FITTED assert learner.coef is None - assert learner.score is None + assert np.isnan(learner.score) def test_model_fit(dataset, model_specs):