Skip to content

Commit

Permalink
Correct the way we compute variance covariance matrix (#26)
Browse files Browse the repository at this point in the history
* add law of total covariance to super_vcov

* add scaled score

* make score_scaled post process of explore rather than attribute for learner

* change the version to 0.1.1

* change the version of regmod to 0.1.0

* temporarily fix the version of jax
  • Loading branch information
zhengp0 authored Aug 16, 2023
1 parent da83047 commit f5c75d9
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 14 deletions.
12 changes: 9 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,21 @@ build-backend = "setuptools.build_meta"

[project]
name = "modrover"
version = "0.1.0"
version = "0.1.1"
description = "Model space explorer for inference and prediction"
readme = "README.rst"
requires-python = ">=3.10"
license = { file = "LICENSE" }
authors = [
{ name = "IHME Math Sciences", email = "[email protected]" },
]
dependencies = ["regmod==0.1.0", "pplkit", "matplotlib"]
dependencies = [
"jax[cpu]==0.4.5",
"jaxlib==0.4.4",
"regmod==0.1.0",
"pplkit",
"matplotlib",
]

[project.optional-dependencies]
test = ["pytest"]
Expand All @@ -25,4 +31,4 @@ github = "https://github.com/ihmeuw-msca/modrover"
project = "modrover"
author = "IHME Math Sciences"
copyright = "2023, IHME Math Sciences"
version = "0.1.0"
version = "0.1.1"
2 changes: 1 addition & 1 deletion src/modrover/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(

# initialize null model
self.model = self._get_model()
self.score: Optional[float] = None
self.score = np.nan
self.status = ModelStatus.NOT_FITTED

# initialize cross validation model
Expand Down
21 changes: 14 additions & 7 deletions src/modrover/rover.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@ def plot(self, bins: Optional[int] = None) -> plt.Figure:
bins = np.linspace(cmin, cmax, bins + 1)

summary = self.summary
score = learner_info["score"].to_numpy()
vmin, vmax = score.min(), score.max()
score_scaled = learner_info["score_scaled"].to_numpy()
vmin, vmax = score_scaled.min(), score_scaled.max()
highlight_index = {
"final": learner_info["weight"] > 0,
"invalid": ~learner_info["valid"],
Expand Down Expand Up @@ -262,7 +262,7 @@ def plot(self, bins: Optional[int] = None) -> plt.Figure:
coef,
coef_jitter,
alpha=0.2,
c=score,
c=score_scaled,
edgecolors="none",
vmin=vmin,
vmax=vmax,
Expand Down Expand Up @@ -441,7 +441,7 @@ def _get_super_learner(
learner_ids, weights = df["learner_id"], df["weight"]
coefs = df[list(self.variables)].to_numpy()
super_coef = coefs.T.dot(weights)
super_vcov = self._get_super_vcov(learner_ids, weights)
super_vcov = self._get_super_vcov(learner_ids, weights, super_coef)

super_learner = self._get_learner(
learner_id=self.super_learner_id, use_cache=False
Expand Down Expand Up @@ -486,6 +486,8 @@ def _get_learner_info(
df.loc[df["valid"], "weight"] = self._get_super_weights(
df.loc[df["valid"], "learner_id"], top_pct_score, top_pct_learner
)

df["score_scaled"] = df["score"] / df["score"].dropna().max()
self._learner_info = df
return df

Expand All @@ -503,14 +505,19 @@ def _get_super_coef(
return super_coef

def _get_super_vcov(
self, learner_ids: list[LearnerID], weights: NDArray
self,
learner_ids: list[LearnerID],
weights: NDArray,
super_coef: NDArray,
) -> NDArray:
super_vcov = np.zeros((self.num_vars, self.num_vars))
for learner_id, weight in zip(learner_ids, weights):
learner = self.learners[learner_id]
coef_index = self._get_coef_index(learner_id)
super_vcov[np.ix_(coef_index, coef_index)] += (
weight * self.learners[learner_id].vcov
super_vcov[np.ix_(coef_index, coef_index)] += weight * (
learner.vcov + np.outer(learner.coef, learner.coef)
)
super_vcov -= np.outer(super_coef, super_coef)
return super_vcov

def _get_coef_index(self, learner_id: LearnerID) -> list[int]:
Expand Down
3 changes: 2 additions & 1 deletion tests/integration/test_ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,6 @@ def test_get_super_learner(mock_rover):
super_learner.coef, mock_rover._get_super_coef(learner_ids, weights)
)
assert np.allclose(
super_learner.vcov, mock_rover._get_super_vcov(learner_ids, weights)
super_learner.vcov,
mock_rover._get_super_vcov(learner_ids, weights, super_learner.coef),
)
3 changes: 1 addition & 2 deletions tests/test_learner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pandas as pd
import pytest

from modrover.globals import model_type_dict
from modrover.learner import Learner, ModelStatus

Expand Down Expand Up @@ -36,7 +35,7 @@ def test_model_init(model_specs):
# Check that model is "new"
assert learner.status == ModelStatus.NOT_FITTED
assert learner.coef is None
assert learner.score is None
assert np.isnan(learner.score)


def test_model_fit(dataset, model_specs):
Expand Down

0 comments on commit f5c75d9

Please sign in to comment.