Skip to content

Commit

Permalink
Add _from_state methods to MultivariateGaussian and EmpiricalCovarian…
Browse files Browse the repository at this point in the history
…ce (#1327)

* ADD: _from_state method for EmpiricalCovariance

* ADD: _from_state method to MultivariateGaussian

* ADD: docstring to _from_state method

* UPDATE: add _from_state info to release notes

* ADD: doctest on _from_state method

* FIX: end of file

* FIX: resolve comments from smastelini
  • Loading branch information
MarekWadinger authored Oct 3, 2023
1 parent d7bd65d commit 1a32e0d
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 15 deletions.
8 changes: 8 additions & 0 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@ River's mini-batch methods now support pandas v2. In particular, River conforms
## forest

- Simplify inner the structures of `forest.ARFClassifier` and `forest.ARFRegressor` by removing redundant class hierarchy. Simplify how concept drift logging can be accessed in individual trees and in the forest as a whole.

## covariance

- Added `_from_state` method to `covariance.EmpiricalCovariance` to warm start from previous knowledge.

## proba

- Added `_from_state` method to `proba.MultivariateGaussian` to warm start from previous knowledge.
72 changes: 58 additions & 14 deletions river/covariance/emp.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,22 @@ class EmpiricalCovariance(SymmetricMatrix):
>>> cov["blue", "blue"]
Var: 0.076119
Start from a state:
>>> n = 8
>>> mean = {'red': 0.416, 'green': 0.387, 'blue': 0.518}
>>> cov_ = {('red', 'red'): 0.079,
... ('red', 'green'): -0.053,
... ('red', 'blue'): -0.010,
... ('green', 'green'): 0.113,
... ('green', 'blue'): 0.020,
... ('blue', 'blue'): 0.076}
>>> cov = covariance.EmpiricalCovariance._from_state(
... n=n, mean=mean, cov=cov_, ddof=1)
>>> cov
blue green red
blue 0.076 0.020 -0.010
green 0.020 0.113 -0.053
red -0.010 -0.053 0.079
"""

def __init__(self, ddof=1):
Expand Down Expand Up @@ -178,35 +194,63 @@ def update_many(self, X: pd.DataFrame):
mean_arr = X_arr.mean(axis=0)
cov_arr = np.cov(X_arr.T, ddof=self.ddof)

n = len(X)
mean = dict(zip(X.columns, mean_arr))
cov = {
(i, j): cov_arr[r, c]
for (r, i), (c, j) in itertools.combinations_with_replacement(enumerate(X.columns), r=2)
}

for i, j in itertools.combinations(sorted(X.columns), r=2):
self = self._from_state(n=n, mean=mean, cov=cov, ddof=self.ddof)

return self

@classmethod
def _from_state(cls, n: int, mean: dict, cov: dict, *, ddof=1):
"""Create a new instance from state information.
Parameters
----------
cls
The class type.
n
The number of data points.
mean
A dictionary of variable means.
cov
A dictionary of covariance or variance values.
ddof
Degrees of freedom for covariance calculation. Defaults to 1.
Returns
----------
cls: A new instance of the class with updated covariance matrix.
Raises
----------
KeyError: If an element in `mean` or `cov` is missing.
"""
new = cls(ddof=ddof)
for i, j in itertools.combinations(mean.keys(), r=2):
try:
self[i, j]
new[i, j]
except KeyError:
self._cov[i, j] = stats.Cov(self.ddof)
self._cov[i, j] += stats.Cov._from_state(
n=len(X),
new._cov[i, j] = stats.Cov(new.ddof)
new._cov[i, j] += stats.Cov._from_state(
n=n,
mean_x=mean[i],
mean_y=mean[j],
cov=cov.get((i, j), cov.get((j, i))),
ddof=self.ddof,
ddof=new.ddof,
)

for i in X.columns:
for i in mean.keys():
try:
self[i, i]
new[i, i]
except KeyError:
self._cov[i, i] = stats.Var(self.ddof)
self._cov[i, i] += stats.Var._from_state(
n=len(X), m=mean[i], sig=cov[i, i], ddof=self.ddof
)

return self
new._cov[i, i] = stats.Var(new.ddof)
new._cov[i, i] += stats.Var._from_state(n=n, m=mean[i], sig=cov[i, i], ddof=new.ddof)
return new


class EmpiricalPrecision(SymmetricMatrix):
Expand Down
6 changes: 5 additions & 1 deletion river/proba/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,11 @@ def __init__(self, seed=None):
super().__init__(seed)
self._var = covariance.EmpiricalCovariance(ddof=1)

# TODO: add method _from_state to initialize model (for warm starting)
@classmethod
def _from_state(cls, n, mean, cov, ddof, seed=None):
new = cls(seed)
new._var = covariance.EmpiricalCovariance._from_state(n, mean, cov, ddof=ddof)
return new

@property
def n_samples(self) -> float:
Expand Down

0 comments on commit 1a32e0d

Please sign in to comment.