Skip to content

Commit

Permalink
FIX: make PCA models usable in pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
ogrisel committed Mar 2, 2011
1 parent 09940a9 commit ea51378
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 7 deletions.
6 changes: 3 additions & 3 deletions scikits/learn/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def __init__(self, n_components=None, copy=True, whiten=False):
self.copy = copy
self.whiten = whiten

def fit(self, X, **params):
def fit(self, X, y=None, **params):
"""Fit the model from data in X.
Parameters
Expand Down Expand Up @@ -238,7 +238,7 @@ class ProbabilisticPCA(PCA):
""" + PCA.__doc__

def fit(self, X, homoscedastic=True):
def fit(self, X, y=None, homoscedastic=True):
"""Additionally to PCA.fit, learns a covariance model
Parameters
Expand Down Expand Up @@ -369,7 +369,7 @@ def __init__(self, n_components, copy=True, iterated_power=3,
self.whiten = whiten
self.mean_ = None

def fit(self, X, **params):
def fit(self, X, y=None, **params):
"""Fit the model to the data X.
Parameters
Expand Down
2 changes: 1 addition & 1 deletion scikits/learn/tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_probabilistic_pca_3():
ppca = ProbabilisticPCA(n_components=2)
ppca.fit(X)
ll1 = ppca.score(X)
ppca.fit(X, False)
ppca.fit(X, homoscedastic=False)
ll2 = ppca.score(X)
assert_true(ll1.mean() < ll2.mean())

Expand Down
39 changes: 36 additions & 3 deletions scikits/learn/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ..svm import SVC
from ..linear_model import LogisticRegression
from ..feature_selection import SelectKBest, f_classif
from ..pca import PCA, RandomizedPCA
from ..datasets import load_iris


Expand Down Expand Up @@ -78,16 +79,48 @@ def test_pipeline_init():
assert_equal(params, params2)


def test_pipeline_methods():
def test_pipeline_methods_anova():
""" Test the various methods of the pipeline.
"""
iris = load_iris()
X = iris.data
y = iris.target
# Test with Anova+SVC
# Test with Anova + LogisticRegression
clf = LogisticRegression()
filter1 = SelectKBest(f_classif, k=2)
pipe = Pipeline([('anova', filter1), ('svc', clf)])
pipe = Pipeline([('anova', filter1), ('logistic', clf)])
pipe.fit(X, y)
pipe.predict(X)
pipe.predict_proba(X)
pipe.predict_log_proba(X)
pipe.score(X, y)


def test_pipeline_methods_pca_svm():
"""Test the various methods of the pipeline."""
iris = load_iris()
X = iris.data
y = iris.target
# Test with PCA + SVC
clf = SVC(probability=True)
pca = PCA(n_components='mle', whiten=True)
pipe = Pipeline([('pca', pca), ('svc', clf)])
pipe.fit(X, y)
pipe.predict(X)
pipe.predict_proba(X)
pipe.predict_log_proba(X)
pipe.score(X, y)


def test_pipeline_methods_randomized_pca_svm():
"""Test the various methods of the pipeline."""
iris = load_iris()
X = iris.data
y = iris.target
# Test with PCA + SVC
clf = SVC(probability=True)
pca = RandomizedPCA(n_components=2, whiten=True)
pipe = Pipeline([('pca', pca), ('svc', clf)])
pipe.fit(X, y)
pipe.predict(X)
pipe.predict_proba(X)
Expand Down

0 comments on commit ea51378

Please sign in to comment.