FIX: make PCA models usable in pipelines

sergiogaiotto · Mar 2, 2011 · ea51378 · ea51378
1 parent 09940a9
commit ea51378
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 7 deletions.
diff --git a/scikits/learn/pca.py b/scikits/learn/pca.py
@@ -165,7 +165,7 @@ def __init__(self, n_components=None, copy=True, whiten=False):
         self.copy = copy
         self.whiten = whiten
 
-    def fit(self, X, **params):
+    def fit(self, X, y=None, **params):
         """Fit the model from data in X.
 
         Parameters
@@ -238,7 +238,7 @@ class ProbabilisticPCA(PCA):
 
     """ + PCA.__doc__
 
-    def fit(self, X, homoscedastic=True):
+    def fit(self, X, y=None, homoscedastic=True):
         """Additionally to PCA.fit, learns a covariance model
 
         Parameters
@@ -369,7 +369,7 @@ def __init__(self, n_components, copy=True, iterated_power=3,
         self.whiten = whiten
         self.mean_ = None
 
-    def fit(self, X, **params):
+    def fit(self, X, y=None, **params):
         """Fit the model to the data X.
 
         Parameters

diff --git a/scikits/learn/tests/test_pca.py b/scikits/learn/tests/test_pca.py
@@ -281,7 +281,7 @@ def test_probabilistic_pca_3():
     ppca = ProbabilisticPCA(n_components=2)
     ppca.fit(X)
     ll1 = ppca.score(X)
-    ppca.fit(X, False)
+    ppca.fit(X, homoscedastic=False)
     ll2 = ppca.score(X)
     assert_true(ll1.mean() < ll2.mean())
 

diff --git a/scikits/learn/tests/test_pipeline.py b/scikits/learn/tests/test_pipeline.py
@@ -11,6 +11,7 @@
 from ..svm import SVC
 from ..linear_model import LogisticRegression
 from ..feature_selection import SelectKBest, f_classif
+from ..pca import PCA, RandomizedPCA
 from ..datasets import load_iris
 
 
@@ -78,16 +79,48 @@ def test_pipeline_init():
     assert_equal(params, params2)
 
 
-def test_pipeline_methods():
+def test_pipeline_methods_anova():
     """ Test the various methods of the pipeline.
     """
     iris = load_iris()
     X = iris.data
     y = iris.target
-    # Test with Anova+SVC
+    # Test with Anova + LogisticRegression
     clf = LogisticRegression()
     filter1 = SelectKBest(f_classif, k=2)
-    pipe = Pipeline([('anova', filter1), ('svc', clf)])
+    pipe = Pipeline([('anova', filter1), ('logistic', clf)])
+    pipe.fit(X, y)
+    pipe.predict(X)
+    pipe.predict_proba(X)
+    pipe.predict_log_proba(X)
+    pipe.score(X, y)
+
+
+def test_pipeline_methods_pca_svm():
+    """Test the various methods of the pipeline."""
+    iris = load_iris()
+    X = iris.data
+    y = iris.target
+    # Test with PCA + SVC
+    clf = SVC(probability=True)
+    pca = PCA(n_components='mle', whiten=True)
+    pipe = Pipeline([('pca', pca), ('svc', clf)])
+    pipe.fit(X, y)
+    pipe.predict(X)
+    pipe.predict_proba(X)
+    pipe.predict_log_proba(X)
+    pipe.score(X, y)
+
+
+def test_pipeline_methods_randomized_pca_svm():
+    """Test the various methods of the pipeline."""
+    iris = load_iris()
+    X = iris.data
+    y = iris.target
+    # Test with PCA + SVC
+    clf = SVC(probability=True)
+    pca = RandomizedPCA(n_components=2, whiten=True)
+    pipe = Pipeline([('pca', pca), ('svc', clf)])
     pipe.fit(X, y)
     pipe.predict(X)
     pipe.predict_proba(X)