From 5210f810f78d7aa102cb56e6b586880a771980b4 Mon Sep 17 00:00:00 2001
From: Sarah Brown <smb@sarahmbrown.org>
Date: Mon, 6 Mar 2017 22:39:45 -0800
Subject: [PATCH] [MRG+2] addresses #8509 improvements to f_regression
 documentation (#8548)

* clarify role of the function and streamline introduction

* added feature selection methods to see also

* completed see also

* fixed pep related formatting for flake8checks.

* fixed extra whitespace flake8 problems, remaining failure is a copied see all line from another function, the line is over by a period, does not make sense to newline that.

* one more whitespace

* FIX small pep8 error.
---
 .../feature_selection/univariate_selection.py   | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
index f1d6047f0b55e..bdeda48a556a9 100644
--- a/sklearn/feature_selection/univariate_selection.py
+++ b/sklearn/feature_selection/univariate_selection.py
@@ -230,17 +230,18 @@ def chi2(X, y):
 def f_regression(X, y, center=True):
     """Univariate linear regression tests.
 
-    Quick linear model for testing the effect of a single regressor,
-    sequentially for many regressors.
+    Linear model for testing the individual effect of each of many regressors.
+    This is a scoring function to be used in a feature seletion procedure, not
+    a free standing feature selection procedure.
 
     This is done in 2 steps:
 
-    1. The cross correlation between each regressor and the target is computed,
+    1. The correlation between each regressor and the target is computed,
        that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *
        std(y)).
     2. It is converted to an F score then to a p-value.
 
-    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+    For more on usage see the :ref:`User Guide <univariate_feature_selection>`.
 
     Parameters
     ----------
@@ -261,10 +262,18 @@ def f_regression(X, y, center=True):
     pval : array, shape=(n_features,)
         p-values of F-scores.
 
+
     See also
     --------
+    mutual_info_regression: Mutual information for a continuous target.
     f_classif: ANOVA F-value between label/feature for classification tasks.
     chi2: Chi-squared stats of non-negative features for classification tasks.
+    SelectKBest: Select features based on the k highest scores.
+    SelectFpr: Select features based on a false positive rate test.
+    SelectFdr: Select features based on an estimated false discovery rate.
+    SelectFwe: Select features based on family-wise error rate.
+    SelectPercentile: Select features based on percentile of the highest
+        scores.
     """
     X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=np.float64)
     n_samples = X.shape[0]