Skip to content

Commit

Permalink
Merge pull request #43 from escherba/master
Browse files Browse the repository at this point in the history
Use pandas default method to generate correlation matrices
  • Loading branch information
parrt authored Nov 25, 2020
2 parents 4fb873d + 305a156 commit 777fb8c
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 4 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.egg-info/
__pycache__/
20 changes: 17 additions & 3 deletions src/rfpimp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from sklearn.metrics import r2_score
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from scipy.stats import spearmanr
from scipy import stats
from pandas.api.types import is_numeric_dtype
from matplotlib.colors import ListedColormap
from matplotlib.ticker import FormatStrFormatter
Expand Down Expand Up @@ -859,6 +859,20 @@ def plot_dependence_heatmap(D,
return PimpViz()


def get_feature_corr(df, method="spearman"):
if isinstance(df, pd.DataFrame):
result = df.corr(method=method).values
elif callable(method):
result = method(df)
elif method == "spearman":
result = stats.spearmanr(df).correlation
elif method == "pearson":
result = np.corrcoef(df)
else:
raise ValueError("unsupported correlation method")
return result


def feature_corr_matrix(df):
"""
Return the Spearman's rank-order correlation between all pairs
Expand All @@ -875,7 +889,7 @@ def feature_corr_matrix(df):
without the target variable.
:return: a data frame with the correlation matrix
"""
corr = np.round(spearmanr(df).correlation, 4)
corr = np.round(get_feature_corr(df), 4)
df_corr = pd.DataFrame(data=corr, index=df.columns, columns=df.columns)
return df_corr

Expand Down Expand Up @@ -905,7 +919,7 @@ def plot_corr_heatmap(df,
figsize=(7,5), label_fontsize=13, value_fontsize=11)
viz.view() # or just viz in notebook
"""
corr = spearmanr(df).correlation
corr = get_feature_corr(df)
if len(corr.shape) == 0:
corr = np.array([[1.0, corr],
[corr, 1.0]])
Expand Down
2 changes: 1 addition & 1 deletion src/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
python_requires='>=3.6',
author='Terence Parr, Kerem Turgutlu',
author_email='[email protected], [email protected]',
install_requires=['stratx>=0.2','numpy','pandas','scikit-learn','matplotlib'],
install_requires=['numpy','pandas','scikit-learn','matplotlib'],
description='Permutation and drop-column importance for scikit-learn random forests and other models',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 777fb8c

Please sign in to comment.