Skip to content

Commit

Permalink
add method argument to the correlation methods.
Browse files Browse the repository at this point in the history
  • Loading branch information
parrt committed Nov 25, 2020
1 parent 777fb8c commit cdbc7d1
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 13 deletions.
15 changes: 9 additions & 6 deletions src/play_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

df = df_all[num_features + [target]]


def test1():
# compute median per num bedrooms
df_median_price_per_bedrooms = df.groupby(by='bedrooms')['price'].median().reset_index()
Expand Down Expand Up @@ -77,11 +78,11 @@ def test3():
return I


viz = plot_importances(test1())
viz.save(filename='/tmp/t.svg')
I = test2()
viz = plot_importances(I)
viz.save(filename='/tmp/t2.svg')
# viz = plot_importances(test1())
# viz.save(filename='/tmp/t.svg')
# I = test2()
# viz = plot_importances(I)
# viz.save(filename='/tmp/t2.svg')

# I = test3()
# viz = plot_importances(I)
Expand All @@ -94,4 +95,6 @@ def test3():

# D = feature_dependence_matrix(df, n_samples=5000)
# viz = plot_dependence_heatmap(D, figsize=(4,4))
# viz.view()
# viz.view()

print(feature_dependence_matrix(df))
15 changes: 8 additions & 7 deletions src/rfpimp.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,9 +873,9 @@ def get_feature_corr(df, method="spearman"):
return result


def feature_corr_matrix(df):
def feature_corr_matrix(df, method="spearman"):
"""
Return the Spearman's rank-order correlation between all pairs
Return the Spearman's rank-order correlation (or another method) between all pairs
of features as a matrix with feature names as index and column names.
The diagonal will be all 1.0 as features are self correlated.
Expand All @@ -885,11 +885,11 @@ def feature_corr_matrix(df):
assume a linear relationship between the variables; it looks for
monotonic relationships.
:param df_train: dataframe containing features as columns, and
without the target variable.
:param df: dataframe containing features as columns, and without the target variable.
:param method: A string ("spearman", "pearson") or a callable function.
:return: a data frame with the correlation matrix
"""
corr = np.round(get_feature_corr(df), 4)
corr = np.round(get_feature_corr(df, method=method), 4)
df_corr = pd.DataFrame(data=corr, index=df.columns, columns=df.columns)
return df_corr

Expand All @@ -901,7 +901,8 @@ def plot_corr_heatmap(df,
value_fontsize=8,
label_fontsize=9,
precision=2,
xrot=80):
xrot=80,
method="spearman"):
"""
Display the feature spearman's correlation matrix as a heatmap with
any abs(value)>color_threshold appearing with background color.
Expand All @@ -919,7 +920,7 @@ def plot_corr_heatmap(df,
figsize=(7,5), label_fontsize=13, value_fontsize=11)
viz.view() # or just viz in notebook
"""
corr = get_feature_corr(df)
corr = get_feature_corr(df, method=method)
if len(corr.shape) == 0:
corr = np.array([[1.0, corr],
[corr, 1.0]])
Expand Down

0 comments on commit cdbc7d1

Please sign in to comment.