diff --git a/src/play_plot.py b/src/play_plot.py index 18190af..b55ba1d 100644 --- a/src/play_plot.py +++ b/src/play_plot.py @@ -20,6 +20,7 @@ df = df_all[num_features + [target]] + def test1(): # compute median per num bedrooms df_median_price_per_bedrooms = df.groupby(by='bedrooms')['price'].median().reset_index() @@ -77,11 +78,11 @@ def test3(): return I -viz = plot_importances(test1()) -viz.save(filename='/tmp/t.svg') -I = test2() -viz = plot_importances(I) -viz.save(filename='/tmp/t2.svg') +# viz = plot_importances(test1()) +# viz.save(filename='/tmp/t.svg') +# I = test2() +# viz = plot_importances(I) +# viz.save(filename='/tmp/t2.svg') # I = test3() # viz = plot_importances(I) @@ -94,4 +95,6 @@ def test3(): # D = feature_dependence_matrix(df, n_samples=5000) # viz = plot_dependence_heatmap(D, figsize=(4,4)) -# viz.view() \ No newline at end of file +# viz.view() + +print(feature_dependence_matrix(df)) \ No newline at end of file diff --git a/src/rfpimp.py b/src/rfpimp.py index e140294..930daeb 100644 --- a/src/rfpimp.py +++ b/src/rfpimp.py @@ -873,9 +873,9 @@ def get_feature_corr(df, method="spearman"): return result -def feature_corr_matrix(df): +def feature_corr_matrix(df, method="spearman"): """ - Return the Spearman's rank-order correlation between all pairs + Return the Spearman's rank-order correlation (or another method) between all pairs of features as a matrix with feature names as index and column names. The diagonal will be all 1.0 as features are self correlated. @@ -885,11 +885,11 @@ def feature_corr_matrix(df): assume a linear relationship between the variables; it looks for monotonic relationships. - :param df_train: dataframe containing features as columns, and - without the target variable. + :param df: dataframe containing features as columns, and without the target variable. + :param method: A string ("spearman", "pearson") or a callable function. :return: a data frame with the correlation matrix """ - corr = np.round(get_feature_corr(df), 4) + corr = np.round(get_feature_corr(df, method=method), 4) df_corr = pd.DataFrame(data=corr, index=df.columns, columns=df.columns) return df_corr @@ -901,7 +901,8 @@ def plot_corr_heatmap(df, value_fontsize=8, label_fontsize=9, precision=2, - xrot=80): + xrot=80, + method="spearman"): """ Display the feature spearman's correlation matrix as a heatmap with any abs(value)>color_threshold appearing with background color. @@ -919,7 +920,7 @@ def plot_corr_heatmap(df, figsize=(7,5), label_fontsize=13, value_fontsize=11) viz.view() # or just viz in notebook """ - corr = get_feature_corr(df) + corr = get_feature_corr(df, method=method) if len(corr.shape) == 0: corr = np.array([[1.0, corr], [corr, 1.0]])