From 23c290b04cbaa91765a4848192bf5fcffb41ecf4 Mon Sep 17 00:00:00 2001 From: FBurkhardt Date: Tue, 5 Dec 2023 17:14:45 +0100 Subject: [PATCH] 0.71.4 --- CHANGELOG.md | 5 ++++ nkululeko/constants.py | 2 +- nkululeko/modelrunner.py | 6 ++++- nkululeko/models/model.py | 32 ++++++++++++------------ nkululeko/models/model_cnn.py | 1 + nkululeko/models/model_mlp.py | 1 + nkululeko/models/model_mlp_regression.py | 1 + nkululeko/plots.py | 12 ++++----- 8 files changed, 36 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a45d68f..834a43cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ Changelog ========= +Version 0.71.4 +-------------- +* automatic epoch reset if not ANN +* scatter plots now show a regression line + Version 0.71.3 -------------- * enabled scatter plots for all variables diff --git a/nkululeko/constants.py b/nkululeko/constants.py index 741d0bf3..8c50c5c6 100644 --- a/nkululeko/constants.py +++ b/nkululeko/constants.py @@ -1,2 +1,2 @@ -VERSION="0.71.3" +VERSION="0.71.4" SAMPLING_RATE = 16000 diff --git a/nkululeko/modelrunner.py b/nkululeko/modelrunner.py index b6119185..e47f1f0a 100644 --- a/nkululeko/modelrunner.py +++ b/nkululeko/modelrunner.py @@ -38,8 +38,12 @@ def do_epochs(self): reports = [] plot_epochs = self.util.config_val("PLOT", "epochs", False) only_test = self.util.config_val("MODEL", "only_test", False) + epoch_num = int(self.util.config_val("EXP", "epochs", 1)) + if not self.model.is_ANN() and epoch_num > 1: + self.util.warn(f"setting epoch num to 1 (was {epoch_num}) if model not ANN") + epoch_num = 1 # for all epochs - for epoch in range(int(self.util.config_val("EXP", "epochs", 1))): + for epoch in range(epoch_num): if only_test: self.model.load(self.run, epoch) self.util.debug(f"reusing model: {self.model.store_path}") diff --git a/nkululeko/models/model.py b/nkululeko/models/model.py index c350e281..413ef971 100644 --- a/nkululeko/models/model.py +++ b/nkululeko/models/model.py @@ -24,6 +24,7 @@ def __init__(self, df_train, df_test, feats_train, feats_test): feats_train, feats_test, ) + self.model_type = "classic" self.util = Util("model") self.target = self.util.config_val("DATA", "target", "emotion") self.run = 0 @@ -31,6 +32,15 @@ def __init__(self, df_train, df_test, feats_train, feats_test): self.logo = self.util.config_val("MODEL", "logo", False) self.xfoldx = self.util.config_val("MODEL", "k_fold_cross", False) + def set_model_type(self, type): + self.model_type = type + + def is_ANN(self): + if self.model_type == "ann": + return True + else: + return False + def set_testdata(self, data_df, feats_df): self.df_test, self.feats_test = data_df, feats_df @@ -66,9 +76,7 @@ def _x_fold_cross(self): truth_x = feats.iloc[test_index].to_numpy() truth_y = targets[test_index] predict_y = self.clf.predict(truth_x) - report = Reporter( - truth_y.astype(float), predict_y, self.run, self.epoch - ) + report = Reporter(truth_y.astype(float), predict_y, self.run, self.epoch) self.util.debug( f"result for fold {g_index}:" f" {report.get_result().get_test_result()} " @@ -121,9 +129,7 @@ def _do_logo(self): fold_count = annos["fold"].nunique() self.util.debug(f"using existing folds for {fold_count} groups") g_index = 0 - self.util.debug( - f"ignoring splits and doing LOGO with {fold_count} groups" - ) + self.util.debug(f"ignoring splits and doing LOGO with {fold_count} groups") # leave-one-group loop for train_index, test_index in _logo.split( feats, @@ -137,9 +143,7 @@ def _do_logo(self): truth_x = feats.iloc[test_index].to_numpy() truth_y = targets[test_index] predict_y = self.clf.predict(truth_x) - report = Reporter( - truth_y.astype(float), predict_y, self.run, self.epoch - ) + report = Reporter(truth_y.astype(float), predict_y, self.run, self.epoch) result = report.get_result().get_test_result() self.util.debug(f"result for speaker group {g_index}: {result} ") results.append(float(report.get_result().test)) @@ -192,10 +196,8 @@ def train(self): feats = self.feats_train.to_numpy() # compute class weights if self.util.config_val("MODEL", "class_weight", False): - self.classes_weights = ( - sklearn.utils.class_weight.compute_sample_weight( - class_weight="balanced", y=self.df_train[self.target] - ) + self.classes_weights = sklearn.utils.class_weight.compute_sample_weight( + class_weight="balanced", y=self.df_train[self.target] ) tuning_params = self.util.config_val("MODEL", "tuning_params", False) @@ -215,9 +217,7 @@ def train(self): self.clf, tuned_params, refit=True, verbose=3, scoring=scoring ) try: - class_weight = self.util.config_val( - "MODEL", "class_weight", False - ) + class_weight = self.util.config_val("MODEL", "class_weight", False) if class_weight: self.util.debug("using class weight") self.clf.fit( diff --git a/nkululeko/models/model_cnn.py b/nkululeko/models/model_cnn.py index 73366724..8e38fe39 100644 --- a/nkululeko/models/model_cnn.py +++ b/nkululeko/models/model_cnn.py @@ -31,6 +31,7 @@ class CNN_model(Model): def __init__(self, df_train, df_test, feats_train, feats_test): """Constructor taking the configuration and all dataframes""" super().__init__(df_train, df_test, feats_train, feats_test) + super().set_model_type("ann") self.target = glob_conf.config["DATA"]["target"] labels = glob_conf.labels self.class_num = len(labels) diff --git a/nkululeko/models/model_mlp.py b/nkululeko/models/model_mlp.py index 9f74854f..e9eabfda 100644 --- a/nkululeko/models/model_mlp.py +++ b/nkululeko/models/model_mlp.py @@ -19,6 +19,7 @@ class MLP_model(Model): def __init__(self, df_train, df_test, feats_train, feats_test): """Constructor taking the configuration and all dataframes""" super().__init__(df_train, df_test, feats_train, feats_test) + super().set_model_type("ann") self.target = glob_conf.config["DATA"]["target"] labels = glob_conf.labels self.class_num = len(labels) diff --git a/nkululeko/models/model_mlp_regression.py b/nkululeko/models/model_mlp_regression.py index 71fa6efb..c55c2037 100644 --- a/nkululeko/models/model_mlp_regression.py +++ b/nkululeko/models/model_mlp_regression.py @@ -20,6 +20,7 @@ class MLP_Reg_model(Model): def __init__(self, df_train, df_test, feats_train, feats_test): """Constructor taking the configuration and all dataframes""" super().__init__(df_train, df_test, feats_train, feats_test) + super().set_model_type("ann") self.target = glob_conf.config["DATA"]["target"] labels = glob_conf.labels self.class_num = len(labels) diff --git a/nkululeko/plots.py b/nkululeko/plots.py index ba196f4e..15ac17c8 100644 --- a/nkululeko/plots.py +++ b/nkululeko/plots.py @@ -163,16 +163,16 @@ def plot_distributions(self, df, type_s="samples"): filename = f"{self.target}-{filename}" pearson_string = "" if self.util.is_categorical(df[att1]): - ax = sns.scatterplot(data=df, x=self.target, y=att2, hue=att1) + ax = sns.lmplot(data=df, x=self.target, y=att2, hue=att1) elif self.util.is_categorical(df[att2]): - ax = sns.scatterplot(data=df, x=self.target, y=att1, hue=att2) + ax = sns.lmplot(data=df, x=self.target, y=att1, hue=att2) else: pearson = stats.pearsonr(df[att1], df[att2]) pearson = int(pearson[0] * 1000) / 1000 pearson_string = f"PCC: {pearson}" - ax = sns.scatterplot(data=df, x=att1, y=att2, hue="class_label") + ax = sns.lmplot(data=df, x=att1, y=att2, hue="class_label") fig = ax.figure - ax.set_title(f"{type_s} {df.shape[0]}. {pearson_string}") + ax.fig.suptitle(f"{type_s} {df.shape[0]}. {pearson_string}") plt.tight_layout() plt.savefig(f"{fig_dir}{filename}_{type}.{self.format}") plt.close(fig) @@ -191,9 +191,9 @@ def _plot2cont(self, df, col1, col2, xlab, ylab): # trunc to three digits pearson = int(pearson[0] * 1000) / 1000 pearson_string = f"PCC: {pearson}" - ax = sns.scatterplot(data=df, x=col1, y=col2) + ax = sns.lmplot(data=df, x=col1, y=col2) caption = f"{ylab} {df.shape[0]}. {pearson_string}" - ax.set_title(caption) + ax.fig.suptitle(caption) return ax, caption def _plotcatcont(self, df, cat_col, cont_col, xlab, ylab):