From 23c290b04cbaa91765a4848192bf5fcffb41ecf4 Mon Sep 17 00:00:00 2001
From: FBurkhardt <fburkhardt@audeering.com>
Date: Tue, 5 Dec 2023 17:14:45 +0100
Subject: [PATCH] 0.71.4

---
 CHANGELOG.md                             |  5 ++++
 nkululeko/constants.py                   |  2 +-
 nkululeko/modelrunner.py                 |  6 ++++-
 nkululeko/models/model.py                | 32 ++++++++++++------------
 nkululeko/models/model_cnn.py            |  1 +
 nkululeko/models/model_mlp.py            |  1 +
 nkululeko/models/model_mlp_regression.py |  1 +
 nkululeko/plots.py                       | 12 ++++-----
 8 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5a45d68f..834a43cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,11 @@
 Changelog
 =========
 
+Version 0.71.4
+--------------
+* automatic epoch reset if not ANN
+* scatter plots now show a regression line
+
 Version 0.71.3
 --------------
 * enabled scatter plots for all variables
diff --git a/nkululeko/constants.py b/nkululeko/constants.py
index 741d0bf3..8c50c5c6 100644
--- a/nkululeko/constants.py
+++ b/nkululeko/constants.py
@@ -1,2 +1,2 @@
-VERSION="0.71.3"
+VERSION="0.71.4"
 SAMPLING_RATE = 16000
diff --git a/nkululeko/modelrunner.py b/nkululeko/modelrunner.py
index b6119185..e47f1f0a 100644
--- a/nkululeko/modelrunner.py
+++ b/nkululeko/modelrunner.py
@@ -38,8 +38,12 @@ def do_epochs(self):
         reports = []
         plot_epochs = self.util.config_val("PLOT", "epochs", False)
         only_test = self.util.config_val("MODEL", "only_test", False)
+        epoch_num = int(self.util.config_val("EXP", "epochs", 1))
+        if not self.model.is_ANN() and epoch_num > 1:
+            self.util.warn(f"setting epoch num to 1 (was {epoch_num}) if model not ANN")
+            epoch_num = 1
         # for all epochs
-        for epoch in range(int(self.util.config_val("EXP", "epochs", 1))):
+        for epoch in range(epoch_num):
             if only_test:
                 self.model.load(self.run, epoch)
                 self.util.debug(f"reusing model: {self.model.store_path}")
diff --git a/nkululeko/models/model.py b/nkululeko/models/model.py
index c350e281..413ef971 100644
--- a/nkululeko/models/model.py
+++ b/nkululeko/models/model.py
@@ -24,6 +24,7 @@ def __init__(self, df_train, df_test, feats_train, feats_test):
             feats_train,
             feats_test,
         )
+        self.model_type = "classic"
         self.util = Util("model")
         self.target = self.util.config_val("DATA", "target", "emotion")
         self.run = 0
@@ -31,6 +32,15 @@ def __init__(self, df_train, df_test, feats_train, feats_test):
         self.logo = self.util.config_val("MODEL", "logo", False)
         self.xfoldx = self.util.config_val("MODEL", "k_fold_cross", False)
 
+    def set_model_type(self, type):
+        self.model_type = type
+
+    def is_ANN(self):
+        if self.model_type == "ann":
+            return True
+        else:
+            return False
+
     def set_testdata(self, data_df, feats_df):
         self.df_test, self.feats_test = data_df, feats_df
 
@@ -66,9 +76,7 @@ def _x_fold_cross(self):
             truth_x = feats.iloc[test_index].to_numpy()
             truth_y = targets[test_index]
             predict_y = self.clf.predict(truth_x)
-            report = Reporter(
-                truth_y.astype(float), predict_y, self.run, self.epoch
-            )
+            report = Reporter(truth_y.astype(float), predict_y, self.run, self.epoch)
             self.util.debug(
                 f"result for fold {g_index}:"
                 f" {report.get_result().get_test_result()} "
@@ -121,9 +129,7 @@ def _do_logo(self):
             fold_count = annos["fold"].nunique()
             self.util.debug(f"using existing folds for {fold_count} groups")
         g_index = 0
-        self.util.debug(
-            f"ignoring splits and doing LOGO with {fold_count} groups"
-        )
+        self.util.debug(f"ignoring splits and doing LOGO with {fold_count} groups")
         # leave-one-group loop
         for train_index, test_index in _logo.split(
             feats,
@@ -137,9 +143,7 @@ def _do_logo(self):
             truth_x = feats.iloc[test_index].to_numpy()
             truth_y = targets[test_index]
             predict_y = self.clf.predict(truth_x)
-            report = Reporter(
-                truth_y.astype(float), predict_y, self.run, self.epoch
-            )
+            report = Reporter(truth_y.astype(float), predict_y, self.run, self.epoch)
             result = report.get_result().get_test_result()
             self.util.debug(f"result for speaker group {g_index}: {result} ")
             results.append(float(report.get_result().test))
@@ -192,10 +196,8 @@ def train(self):
         feats = self.feats_train.to_numpy()
         # compute class weights
         if self.util.config_val("MODEL", "class_weight", False):
-            self.classes_weights = (
-                sklearn.utils.class_weight.compute_sample_weight(
-                    class_weight="balanced", y=self.df_train[self.target]
-                )
+            self.classes_weights = sklearn.utils.class_weight.compute_sample_weight(
+                class_weight="balanced", y=self.df_train[self.target]
             )
 
         tuning_params = self.util.config_val("MODEL", "tuning_params", False)
@@ -215,9 +217,7 @@ def train(self):
                 self.clf, tuned_params, refit=True, verbose=3, scoring=scoring
             )
             try:
-                class_weight = self.util.config_val(
-                    "MODEL", "class_weight", False
-                )
+                class_weight = self.util.config_val("MODEL", "class_weight", False)
                 if class_weight:
                     self.util.debug("using class weight")
                     self.clf.fit(
diff --git a/nkululeko/models/model_cnn.py b/nkululeko/models/model_cnn.py
index 73366724..8e38fe39 100644
--- a/nkululeko/models/model_cnn.py
+++ b/nkululeko/models/model_cnn.py
@@ -31,6 +31,7 @@ class CNN_model(Model):
     def __init__(self, df_train, df_test, feats_train, feats_test):
         """Constructor taking the configuration and all dataframes"""
         super().__init__(df_train, df_test, feats_train, feats_test)
+        super().set_model_type("ann")
         self.target = glob_conf.config["DATA"]["target"]
         labels = glob_conf.labels
         self.class_num = len(labels)
diff --git a/nkululeko/models/model_mlp.py b/nkululeko/models/model_mlp.py
index 9f74854f..e9eabfda 100644
--- a/nkululeko/models/model_mlp.py
+++ b/nkululeko/models/model_mlp.py
@@ -19,6 +19,7 @@ class MLP_model(Model):
     def __init__(self, df_train, df_test, feats_train, feats_test):
         """Constructor taking the configuration and all dataframes"""
         super().__init__(df_train, df_test, feats_train, feats_test)
+        super().set_model_type("ann")
         self.target = glob_conf.config["DATA"]["target"]
         labels = glob_conf.labels
         self.class_num = len(labels)
diff --git a/nkululeko/models/model_mlp_regression.py b/nkululeko/models/model_mlp_regression.py
index 71fa6efb..c55c2037 100644
--- a/nkululeko/models/model_mlp_regression.py
+++ b/nkululeko/models/model_mlp_regression.py
@@ -20,6 +20,7 @@ class MLP_Reg_model(Model):
     def __init__(self, df_train, df_test, feats_train, feats_test):
         """Constructor taking the configuration and all dataframes"""
         super().__init__(df_train, df_test, feats_train, feats_test)
+        super().set_model_type("ann")
         self.target = glob_conf.config["DATA"]["target"]
         labels = glob_conf.labels
         self.class_num = len(labels)
diff --git a/nkululeko/plots.py b/nkululeko/plots.py
index ba196f4e..15ac17c8 100644
--- a/nkululeko/plots.py
+++ b/nkululeko/plots.py
@@ -163,16 +163,16 @@ def plot_distributions(self, df, type_s="samples"):
                 filename = f"{self.target}-{filename}"
                 pearson_string = ""
                 if self.util.is_categorical(df[att1]):
-                    ax = sns.scatterplot(data=df, x=self.target, y=att2, hue=att1)
+                    ax = sns.lmplot(data=df, x=self.target, y=att2, hue=att1)
                 elif self.util.is_categorical(df[att2]):
-                    ax = sns.scatterplot(data=df, x=self.target, y=att1, hue=att2)
+                    ax = sns.lmplot(data=df, x=self.target, y=att1, hue=att2)
                 else:
                     pearson = stats.pearsonr(df[att1], df[att2])
                     pearson = int(pearson[0] * 1000) / 1000
                     pearson_string = f"PCC: {pearson}"
-                    ax = sns.scatterplot(data=df, x=att1, y=att2, hue="class_label")
+                    ax = sns.lmplot(data=df, x=att1, y=att2, hue="class_label")
                 fig = ax.figure
-                ax.set_title(f"{type_s} {df.shape[0]}. {pearson_string}")
+                ax.fig.suptitle(f"{type_s} {df.shape[0]}. {pearson_string}")
                 plt.tight_layout()
                 plt.savefig(f"{fig_dir}{filename}_{type}.{self.format}")
                 plt.close(fig)
@@ -191,9 +191,9 @@ def _plot2cont(self, df, col1, col2, xlab, ylab):
         # trunc to three digits
         pearson = int(pearson[0] * 1000) / 1000
         pearson_string = f"PCC: {pearson}"
-        ax = sns.scatterplot(data=df, x=col1, y=col2)
+        ax = sns.lmplot(data=df, x=col1, y=col2)
         caption = f"{ylab} {df.shape[0]}. {pearson_string}"
-        ax.set_title(caption)
+        ax.fig.suptitle(caption)
         return ax, caption
 
     def _plotcatcont(self, df, cat_col, cont_col, xlab, ylab):