Skip to content

Commit

Permalink
0.71.4
Browse files Browse the repository at this point in the history
  • Loading branch information
FBurkhardt committed Dec 5, 2023
1 parent 83073b4 commit 23c290b
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 24 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Changelog
=========

Version 0.71.4
--------------
* automatic epoch reset if not ANN
* scatter plots now show a regression line

Version 0.71.3
--------------
* enabled scatter plots for all variables
Expand Down
2 changes: 1 addition & 1 deletion nkululeko/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VERSION="0.71.3"
VERSION="0.71.4"
SAMPLING_RATE = 16000
6 changes: 5 additions & 1 deletion nkululeko/modelrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,12 @@ def do_epochs(self):
reports = []
plot_epochs = self.util.config_val("PLOT", "epochs", False)
only_test = self.util.config_val("MODEL", "only_test", False)
epoch_num = int(self.util.config_val("EXP", "epochs", 1))
if not self.model.is_ANN() and epoch_num > 1:
self.util.warn(f"setting epoch num to 1 (was {epoch_num}) if model not ANN")
epoch_num = 1
# for all epochs
for epoch in range(int(self.util.config_val("EXP", "epochs", 1))):
for epoch in range(epoch_num):
if only_test:
self.model.load(self.run, epoch)
self.util.debug(f"reusing model: {self.model.store_path}")
Expand Down
32 changes: 16 additions & 16 deletions nkululeko/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,23 @@ def __init__(self, df_train, df_test, feats_train, feats_test):
feats_train,
feats_test,
)
self.model_type = "classic"
self.util = Util("model")
self.target = self.util.config_val("DATA", "target", "emotion")
self.run = 0
self.epoch = 0
self.logo = self.util.config_val("MODEL", "logo", False)
self.xfoldx = self.util.config_val("MODEL", "k_fold_cross", False)

def set_model_type(self, type):
self.model_type = type

def is_ANN(self):
if self.model_type == "ann":
return True
else:
return False

def set_testdata(self, data_df, feats_df):
self.df_test, self.feats_test = data_df, feats_df

Expand Down Expand Up @@ -66,9 +76,7 @@ def _x_fold_cross(self):
truth_x = feats.iloc[test_index].to_numpy()
truth_y = targets[test_index]
predict_y = self.clf.predict(truth_x)
report = Reporter(
truth_y.astype(float), predict_y, self.run, self.epoch
)
report = Reporter(truth_y.astype(float), predict_y, self.run, self.epoch)
self.util.debug(
f"result for fold {g_index}:"
f" {report.get_result().get_test_result()} "
Expand Down Expand Up @@ -121,9 +129,7 @@ def _do_logo(self):
fold_count = annos["fold"].nunique()
self.util.debug(f"using existing folds for {fold_count} groups")
g_index = 0
self.util.debug(
f"ignoring splits and doing LOGO with {fold_count} groups"
)
self.util.debug(f"ignoring splits and doing LOGO with {fold_count} groups")
# leave-one-group loop
for train_index, test_index in _logo.split(
feats,
Expand All @@ -137,9 +143,7 @@ def _do_logo(self):
truth_x = feats.iloc[test_index].to_numpy()
truth_y = targets[test_index]
predict_y = self.clf.predict(truth_x)
report = Reporter(
truth_y.astype(float), predict_y, self.run, self.epoch
)
report = Reporter(truth_y.astype(float), predict_y, self.run, self.epoch)
result = report.get_result().get_test_result()
self.util.debug(f"result for speaker group {g_index}: {result} ")
results.append(float(report.get_result().test))
Expand Down Expand Up @@ -192,10 +196,8 @@ def train(self):
feats = self.feats_train.to_numpy()
# compute class weights
if self.util.config_val("MODEL", "class_weight", False):
self.classes_weights = (
sklearn.utils.class_weight.compute_sample_weight(
class_weight="balanced", y=self.df_train[self.target]
)
self.classes_weights = sklearn.utils.class_weight.compute_sample_weight(
class_weight="balanced", y=self.df_train[self.target]
)

tuning_params = self.util.config_val("MODEL", "tuning_params", False)
Expand All @@ -215,9 +217,7 @@ def train(self):
self.clf, tuned_params, refit=True, verbose=3, scoring=scoring
)
try:
class_weight = self.util.config_val(
"MODEL", "class_weight", False
)
class_weight = self.util.config_val("MODEL", "class_weight", False)
if class_weight:
self.util.debug("using class weight")
self.clf.fit(
Expand Down
1 change: 1 addition & 0 deletions nkululeko/models/model_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class CNN_model(Model):
def __init__(self, df_train, df_test, feats_train, feats_test):
"""Constructor taking the configuration and all dataframes"""
super().__init__(df_train, df_test, feats_train, feats_test)
super().set_model_type("ann")
self.target = glob_conf.config["DATA"]["target"]
labels = glob_conf.labels
self.class_num = len(labels)
Expand Down
1 change: 1 addition & 0 deletions nkululeko/models/model_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class MLP_model(Model):
def __init__(self, df_train, df_test, feats_train, feats_test):
"""Constructor taking the configuration and all dataframes"""
super().__init__(df_train, df_test, feats_train, feats_test)
super().set_model_type("ann")
self.target = glob_conf.config["DATA"]["target"]
labels = glob_conf.labels
self.class_num = len(labels)
Expand Down
1 change: 1 addition & 0 deletions nkululeko/models/model_mlp_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class MLP_Reg_model(Model):
def __init__(self, df_train, df_test, feats_train, feats_test):
"""Constructor taking the configuration and all dataframes"""
super().__init__(df_train, df_test, feats_train, feats_test)
super().set_model_type("ann")
self.target = glob_conf.config["DATA"]["target"]
labels = glob_conf.labels
self.class_num = len(labels)
Expand Down
12 changes: 6 additions & 6 deletions nkululeko/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,16 +163,16 @@ def plot_distributions(self, df, type_s="samples"):
filename = f"{self.target}-{filename}"
pearson_string = ""
if self.util.is_categorical(df[att1]):
ax = sns.scatterplot(data=df, x=self.target, y=att2, hue=att1)
ax = sns.lmplot(data=df, x=self.target, y=att2, hue=att1)
elif self.util.is_categorical(df[att2]):
ax = sns.scatterplot(data=df, x=self.target, y=att1, hue=att2)
ax = sns.lmplot(data=df, x=self.target, y=att1, hue=att2)
else:
pearson = stats.pearsonr(df[att1], df[att2])
pearson = int(pearson[0] * 1000) / 1000
pearson_string = f"PCC: {pearson}"
ax = sns.scatterplot(data=df, x=att1, y=att2, hue="class_label")
ax = sns.lmplot(data=df, x=att1, y=att2, hue="class_label")
fig = ax.figure
ax.set_title(f"{type_s} {df.shape[0]}. {pearson_string}")
ax.fig.suptitle(f"{type_s} {df.shape[0]}. {pearson_string}")
plt.tight_layout()
plt.savefig(f"{fig_dir}{filename}_{type}.{self.format}")
plt.close(fig)
Expand All @@ -191,9 +191,9 @@ def _plot2cont(self, df, col1, col2, xlab, ylab):
# trunc to three digits
pearson = int(pearson[0] * 1000) / 1000
pearson_string = f"PCC: {pearson}"
ax = sns.scatterplot(data=df, x=col1, y=col2)
ax = sns.lmplot(data=df, x=col1, y=col2)
caption = f"{ylab} {df.shape[0]}. {pearson_string}"
ax.set_title(caption)
ax.fig.suptitle(caption)
return ax, caption

def _plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
Expand Down

0 comments on commit 23c290b

Please sign in to comment.