diff --git a/CHANGELOG.md b/CHANGELOG.md index e316a447..314ada02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ Changelog ========= +Version 0.28.0 +------------- +* made "label_data" configuration automatic and added "label_result" + + Version 0.27.0 ------------- * added "label_data" configuration to label data with trained model (so now there can be train, dev and test set) diff --git a/ini_file.md b/ini_file.md index ad9437ba..85b2ebeb 100644 --- a/ini_file.md +++ b/ini_file.md @@ -40,8 +40,10 @@ * strategy = traintest * **trains**: if *strategy* = cross_data, denote the train databases * **tests**: if *strategy* = cross_data, denote the test databases -* **label_data**: a dataset to be used by the *experiment.predict_test_and_save()* function as test data for the given best model +* **label_data**: a dataset to be used as test data for the given best model * label_data = emovo +* **label_result**: a file path for the result of the step above + * label_data = ./labelresult.csv * **root_folders**: specify an additional configuration specifically for all entries starting with a dataset name, acting as global defaults. * root_folders = data_roots.ini * **db_name**: path with audformatted repository for each database listed in 'databases* diff --git a/src/constants.py b/src/constants.py index e993878b..1cf7360d 100644 --- a/src/constants.py +++ b/src/constants.py @@ -1 +1 @@ -VERSION = '0.27.0' \ No newline at end of file +VERSION = '0.28.0' \ No newline at end of file diff --git a/src/experiment.py b/src/experiment.py index 95164a12..80d4dee9 100644 --- a/src/experiment.py +++ b/src/experiment.py @@ -339,6 +339,13 @@ def run(self): self.plot_confmat_per_speaker(conf_mat_per_speaker_function) used_time = time.process_time() - self.start self.util.debug(f'Done, used {used_time:.3f} seconds') + + # check if a test set should be labeled by the model: + label_data = self.util.config_val('DATA', 'label_data', False) + label_result = self.util.config_val('DATA', 'label_result', False) + if label_data and label_result: + self.predict_test_and_save(label_result) + return self.reports def plot_confmat_per_speaker(self, function): diff --git a/src/model.py b/src/model.py index 962874eb..cd42df18 100644 --- a/src/model.py +++ b/src/model.py @@ -27,6 +27,9 @@ def __init__(self, df_train, df_test, feats_train, feats_test): self.loso = self.util.config_val('MODEL', 'loso', False) self.logo = self.util.config_val('MODEL', 'logo', False) self.xfoldx = self.util.config_val('MODEL', 'k_fold_cross', False) + + def set_testdata(self, data_df, feats_df): + self.df_test, self.feats_test = data_df, feats_df def reset_test(self, df_test, feats_test): self.df_test, self.feats_test = df_test, feats_test diff --git a/src/model_mlp_regression.py b/src/model_mlp_regression.py index e4234148..77cf9d6c 100644 --- a/src/model_mlp_regression.py +++ b/src/model_mlp_regression.py @@ -57,6 +57,9 @@ def __init__(self, df_train, df_test, feats_train, feats_test): self.trainloader = self.get_loader(feats_train, df_train, True) self.testloader = self.get_loader(feats_test, df_test, False) + def set_testdata(self, data_df, feats_df): + self.testloader = self.get_loader(feats_df, data_df, False) + def train(self): loss = self.train_epoch(self.model, self.trainloader, self.device, self.optimizer, self.criterion) return loss diff --git a/src/test_predictor.py b/src/test_predictor.py index 5ec0152b..3c0b2cdb 100644 --- a/src/test_predictor.py +++ b/src/test_predictor.py @@ -5,6 +5,9 @@ from dataset import Dataset from feature_extractor import FeatureExtractor from scaler import Scaler +import numpy as np +from sklearn.preprocessing import LabelEncoder + class Test_predictor(): @@ -29,16 +32,18 @@ def predict_and_store(self): featextractor = FeatureExtractor(data_df, label_data, '') feats_df = featextractor.extract() scale = self.util.config_val('FEATS', 'scale', False) - data_df[self.target] = self.label_encoder.fit_transform(data_df[self.target]) + labelenc = LabelEncoder() + data_df[self.target] = labelenc.fit_transform(data_df[self.target]) +# data_df[self.target] = self.label_encoder.fit_transform(data_df[self.target]) if scale: self.scaler = Scaler(data_df, None, feats_df, None, scale) feats_df, _ = self.scaler.scale() self.model.set_testdata(data_df, feats_df) - predictions = self.model.get_predictions().tolist() + predictions = self.model.get_predictions() df = pd.DataFrame(index = data_df.index) df['speaker'] = data_df['speaker'] df['gender'] = data_df['gender'] - df[self.target] = self.label_encoder.inverse_transform(predictions) + df[self.target] = labelenc.inverse_transform(predictions.tolist()) df.to_csv(self.name) else: predictions = self.model.get_predictions()