diff --git a/CHANGELOG.md b/CHANGELOG.md index 76aba91..2fcce4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ Changelog ========= +Version 0.90.2 +-------------- +* added probability output to finetuning classification models +* switched path to prob. output from "store" to "results" + +Version 0.90.1 +-------------- +* Add balancing for finetune and update data README + Version 0.90.0 -------------- * augmentation can now be done without target diff --git a/make_package.sh b/make_package.sh index a4e1303..83a375f 100644 --- a/make_package.sh +++ b/make_package.sh @@ -1,31 +1,24 @@ #!/bin/bash - git add nkululeko/*py - for value in augmenting autopredict data feat_extract losses models reporting segmenting utils - do - git add nkululeko/$value/*.py - done - for data in aesdd androids androids_orig androids_test ased asvp-esd baved cafe clac cmu-mosei crema-d demos ekorpus emns emodb emofilm EmoFilm emorynlp emov-db emovo emozionalmente enterface esd gerparas iemocap jl jtes laughter-types meld mesd mess mlendsnd msp-improv msp-podcast oreau2 portuguese ravdess savee shemo subesco syntact tess thorsten-emotional urdu vivae - do - git add data/$data/*.py - git add data/$data/*.md - done - git add README.md - git add requirements.txt - git add make_package.sh - git add test_runs.sh - git add data/README.md - git add tests/*.ini - git add CHANGELOG.md ini_file.md setup.cfg -if [ "$1" == "--notag" ]; then - git commit -m update -else - source nkululeko/constants.py - git commit -m $VERSION - git tag $VERSION -fi +git add nkululeko/*py +for value in augmenting autopredict data feat_extract losses models reporting segmenting utils +do + git add nkululeko/$value/*.py +done +for data in aesdd androids androids_orig androids_test ased asvp-esd baved cafe clac cmu-mosei crema-d demos ekorpus emns emodb emofilm EmoFilm emorynlp emov-db emovo emozionalmente enterface esd gerparas iemocap jl jtes laughter-types meld mesd mess mlendsnd msp-improv msp-podcast oreau2 portuguese ravdess savee shemo subesco syntact tess thorsten-emotional urdu vivae +do + git add data/$data/*.py + git add data/$data/*.md +done +git add README.md +git add requirements.txt +git add make_package.sh +git add test_runs.sh +git add data/README.md +git add tests/*.ini +git add CHANGELOG.md ini_file.md setup.cfg +source nkululeko/constants.py +git commit -m $VERSION +git tag $VERSION git push - -if ! [ "$1" == "--notag" ]; then - git push --tags -fi +git push --tags diff --git a/nkululeko/constants.py b/nkululeko/constants.py index 47579ca..936acfe 100644 --- a/nkululeko/constants.py +++ b/nkululeko/constants.py @@ -1,2 +1,2 @@ -VERSION = "0.90.0" +VERSION="0.90.2" SAMPLING_RATE = 16000 diff --git a/nkululeko/models/model_tuned.py b/nkululeko/models/model_tuned.py index 40d45be..9db9e3e 100644 --- a/nkululeko/models/model_tuned.py +++ b/nkululeko/models/model_tuned.py @@ -30,10 +30,16 @@ def __init__(self, df_train, df_test, feats_train, feats_test): """Constructor taking the configuration and all dataframes.""" super().__init__(df_train, df_test, feats_train, feats_test) super().set_model_type("finetuned") + self.df_test, self.df_train, self.feats_test, self.feats_train = ( + df_test, + df_train, + feats_test, + feats_train, + ) self.name = "finetuned_wav2vec2" self.target = glob_conf.config["DATA"]["target"] - labels = glob_conf.labels - self.class_num = len(labels) + self.labels = glob_conf.labels + self.class_num = len(self.labels) device = self.util.config_val("MODEL", "device", False) if not device: self.device = "cuda" if torch.cuda.is_available() else "cpu" @@ -304,7 +310,7 @@ def train(self): else: self.util.error(f"criterion {criterion} not supported for classifier") else: - self.criterion = self.util.config_val("MODEL", "loss", "ccc") + criterion = self.util.config_val("MODEL", "loss", "1-ccc") if criterion == "1-ccc": criterion = ConcordanceCorCoeff() elif criterion == "mse": @@ -402,7 +408,7 @@ def compute_loss( self.load(self.run, self.epoch) def get_predictions(self): - results = [] + results = [[]].pop(0) for (file, start, end), _ in audeer.progress_bar( self.df_test.iterrows(), total=len(self.df_test), @@ -415,18 +421,37 @@ def get_predictions(self): file, duration=end - start, offset=start, always_2d=True ) assert sr == self.sampling_rate - predictions = self.model.predict(signal) - results.append(predictions.argmax()) - return results + prediction = self.model.predict(signal) + results.append(prediction) + # results.append(predictions.argmax()) + predictions = np.asarray(results) + if self.util.exp_is_classification(): + # make a dataframe for the class probabilities + proba_d = {} + for c in range(self.class_num): + proba_d[c] = [] + # get the class probabilities + # predictions = self.clf.predict_proba(self.feats_test.to_numpy()) + # pred = self.clf.predict(features) + for i in range(self.class_num): + proba_d[i] = list(predictions.T[i]) + probas = pd.DataFrame(proba_d) + probas = probas.set_index(self.df_test.index) + predictions = probas.idxmax(axis=1).values + else: + predictions = predictions.flatten() + probas = None + return predictions, probas def predict(self): """Predict the whole eval feature set""" - predictions = self.get_predictions() + predictions, probas = self.get_predictions() report = Reporter( self.df_test[self.target].to_numpy().astype(float), predictions, self.run, self.epoch_num, + probas=probas, ) self._plot_epoch_progression(report) return report @@ -438,6 +463,7 @@ def _plot_epoch_progression(self, report): ) with open(log_file, "r") as file: data = file.read() + data = data.strip().replace("nan", "0") list = ast.literal_eval(data) epochs, vals, loss = [], [], [] for index, tp in enumerate(list): diff --git a/nkululeko/utils/util.py b/nkululeko/utils/util.py index aa584a3..c0a7084 100644 --- a/nkululeko/utils/util.py +++ b/nkululeko/utils/util.py @@ -155,10 +155,10 @@ def get_save_name(self): return f"{store}/{self.get_exp_name()}.pkl" def get_pred_name(self): - store = self.get_path("store") + results_dir = self.get_path("res_dir") target = self.get_target_name() pred_name = self.get_model_description() - return f"{store}/pred_{target}_{pred_name}.csv" + return f"{results_dir}/pred_{target}_{pred_name}.csv" def is_categorical(self, pd_series): """Check if a dataframe column is categorical.""" diff --git a/tests/exp_agedb_os_mlp.ini b/tests/exp_agedb_os_mlp.ini index 007852b..b971e3d 100644 --- a/tests/exp_agedb_os_mlp.ini +++ b/tests/exp_agedb_os_mlp.ini @@ -23,7 +23,7 @@ layers = {'l1':1024, 'l2':128} drop = .4 loss = 1-ccc measure = ccc -patience = 5 +patience = 10 [PLOT] best_model = True epoch_progression = True