-
Notifications
You must be signed in to change notification settings - Fork 5
/
visualize_eval_results.py
127 lines (96 loc) · 5.77 KB
/
visualize_eval_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
This script can be used to print tables with the evaluation results organized according to vocals categories
"""
import pandas as pd
import museval
import musdb
import os
import json
import numpy as np
# choose which target to visualize
target = 'vocals' # vocals, accompaniment, all
# which statistic to compute on the data
statistic = 'median' # mean or median
# choose which tags to visualize
tags = ['SEQ', 'SEQ-BL1', 'SEQ-BL2']
# ----------------------------------------------------------------------------------------------------------------
# statistics per track
results_over_tracks = pd.DataFrame({'tag': [], 'SDR': [], 'SIR': [], 'SAR': []})
# statistics over ALL frames
results_over_frames = pd.DataFrame({'tag': [],
'SDR_n': [], 'SIR_n': [], 'SAR_n': [], 'PES_n': [], 'EPS_n': [],
'SDR_s': [], 'SIR_s': [], 'SAR_s': [], 'PES_s': [], 'EPS_s': [],
'SDR_d': [], 'SIR_d': [], 'SAR_d': [], 'PES_d': [], 'EPS_d': [],
'SDR_x': [], 'SIR_x': [], 'SAR_x': [], 'PES_x': [], 'EPS_x': [],
'SDR_nsd': [], 'SIR_nsd': [], 'SAR_nsd': [], 'PES_nsd': [], 'EPS_nsd': []})
# silence results over all frames
results_silence_over_frames = pd.DataFrame({'tag': [], 'PES': [], 'EPS': []})
track_names = set() # all test tracks in shortened form
museval_data_path = os.path.join('evaluation', tags[0], 'bss_eval_results.pandas')
museval_data = pd.read_pickle(museval_data_path)
for track in museval_data['track'].values:
track_names.add(track[2:10])
for tag in tags:
tag_dict = {'tag': tag,
'SDR_n': [], 'SIR_n': [], 'SAR_n': [], 'PES_n': [], 'EPS_n': [],
'SDR_s': [], 'SIR_s': [], 'SAR_s': [], 'PES_s': [], 'EPS_s': [],
'SDR_d': [], 'SIR_d': [], 'SAR_d': [], 'PES_d': [], 'EPS_d': [],
'SDR_x': [], 'SIR_x': [], 'SAR_x': [], 'PES_x': [], 'EPS_x': [],
'SDR_nsd': [], 'SIR_nsd': [], 'SAR_nsd': [], 'PES_nsd': [], 'EPS_nsd': []}
silence_over_frames_dict = {'tag': tag, 'PES': [], 'EPS': []}
# load museval summary
museval_data_path = os.path.join('evaluation', tag, 'bss_eval_results.pandas')
museval_data = pd.read_pickle(museval_data_path)
# remove nan
museval_data = museval_data.dropna(axis=0)
# load silent frames results
silent_frames_data_path = os.path.join('evaluation', tag, 'silent_frames_results.json')
silent_frames_data = pd.read_json(silent_frames_data_path, orient='records')
silence_over_frames_dict['PES'] = silent_frames_data[(silent_frames_data['target'] == 'vocals')].mean(axis=0, skipna=True)['PES']
silence_over_frames_dict['EPS'] = silent_frames_data[(silent_frames_data['target'] == 'vocals')].mean(axis=0, skipna=True)['EPS']
results_silence_over_frames = results_silence_over_frames.append(pd.DataFrame([silence_over_frames_dict]),
ignore_index=True, sort=False)
# statistics over all frames within a vocals type
for vocals_type in ['n', 's', 'd', 'x', 'nsd']:
if statistic == 'mean':
if vocals_type == 'nsd':
vocals_mean = silent_frames_data[(silent_frames_data['target'] == 'vocals') &
(silent_frames_data['track'].str.contains('n/') |
silent_frames_data['track'].str.contains('s/') |
silent_frames_data['track'].str.contains('d/'))] \
.mean(axis=0, skipna=True)
else:
vocals_mean = silent_frames_data[(silent_frames_data['target'] == 'vocals') &
(silent_frames_data['track'].str.contains('{}/'.format(vocals_type)))]\
.mean(axis=0, skipna=True)
# add silent frames results to method results dict
tag_dict['PES_{}'.format(vocals_type)] = vocals_mean['PES']
tag_dict['EPS_{}'.format(vocals_type)] = vocals_mean['EPS']
for metric in ['SDR', 'SIR', 'SAR']:
if vocals_type == 'nsd':
values = museval_data[(museval_data['metric']== metric) &
(museval_data['track'].str.contains('n/') |
museval_data['track'].str.contains('s/') |
museval_data['track'].str.contains('d/')) &
(museval_data['target'] == 'vocals')]['score'].values
else:
values = museval_data[(museval_data['metric']==metric) &
(museval_data['track'].str.contains('{}/'.format(vocals_type))) &
(museval_data['target'] == 'vocals')]['score'].values
if statistic == 'mean':
summary_statistic = np.mean(values)
elif statistic == 'median':
summary_statistic = np.median(values)
tag_dict['{}_{}'.format(metric, vocals_type)] = summary_statistic
results_over_frames = results_over_frames.append(pd.DataFrame([tag_dict]), ignore_index=True, sort=False)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print('Results over frames:')
print(results_over_frames)
print('Results on silence over all frames:')
print(results_silence_over_frames)
print('Results over frames:')
print(results_over_frames.to_latex(
float_format="{:0.2f}".format, index=False))
print('Results on silence over all frames:')
print(results_silence_over_frames.to_latex(
float_format="{:0.2f}".format, index=False))