-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_eval.sh
executable file
·262 lines (242 loc) · 15.9 KB
/
run_eval.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
################# ALL DATA
# # Wav2Vec 2.0 English
# CUDA_VISIBLE_DEVICES=0 python eval.py \
# --model_name_or_path=save/jonatasgrosman/wav2vec2-large-xlsr-53-english/checkpoint-25155 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache/jonatasgrosman/wav2vec2-large-xlsr-53-english \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval/eng \
# --lang="cs-eng"
# # Wav2Vec 2.0 Cantonese
# CUDA_VISIBLE_DEVICES=0 python eval.py \
# --model_name_or_path=./save_cs-yue_cs-eng/./pretrain/save/facebook/wav2vec2-large-xlsr-53/checkpoint-63424/checkpoint-20335 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_cs-yue_cs-eng/pretrain/save/facebook/wav2vec2-large-xlsr-53/checkpoint-63424 \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval/yue \
# --lang="yue,eng,cs-yue,cs-eng"
# Wav2Vec 2.0 Cantonese
CUDA_VISIBLE_DEVICES=1 python eval.py \
--model_name_or_path=baselines/save/scottykwok/wav2vec2-large-xlsr-cantonese/checkpoint-112230 \
--train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
--valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
--test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
--cache_dir_name baselines/cache/scottykwok/wav2vec2-large-xlsr-cantonese/checkpoint-112230 \
--preprocessing_num_workers=16 \
--audio_column_name=audio_path --text_column_name=text_path \
--eval_accumulation_steps=50 \
--per_device_train_batch_size=8 --per_device_eval_batch_size=8 \
--dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
--seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./baselines/eval/scotty \
--lang="cs-eng"
# # Wav2Vec 2.0 Chinese
# CUDA_VISIBLE_DEVICES=1 python eval.py \
# --model_name_or_path=save/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn/checkpoint-32895 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval/zh \
# --lang="cs-eng"
# # Wav2Vec 2.0 Multilingual
# CUDA_VISIBLE_DEVICES=1 python eval.py \
# --model_name_or_path=save/facebook/wav2vec2-large-xlsr-53/checkpoint-38700 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache/facebook/wav2vec2-large-xlsr-53 \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval/multilingual \
# --lang="yue,eng,cs-yue,cs-eng"
################# CANTONESE AND CODE-SWITCHING ONLY
# # Wav2Vec 2.0 Cantonese
# CUDA_VISIBLE_DEVICES=1 python eval.py \
# --model_name_or_path=save_yue_cs-yue/ctl/wav2vec2-large-xlsr-cantonese/checkpoint-24384 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_yue_cs-yue/ctl/wav2vec2-large-xlsr-cantonese \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval_yue_cs-yue/yue \
# --lang="yue,cs-yue"
# # Wav2Vec 2.0 English
# CUDA_VISIBLE_DEVICES=0 python eval.py \
# --model_name_or_path=save_yue_cs-yue/jonatasgrosman/wav2vec2-large-xlsr-53-english/checkpoint-39624 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_yue_cs-yue/jonatasgrosman/wav2vec2-large-xlsr-53-english \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval_yue_cs-yue/eng \
# --lang="yue,cs-yue"
# # Wav2Vec 2.0 Multilingual
# CUDA_VISIBLE_DEVICES=0 python eval.py \
# --model_name_or_path=save_yue_cs-yue/facebook/wav2vec2-large-xlsr-53/checkpoint-31496 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_yue_cs-yue/facebook/wav2vec2-large-xlsr-53 \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval_yue_cs-yue/multilingual \
# --lang="yue,cs-yue"
# # Wav2Vec 2.0 Multilingual
# CUDA_VISIBLE_DEVICES=0 python eval.py \
# --model_name_or_path=save_yue_cs-yue/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn/checkpoint-49784 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_yue_cs-yue/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval_yue_cs-yue/zh \
# --lang="yue,cs-yue"
################# CANTONESE CODE-SWITCHING AND ENGLISH CODE-SWITCHING ONLY
# # Wav2Vec 2.0 Cantonese
# CUDA_VISIBLE_DEVICES=0 python eval.py \
# --model_name_or_path=save_cs-yue_cs-eng/ctl/wav2vec2-large-xlsr-cantonese/checkpoint-10790 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_cs-yue_cs-eng/ctl/wav2vec2-large-xlsr-cantonese/checkpoint-10790 \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval_cs-yue_cs-eng/yue \
# --lang="cs-eng"
# # Wav2Vec 2.0 Cantonese
# CUDA_VISIBLE_DEVICES=7 python eval.py \
# --model_name_or_path=baselines/save_cs-yue_cs-eng/scottykwok/wav2vec2-large-xlsr-cantonese/checkpoint-22383 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name baselines/cache_cs-yue_cs-eng/scottykwok/wav2vec2-large-xlsr-cantonese/checkpoint-22383 \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=30 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=8 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./baselines/eval_cs-yue_cs-eng/scotty \
# --lang="cs-eng"
# # Wav2Vec 2.0 English
# CUDA_VISIBLE_DEVICES=1 python eval.py \
# --model_name_or_path=save_cs-yue_cs-eng/jonatasgrosman/wav2vec2-large-xlsr-53-english/checkpoint-15770 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_cs-yue_cs-eng/jonatasgrosman/wav2vec2-large-xlsr-53-english \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval_cs-yue_cs-eng/eng \
# --lang="cs-eng"
# # Wav2Vec 2.0 Multilingual
# CUDA_VISIBLE_DEVICES=1 python eval.py \
# --model_name_or_path=save_cs-yue_cs-eng/facebook/wav2vec2-large-xlsr-53/checkpoint-20750 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_cs-yue_cs-eng/facebook/wav2vec2-large-xlsr-53 \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval_cs-yue_cs-eng/multilingual \
# --lang="cs-eng"
# # Wav2Vec 2.0 Chinese
# CUDA_VISIBLE_DEVICES=1 python eval.py \
# --model_name_or_path=save_cs-yue_cs-eng/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn/checkpoint-18260 \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_cs-yue_cs-eng/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval_cs-yue_cs-eng/zh \
# --lang="cs-eng"
############# ZERO-SHOT
# # Wav2Vec 2.0 Multilingual
# CUDA_VISIBLE_DEVICES=0 python eval.py \
# --model_name_or_path=jonatasgrosman/wav2vec2-large-xlsr-53-english \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache/jonatasgrosman/wav2vec2-large-xlsr-53-english \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval/zero-shot/eng \
# --lang="cs-eng"
# # Wav2Vec 2.0 Multilingual
# CUDA_VISIBLE_DEVICES=0 python eval.py \
# --model_name_or_path=CAiRE/wav2vec2-large-xlsr-53-cantonese \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_cs-yue_cs-eng/zero-shot/CAiRE/wav2vec2-large-xlsr-53-cantonese \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval/zero-shot/yue \
# --lang="cs-eng"
# # Wav2Vec 2.0 Multilingual
# CUDA_VISIBLE_DEVICES=0 python eval.py \
# --model_name_or_path=jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn \
# --train_manifest_path=data/ubc_cantonese_english_asr/preprocessed_train_metadata.csv \
# --valid_manifest_path=data/ubc_cantonese_english_asr/preprocessed_valid_metadata.csv \
# --test_manifest_path=data/ubc_cantonese_english_asr/preprocessed_test_metadata.csv \
# --cache_dir_name cache_cs-yue_cs-eng/zero-shot/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn \
# --preprocessing_num_workers=16 \
# --audio_column_name=audio_path --text_column_name=text_path \
# --eval_accumulation_steps=10 \
# --per_device_train_batch_size=8 --per_device_eval_batch_size=16 \
# --dataloader_num_workers=8 --dataloader_pin_memory --group_by_length \
# --seed=14045 --num_train_epochs=5 --learning_rate=5e-5 --output_dir=./eval/zero-shot/zh \
# --lang="cs-eng"