-
Notifications
You must be signed in to change notification settings - Fork 3
/
evaluate.sh
129 lines (108 loc) · 5.91 KB
/
evaluate.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env bash
#############################################################################
# #
# #
# Detecting Fine-Grained Cross-Lingual Semantic Divergences #
# without Supervision by Learning To Rank #
# #
# eleftheria #
# #
# ==== Step 4 ==== #
# #
# Evaluation on REFreSD #
# #
# #
#############################################################################
##############################################################################
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/fs/clip-scratch/ebriakou/anaconda3/lib
corpus=WikiMatrix # Corpus from which seed equivalents are extracted
sampling_method=contrastive_divergence_ranking # Sampling method for extracting divergent examples from seeds
size=50000 # Number of seeds sampled from original corpus
src=en # Source language (language code)
tgt=fr # Target language (language code)
divergent_list=rdpg # List of divergences (e.g, 'rd' if divergences include
# phrase replacement and subtree deletion)
#############################################################################
root_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
data_dir=$root_dir/data
scripts_dir=$root_dir/source
exp_identifier=from_${corpus}.${src}-${tgt}.tsv.filtered_sample_${size}.moses.seed/${sampling_method}/${divergent_list}
data_dir=$root_dir/for_divergentmBERT/${exp_identifier}
output_dir=$root_dir/trained_bert/$exp_identifier
REFreSD_dir=$root_dir/REFreSD_no_normal/REFreSD_for_huggingface
model=bert-base-multilingual-cased
################################################################################
# Synthetic test evaluation #
################################################################################
set_=test_synthetic
python $scripts_dir/run_div_margin.py \
--node $SLURM_NODELIST \
--model_type bert_margin \
--model_name_or_path $model \
--task_name SemDiv \
--do_eval \
--best_checkpoint \
--evaluation_set $set_ \
--data_dir $data_dir/ \
--output_dir $output_dir \
--synth_data_dir $data_dir/ \
--overwrite_cache
#################################################################################
# REFreSD evaluation --- Divergence VS Equivalence #
#################################################################################
set_=test
python $scripts_dir/run_div_margin.py \
--node $SLURM_NODELIST \
--model_type bert_margin \
--model_name_or_path $model \
--task_name SemDiv \
--do_eval \
--best_checkpoint \
--evaluation_set $set_ \
--data_dir $data_dir/ \
--output_dir $output_dir \
--synth_data_dir $REFreSD_dir/ \
--overwrite_cache
#####################################################################################
# REFreSD evaluation --- Unrelated VS No meaning difference #
#####################################################################################
set_=unrelated
python $scripts_dir/run_div_margin.py \
--node $SLURM_NODELIST \
--model_type bert_margin \
--model_name_or_path $model \
--task_name SemDiv \
--do_eval \
--best_checkpoint \
--evaluation_set $set_ \
--data_dir $data_dir/ \
--output_dir $output_dir \
--synth_data_dir $REFreSD_dir/ \
--overwrite_cache
########################################################################################
# REFreSD evaluation -- Some meaning difference VS No meaning difference #
########################################################################################
set_=some_meaning_difference
python $scripts_dir/run_div_margin.py \
--node $SLURM_NODELIST \
--model_type bert_margin \
--model_name_or_path $model \
--task_name SemDiv \
--do_eval \
--best_checkpoint \
--evaluation_set $set_ \
--data_dir $data_dir/ \
--output_dir $output_dir \
--synth_data_dir $REFreSD_dir/ \
--overwrite_cache
########################################################################################
# Print results #
########################################################################################
echo '> Test synthetic:'
python $scripts_dir/sentence_evaluation.py --dict_dir $output_dir/ --set_ test_synthetic
echo '> REFreSD (Divergence vs Equivalence):'
python $scripts_dir/sentence_evaluation.py --dict_dir $output_dir/ --set_ test
echo '> REFreSD (Unrelated vs No meaning difference):'
python $scripts_dir/sentence_evaluation.py --dict_dir $output_dir/ --set_ unrelated
echo '> REFreSD (Some meaning difference vs No meaning difference):'
python $scripts_dir/sentence_evaluation.py --dict_dir $output_dir/ --set_ some_meaning_difference