forked from Helsinki-NLP/Tatoeba-Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
opus2m-2020-08-01.yml
204 lines (204 loc) · 5.54 KB
/
opus2m-2020-08-01.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
release: roa-eng/opus2m-2020-08-01.zip
release-date: 2020-08-01
dataset-name: opus2m
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- arg
- ast
- cat
- cos
- egl
- ext
- fra
- frm
- gcf
- glg
- hat
- ind
- ita
- lad
- lij
- lld
- lmo
- max
- mfe
- min
- mwl
- oci
- pap
- pms
- por
- roh
- ron
- scn
- spa
- tmw
- vec
- wln
- zlm
- zsm
target-languages:
- eng
test-data:
Tatoeba-test.arg-eng: 105/343
Tatoeba-test.ast-eng: 99/682
Tatoeba-test.cat-eng: 1631/10625
Tatoeba-test.cos-eng: 5/28
Tatoeba-test.egl-eng: 84/348
Tatoeba-test.ext-eng: 69/279
Tatoeba-test.fra-eng: 10000/65688
Tatoeba-test.frm-eng: 18/183
Tatoeba-test.gcf-eng: 99/455
Tatoeba-test.glg-eng: 1008/7171
Tatoeba-test.hat-eng: 64/281
Tatoeba-test.ita-eng: 10000/56443
Tatoeba-test.lad-eng: 629/2774
Tatoeba-test.lij-eng: 94/586
Tatoeba-test.lld-eng: 21/189
Tatoeba-test.lmo-eng: 17/104
Tatoeba-test.mfe-eng: 7/28
Tatoeba-test.msa-eng: 5000/31183
Tatoeba-test.multi-eng: 10000/63328
Tatoeba-test.mwl-eng: 4/19
Tatoeba-test.oci-eng: 841/4376
Tatoeba-test.pap-eng: 70/275
Tatoeba-test.pms-eng: 268/1708
Tatoeba-test.por-eng: 10000/63761
Tatoeba-test.roh-eng: 16/186
Tatoeba-test.ron-eng: 5000/31418
Tatoeba-test.scn-eng: 4/31
Tatoeba-test.spa-eng: 10000/67744
Tatoeba-test.vec-eng: 19/90
Tatoeba-test.wln-eng: 89/363
news-test2008.fra-eng: 2051/43455
news-test2008.spa-eng: 2051/43455
newsdev2016-enro.ron-eng: 1999/43535
newsdiscussdev2015-enfr.fra-eng: 1500/24201
newsdiscusstest2015-enfr.fra-eng: 1500/23668
newssyscomb2009.fra-eng: 502/10424
newssyscomb2009.ita-eng: 502/10424
newssyscomb2009.spa-eng: 502/10424
newstest2009.fra-eng: 2525/57625
newstest2009.ita-eng: 2525/57625
newstest2009.spa-eng: 2525/57625
newstest2010.fra-eng: 2489/54021
newstest2010.spa-eng: 2489/54021
newstest2011.fra-eng: 3003/65829
newstest2011.spa-eng: 3003/65829
newstest2012.fra-eng: 3003/63778
newstest2012.spa-eng: 3003/63778
newstest2013.fra-eng: 3000/56089
newstest2013.spa-eng: 3000/56089
newstest2014-fren.fra-eng: 3003/62338
newstest2016-enro.ron-eng: 1999/42055
BLEU-scores:
Tatoeba-test.arg-eng: 38.7
Tatoeba-test.ast-eng: 35.2
Tatoeba-test.cat-eng: 54.9
Tatoeba-test.cos-eng: 68.1
Tatoeba-test.egl-eng: 6.7
Tatoeba-test.ext-eng: 24.2
Tatoeba-test.fra-eng: 53.9
Tatoeba-test.frm-eng: 25.7
Tatoeba-test.gcf-eng: 14.8
Tatoeba-test.glg-eng: 54.6
Tatoeba-test.hat-eng: 37.0
Tatoeba-test.ita-eng: 64.8
Tatoeba-test.lad-eng: 21.7
Tatoeba-test.lij-eng: 11.2
Tatoeba-test.lld-eng: 10.8
Tatoeba-test.lmo-eng: 5.8
Tatoeba-test.mfe-eng: 63.1
Tatoeba-test.msa-eng: 40.9
Tatoeba-test.multi-eng: 54.9
Tatoeba-test.mwl-eng: 44.6
Tatoeba-test.oci-eng: 20.5
Tatoeba-test.pap-eng: 56.2
Tatoeba-test.pms-eng: 10.3
Tatoeba-test.por-eng: 59.7
Tatoeba-test.roh-eng: 14.8
Tatoeba-test.ron-eng: 55.2
Tatoeba-test.scn-eng: 10.2
Tatoeba-test.spa-eng: 56.2
Tatoeba-test.vec-eng: 13.8
Tatoeba-test.wln-eng: 17.3
news-test2008.fra-eng: 25.2
news-test2008.spa-eng: 26.6
newsdev2016-enro.ron-eng: 37.1
newsdiscussdev2015-enfr.fra-eng: 31.6
newsdiscusstest2015-enfr.fra-eng: 36.1
newssyscomb2009.fra-eng: 29.3
newssyscomb2009.ita-eng: 33.1
newssyscomb2009.spa-eng: 29.2
newstest2009.fra-eng: 28.6
newstest2009.ita-eng: 32.0
newstest2009.spa-eng: 28.9
newstest2010.fra-eng: 29.9
newstest2010.spa-eng: 33.3
newstest2011.fra-eng: 31.2
newstest2011.spa-eng: 32.3
newstest2012.fra-eng: 31.3
newstest2012.spa-eng: 35.3
newstest2013.fra-eng: 31.9
newstest2013.spa-eng: 32.8
newstest2014-fren.fra-eng: 34.6
newstest2016-enro.ron-eng: 35.8
chr-F-scores:
Tatoeba-test.arg-eng: 0.512
Tatoeba-test.ast-eng: 0.520
Tatoeba-test.cat-eng: 0.703
Tatoeba-test.cos-eng: 0.666
Tatoeba-test.egl-eng: 0.209
Tatoeba-test.ext-eng: 0.427
Tatoeba-test.fra-eng: 0.691
Tatoeba-test.frm-eng: 0.423
Tatoeba-test.gcf-eng: 0.288
Tatoeba-test.glg-eng: 0.703
Tatoeba-test.hat-eng: 0.540
Tatoeba-test.ita-eng: 0.768
Tatoeba-test.lad-eng: 0.452
Tatoeba-test.lij-eng: 0.299
Tatoeba-test.lld-eng: 0.273
Tatoeba-test.lmo-eng: 0.260
Tatoeba-test.mfe-eng: 0.819
Tatoeba-test.msa-eng: 0.592
Tatoeba-test.multi-eng: 0.697
Tatoeba-test.mwl-eng: 0.674
Tatoeba-test.oci-eng: 0.404
Tatoeba-test.pap-eng: 0.669
Tatoeba-test.pms-eng: 0.324
Tatoeba-test.por-eng: 0.738
Tatoeba-test.roh-eng: 0.378
Tatoeba-test.ron-eng: 0.703
Tatoeba-test.scn-eng: 0.259
Tatoeba-test.spa-eng: 0.714
Tatoeba-test.vec-eng: 0.317
Tatoeba-test.wln-eng: 0.323
news-test2008.fra-eng: 0.533
news-test2008.spa-eng: 0.542
newsdev2016-enro.ron-eng: 0.631
newsdiscussdev2015-enfr.fra-eng: 0.564
newsdiscusstest2015-enfr.fra-eng: 0.592
newssyscomb2009.fra-eng: 0.563
newssyscomb2009.ita-eng: 0.589
newssyscomb2009.spa-eng: 0.562
newstest2009.fra-eng: 0.557
newstest2009.ita-eng: 0.580
newstest2009.spa-eng: 0.559
newstest2010.fra-eng: 0.573
newstest2010.spa-eng: 0.596
newstest2011.fra-eng: 0.585
newstest2011.spa-eng: 0.584
newstest2012.fra-eng: 0.580
newstest2012.spa-eng: 0.606
newstest2013.fra-eng: 0.575
newstest2013.spa-eng: 0.592
newstest2014-fren.fra-eng: 0.611
newstest2016-enro.ron-eng: 0.614