forked from Helsinki-NLP/Tatoeba-Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
opus-2020-07-14.yml
109 lines (109 loc) · 2.49 KB
/
opus-2020-07-14.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
release: eng-trk/opus-2020-07-14.zip
release-date: 2020-07-14
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eng
target-languages:
- aze
- bak
- chv
- crh
- kaz
- kir
- kjh
- kum
- ota
- sah
- tat
- tuk
- tur
- tyv
- uig
- uzb
use-target-labels:
- ">>aze_Latn<<"
- ">>bak<<"
- ">>chv<<"
- ">>crh<<"
- ">>crh_Latn<<"
- ">>kaz_Cyrl<<"
- ">>kaz_Latn<<"
- ">>kir_Cyrl<<"
- ">>kjh<<"
- ">>kum<<"
- ">>ota_Arab<<"
- ">>ota_Latn<<"
- ">>sah<<"
- ">>tat<<"
- ">>tat_Arab<<"
- ">>tat_Latn<<"
- ">>tuk<<"
- ">>tuk_Latn<<"
- ">>tur<<"
- ">>tyv<<"
- ">>uig_Arab<<"
- ">>uig_Cyrl<<"
- ">>uzb_Cyrl<<"
- ">>uzb_Latn<<"
test-data:
Tatoeba-test.eng-aze: 2659/10046
Tatoeba-test.eng-bak: 39/140
Tatoeba-test.eng-chv: 333/1358
Tatoeba-test.eng-crh: 22/81
Tatoeba-test.eng-kaz: 397/1668
Tatoeba-test.eng-kir: 118/428
Tatoeba-test.eng-kjh: 17/48
Tatoeba-test.eng-kum: 8/25
Tatoeba-test.eng-multi: 10000/46183
Tatoeba-test.eng-ota: 678/3328
Tatoeba-test.eng-sah: 39/131
Tatoeba-test.eng-tat: 1451/6996
Tatoeba-test.eng-tuk: 2500/12809
Tatoeba-test.eng-tur: 10000/49076
Tatoeba-test.eng-tyv: 5/19
Tatoeba-test.eng-uig: 3024/13084
Tatoeba-test.eng-uzb: 457/1514
BLEU-scores:
Tatoeba-test.eng-aze: 25.7
Tatoeba-test.eng-bak: 5.2
Tatoeba-test.eng-chv: 3.7
Tatoeba-test.eng-crh: 7.4
Tatoeba-test.eng-kaz: 11.4
Tatoeba-test.eng-kir: 25.4
Tatoeba-test.eng-kjh: 1.3
Tatoeba-test.eng-kum: 2.2
Tatoeba-test.eng-multi: 18.0
Tatoeba-test.eng-ota: 0.2
Tatoeba-test.eng-sah: 0.5
Tatoeba-test.eng-tat: 9.7
Tatoeba-test.eng-tuk: 5.6
Tatoeba-test.eng-tur: 32.1
Tatoeba-test.eng-tyv: 4.8
Tatoeba-test.eng-uig: 0.1
Tatoeba-test.eng-uzb: 3.3
chr-F-scores:
Tatoeba-test.eng-aze: 0.560
Tatoeba-test.eng-bak: 0.267
Tatoeba-test.eng-chv: 0.264
Tatoeba-test.eng-crh: 0.301
Tatoeba-test.eng-kaz: 0.353
Tatoeba-test.eng-kir: 0.496
Tatoeba-test.eng-kjh: 0.035
Tatoeba-test.eng-kum: 0.046
Tatoeba-test.eng-multi: 0.436
Tatoeba-test.eng-ota: 0.059
Tatoeba-test.eng-sah: 0.021
Tatoeba-test.eng-tat: 0.304
Tatoeba-test.eng-tuk: 0.305
Tatoeba-test.eng-tur: 0.602
Tatoeba-test.eng-tyv: 0.224
Tatoeba-test.eng-uig: 0.150
Tatoeba-test.eng-uzb: 0.264