forked from Helsinki-NLP/Tatoeba-Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
opus-2021-02-18.yml
48 lines (48 loc) · 1.31 KB
/
opus-2021-02-18.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
release: fra-ces/opus-2021-02-18.zip
release-date: 2021-02-18
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- fra
target-languages:
- ces
training-data:
ces-fra: Tatoeba-train (35981979)
validation-data:
ces-fra: Tatoeba-dev, 1000
total-size-shuffled: 1000
devset-selected: top 1000 lines of Tatoeba-dev.src.shuffled!
test-data:
newssyscomb2009.fra-ces: 502/10032
news-test2008.fra-ces: 2051/42484
newstest2009.fra-ces: 2525/55533
newstest2010.fra-ces: 2489/52958
newstest2011.fra-ces: 3003/65653
newstest2012.fra-ces: 3003/65456
newstest2013.fra-ces: 3000/57250
Tatoeba-test.fra-ces: 425/2625
BLEU-scores:
newssyscomb2009.fra-ces: 22.0
news-test2008.fra-ces: 20.1
newstest2009.fra-ces: 20.6
newstest2010.fra-ces: 20.4
newstest2011.fra-ces: 21.8
newstest2012.fra-ces: 20.6
newstest2013.fra-ces: 22.0
Tatoeba-test.fra-ces: 46.7
chr-F-scores:
newssyscomb2009.fra-ces: 0.501
news-test2008.fra-ces: 0.484
newstest2009.fra-ces: 0.486
newstest2010.fra-ces: 0.489
newstest2011.fra-ces: 0.495
newstest2012.fra-ces: 0.478
newstest2013.fra-ces: 0.488
Tatoeba-test.fra-ces: 0.672