-
Notifications
You must be signed in to change notification settings - Fork 2
/
test_match.py
130 lines (117 loc) · 5.53 KB
/
test_match.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from translate.search import match
from translate.storage import csvl10n
class TestMatch:
"""Test the matching class"""
def candidatestrings(self, units):
"""returns only the candidate strings out of the list with (score, string) tuples"""
return [unit.source for unit in units]
def buildcsv(self, sources, targets=None):
"""Build a csvfile store with the given source and target strings"""
if targets is None:
targets = sources
else:
assert len(sources) == len(targets)
csvfile = csvl10n.csvfile()
for source, target in zip(sources, targets):
unit = csvfile.addsourceunit(source)
unit.target = target
return csvfile
def test_matching(self):
"""Test basic matching"""
csvfile = self.buildcsv(["hand", "asdf", "fdas", "haas", "pond"])
matcher = match.matcher(csvfile)
candidates = self.candidatestrings(matcher.matches("hond"))
candidates.sort()
assert candidates == ["hand", "pond"]
message = "Ek skop die bal"
csvfile = self.buildcsv(
["Hy skop die bal",
message,
"Jannie skop die bal",
"Ek skop die balle",
"Niemand skop die bal nie"])
matcher = match.matcher(csvfile)
candidates = self.candidatestrings(matcher.matches(message))
assert len(candidates) == 3
#test that the 100% match is indeed first:
assert candidates[0] == message
candidates.sort()
assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
def test_multiple_store(self):
"""Test using multiple datastores"""
csvfile1 = self.buildcsv(["hand", "asdf", "fdas"])
csvfile2 = self.buildcsv(["haas", "pond"])
matcher = match.matcher([csvfile1, csvfile2])
candidates = self.candidatestrings(matcher.matches("hond"))
candidates.sort()
assert candidates == ["hand", "pond"]
message = "Ek skop die bal"
csvfile1 = self.buildcsv(
["Hy skop die bal",
message,
"Jannie skop die bal"])
csvfile2 = self.buildcsv(
["Ek skop die balle",
"Niemand skop die bal nie"])
matcher = match.matcher([csvfile1, csvfile2])
candidates = self.candidatestrings(matcher.matches(message))
assert len(candidates) == 3
#test that the 100% match is indeed first:
assert candidates[0] == message
candidates.sort()
assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
def test_extendtm(self):
"""Test that we can extend the TM after creation."""
message = "Open file..."
csvfile1 = self.buildcsv(["Close application", "Do something"])
matcher = match.matcher([csvfile1])
candidates = self.candidatestrings(matcher.matches(message))
assert len(candidates) == 0
csvfile2 = self.buildcsv(["Open file"])
matcher.extendtm(csvfile2.units, store=csvfile2)
candidates = self.candidatestrings(matcher.matches(message))
assert len(candidates) == 1
assert candidates[0] == "Open file"
def test_terminology(self):
csvfile = self.buildcsv(["file", "computer", "directory"])
matcher = match.terminologymatcher(csvfile)
candidates = self.candidatestrings(matcher.matches("Copy the files from your computer"))
candidates.sort()
assert candidates == ["computer", "file"]
def test_brackets(self):
"""Tests that brackets at the end of a term are ignored"""
csvfile = self.buildcsv(["file (noun)", "ISP (Internet Service Provider)"])
matcher = match.terminologymatcher(csvfile)
candidates = self.candidatestrings(matcher.matches("Open File"))
assert candidates == ["file"]
candidates = self.candidatestrings(matcher.matches("Contact your ISP"))
# we lowercase everything - that is why we get it back differerntly.
# we don't change the target text, though
assert candidates == ["isp"]
def test_past_tences(self):
"""Tests matching of some past tenses"""
csvfile = self.buildcsv(["submit", "certify"])
matcher = match.terminologymatcher(csvfile)
candidates = self.candidatestrings(matcher.matches("The bug was submitted"))
assert candidates == ["submit"]
candidates = self.candidatestrings(matcher.matches("The site is certified"))
def test_space_mismatch(self):
"""Tests that we can match with some spacing mismatch"""
csvfile = self.buildcsv(["down time"])
matcher = match.terminologymatcher(csvfile)
candidates = self.candidatestrings(matcher.matches("%d minutes downtime"))
assert candidates == ["downtime"]
def test_hyphen_mismatch(self):
"""Tests that we can match with some spacing mismatch"""
csvfile = self.buildcsv(["pre-order"])
matcher = match.terminologymatcher(csvfile)
candidates = self.candidatestrings(matcher.matches("You can preorder"))
assert candidates == ["preorder"]
candidates = self.candidatestrings(matcher.matches("You can pre order"))
assert candidates == ["pre order"]
csvfile = self.buildcsv(["pre order"])
matcher = match.terminologymatcher(csvfile)
candidates = self.candidatestrings(matcher.matches("You can preorder"))
assert candidates == ["preorder"]
candidates = self.candidatestrings(matcher.matches("You can pre order"))
assert candidates == ["pre order"]