-
Notifications
You must be signed in to change notification settings - Fork 17
/
ngram_test.go
125 lines (119 loc) · 2.33 KB
/
ngram_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package ngram
import (
"fmt"
"testing"
)
func TestIndexBasics(t *testing.T) {
var ng NGramIndex
index := &ng
id, error := index.Add("hello")
if error != nil {
t.Error(error)
}
strval, error := index.GetString(id)
if error != nil {
t.Error(error)
}
if strval != "hello" {
t.Error("Can't read string from index")
}
}
func TestSearching(t *testing.T) {
var ng NGramIndex
index := &ng
_, error := index.Add("hello")
if error != nil {
t.Error(error)
}
_, error = index.Add("world")
if error != nil {
t.Error(error)
}
results, error := index.Search("hello", 0.0)
if error != nil {
t.Error(error)
}
if len(results) != 1 {
t.Error("len(results) != 1")
}
if results[0].Similarity != 1.0 && results[0].TokenID != 0 {
t.Error("Bad result")
}
results, error = index.Search("12345")
if len(results) != 0 {
t.Error("Invalid value found")
}
result, error := index.BestMatch("hel")
if error != nil {
t.Error(error)
}
if result.TokenID != 0 {
t.Error("BestMatch doesn't work as expected")
}
}
func TestIndexInitialization(t *testing.T) {
index, error := NewNGramIndex()
if error != nil {
t.Error(error)
}
if index.n != defaultN {
t.Error("n is not set to default value")
}
if index.pad != defaultPad {
t.Error("pad is not set to default value")
}
index, error = NewNGramIndex(SetN(4))
if error != nil {
t.Error(error)
}
if index.n != 4 {
t.Error("n is not set to 4")
}
index, error = NewNGramIndex(SetPad('@'))
if error != nil {
t.Error(error)
}
if index.pad != "@" {
t.Error("pad is not set to @")
}
// check off limits
index, error = NewNGramIndex(SetN(1))
if error == nil {
t.Error("Error not set (1)")
}
index, error = NewNGramIndex(SetN(maxN + 1))
if error == nil {
t.Error("Error not set (2)")
}
}
func BenchmarkAdd(b *testing.B) {
b.StopTimer()
// init
index, _ := NewNGramIndex()
var arr []string
for i := 0; i < 10000; i++ {
str := fmt.Sprintf("%x", i)
arr = append(arr, str)
}
b.StartTimer()
for _, hexstr := range arr {
index.Add(hexstr)
}
}
func BenchmarkSearch(b *testing.B) {
b.StopTimer()
// init
index, _ := NewNGramIndex()
var arr []string
for i := 0; i < 10000; i++ {
str := fmt.Sprintf("%000x", i)
arr = append(arr, str)
}
for _, hexstr := range arr {
index.Add(hexstr)
}
b.StartTimer()
for i := 0; i < 10000; i += 4 {
index.Search(arr[i], 0.5)
}
}