-
Notifications
You must be signed in to change notification settings - Fork 3
/
nlp.go
80 lines (69 loc) · 1.65 KB
/
nlp.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
package main
import (
"strings"
"github.com/jdkato/prose/v2"
)
var replacecommits = strings.NewReplacer(".", " ", "(", " ", ")", " ", ":", " ", ",", " ",
"[", " ", "]", " ", "\\", " ", `"`, " ", "'", " ", "!", " ", ";", " ", "?", " ",
"/", " ", "<", " ", ">", " ")
func tokenizeCommits(commits []commit) ([]prose.Token, error) {
var err error
if len(commits) == 0 {
panic("expected non-nil/non-zero number of commits")
}
var doc *prose.Document
var allCommits = &strings.Builder{}
cap := allCommits.Cap()
if cap < len(commits)*20 {
allCommits.Grow(len(commits)*20 - cap)
}
for i := range commits {
msg := replacecommits.Replace(commits[i].Message) + " . "
allCommits.WriteString(msg)
}
// allstr :=allCommits.String() // debugging purposes
doc, err = prose.NewDocument(allCommits.String(),
prose.WithExtraction(false), prose.WithSegmentation(false), prose.WithTokenization(false))
return doc.Tokens(), err
}
// walkCommits is SLOW. This is because it processes all commit messages into one
//
func walkCommits(commits []commit, f func(*commit, []prose.Token)) error {
tokens, err := tokenizeCommits(commits)
if err != nil {
return err
}
atCommit := 0
last := -1
for i := range tokens {
if tokens[i].Tag == "." {
f(&commits[atCommit], tokens[last+1:i])
last = i
atCommit++
}
}
return nil
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func spaces(n int) string {
const spaces32 = " "
if n < 32 {
return spaces32[:n]
}
var res string
for i := 0; i < n/32; i++ {
res += spaces32
}
return res + spaces32[:n%32]
}