-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.go
178 lines (161 loc) · 4.93 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
package grammar
import (
"errors"
"fmt"
"io"
"regexp"
)
// SimpleToken is a simple implementation of the both the Token and the Parser
// interfaces. So it can be used in rules to match a token field. It is also
// the concrete type of Tokens returned by SimpleTokenStream.
type SimpleToken struct {
TokType string
TokValue string
}
var _ Token = SimpleToken{}
var _ Parser = &SimpleToken{}
// EOF is the token that is returned when trying to consument an exhausted token
// stream.
var EOF = SimpleToken{
TokType: "EOF",
TokValue: "EOF",
}
// Type returns the token type.
func (t SimpleToken) Type() string {
return t.TokType
}
// Value returns the value of the token.
func (t SimpleToken) Value() string {
return t.TokValue
}
// Parse tries to match the next token in the given TokenStream with the
// ParseOptions, using opts.MatchToken. If they match, the receiver is loaded
// with the token data, if not a non-nil *ParseError is returned. In any event
// the next token in the token stream has been consumed.
func (t *SimpleToken) Parse(_ interface{}, s *ParserState, opts TokenOptions) *ParseError {
tok, err := opts.MatchNextToken(s)
if err != nil {
return err
}
t.TokType = tok.Type()
t.TokValue = tok.Value()
// log.Printf("+++ parsed tok #%d: (%s, %q)", pos, t.TokType, t.TokValue)
return nil
}
// SimpleTokenStream is a very simple implementation of the TokenStream
// interface which the Parse function requires.
type SimpleTokenStream struct {
tokens []Token
currentPos int
}
func NewSimpleTokenStream(toks []Token) *SimpleTokenStream {
return &SimpleTokenStream{
tokens: toks,
}
}
var _ TokenStream = (*SimpleTokenStream)(nil)
// Next consumes the next token in the token stream and returns it. If the
// stream is exhausted, EOF is returned.
func (s *SimpleTokenStream) Next() Token {
if s.currentPos >= len(s.tokens) {
// log.Printf("Next token %d: EOF", s.currentPos)
return EOF
}
tok := s.tokens[s.currentPos]
s.currentPos++
// log.Printf("Next token: %d %q", s.currentPos-1, tok.Value())
return tok
}
// Save returns the current position in the token stream.
func (s *SimpleTokenStream) Save() int {
// log.Print("Saving pos: ", s.currentPos)
return s.currentPos
}
// Restore rewinds the token stream to the given position (which should have
// been obtained by s.Save()). In general this may panic - in this
// implementation it's always possible to rewind.
func (s *SimpleTokenStream) Restore(pos int) {
// log.Print("Restoring pos: ", pos)
s.currentPos = pos
}
func (s *SimpleTokenStream) Dump(w io.Writer) {
for i, tok := range s.tokens {
currentMarker := " "
if i == s.currentPos {
currentMarker = "*"
}
fmt.Fprintf(w, "%3d%s %s %q\n", i, currentMarker, tok.Type(), tok.Value())
}
}
// A TokenDef defines a type of token and the pattern that matches it. Used by
// SimpleTokeniser to create tokenisers simply.
type TokenDef struct {
Ptn string // The regular expression the token should match
Name string // The name given to this token type
Special func(string) string // If defined, it takes over the tokenising for this pattern
Mode string
PushMode string
PopMode bool
}
type Mode struct {
}
// SimpleTokeniser takes a list of TokenDefs and returns a function that can
// tokenise a string. Designed for simple use-cases.
func SimpleTokeniser(tokenDefs []TokenDef) func(string) (*SimpleTokenStream, error) {
modeTokenDefs := make(map[string][]TokenDef)
ptnStrings := make(map[string]string)
for _, tokenDef := range tokenDefs {
ptn := fmt.Sprintf(`(%s)`, tokenDef.Ptn)
if _, ok := ptnStrings[tokenDef.Mode]; ok {
ptnStrings[tokenDef.Mode] += "|" + ptn
} else {
ptnStrings[tokenDef.Mode] = `^(?:` + ptn
}
modeTokenDefs[tokenDef.Mode] = append(modeTokenDefs[tokenDef.Mode], tokenDef)
}
ptns := make(map[string]*regexp.Regexp)
for m, s := range ptnStrings {
ptns[m] = regexp.MustCompile(s + ")")
}
initialMode := tokenDefs[0].Mode
return func(s string) (*SimpleTokenStream, error) {
mode := initialMode
var prevModes []string
var toks []Token
for len(s) > 0 {
matches := ptns[mode].FindStringSubmatch(s)
if matches == nil {
return nil, fmt.Errorf("invalid input string")
}
tokType := ""
tokValue := matches[0]
for i, match := range matches[1:] {
if match != "" {
tokDef := modeTokenDefs[mode][i]
if tokDef.Special != nil {
tokValue = tokDef.Special(s)
}
tokType = tokDef.Name
switch {
case tokDef.PushMode != "":
prevModes = append(prevModes, mode)
mode = tokDef.PushMode
case tokDef.PopMode:
last := len(prevModes) - 1
if last < 0 {
return nil, errors.New("no mode to pop")
}
mode = prevModes[last]
prevModes = prevModes[:last]
}
break
}
}
if tokType != "" {
toks = append(toks, SimpleToken{TokType: tokType, TokValue: tokValue})
}
s = s[len(tokValue):]
}
return &SimpleTokenStream{tokens: toks}, nil
}
}