-
Notifications
You must be signed in to change notification settings - Fork 0
/
tokenizers.py
49 lines (37 loc) · 1.3 KB
/
tokenizers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from easyparse import Tokenizer, Token
def eq(value):
return lambda x: x == value
def neq(value):
return lambda x: x != value
def concatenate(list_object):
return "".join(list_object)
class WhitespaceTokenizer(Tokenizer):
def __init__(self, auto_discard):
self.auto_discard = auto_discard
def tokenize(self, view):
buffer = view.consume_while(lambda x: x in (" ", "\t"))
if buffer:
return Token("Whitespace", auto_discard=self.auto_discard)
class IdentityTokenizer(Tokenizer):
def tokenize(self, view):
return view.pop()
class SingleTokenizer(Tokenizer):
def __init__(self, TYPE, match_f=None, T=lambda x:x):
self.TYPE = TYPE
self.match_f = match_f if match_f is not None else eq(self.TYPE)
self.T = T
def tokenize(self, view):
value = view.pop()
if self.match_f(value):
return Token(self.TYPE, self.T(value))
class CharTokenizer(SingleTokenizer):
def __init__(self, TYPE):
super().__init__(TYPE, eq(TYPE), lambda x: None)
class DiscardTokens(Tokenizer):
def __init__(self, discard_f):
self.discard_f = discard_f
def tokenize(self, view):
token = view.pop()
if self.discard_f(token):
token.set_auto_discard(True)
return token