-
Notifications
You must be signed in to change notification settings - Fork 1
/
tokenize.py
113 lines (97 loc) · 2.89 KB
/
tokenize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
These are all the possible type for the interpreter to interpret
We can add more as we go on
"""
import sys
import re
from typing import List, Optional, Pattern, Tuple, Union
class Token:
"""
This is the token class, with the current types being
- LETTER
- DIGIT
- SEMICOLON
- SPACE
- HASH (COMMENT)
- VARIABLE
- LABEL
- INSTRUCTION
- TYPE
- INDEX
"""
(LETTER, DIGIT, SEMICOLON, INSTRUCTION, GATE, VARIABLE, LABEL, TYPE, INDEX,) = [
"LETTER",
"DIGIT",
"SEMI-COLON",
"INSTRUCTION",
"GATE",
"VARIABLE",
"LABEL",
"TYPE",
"INDEX",
]
def __init__(self, token_type: Optional[str], value: Union[str, int]):
"""
For now just gates, qubits, variables, labels, and jumps
"""
self.token_type = token_type
self.value = value
def __repr__(self) -> str:
return f"<{self.token_type} :: {self.value}>"
class LexicalAnalyzer:
"""
Lexes input to `Token` types
"""
token_expressions: List[Tuple[str, Optional[str]]] = [
(r"[ \n\t]+", None), # Spaces
(r"#[^\n]*", None), # Regex for comments
(r";", None),
(r"DEFGATE", Token.INSTRUCTION),
(r"DEFCIRCUIT", Token.INSTRUCTION),
(r"DECLARE", Token.INSTRUCTION),
(r"LABEL", Token.INSTRUCTION),
(r"JUMP-WHEN", Token.INSTRUCTION),
(r"JUMP-UNLESS", Token.INSTRUCTION),
(r"BIT|INTEGER", Token.TYPE),
(r"MEASURE", Token.GATE),
(r"CNOT", Token.GATE),
(r"NOP", Token.GATE),
(r"X", Token.GATE),
(r"H", Token.GATE),
(r"I", Token.GATE),
(r"Y", Token.GATE),
(r"Z", Token.GATE),
(r"[0-9]+", Token.DIGIT),
(r"[a-zA-Z]\w*", Token.VARIABLE),
(r"\[\d+\]", Token.INDEX),
(r"@\w+", Token.LABEL),
]
def __init__(self, text: str):
self.text = text
def lex(self) -> List[Token]:
"""
Returns an array of tokens
Will exit with error if the tokens don't match
"""
pos = 0
out = []
while pos < len(self.text):
match = None
for token_expression in self.token_expressions:
pattern: str
type_value: Optional[str]
pattern, type_value = token_expression
regex = re.compile(pattern)
match = regex.match(self.text, pos)
if match:
token_value: str = match.group(0)
if token_value:
token = Token(type_value, token_value)
out.append(token)
break
if match is None:
sys.stderr.write(f"Illegal sequence: {self.text[pos]}\n")
sys.exit()
else:
pos = match.end(0)
return out