-
Notifications
You must be signed in to change notification settings - Fork 7
/
script.c
198 lines (181 loc) · 3.77 KB
/
script.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <macro.h>
#include <lib.h>
#include <nfa.h>
#include <dfa.h>
#include <set.h>
#include <text.h>
extern FILE *fout;
static int part = 0; /* current handing part */
void parse_errx(char *str)
{
fprintf(stderr, "\nPART %d:\n", part);
text_errx(str);
}
/* get a valid line, skip blank line and space-starting line */
int parse_getline(char **linp)
{
char *line;
int len;
/* skip total white space line */
while ((len = text_getline(&line)) && isspace(line[0])) {
/* Is there invalid char at line head? */
if (!isspaceline(line))
parse_errx("whitespace is redundant");
}
if (!len)
parse_errx("invalid script: empty script");
if (linp)
*linp = line;
return len;
}
/*
* handling c code header:
* %{
* <body>
* }%
*
* + output c code header into lex.yy.c
*/
void parse_cheader(void)
{
char *line;
int len;
/* set current part */
part = 1;
len = parse_getline(&line);
/* ignore other chars of `%{` or `}%` line */
/* start code: `%{` */
if (line[0] == '%' && line[1] == '{') {
while (len = text_getline(&line)) {
/* end code: `}%` */
if (line[0] == '%' && line[1] == '}')
break;
/* C code header body */
/* FIXME: need a good interface */
fprintf(fout, "%s", line);
}
if (!len)
parse_errx("no header end code: %} ");
} else if (ispartend(line)) {
/* `%%` means part end, which skip macro definition phase */
part = 2;
} else {
/* save line for macro handing */
text_backline(line);
}
}
/*
* macroname macrotext
*/
void parse_macro(void)
{
char *line;
int len;
/* skip to part 2 */
if (part == 2)
return;
init_macro();
while (len = text_getline(&line)) {
if (ispartend(line))
break;
add_macro(line, len);
}
if (!len)
parse_errx("no part end: \%\% ");
}
void parse_prepare_regexp(void)
{
char *p;
if (skip_whitespace() == EOF)
text_errx("part2 && part3 is empty");
/* here, we can lookahead at least one char */
/* `%%` */
p = text_lookahead(2);
/* `x` */
if (!p)
text_err("small part2");
if (p && ispartend(p))
text_errx("part2 is empty");
}
void parse_regexp(void)
{
struct nfa *nfa;
struct set *accept;
struct set *minaccept;
int (*table)[MAX_CHARS]; /* dfa table */
int (*mintable)[MAX_CHARS]; /* minimized dfa table */
int size; /* dfa table size */
int minsize; /* minimized dfa table size*/
part = 2;
/* prepare token stream */
parse_prepare_regexp();
/* real parse */
init_nfa_buffer();
nfa = machine();
#ifdef DEBUG
traverse_nfa(nfa);
#endif
/* construct dfa table */
size = construct_dfa(nfa, &table, &accept);
#ifdef DEBUG
traverse_dfatable(table, size, accept);
#endif
/* minimization: accept will be freed */
minimize_dfa(table, accept, &minaccept);
/* minimize dfa table */
minsize = minimize_dfatable2(table, &mintable);
#ifdef DEBUG
traverse_dfatable(mintable, minsize, minaccept);
#endif
/* Now we can free table, should we? */
free(table);
freeset(minaccept);
/* compress dfa table */
compress_dfatable(mintable, minsize, MAX_CHARS);
/* free mintable */
free(mintable);
}
void parse_ccode(void)
{
int len, c;
char *line;
/* ignore `%%` line */
c = text_getchar();
if (c != '\n')
text_getline(&line);
/* output part 3 */
while (len = text_getline(&line))
fprintf(fout, "%s", line);
}
/*
* Parsing script text:
* The script format is as follow:
*
* <part1: C file header, pattern macro definition >
* %%
* <part2: regular expression, associated rule >
* %%
* <part3: C code >
*
*/
void parse_script(void)
{
/* prehandle */
/* part1: */
/* TODO:option handling */
parse_cheader();
parse_macro();
/* part2: */
parse_regexp();
/* generate part code into lex.yy.c */
gen_part_code();
/* part3: */
parse_ccode();
}
void open_script(const char *name)
{
text_open(name);
}