1 | %{ |
---|
2 | /* |
---|
3 | * Bill's ABNF parser. |
---|
4 | * Copyright 2002-2007 William C. Fenner <fenner@fenron.com> |
---|
5 | * All rights reserved. |
---|
6 | * |
---|
7 | * Redistribution and use in source and binary forms, with or without |
---|
8 | * modification, are permitted provided that the following conditions |
---|
9 | * are met: |
---|
10 | * 1. Redistributions of source code must retain the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer. |
---|
12 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
13 | * notice, this list of conditions and the following disclaimer in the |
---|
14 | * documentation and/or other materials provided with the distribution. |
---|
15 | * 3. Neither the name of the author nor the names of contributors |
---|
16 | * may be used to endorse or promote products derived from this software |
---|
17 | * without specific prior written permission. |
---|
18 | * |
---|
19 | * THIS SOFTWARE IS PROVIDED BY WILLIAM C. FENNER ``AS IS'' AND |
---|
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
---|
21 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
---|
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WILLIAM C. FENNER OR HIS |
---|
23 | * BROTHER B1FF BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
---|
24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
---|
26 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
---|
27 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
---|
28 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE |
---|
29 | * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
---|
30 | * DAMAGE. |
---|
31 | */ |
---|
32 | |
---|
33 | #include "config.h" |
---|
34 | #include <stdlib.h> |
---|
35 | #include <string.h> |
---|
36 | #include "common.h" |
---|
37 | #include "y.tab.h" |
---|
38 | |
---|
39 | static const char rcsid[] = |
---|
40 | "$Id: scanner.l,v 1.1 2008-05-28 12:01:58 jre Exp $"; |
---|
41 | |
---|
42 | int yycolumn = 0; |
---|
43 | int yyerrors = 0; |
---|
44 | extern int permissive; |
---|
45 | int indent = -1; |
---|
46 | |
---|
47 | char badchar; |
---|
48 | |
---|
49 | static void scanrange(char *, int, struct range *); |
---|
50 | static char *scanstr(char *, int); |
---|
51 | static void gotcr(void); |
---|
52 | |
---|
53 | %} |
---|
54 | |
---|
55 | bit [01] |
---|
56 | digit [0-9] |
---|
57 | hexdig [0-9A-Fa-f] |
---|
58 | |
---|
59 | rulename [A-Za-z][-0-9A-Za-z]* |
---|
60 | wsp [ \t] |
---|
61 | |
---|
62 | /* *(%x20-21 / %x23-7E) */ |
---|
63 | charval [ !#-~]* |
---|
64 | |
---|
65 | /* *(%x20-3D / %x3F-7E) */ |
---|
66 | proseval [ -=?-~]* |
---|
67 | |
---|
68 | mycrlf (\n\r|\r\n|\r|\n) |
---|
69 | |
---|
70 | /* %x may be a flex feature; %s works but sometimes results in |
---|
71 | * confusing error messages */ |
---|
72 | %x SKIP |
---|
73 | |
---|
74 | /* line number isn't quite being updated properly. |
---|
75 | suspect unterminated charval and prosevals. */ |
---|
76 | %% |
---|
77 | <SKIP>.*{mycrlf} { char q = (badchar == '\'') ? '"' : '\''; |
---|
78 | mywarn(MYERROR, "Illegal character %c%c%c - skipping to end of line", q, badchar, q); |
---|
79 | gotcr(); |
---|
80 | BEGIN(INITIAL); |
---|
81 | return CRLF; } |
---|
82 | \"{charval}["\r\n] { |
---|
83 | char *p; |
---|
84 | yycolumn += strlen(yytext); |
---|
85 | yylval.string = strdup(yytext + 1); |
---|
86 | p = &yylval.string[strlen(yylval.string) - 1]; |
---|
87 | if (*p != '"') { |
---|
88 | mywarn(MYERROR, "unterminated char-val"); |
---|
89 | unput(*p); /* put the cr or lf back */ |
---|
90 | } |
---|
91 | *p = '\0'; |
---|
92 | if (*yylval.string == '\0') |
---|
93 | mywarn(MYWARNING, "zero-length char-val"); |
---|
94 | return CHARVAL; |
---|
95 | } |
---|
96 | \<{proseval}[>\r\n] { |
---|
97 | char *p; |
---|
98 | yycolumn += strlen(yytext); |
---|
99 | yylval.string = strdup(yytext + 1); |
---|
100 | p = &yylval.string[strlen(yylval.string) - 1]; |
---|
101 | if (*p != '>') { |
---|
102 | mywarn(MYERROR, "unterminated prose-val"); |
---|
103 | unput(*p); /* put the cr or lf back */ |
---|
104 | } |
---|
105 | *p = '\0'; |
---|
106 | return PROSEVAL; |
---|
107 | } |
---|
108 | {rulename} { |
---|
109 | /* record the indentation of the first rule name. */ |
---|
110 | if (indent == -1) |
---|
111 | indent = yycolumn; |
---|
112 | yycolumn += strlen(yytext); |
---|
113 | yylval.string = strdup(yytext); |
---|
114 | return RULENAME; |
---|
115 | } |
---|
116 | %[Bb]{bit}+(-|\.\.){bit}+ { |
---|
117 | yycolumn += strlen(yytext); |
---|
118 | scanrange(yytext + 2, 2, &yylval.range); |
---|
119 | return BINVALRANGE; |
---|
120 | } |
---|
121 | %[Bb]{bit}+(\.{bit}+)* { |
---|
122 | yycolumn += strlen(yytext); |
---|
123 | yylval.string = scanstr(yytext + 2, 2); |
---|
124 | return BINVAL; |
---|
125 | } |
---|
126 | %[Bb]. { mywarn(MYERROR, "bad bit value"); |
---|
127 | badchar = yytext[2]; BEGIN(SKIP); } |
---|
128 | %[Dd]{digit}+(-|\.\.){digit}+ { |
---|
129 | yycolumn += strlen(yytext); |
---|
130 | scanrange(yytext + 2, 10, &yylval.range); |
---|
131 | return DECVALRANGE; |
---|
132 | } |
---|
133 | %[Dd]{digit}+(\.{digit}+)* { |
---|
134 | yycolumn += strlen(yytext); |
---|
135 | yylval.string = scanstr(yytext + 2, 10); |
---|
136 | return DECVAL; |
---|
137 | } |
---|
138 | %[Dd]. { mywarn(MYERROR, "bad decimal value"); |
---|
139 | badchar = yytext[2]; BEGIN(SKIP); } |
---|
140 | %[Xx]{hexdig}+(-|\.\.){hexdig}+ { |
---|
141 | yycolumn += strlen(yytext); |
---|
142 | scanrange(yytext + 2, 16, &yylval.range); |
---|
143 | return HEXVALRANGE; |
---|
144 | } |
---|
145 | %[Xx]{hexdig}+(\.{hexdig}+)* { |
---|
146 | yycolumn += strlen(yytext); |
---|
147 | yylval.string = scanstr(yytext + 2, 16); |
---|
148 | return HEXVAL; |
---|
149 | } |
---|
150 | %[Xx]. { mywarn(MYERROR, "bad hex value"); |
---|
151 | badchar = yytext[2]; BEGIN(SKIP); } |
---|
152 | {digit}*\*{digit}* { |
---|
153 | char *ep; |
---|
154 | |
---|
155 | yycolumn += strlen(yytext); |
---|
156 | yylval.range.lo = strtoul(yytext, &ep, 10); |
---|
157 | if (*ep != '*') { |
---|
158 | mywarn(MYERROR, "internal scanner error 1"); |
---|
159 | yylval.range.hi = -1; |
---|
160 | } else { |
---|
161 | yylval.range.hi = strtoul(ep + 1, &ep, 10); |
---|
162 | if (*ep) { |
---|
163 | mywarn(MYERROR, "internal scanner error 2"); |
---|
164 | yylval.range.hi = -1; |
---|
165 | } else if (yylval.range.hi == 0) |
---|
166 | yylval.range.hi = -1; |
---|
167 | } |
---|
168 | return REPEAT; |
---|
169 | } |
---|
170 | {digit}*#{digit}* { |
---|
171 | char *ep; |
---|
172 | |
---|
173 | yycolumn += strlen(yytext); |
---|
174 | yylval.range.lo = strtoul(yytext, &ep, 10); |
---|
175 | if (*ep != '#') { |
---|
176 | mywarn(MYERROR, "internal scanner error 1"); |
---|
177 | yylval.range.hi = -1; |
---|
178 | } else { |
---|
179 | yylval.range.hi = strtoul(ep + 1, &ep, 10); |
---|
180 | if (*ep) { |
---|
181 | mywarn(MYERROR, "internal scanner error 2"); |
---|
182 | yylval.range.hi = -1; |
---|
183 | } else if (yylval.range.hi == 0) |
---|
184 | yylval.range.hi = -1; |
---|
185 | } |
---|
186 | return LIST; |
---|
187 | } |
---|
188 | {digit}+ { |
---|
189 | char *ep; |
---|
190 | |
---|
191 | yycolumn += strlen(yytext); |
---|
192 | yylval.range.hi = yylval.range.lo = strtoul(yytext, &ep, 10); |
---|
193 | if (*ep) { |
---|
194 | mywarn(MYERROR, "internal scanner error 3"); |
---|
195 | yylval.range.hi = yylval.range.lo = 42; |
---|
196 | } |
---|
197 | return REPEAT; |
---|
198 | } |
---|
199 | =\/ { yycolumn += 2; return EQSLASH; } |
---|
200 | ({wsp}+|(;[^\r\n]*)|{mycrlf}{wsp}+)+ { char *p = yytext; |
---|
201 | while (*p) { |
---|
202 | /* TO DO: |
---|
203 | * deal with indent if we |
---|
204 | * have one set - if a blank |
---|
205 | * line or a comment is indented |
---|
206 | * less than enough, we warn |
---|
207 | * about it. */ |
---|
208 | if (*p == '\r') { |
---|
209 | gotcr(); |
---|
210 | if (*(++p) == '\n') |
---|
211 | p++; |
---|
212 | continue; |
---|
213 | } |
---|
214 | if (*p == '\n') { |
---|
215 | gotcr(); |
---|
216 | if (*(++p) == '\r') |
---|
217 | p++; |
---|
218 | continue; |
---|
219 | } |
---|
220 | p++; |
---|
221 | yycolumn++; |
---|
222 | } |
---|
223 | /* If we don't know the indent yet, then just |
---|
224 | ignore leading whitespace. */ |
---|
225 | if (indent == -1) |
---|
226 | continue; |
---|
227 | /* If there is more whitespace than |
---|
228 | the initial indent, then tell the parser |
---|
229 | about the leading whitespace. */ |
---|
230 | if (yycolumn > indent) |
---|
231 | return CWSP; |
---|
232 | if (yycolumn < indent) { |
---|
233 | indent = yycolumn; |
---|
234 | mywarn(MYERROR, "adjusting indentation"); |
---|
235 | } |
---|
236 | /* Since we didn't have more whitespace than |
---|
237 | indent, tell the parser it was just |
---|
238 | a CR. */ |
---|
239 | return CRLF; } |
---|
240 | {mycrlf} { gotcr(); return CRLF; } |
---|
241 | [][()=/] { yycolumn++; return yytext[0]; } |
---|
242 | \| { yycolumn++; |
---|
243 | if (!permissive) { |
---|
244 | badchar = yytext[0]; |
---|
245 | BEGIN(SKIP); |
---|
246 | } |
---|
247 | return yytext[0]; } |
---|
248 | . { yycolumn++; badchar = yytext[0]; BEGIN(SKIP); } |
---|
249 | %% |
---|
250 | |
---|
251 | static void |
---|
252 | scanrange(char *p, int base, struct range *r) |
---|
253 | { |
---|
254 | char *ep; |
---|
255 | |
---|
256 | r->lo = strtoul(p, &ep, base); |
---|
257 | if (*ep != '-' && *ep != '.') { |
---|
258 | mywarn(MYERROR, "internal scanner error 4"); |
---|
259 | r->hi = r->lo; |
---|
260 | return; |
---|
261 | } |
---|
262 | if (*ep == '.') { |
---|
263 | if (!permissive) { |
---|
264 | badchar = '.'; |
---|
265 | BEGIN(SKIP); |
---|
266 | } |
---|
267 | mywarn(MYERROR, "Ranges use \"-\", not \"..\"."); |
---|
268 | ep++; |
---|
269 | } |
---|
270 | r->hi = strtoul(ep + 1, &ep, base); |
---|
271 | if (*ep) { |
---|
272 | mywarn(MYERROR, "internal scanner error 5"); |
---|
273 | } |
---|
274 | if (r->hi < r->lo) { |
---|
275 | mywarn(MYERROR, "inverted range"); |
---|
276 | } |
---|
277 | return; |
---|
278 | } |
---|
279 | |
---|
280 | static char * |
---|
281 | scanstr(char *p, int base) |
---|
282 | { |
---|
283 | char *ep; |
---|
284 | char buf[512]; /*XXX*/ |
---|
285 | char *b = buf; |
---|
286 | int i; |
---|
287 | |
---|
288 | do { |
---|
289 | i = strtoul(p, &ep, base); |
---|
290 | if (i > 255) { /* XXX */ |
---|
291 | mywarn(MYWARNING, "I can't handle this legal ABNF char value"); |
---|
292 | i = 255; |
---|
293 | } |
---|
294 | if (i == 0) { |
---|
295 | mywarn(MYERROR, "This parser will truncate strings at %%x00"); |
---|
296 | } |
---|
297 | *b++ = i; |
---|
298 | p = ep + 1; |
---|
299 | } while (*ep == '.'); |
---|
300 | if (*ep) |
---|
301 | mywarn(MYERROR, "internal scanner error 6"); |
---|
302 | *b++ = '\0'; |
---|
303 | return strdup(buf); |
---|
304 | } |
---|
305 | |
---|
306 | static void |
---|
307 | gotcr(void) |
---|
308 | { |
---|
309 | yylineno++; |
---|
310 | yycolumn = 0; |
---|
311 | } |
---|
312 | |
---|
313 | void |
---|
314 | scanreset(void) { |
---|
315 | yylineno = 0; |
---|
316 | yycolumn = 0; |
---|
317 | indent = -1; |
---|
318 | } |
---|