summaryrefslogtreecommitdiffabout
path: root/lib/sitecing_parser.ll
Unidiff
Diffstat (limited to 'lib/sitecing_parser.ll') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/sitecing_parser.ll594
1 files changed, 594 insertions, 0 deletions
diff --git a/lib/sitecing_parser.ll b/lib/sitecing_parser.ll
new file mode 100644
index 0000000..6cb78f3
--- a/dev/null
+++ b/lib/sitecing_parser.ll
@@ -0,0 +1,594 @@
1%{
2 /*
3 * XXX: I have a strong feeling that this parser should be completely rewritten.
4 */
5#include <iostream>
6#include <fstream>
7#include <cassert>
8#include <stdexcept>
9using namespace std;
10#include "sitecing/sitecing_util.h"
11#include "sitecing/sitecing_exception.h"
12using namespace sitecing;
13#define sitecing_parser_flexlexer_once
14#include "sitecing/sitecing_parser.h"
15#include "sitecing/sitecing_enflesher.h"
16#undef yyFlexLexer
17#define yyFlexLexer sitecing_parserFlexLexer
18%}
19%x SLASHSTAR_COMMENT SLASHSLASH_COMMENT STRING
20%x CODELINE CLASSLINE DECLLINE IMPLLINE DECLBLOCK IMPLBLOCK VARLINE VARINIT
21%x IMPORTLINE IMPORTCOMPONENT
22%x IMPORTTYPELINE IMPORTTYPECOMPONENT
23%x DERIVELINE DERIVECOMPONENT
24%x CONSTRUCTOR DESTRUCTOR CODEMETHODLINE CODEMETHODARGS
25%x CODEMETHODBLOCK INLINE METHODLINE METHODARGS METHODBLOCK CODEBLOCK OUTPUTBLOCK
26%option 8bit c++ verbose noyywrap yyclass="sitecing_parser" prefix="sitecing_parser" stack yylineno
27
28 WHITESPACE[ \t]
29 ID [A-Za-z_][A-Za-z0-9_]*
30 NOIDCHAR[^A-Za-z0-9_]
31
32%%
33
34<INITIAL>{
35 ^\%\%class{WHITESPACE}+{
36 // TODO: signal error if we already have class name acquired from source.
37 modi.push_front(modus_operandi(modus_operandi::flag_devour_comments|modus_operandi::flag_devour_whitespace));
38 BEGIN(CLASSLINE);
39 }
40 ^\%\%decl{WHITESPACE}+{
41 modi.push_front(modus_operandi(0));
42 anchor();
43 BEGIN(DECLLINE);
44 }
45 ^\%\%impl{WHITESPACE}+{
46 modi.push_front(modus_operandi(0));
47 anchor();
48 BEGIN(IMPLLINE);
49 }
50 \<\%decl\> {
51 modi.push_front(modus_operandi(0));
52 anchor();
53 BEGIN(DECLBLOCK);
54 }
55 \<\%impl\> {
56 modi.push_front(modus_operandi(0));
57 anchor();
58 BEGIN(IMPLBLOCK);
59 }
60 ^\%\%var{WHITESPACE}+{
61 modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
62 anchor();
63 BEGIN(VARLINE);
64 }
65 ^\%\%import{WHITESPACE}+{
66 modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
67 BEGIN(IMPORTLINE);
68 }
69 ^\%\%import_type{WHITESPACE}+ {
70 modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
71 BEGIN(IMPORTTYPELINE);
72 }
73 ^\%\%derive{WHITESPACE}+{
74 modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
75 BEGIN(DERIVELINE);
76 }
77 \<\%constructor\>{
78 modi.push_front(modus_operandi());
79 anchor();
80 BEGIN(CONSTRUCTOR);
81 }
82 \<\%destructor\>{
83 modi.push_front(modus_operandi());
84 anchor();
85 BEGIN(DESTRUCTOR);
86 }
87 \<\%codemethod{WHITESPACE}+{
88 modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
89 anchor();
90 BEGIN(CODEMETHODLINE);
91 }
92 \<\%method{WHITESPACE}+ {
93 modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
94 anchor();
95 BEGIN(METHODLINE);
96 }
97 <<EOF>>{
98 assert(modi.size()==1);
99 M().modify(modus_operandi::modus_preop);
100 LexerOutput(";",1);
101 return 0;
102 }
103}
104 <<EOF>>throw preprocessor_error(CODEPOINT,"unexpected end of file",lineno());
105
106<CODEBLOCK,CODEMETHODBLOCK>{
107 "<%output>"{
108 anchor();
109 yy_push_state(OUTPUTBLOCK);
110 }
111}
112
113<METHODLINE>{
114 {WHITESPACE}+{
115 modus_operandi& m = modi.front();
116 if(!m.output.empty()) {
117 if(!m._lastid.empty()) {
118 if(!m._type.empty()) m._type += ' ';
119 m._type += m._lastid;
120 }
121 m._lastid = m.output;
122 m.output.clear();
123 }
124 }
125 \*{
126 modus_operandi& m = modi.front();
127 ECHO;
128 if(!m._lastid.empty()) {
129 if(!m._type.empty()) m._type += ' ';
130 m._type += m._lastid;
131 }
132 m._lastid = m.output;
133 m.output.clear();
134 }
135 \({
136 modus_operandi& m = modi.front();
137 if(m.output.empty()) {
138 m._name=m._lastid;
139 }else{
140 if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy...
141 if(!m._type.empty()) m._type += ' ';
142 m._type += m._lastid;
143 }
144 m._name = m.output;
145 m.output.clear();
146 }
147 ECHO;
148 BEGIN(METHODARGS);
149 }
150}
151<METHODARGS>{
152 \%\>{
153 modus_operandi& m = modi.front();
154 m._args = m.output;
155 m.output.clear();
156 anchor();
157 BEGIN(METHODBLOCK);
158 }
159}
160
161<INITIAL,METHODBLOCK,OUTPUTBLOCK>{
162 \<\%{WHITESPACE}+{
163 M().modify(modus_operandi::modus_postop);
164 anchor();
165 LexerOutput("(",1);
166 yy_push_state(INLINE);
167 }
168 ^\%{WHITESPACE}{
169 M().modify(modus_operandi::modus_code);
170 anchor();
171 yy_push_state(CODELINE);
172 }
173 \<\%code\>{
174 M().modify(modus_operandi::modus_code);
175 anchor();
176 yy_push_state(CODEBLOCK);
177 }
178 "</%output>" {
179 if(YY_START!=OUTPUTBLOCK) throw preprocessor_error(CODEPOINT,"unexpected tag",lineno());
180 M().modify(modus_operandi::modus_code);
181 anchor();
182 yy_pop_state();
183 }
184}
185
186 <INLINE>\%\>LexerOutput(")",1); M().modus=modus_operandi::modus_preop; yy_pop_state();
187 <CODELINE>\nyy_pop_state();
188
189<CODEMETHODLINE>{
190 {WHITESPACE}+{
191 modus_operandi& m = modi.front();
192 if(!m.output.empty()) {
193 if(!m._lastid.empty()) {
194 if(!m._type.empty()) m._type += ' ';
195 m._type += m._lastid;
196 }
197 m._lastid = m.output;
198 m.output.clear();
199 }
200 }
201 \*{
202 modus_operandi& m = modi.front();
203 ECHO;
204 if(!m._lastid.empty()) {
205 if(!m._type.empty()) m._type += ' ';
206 m._type += m._lastid;
207 }
208 m._lastid = m.output;
209 m.output.clear();
210 }
211 \({
212 modus_operandi& m = modi.front();
213 if(m.output.empty()) {
214 m._name=m._lastid;
215 }else{
216 if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy...
217 if(!m._type.empty()) m._type += ' ';
218 m._type += m._lastid;
219 }
220 m._name = m.output;
221 m.output.clear();
222 }
223 ECHO;
224 BEGIN(CODEMETHODARGS);
225 }
226}
227<CODEMETHODARGS>{
228 \%\>{
229 modus_operandi& m = modi.front();
230 m._args = m.output;
231 m.output.clear();
232 m.flags=0;
233 anchor();
234 BEGIN(CODEMETHODBLOCK);
235 }
236}
237
238<IMPORTLINE>{
239 {WHITESPACE}+{ }
240 {ID}{
241 if(!modi.front()._name.empty())
242 throw preprocessor_error(CODEPOINT,"syntax error",lineno());
243 modi.front()._name = yytext;
244 }
245 \= {
246 modi.front().output.clear();
247 BEGIN(IMPORTCOMPONENT);
248 }
249}
250<IMPORTCOMPONENT>{
251 {WHITESPACE}+{ }
252 \n{
253 modus_operandi& m = M();
254 string::size_type t = m.output.find_first_not_of(" \t");
255 if(t!=string::npos)
256 m.output.erase(0,t);
257 t = m.output.find_last_not_of(" \t;");
258 if(t!=string::npos)
259 m.output.erase(t+1);
260 if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') {
261 m.output.erase(0,1);
262 m.output.erase(m.output.length()-1);
263 }
264 string c = combine_path(component_basename,m.output);
265 member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true));
266 modi.pop_front();
267 BEGIN(INITIAL);
268 }
269}
270
271<IMPORTTYPELINE>{
272 {WHITESPACE}+{ }
273 {ID}{
274 if(!modi.front()._name.empty())
275 throw preprocessor_error(CODEPOINT,"syntax error",lineno());
276 modi.front()._name = yytext;
277 }
278 \= {
279 modi.front().output.clear();
280 BEGIN(IMPORTTYPECOMPONENT);
281 }
282}
283<IMPORTTYPECOMPONENT>{
284 {WHITESPACE}+{ }
285 \n{
286 modus_operandi& m = M();
287 string::size_type t = m.output.find_first_not_of(" \t");
288 if(t!=string::npos)
289 m.output.erase(0,t);
290 t = m.output.find_last_not_of(" \t;");
291 if(t!=string::npos)
292 m.output.erase(t+1);
293 if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') {
294 m.output.erase(0,1);
295 m.output.erase(m.output.length()-1);
296 }
297 string c = combine_path(component_basename,m.output);
298 member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true,true));
299 modi.pop_front();
300 BEGIN(INITIAL);
301 }
302}
303
304<DERIVELINE>{
305 {WHITESPACE}+{ }
306 {ID}{
307 if(!modi.front()._name.empty())
308 throw preprocessor_error(CODEPOINT,"syntax_error",lineno());
309 modi.front()._name = yytext;
310 }
311 \= {
312 modi.front().output.clear();
313 BEGIN(DERIVECOMPONENT);
314 }
315}
316<DERIVECOMPONENT>{
317 {WHITESPACE}+{ }
318 \n {
319 modus_operandi& m = M();
320 string::size_type t = m.output.find_first_not_of(" \t");
321 if(t!=string::npos)
322 m.output.erase(0,t);
323 t = m.output.find_last_not_of(" \t;");
324 if(t!=string::npos)
325 m.output.erase(t+1);
326 if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') {
327 m.output.erase(0,1);
328 m.output.erase(m.output.length()-1);
329 }
330 string c = combine_path(component_basename,m.output);
331 ancestor_classes.push_back(ancestor_class(m._name,normalize_path(c,strip_leading_slash)));
332 modi.pop_front();
333 BEGIN(INITIAL);
334 }
335}
336
337<VARLINE>{
338 {WHITESPACE}+{
339 modus_operandi& m = modi.front();
340 if(!m.output.empty()) {
341 if(!m._lastid.empty()) {
342 if(!m._type.empty()) m._type += ' ';
343 m._type += m._lastid;
344 }
345 m._lastid = m.output;
346 m.output.clear();
347 }
348 }
349 \*{
350 modus_operandi& m = modi.front();
351 ECHO;
352 if(!m._lastid.empty()) {
353 if(!m._type.empty()) m._type += ' ';
354 m._type += m._lastid;
355 }
356 m._lastid = m.output;
357 m.output.clear();
358 }
359 \;|\n|\={
360 modus_operandi& m = modi.front();
361 if(m.output.empty()) {
362 m._name=m._lastid;
363 }else{
364 if(!m._lastid.empty()) { // XXX: lastid should never be emtpy, I believe?
365 if(!m._type.empty()) m._type += ' ';
366 m._type += m._lastid;
367 }
368 m._name=m.output;
369 m.output.clear();
370 }
371 BEGIN(VARINIT);
372 if(*yytext!='=')
373 unput('\n');
374 }
375}
376<VARINIT>{
377 \n{
378 modus_operandi& m = modi.front();
379 string::size_type t = m.output.find_first_not_of(" \t");
380 if(t!=string::npos)
381 m.output.erase(0,t);
382 t = m.output.find_last_not_of(" \t;");
383 if(t!=string::npos)
384 m.output.erase(t+1);
385 member_variables.push_back(member_variable(m._type,m._name,m.output));
386 if(!m.output.empty())
387 have_initializers=true;
388 modi.pop_front();
389 BEGIN(INITIAL);
390 }
391}
392 <DECLLINE>\n{
393 ECHO;
394 decl += modi.front().output;
395 modi.pop_front();
396 BEGIN(INITIAL);
397}
398 <IMPLLINE>\n{
399 ECHO;
400 impl += modi.front().output;
401 modi.pop_front();
402 BEGIN(INITIAL);
403}
404 <CLASSLINE>\n{
405 class_name = modi.front().output;
406 modi.pop_front();
407 BEGIN(INITIAL);
408}
409<CLASSLINE,DECLLINE,IMPLLINE,VARLINE,VARINIT,IMPORTLINE,IMPORTCOMPONENT,CODEMETHODLINE,CODEMETHODARGS,INLINE,METHODLINE,METHODARGS,DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,CODELINE,CODEBLOCK>{
410 "/*"{
411 yy_push_state(SLASHSTAR_COMMENT);
412 if(!M().devour_comments()) {
413 ECHO;
414 }
415 }
416 "//"{
417 yy_push_state(SLASHSLASH_COMMENT);
418 if(!M().devour_comments()) {
419 ECHO;
420 }
421 }
422 \" {
423 yy_push_state(STRING);
424 ECHO;
425 }
426 \'\\.\'{
427 ECHO;
428 }
429}
430
431<INITIAL,METHODBLOCK,OUTPUTBLOCK>{
432 \"soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\\"",2);
433 \nsoft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\n",2);
434 \rsoft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\r",2);
435 \tsoft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\t",2);
436 \bsoft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\b",2);
437 \asoft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\a",2);
438 .soft_anchor(); M().modify(modus_operandi::modus_text); ECHO;
439 {WHITESPACE}+soft_anchor(); M().modify(modus_operandi::modus_text); ECHO;
440}
441
442<DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,METHODBLOCK,CODEBLOCK>{
443 \<\/\%decl\>{
444 if(YY_START!=DECLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
445 decl += modi.front().output;
446 modi.pop_front();
447 BEGIN(INITIAL);
448 }
449 \<\/\%impl\>{
450 if(YY_START!=IMPLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
451 impl += modi.front().output;
452 modi.pop_front();
453 BEGIN(INITIAL);
454 }
455 \<\/\%constructor\>{
456 if(YY_START!=CONSTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
457 member_functions.push_back(member_function("","","",modi.front().output));
458 have_constructor = true;
459 modi.pop_front();
460 BEGIN(INITIAL);
461 }
462 \<\/\%destructor\>{
463 if(YY_START!=DESTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
464 member_functions.push_back(member_function("","~","",modi.front().output));
465 modi.pop_front();
466 BEGIN(INITIAL);
467 }
468 \<\/\%codemethod\>{
469 if(YY_START!=CODEMETHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
470 modus_operandi& m = modi.front();
471 member_functions.push_back(member_function(m._type,m._name,m._args,m.output));
472 modi.pop_front();
473 BEGIN(INITIAL);
474 }
475 \<\/%method\> {
476 if(YY_START!=METHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
477 modus_operandi& m = modi.front();
478 m.modify(modus_operandi::modus_code);
479 member_functions.push_back(member_function(m._type,m._name,m._args,m.output));
480 modi.pop_front();
481 BEGIN(INITIAL);
482 }
483 \<\/%code\> {
484 if(YY_START!=CODEBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
485 yy_pop_state();
486 }
487 \n ECHO;
488}
489
490<SLASHSTAR_COMMENT>{
491 "*/"{
492 if(!M().devour_comments()) {
493 ECHO;
494 }
495 yy_pop_state();
496 unput(' ');
497 }
498 \n{
499 if(!M().devour_comments()) {
500 ECHO;
501 }
502 }
503}
504<SLASHSLASH_COMMENT>{
505 \n{
506 if(!M().devour_comments()) {
507 ECHO;
508 }
509 yy_pop_state();
510 if(YY_START!=CODEBLOCK && YY_START!=CODEMETHODBLOCK && YY_START!=IMPLBLOCK && YY_START!=DECLBLOCK)
511 unput('\n');
512 }
513}
514 <SLASHSTAR_COMMENT,SLASHSLASH_COMMENT>.{
515 if(!M().devour_comments()) {
516 ECHO;
517 }
518}
519<STRING>{
520 \\.ECHO;
521 \"ECHO; yy_pop_state();
522 .ECHO;
523}
524
525 {WHITESPACE}+{
526 if(!(M().flags&modus_operandi::flag_devour_whitespace)) {
527 ECHO;
528 }
529}
530
531%%
532
533sitecing_parser::sitecing_parser(component_factory& f)
534 : factory(f), have_initializers(false), have_constructor(false),
535 base_class("sitecing::cgi_component"),
536 base_header("sitecing/cgi_component.h"),
537 skeleton(__SC_DEFAULT_SKELETON) {
538 }
539
540void sitecing_parser::preprocess(const string& in) {
541 ifstream ifs(in.c_str(),ios::in);
542 if(!ifs.good())
543 throw preprocessor_error(CODEPOINT,"failed to open input file");
544 input_file = in;
545 modi.push_front(modus_operandi(0));
546 switch_streams(&ifs,NULL);
547 if(yylex())
548 throw preprocessor_error(CODEPOINT,"unknown error");
549 member_functions.push_back(member_function("void","main","(int _magic,va_list _args)",M().output));
550 if(have_initializers && !have_constructor)
551 member_functions.push_back(member_function("","","",""));
552 sitecing_enflesher enflesher(*this);
553 enflesher.enflesh();
554}
555
556void sitecing_parser::LexerOutput(const char* buf,int size) {
557 assert(modi.size());
558 M().output.append(buf,size);
559}
560
561static const char *modus_transitions
562 [sitecing_parser::modus_operandi::modi]
563 [sitecing_parser::modus_operandi::modi] = {
564// To:
565// code preop postop text From:
566 { "", "(*(__SCIF->out))", "(*(__SCIF->out))<<", "(*(__SCIF->out))<<\"" }, // code
567 { ";", "", "<<", "<<\"" }, // preop
568 { NULL, NULL, "", "\"" }, // postop
569 { "\";", "\"", "\"<<", "" } // text
570};
571
572void sitecing_parser::modus_operandi::modify(modus_t m) {
573 const char * x = modus_transitions[modus][m];
574 assert(x);
575 output += x;
576 modus = m;
577}
578
579void sitecing_parser::soft_anchor() {
580 if(M().modus!=modus_operandi::modus_text)
581 anchor();
582}
583void sitecing_parser::anchor() {
584 if(M().modus==modus_operandi::modus_text)
585 M().modify(modus_operandi::modus_preop);
586 M().output += "\n#line ";
587 char tmp[7];
588 snprintf(tmp,sizeof(tmp),"%d",lineno());
589 M().output += tmp;
590 M().output += " \"";
591 M().output += input_file;
592 M().output += "\"\n";
593}
594/* vim:set ft=lex: */