-rw-r--r-- | lib/sitecing_parser.ll | 47 |
1 files changed, 46 insertions, 1 deletions
diff --git a/lib/sitecing_parser.ll b/lib/sitecing_parser.ll index 6cb78f3..8ba8673 100644 --- a/lib/sitecing_parser.ll +++ b/lib/sitecing_parser.ll @@ -1,594 +1,639 @@ %{ /* * XXX: I have a strong feeling that this parser should be completely rewritten. */ #include <iostream> #include <fstream> #include <cassert> #include <stdexcept> using namespace std; #include "sitecing/sitecing_util.h" #include "sitecing/sitecing_exception.h" using namespace sitecing; #define sitecing_parser_flexlexer_once #include "sitecing/sitecing_parser.h" #include "sitecing/sitecing_enflesher.h" #undef yyFlexLexer #define yyFlexLexer sitecing_parserFlexLexer %} %x SLASHSTAR_COMMENT SLASHSLASH_COMMENT STRING %x CODELINE CLASSLINE DECLLINE IMPLLINE DECLBLOCK IMPLBLOCK VARLINE VARINIT %x IMPORTLINE IMPORTCOMPONENT %x IMPORTTYPELINE IMPORTTYPECOMPONENT %x DERIVELINE DERIVECOMPONENT %x CONSTRUCTOR DESTRUCTOR CODEMETHODLINE CODEMETHODARGS %x CODEMETHODBLOCK INLINE METHODLINE METHODARGS METHODBLOCK CODEBLOCK OUTPUTBLOCK +%x PRAGMALINE %option 8bit c++ verbose noyywrap yyclass="sitecing_parser" prefix="sitecing_parser" stack yylineno WHITESPACE [ \t] ID [A-Za-z_][A-Za-z0-9_]* NOIDCHAR [^A-Za-z0-9_] %% <INITIAL>{ ^\%\%class{WHITESPACE}+ { // TODO: signal error if we already have class name acquired from source. modi.push_front(modus_operandi(modus_operandi::flag_devour_comments|modus_operandi::flag_devour_whitespace)); BEGIN(CLASSLINE); } ^\%\%decl{WHITESPACE}+ { modi.push_front(modus_operandi(0)); anchor(); BEGIN(DECLLINE); } ^\%\%impl{WHITESPACE}+ { modi.push_front(modus_operandi(0)); anchor(); BEGIN(IMPLLINE); } \<\%decl\> { modi.push_front(modus_operandi(0)); anchor(); BEGIN(DECLBLOCK); } \<\%impl\> { modi.push_front(modus_operandi(0)); anchor(); BEGIN(IMPLBLOCK); } ^\%\%var{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); anchor(); BEGIN(VARLINE); } ^\%\%import{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); BEGIN(IMPORTLINE); } ^\%\%import_type{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); BEGIN(IMPORTTYPELINE); } ^\%\%derive{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); BEGIN(DERIVELINE); } \<\%constructor\> { modi.push_front(modus_operandi()); anchor(); BEGIN(CONSTRUCTOR); } \<\%destructor\> { modi.push_front(modus_operandi()); anchor(); BEGIN(DESTRUCTOR); } \<\%codemethod{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); anchor(); BEGIN(CODEMETHODLINE); } \<\%method{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); anchor(); BEGIN(METHODLINE); } + ^\%\%pragma{WHITESPACE}+ { + modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); + BEGIN(PRAGMALINE); + } <<EOF>> { assert(modi.size()==1); M().modify(modus_operandi::modus_preop); LexerOutput(";",1); return 0; } } <<EOF>> throw preprocessor_error(CODEPOINT,"unexpected end of file",lineno()); <CODEBLOCK,CODEMETHODBLOCK>{ "<%output>" { anchor(); yy_push_state(OUTPUTBLOCK); } } +<PRAGMALINE>{ + {WHITESPACE}+ { + modus_operandi& m = M(); + if(!m.output.empty()) { + string::size_type eq = m.output.find('='); + if(eq==string::npos) { + pragmas[m.output]=m.output; + }else{ + pragmas[m.output.substr(0,eq)] = m.output.substr(eq+1); + } + m.output.erase(); + } + } + \n { + modus_operandi& m = M(); + if(!m.output.empty()) { + string::size_type eq = m.output.find('='); + if(eq==string::npos) { + pragmas[m.output]=m.output; + }else{ + pragmas[m.output.substr(0,eq)] = m.output.substr(eq+1); + } + m.output.erase(); + } + modi.pop_front(); + BEGIN(INITIAL); + anchor(); + } +} + <METHODLINE>{ {WHITESPACE}+ { modus_operandi& m = modi.front(); if(!m.output.empty()) { if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } } \* { modus_operandi& m = modi.front(); ECHO; if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } \( { modus_operandi& m = modi.front(); if(m.output.empty()) { m._name=m._lastid; }else{ if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy... if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._name = m.output; m.output.clear(); } ECHO; BEGIN(METHODARGS); } } <METHODARGS>{ \%\> { modus_operandi& m = modi.front(); m._args = m.output; m.output.clear(); anchor(); BEGIN(METHODBLOCK); } } <INITIAL,METHODBLOCK,OUTPUTBLOCK>{ \<\%{WHITESPACE}+ { M().modify(modus_operandi::modus_postop); anchor(); LexerOutput("(",1); yy_push_state(INLINE); } ^\%{WHITESPACE} { M().modify(modus_operandi::modus_code); anchor(); yy_push_state(CODELINE); } \<\%code\> { M().modify(modus_operandi::modus_code); anchor(); yy_push_state(CODEBLOCK); } "</%output>" { if(YY_START!=OUTPUTBLOCK) throw preprocessor_error(CODEPOINT,"unexpected tag",lineno()); M().modify(modus_operandi::modus_code); anchor(); yy_pop_state(); } } <INLINE>\%\> LexerOutput(")",1); M().modus=modus_operandi::modus_preop; yy_pop_state(); <CODELINE>\n yy_pop_state(); <CODEMETHODLINE>{ {WHITESPACE}+ { modus_operandi& m = modi.front(); if(!m.output.empty()) { if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } } \* { modus_operandi& m = modi.front(); ECHO; if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } \( { modus_operandi& m = modi.front(); if(m.output.empty()) { m._name=m._lastid; }else{ if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy... if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._name = m.output; m.output.clear(); } ECHO; BEGIN(CODEMETHODARGS); } } <CODEMETHODARGS>{ \%\> { modus_operandi& m = modi.front(); m._args = m.output; m.output.clear(); m.flags=0; anchor(); BEGIN(CODEMETHODBLOCK); } } <IMPORTLINE>{ {WHITESPACE}+ { } {ID} { if(!modi.front()._name.empty()) throw preprocessor_error(CODEPOINT,"syntax error",lineno()); modi.front()._name = yytext; } \= { modi.front().output.clear(); BEGIN(IMPORTCOMPONENT); } } <IMPORTCOMPONENT>{ {WHITESPACE}+ { } \n { modus_operandi& m = M(); string::size_type t = m.output.find_first_not_of(" \t"); if(t!=string::npos) m.output.erase(0,t); t = m.output.find_last_not_of(" \t;"); if(t!=string::npos) m.output.erase(t+1); if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') { m.output.erase(0,1); m.output.erase(m.output.length()-1); } string c = combine_path(component_basename,m.output); member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true)); modi.pop_front(); BEGIN(INITIAL); } } <IMPORTTYPELINE>{ {WHITESPACE}+ { } {ID} { if(!modi.front()._name.empty()) throw preprocessor_error(CODEPOINT,"syntax error",lineno()); modi.front()._name = yytext; } \= { modi.front().output.clear(); BEGIN(IMPORTTYPECOMPONENT); } } <IMPORTTYPECOMPONENT>{ {WHITESPACE}+ { } \n { modus_operandi& m = M(); string::size_type t = m.output.find_first_not_of(" \t"); if(t!=string::npos) m.output.erase(0,t); t = m.output.find_last_not_of(" \t;"); if(t!=string::npos) m.output.erase(t+1); if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') { m.output.erase(0,1); m.output.erase(m.output.length()-1); } string c = combine_path(component_basename,m.output); member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true,true)); modi.pop_front(); BEGIN(INITIAL); } } <DERIVELINE>{ {WHITESPACE}+ { } {ID} { if(!modi.front()._name.empty()) throw preprocessor_error(CODEPOINT,"syntax_error",lineno()); modi.front()._name = yytext; } \= { modi.front().output.clear(); BEGIN(DERIVECOMPONENT); } } <DERIVECOMPONENT>{ {WHITESPACE}+ { } \n { modus_operandi& m = M(); string::size_type t = m.output.find_first_not_of(" \t"); if(t!=string::npos) m.output.erase(0,t); t = m.output.find_last_not_of(" \t;"); if(t!=string::npos) m.output.erase(t+1); if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') { m.output.erase(0,1); m.output.erase(m.output.length()-1); } string c = combine_path(component_basename,m.output); ancestor_classes.push_back(ancestor_class(m._name,normalize_path(c,strip_leading_slash))); modi.pop_front(); BEGIN(INITIAL); } } <VARLINE>{ {WHITESPACE}+ { modus_operandi& m = modi.front(); if(!m.output.empty()) { if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } } \* { modus_operandi& m = modi.front(); ECHO; if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } \;|\n|\= { modus_operandi& m = modi.front(); if(m.output.empty()) { m._name=m._lastid; }else{ if(!m._lastid.empty()) { // XXX: lastid should never be emtpy, I believe? if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._name=m.output; m.output.clear(); } BEGIN(VARINIT); if(*yytext!='=') unput('\n'); } } <VARINIT>{ \n { modus_operandi& m = modi.front(); string::size_type t = m.output.find_first_not_of(" \t"); if(t!=string::npos) m.output.erase(0,t); t = m.output.find_last_not_of(" \t;"); if(t!=string::npos) m.output.erase(t+1); member_variables.push_back(member_variable(m._type,m._name,m.output)); if(!m.output.empty()) have_initializers=true; modi.pop_front(); BEGIN(INITIAL); } } <DECLLINE>\n { ECHO; decl += modi.front().output; modi.pop_front(); BEGIN(INITIAL); } <IMPLLINE>\n { ECHO; impl += modi.front().output; modi.pop_front(); BEGIN(INITIAL); } <CLASSLINE>\n { class_name = modi.front().output; modi.pop_front(); BEGIN(INITIAL); } -<CLASSLINE,DECLLINE,IMPLLINE,VARLINE,VARINIT,IMPORTLINE,IMPORTCOMPONENT,CODEMETHODLINE,CODEMETHODARGS,INLINE,METHODLINE,METHODARGS,DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,CODELINE,CODEBLOCK>{ +<CLASSLINE,DECLLINE,IMPLLINE,VARLINE,VARINIT,IMPORTLINE,IMPORTCOMPONENT,CODEMETHODLINE,CODEMETHODARGS,INLINE,METHODLINE,METHODARGS,DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,CODELINE,CODEBLOCK,PRAGMALINE>{ "/*" { yy_push_state(SLASHSTAR_COMMENT); if(!M().devour_comments()) { ECHO; } } "//" { yy_push_state(SLASHSLASH_COMMENT); if(!M().devour_comments()) { ECHO; } } \" { yy_push_state(STRING); ECHO; } \'\\.\' { ECHO; } } <INITIAL,METHODBLOCK,OUTPUTBLOCK>{ \" soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\\"",2); \n soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\n",2); \r soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\r",2); \t soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\t",2); \b soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\b",2); \a soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\a",2); . soft_anchor(); M().modify(modus_operandi::modus_text); ECHO; {WHITESPACE}+ soft_anchor(); M().modify(modus_operandi::modus_text); ECHO; } <DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,METHODBLOCK,CODEBLOCK>{ \<\/\%decl\> { if(YY_START!=DECLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); decl += modi.front().output; modi.pop_front(); BEGIN(INITIAL); } \<\/\%impl\> { if(YY_START!=IMPLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); impl += modi.front().output; modi.pop_front(); BEGIN(INITIAL); } \<\/\%constructor\> { if(YY_START!=CONSTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); member_functions.push_back(member_function("","","",modi.front().output)); have_constructor = true; modi.pop_front(); BEGIN(INITIAL); } \<\/\%destructor\> { if(YY_START!=DESTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); member_functions.push_back(member_function("","~","",modi.front().output)); modi.pop_front(); BEGIN(INITIAL); } \<\/\%codemethod\> { if(YY_START!=CODEMETHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); modus_operandi& m = modi.front(); member_functions.push_back(member_function(m._type,m._name,m._args,m.output)); modi.pop_front(); BEGIN(INITIAL); } \<\/%method\> { if(YY_START!=METHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); modus_operandi& m = modi.front(); m.modify(modus_operandi::modus_code); member_functions.push_back(member_function(m._type,m._name,m._args,m.output)); modi.pop_front(); BEGIN(INITIAL); } \<\/%code\> { if(YY_START!=CODEBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); yy_pop_state(); } \n ECHO; } <SLASHSTAR_COMMENT>{ "*/" { if(!M().devour_comments()) { ECHO; } yy_pop_state(); unput(' '); } \n { if(!M().devour_comments()) { ECHO; } } } <SLASHSLASH_COMMENT>{ \n { if(!M().devour_comments()) { ECHO; } yy_pop_state(); if(YY_START!=CODEBLOCK && YY_START!=CODEMETHODBLOCK && YY_START!=IMPLBLOCK && YY_START!=DECLBLOCK) unput('\n'); } } <SLASHSTAR_COMMENT,SLASHSLASH_COMMENT>. { if(!M().devour_comments()) { ECHO; } } <STRING>{ \\. ECHO; \" ECHO; yy_pop_state(); . ECHO; } {WHITESPACE}+ { if(!(M().flags&modus_operandi::flag_devour_whitespace)) { ECHO; } } %% sitecing_parser::sitecing_parser(component_factory& f) : factory(f), have_initializers(false), have_constructor(false), base_class("sitecing::cgi_component"), base_header("sitecing/cgi_component.h"), skeleton(__SC_DEFAULT_SKELETON) { } void sitecing_parser::preprocess(const string& in) { ifstream ifs(in.c_str(),ios::in); if(!ifs.good()) throw preprocessor_error(CODEPOINT,"failed to open input file"); input_file = in; modi.push_front(modus_operandi(0)); switch_streams(&ifs,NULL); if(yylex()) throw preprocessor_error(CODEPOINT,"unknown error"); + pragmas_t::const_iterator mp = pragmas.find("main"); + if(mp==pragmas.end()) { member_functions.push_back(member_function("void","main","(int _magic,va_list _args)",M().output)); + }else{ + member_functions.push_back( + member_function( + "void","main","(int _magic,va_list _args)", + mp->second+"::main(_magic,_args);" + ) + ); + } if(have_initializers && !have_constructor) member_functions.push_back(member_function("","","","")); sitecing_enflesher enflesher(*this); enflesher.enflesh(); } void sitecing_parser::LexerOutput(const char* buf,int size) { assert(modi.size()); M().output.append(buf,size); } static const char *modus_transitions [sitecing_parser::modus_operandi::modi] [sitecing_parser::modus_operandi::modi] = { // To: // code preop postop text From: { "", "(*(__SCIF->out))", "(*(__SCIF->out))<<", "(*(__SCIF->out))<<\"" }, // code { ";", "", "<<", "<<\"" }, // preop { NULL, NULL, "", "\"" }, // postop { "\";", "\"", "\"<<", "" } // text }; void sitecing_parser::modus_operandi::modify(modus_t m) { const char * x = modus_transitions[modus][m]; assert(x); output += x; modus = m; } void sitecing_parser::soft_anchor() { if(M().modus!=modus_operandi::modus_text) anchor(); } void sitecing_parser::anchor() { if(M().modus==modus_operandi::modus_text) M().modify(modus_operandi::modus_preop); M().output += "\n#line "; char tmp[7]; snprintf(tmp,sizeof(tmp),"%d",lineno()); M().output += tmp; M().output += " \""; M().output += input_file; M().output += "\"\n"; } /* vim:set ft=lex: */ |