%{ /* * XXX: I have a strong feeling that this parser should be completely rewritten. */ #include #include #include #include using namespace std; #include #include "sitecing/sitecing_util.h" #include "sitecing/sitecing_exception.h" using namespace sitecing; #define sitecing_parser_flexlexer_once #include "sitecing/sitecing_parser.h" #include "sitecing/sitecing_enflesher.h" #undef yyFlexLexer #define yyFlexLexer sitecing_parserFlexLexer %} %x SLASHSTAR_COMMENT SLASHSLASH_COMMENT STRING %x CODELINE CLASSLINE DECLLINE IMPLLINE DECLBLOCK IMPLBLOCK VARLINE VARINIT %x IMPORTLINE IMPORTCOMPONENT %x IMPORTTYPELINE IMPORTTYPECOMPONENT %x DERIVELINE DERIVECOMPONENT %x CONSTRUCTOR DESTRUCTOR CODEMETHODLINE CODEMETHODARGS %x CODEMETHODBLOCK INLINE METHODLINE METHODARGS METHODBLOCK CODEBLOCK OUTPUTBLOCK %x PRAGMALINE %option 8bit c++ verbose noyywrap yyclass="sitecing_parser" prefix="sitecing_parser" stack yylineno WHITESPACE [ \t] ID [A-Za-z_][A-Za-z0-9_]* NOIDCHAR [^A-Za-z0-9_] %% { ^\%\%class{WHITESPACE}+ { // TODO: signal error if we already have class name acquired from source. modi.push_front(modus_operandi(modus_operandi::flag_devour_comments|modus_operandi::flag_devour_whitespace)); BEGIN(CLASSLINE); } ^\%\%decl{WHITESPACE}+ { modi.push_front(modus_operandi(0)); anchor(); BEGIN(DECLLINE); } ^\%\%impl{WHITESPACE}+ { modi.push_front(modus_operandi(0)); anchor(); BEGIN(IMPLLINE); } \<\%decl\> { modi.push_front(modus_operandi(0)); anchor(); BEGIN(DECLBLOCK); } \<\%impl\> { modi.push_front(modus_operandi(0)); anchor(); BEGIN(IMPLBLOCK); } ^\%\%var{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); anchor(); BEGIN(VARLINE); } ^\%\%import{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); BEGIN(IMPORTLINE); } ^\%\%import_type{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); BEGIN(IMPORTTYPELINE); } ^\%\%derive{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); BEGIN(DERIVELINE); } \<\%constructor\> { modi.push_front(modus_operandi()); anchor(); BEGIN(CONSTRUCTOR); } \<\%destructor\> { modi.push_front(modus_operandi()); anchor(); BEGIN(DESTRUCTOR); } \<\%codemethod{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); anchor(); BEGIN(CODEMETHODLINE); } \<\%method{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); anchor(); BEGIN(METHODLINE); } ^\%\%pragma{WHITESPACE}+ { modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); BEGIN(PRAGMALINE); } <> { assert(modi.size()==1); M().modify(modus_operandi::modus_preop); LexerOutput(";",1); return 0; } } <> throw preprocessor_error(CODEPOINT,"unexpected end of file",lineno()); { "<%output>" { anchor(); yy_push_state(OUTPUTBLOCK); } } { {WHITESPACE}+ { modus_operandi& m = M(); if(!m.output.empty()) { string::size_type eq = m.output.find('='); if(eq==string::npos) { pragmas[m.output]=m.output; }else{ pragmas[m.output.substr(0,eq)] = m.output.substr(eq+1); } m.output.erase(); } } \n { modus_operandi& m = M(); if(!m.output.empty()) { string::size_type eq = m.output.find('='); if(eq==string::npos) { pragmas[m.output]=m.output; }else{ pragmas[m.output.substr(0,eq)] = m.output.substr(eq+1); } m.output.erase(); } modi.pop_front(); BEGIN(INITIAL); anchor(); } } { {WHITESPACE}+ { modus_operandi& m = M(); if(!m.output.empty()) { if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } } \* { modus_operandi& m = M(); ECHO; if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } \( { modus_operandi& m = M(); if(m.output.empty()) { m._name=m._lastid; }else{ if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy... if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._name = m.output; m.output.clear(); } ECHO; BEGIN(METHODARGS); } } { \%\> { modus_operandi& m = M(); m._args = m.output; m.output.clear(); anchor(); BEGIN(METHODBLOCK); } } { \<\%{WHITESPACE}+ { M().modify(modus_operandi::modus_postop); anchor(); LexerOutput("(",1); yy_push_state(INLINE); } ^\%{WHITESPACE} { M().modify(modus_operandi::modus_code); anchor(); yy_push_state(CODELINE); } \<\%code\> { M().modify(modus_operandi::modus_code); anchor(); yy_push_state(CODEBLOCK); } "" { if(YY_START!=OUTPUTBLOCK) throw preprocessor_error(CODEPOINT,"unexpected tag",lineno()); M().modify(modus_operandi::modus_code); anchor(); yy_pop_state(); } } \%\> LexerOutput(")",1); M().modus=modus_operandi::modus_preop; yy_pop_state(); \n yy_pop_state(); { {WHITESPACE}+ { modus_operandi& m = M(); if(!m.output.empty()) { if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } } \* { modus_operandi& m = M(); ECHO; if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } \( { modus_operandi& m = M(); if(m.output.empty()) { m._name=m._lastid; }else{ if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy... if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._name = m.output; m.output.clear(); } ECHO; BEGIN(CODEMETHODARGS); } } { \%\> { modus_operandi& m = M(); m._args = m.output; m.output.clear(); m.flags=0; anchor(); BEGIN(CODEMETHODBLOCK); } } { {WHITESPACE}+ { } {ID} { modus_operandi& m = M(); if(!m._name.empty()) throw preprocessor_error(CODEPOINT,"syntax error",lineno()); m._name = yytext; } \= { M().output.clear(); BEGIN(IMPORTCOMPONENT); } } { {WHITESPACE}+ { } \n { modus_operandi& m = M(); string::size_type t = m.output.find_first_not_of(" \t"); if(t!=string::npos) m.output.erase(0,t); t = m.output.find_last_not_of(" \t;"); if(t!=string::npos) m.output.erase(t+1); if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') { m.output.erase(0,1); m.output.erase(m.output.length()-1); } string c = konforka::combine_path(component_basename,m.output); member_variables.push_back(member_variable(m._type,m._name,konforka::normalize_path(c,konforka::strip_leading_slash),true)); modi.pop_front(); BEGIN(INITIAL); } } { {WHITESPACE}+ { } {ID} { modus_operandi& m = M(); if(!m._name.empty()) throw preprocessor_error(CODEPOINT,"syntax error",lineno()); m._name = yytext; } \= { M().output.clear(); BEGIN(IMPORTTYPECOMPONENT); } } { {WHITESPACE}+ { } \n { modus_operandi& m = M(); string::size_type t = m.output.find_first_not_of(" \t"); if(t!=string::npos) m.output.erase(0,t); t = m.output.find_last_not_of(" \t;"); if(t!=string::npos) m.output.erase(t+1); if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') { m.output.erase(0,1); m.output.erase(m.output.length()-1); } string c = konforka::combine_path(component_basename,m.output); member_variables.push_back(member_variable(m._type,m._name,konforka::normalize_path(c,konforka::strip_leading_slash),true,true)); modi.pop_front(); BEGIN(INITIAL); } } { {WHITESPACE}+ { } {ID} { modus_operandi& m = M(); if(!m._name.empty()) throw preprocessor_error(CODEPOINT,"syntax_error",lineno()); m._name = yytext; } \= { M().output.clear(); BEGIN(DERIVECOMPONENT); } } { {WHITESPACE}+ { } \n { modus_operandi& m = M(); string::size_type t = m.output.find_first_not_of(" \t"); if(t!=string::npos) m.output.erase(0,t); t = m.output.find_last_not_of(" \t;"); if(t!=string::npos) m.output.erase(t+1); if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') { m.output.erase(0,1); m.output.erase(m.output.length()-1); } string c = konforka::combine_path(component_basename,m.output); ancestor_classes.push_back(ancestor_class(m._name,konforka::normalize_path(c,konforka::strip_leading_slash))); modi.pop_front(); BEGIN(INITIAL); } } { {WHITESPACE}+ { modus_operandi& m = M(); if(!m.output.empty()) { if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } } \* { modus_operandi& m = M(); ECHO; if(!m._lastid.empty()) { if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._lastid = m.output; m.output.clear(); } \;|\n|\= { modus_operandi& m = M(); if(m.output.empty()) { m._name=m._lastid; }else{ if(!m._lastid.empty()) { // XXX: lastid should never be emtpy, I believe? if(!m._type.empty()) m._type += ' '; m._type += m._lastid; } m._name=m.output; m.output.clear(); } BEGIN(VARINIT); if(*yytext!='=') unput('\n'); } } { \n { modus_operandi& m = M(); string::size_type t = m.output.find_first_not_of(" \t"); if(t!=string::npos) m.output.erase(0,t); t = m.output.find_last_not_of(" \t;"); if(t!=string::npos) m.output.erase(t+1); member_variables.push_back(member_variable(m._type,m._name,m.output)); if(!m.output.empty()) have_initializers=true; modi.pop_front(); BEGIN(INITIAL); } } \n { ECHO; decl += M().output; modi.pop_front(); BEGIN(INITIAL); } \n { ECHO; impl += M().output; modi.pop_front(); BEGIN(INITIAL); } \n { class_name = M().output; modi.pop_front(); BEGIN(INITIAL); } { "/*" { yy_push_state(SLASHSTAR_COMMENT); if(!M().devour_comments()) { ECHO; } } "//" { yy_push_state(SLASHSLASH_COMMENT); if(!M().devour_comments()) { ECHO; } } \" { yy_push_state(STRING); ECHO; } \'\\.\' { ECHO; } } { \" soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\\"",2); \n soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\n",2); \r soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\r",2); \t soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\t",2); \b soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\b",2); \a soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\a",2); \\ soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\\\",2); . soft_anchor(); M().modify(modus_operandi::modus_text); ECHO; {WHITESPACE}+ soft_anchor(); M().modify(modus_operandi::modus_text); ECHO; } { \<\/\%decl\> { if(YY_START!=DECLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); decl += M().output; modi.pop_front(); BEGIN(INITIAL); } \<\/\%impl\> { if(YY_START!=IMPLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); impl += M().output; modi.pop_front(); BEGIN(INITIAL); } \<\/\%constructor\> { if(YY_START!=CONSTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); member_functions.push_back(member_function("","","",M().output)); have_constructor = true; modi.pop_front(); BEGIN(INITIAL); } \<\/\%destructor\> { if(YY_START!=DESTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); member_functions.push_back(member_function("","~","",M().output)); modi.pop_front(); BEGIN(INITIAL); } \<\/\%codemethod\> { if(YY_START!=CODEMETHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); modus_operandi& m = M(); member_functions.push_back(member_function(m._type,m._name,m._args,m.output)); modi.pop_front(); BEGIN(INITIAL); } \<\/%method\> { if(YY_START!=METHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); modus_operandi& m = M(); m.modify(modus_operandi::modus_code); member_functions.push_back(member_function(m._type,m._name,m._args,m.output)); modi.pop_front(); BEGIN(INITIAL); } \<\/%code\> { if(YY_START!=CODEBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); yy_pop_state(); } \n ECHO; } { "*/" { if(!M().devour_comments()) { ECHO; } yy_pop_state(); unput(' '); } \n { if(!M().devour_comments()) { ECHO; } } } { \n { if(!M().devour_comments()) { ECHO; } yy_pop_state(); if(YY_START!=CODEBLOCK && YY_START!=CODEMETHODBLOCK && YY_START!=IMPLBLOCK && YY_START!=DECLBLOCK) unput('\n'); } } . { if(!M().devour_comments()) { ECHO; } } { \\. ECHO; \" ECHO; yy_pop_state(); . ECHO; } {WHITESPACE}+ { if(!(M().flags&modus_operandi::flag_devour_whitespace)) { ECHO; } } %% sitecing_parser::sitecing_parser(component_factory& f) : factory(f), have_initializers(false), have_constructor(false), base_class("sitecing::cgi_component"), base_header("sitecing/cgi_component.h"), skeleton(__SC_DEFAULT_SKELETON) { } void sitecing_parser::preprocess(const string& in) { ifstream ifs(in.c_str(),ios::in); if(!ifs.good()) throw preprocessor_error(CODEPOINT,"failed to open input file"); input_file = in; modi.push_front(modus_operandi(0)); switch_streams(&ifs,NULL); if(yylex()) throw preprocessor_error(CODEPOINT,"unknown error"); pragmas_t::const_iterator mp = pragmas.find("main"); if(mp==pragmas.end()) { member_functions.push_back(member_function("void","main","(int _magic,va_list _args)",M().output)); }else{ member_functions.push_back( member_function( "void","main","(int _magic,va_list _args)", mp->second+"::main(_magic,_args);" ) ); } if(have_initializers && !have_constructor) member_functions.push_back(member_function("","","","")); sitecing_enflesher enflesher(*this); enflesher.enflesh(); } void sitecing_parser::LexerOutput(const char* buf,int size) { assert(modi.size()); M().output.append(buf,size); } static const char *modus_transitions [sitecing_parser::modus_operandi::modi] [sitecing_parser::modus_operandi::modi] = { // To: // code preop postop text From: { "", "(*(__SCIF->out))", "(*(__SCIF->out))<<", "(*(__SCIF->out))<<\"" }, // code { ";", "", "<<", "<<\"" }, // preop { NULL, NULL, "", "\"" }, // postop { "\";", "\"", "\"<<", "" } // text }; void sitecing_parser::modus_operandi::modify(modus_t m) { const char * x = modus_transitions[modus][m]; assert(x); output += x; modus = m; } void sitecing_parser::soft_anchor() { if(M().modus!=modus_operandi::modus_text) anchor(); } void sitecing_parser::anchor() { if(M().modus==modus_operandi::modus_text) M().modify(modus_operandi::modus_preop); M().output += "\n#line "; char tmp[7]; snprintf(tmp,sizeof(tmp),"%d",lineno()); M().output += tmp; M().output += " \""; M().output += input_file; M().output += "\"\n"; } /* vim:set ft=lex: */