-rw-r--r-- | lib/sitecing_parser.ll | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/lib/sitecing_parser.ll b/lib/sitecing_parser.ll new file mode 100644 index 0000000..6cb78f3 --- a/dev/null +++ b/lib/sitecing_parser.ll @@ -0,0 +1,594 @@ +%{ + /* + * XXX: I have a strong feeling that this parser should be completely rewritten. + */ +#include <iostream> +#include <fstream> +#include <cassert> +#include <stdexcept> +using namespace std; +#include "sitecing/sitecing_util.h" +#include "sitecing/sitecing_exception.h" +using namespace sitecing; +#define sitecing_parser_flexlexer_once +#include "sitecing/sitecing_parser.h" +#include "sitecing/sitecing_enflesher.h" +#undef yyFlexLexer +#define yyFlexLexer sitecing_parserFlexLexer +%} +%x SLASHSTAR_COMMENT SLASHSLASH_COMMENT STRING +%x CODELINE CLASSLINE DECLLINE IMPLLINE DECLBLOCK IMPLBLOCK VARLINE VARINIT +%x IMPORTLINE IMPORTCOMPONENT +%x IMPORTTYPELINE IMPORTTYPECOMPONENT +%x DERIVELINE DERIVECOMPONENT +%x CONSTRUCTOR DESTRUCTOR CODEMETHODLINE CODEMETHODARGS +%x CODEMETHODBLOCK INLINE METHODLINE METHODARGS METHODBLOCK CODEBLOCK OUTPUTBLOCK +%option 8bit c++ verbose noyywrap yyclass="sitecing_parser" prefix="sitecing_parser" stack yylineno + +WHITESPACE [ \t] +ID [A-Za-z_][A-Za-z0-9_]* +NOIDCHAR [^A-Za-z0-9_] + +%% + +<INITIAL>{ + ^\%\%class{WHITESPACE}+ { + // TODO: signal error if we already have class name acquired from source. + modi.push_front(modus_operandi(modus_operandi::flag_devour_comments|modus_operandi::flag_devour_whitespace)); + BEGIN(CLASSLINE); + } + ^\%\%decl{WHITESPACE}+ { + modi.push_front(modus_operandi(0)); + anchor(); + BEGIN(DECLLINE); + } + ^\%\%impl{WHITESPACE}+ { + modi.push_front(modus_operandi(0)); + anchor(); + BEGIN(IMPLLINE); + } + \<\%decl\> { + modi.push_front(modus_operandi(0)); + anchor(); + BEGIN(DECLBLOCK); + } + \<\%impl\> { + modi.push_front(modus_operandi(0)); + anchor(); + BEGIN(IMPLBLOCK); + } + ^\%\%var{WHITESPACE}+ { + modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); + anchor(); + BEGIN(VARLINE); + } + ^\%\%import{WHITESPACE}+ { + modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); + BEGIN(IMPORTLINE); + } + ^\%\%import_type{WHITESPACE}+ { + modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); + BEGIN(IMPORTTYPELINE); + } + ^\%\%derive{WHITESPACE}+ { + modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); + BEGIN(DERIVELINE); + } + \<\%constructor\> { + modi.push_front(modus_operandi()); + anchor(); + BEGIN(CONSTRUCTOR); + } + \<\%destructor\> { + modi.push_front(modus_operandi()); + anchor(); + BEGIN(DESTRUCTOR); + } + \<\%codemethod{WHITESPACE}+ { + modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); + anchor(); + BEGIN(CODEMETHODLINE); + } + \<\%method{WHITESPACE}+ { + modi.push_front(modus_operandi(modus_operandi::flag_devour_comments)); + anchor(); + BEGIN(METHODLINE); + } + <<EOF>> { + assert(modi.size()==1); + M().modify(modus_operandi::modus_preop); + LexerOutput(";",1); + return 0; + } +} +<<EOF>> throw preprocessor_error(CODEPOINT,"unexpected end of file",lineno()); + +<CODEBLOCK,CODEMETHODBLOCK>{ + "<%output>" { + anchor(); + yy_push_state(OUTPUTBLOCK); + } +} + +<METHODLINE>{ + {WHITESPACE}+ { + modus_operandi& m = modi.front(); + if(!m.output.empty()) { + if(!m._lastid.empty()) { + if(!m._type.empty()) m._type += ' '; + m._type += m._lastid; + } + m._lastid = m.output; + m.output.clear(); + } + } + \* { + modus_operandi& m = modi.front(); + ECHO; + if(!m._lastid.empty()) { + if(!m._type.empty()) m._type += ' '; + m._type += m._lastid; + } + m._lastid = m.output; + m.output.clear(); + } + \( { + modus_operandi& m = modi.front(); + if(m.output.empty()) { + m._name=m._lastid; + }else{ + if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy... + if(!m._type.empty()) m._type += ' '; + m._type += m._lastid; + } + m._name = m.output; + m.output.clear(); + } + ECHO; + BEGIN(METHODARGS); + } +} +<METHODARGS>{ + \%\> { + modus_operandi& m = modi.front(); + m._args = m.output; + m.output.clear(); + anchor(); + BEGIN(METHODBLOCK); + } +} + +<INITIAL,METHODBLOCK,OUTPUTBLOCK>{ + \<\%{WHITESPACE}+ { + M().modify(modus_operandi::modus_postop); + anchor(); + LexerOutput("(",1); + yy_push_state(INLINE); + } + ^\%{WHITESPACE} { + M().modify(modus_operandi::modus_code); + anchor(); + yy_push_state(CODELINE); + } + \<\%code\> { + M().modify(modus_operandi::modus_code); + anchor(); + yy_push_state(CODEBLOCK); + } + "</%output>" { + if(YY_START!=OUTPUTBLOCK) throw preprocessor_error(CODEPOINT,"unexpected tag",lineno()); + M().modify(modus_operandi::modus_code); + anchor(); + yy_pop_state(); + } +} + +<INLINE>\%\> LexerOutput(")",1); M().modus=modus_operandi::modus_preop; yy_pop_state(); +<CODELINE>\n yy_pop_state(); + +<CODEMETHODLINE>{ + {WHITESPACE}+ { + modus_operandi& m = modi.front(); + if(!m.output.empty()) { + if(!m._lastid.empty()) { + if(!m._type.empty()) m._type += ' '; + m._type += m._lastid; + } + m._lastid = m.output; + m.output.clear(); + } + } + \* { + modus_operandi& m = modi.front(); + ECHO; + if(!m._lastid.empty()) { + if(!m._type.empty()) m._type += ' '; + m._type += m._lastid; + } + m._lastid = m.output; + m.output.clear(); + } + \( { + modus_operandi& m = modi.front(); + if(m.output.empty()) { + m._name=m._lastid; + }else{ + if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy... + if(!m._type.empty()) m._type += ' '; + m._type += m._lastid; + } + m._name = m.output; + m.output.clear(); + } + ECHO; + BEGIN(CODEMETHODARGS); + } +} +<CODEMETHODARGS>{ + \%\> { + modus_operandi& m = modi.front(); + m._args = m.output; + m.output.clear(); + m.flags=0; + anchor(); + BEGIN(CODEMETHODBLOCK); + } +} + +<IMPORTLINE>{ + {WHITESPACE}+ { } + {ID} { + if(!modi.front()._name.empty()) + throw preprocessor_error(CODEPOINT,"syntax error",lineno()); + modi.front()._name = yytext; + } + \= { + modi.front().output.clear(); + BEGIN(IMPORTCOMPONENT); + } +} +<IMPORTCOMPONENT>{ + {WHITESPACE}+ { } + \n { + modus_operandi& m = M(); + string::size_type t = m.output.find_first_not_of(" \t"); + if(t!=string::npos) + m.output.erase(0,t); + t = m.output.find_last_not_of(" \t;"); + if(t!=string::npos) + m.output.erase(t+1); + if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') { + m.output.erase(0,1); + m.output.erase(m.output.length()-1); + } + string c = combine_path(component_basename,m.output); + member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true)); + modi.pop_front(); + BEGIN(INITIAL); + } +} + +<IMPORTTYPELINE>{ + {WHITESPACE}+ { } + {ID} { + if(!modi.front()._name.empty()) + throw preprocessor_error(CODEPOINT,"syntax error",lineno()); + modi.front()._name = yytext; + } + \= { + modi.front().output.clear(); + BEGIN(IMPORTTYPECOMPONENT); + } +} +<IMPORTTYPECOMPONENT>{ + {WHITESPACE}+ { } + \n { + modus_operandi& m = M(); + string::size_type t = m.output.find_first_not_of(" \t"); + if(t!=string::npos) + m.output.erase(0,t); + t = m.output.find_last_not_of(" \t;"); + if(t!=string::npos) + m.output.erase(t+1); + if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') { + m.output.erase(0,1); + m.output.erase(m.output.length()-1); + } + string c = combine_path(component_basename,m.output); + member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true,true)); + modi.pop_front(); + BEGIN(INITIAL); + } +} + +<DERIVELINE>{ + {WHITESPACE}+ { } + {ID} { + if(!modi.front()._name.empty()) + throw preprocessor_error(CODEPOINT,"syntax_error",lineno()); + modi.front()._name = yytext; + } + \= { + modi.front().output.clear(); + BEGIN(DERIVECOMPONENT); + } +} +<DERIVECOMPONENT>{ + {WHITESPACE}+ { } + \n { + modus_operandi& m = M(); + string::size_type t = m.output.find_first_not_of(" \t"); + if(t!=string::npos) + m.output.erase(0,t); + t = m.output.find_last_not_of(" \t;"); + if(t!=string::npos) + m.output.erase(t+1); + if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') { + m.output.erase(0,1); + m.output.erase(m.output.length()-1); + } + string c = combine_path(component_basename,m.output); + ancestor_classes.push_back(ancestor_class(m._name,normalize_path(c,strip_leading_slash))); + modi.pop_front(); + BEGIN(INITIAL); + } +} + +<VARLINE>{ + {WHITESPACE}+ { + modus_operandi& m = modi.front(); + if(!m.output.empty()) { + if(!m._lastid.empty()) { + if(!m._type.empty()) m._type += ' '; + m._type += m._lastid; + } + m._lastid = m.output; + m.output.clear(); + } + } + \* { + modus_operandi& m = modi.front(); + ECHO; + if(!m._lastid.empty()) { + if(!m._type.empty()) m._type += ' '; + m._type += m._lastid; + } + m._lastid = m.output; + m.output.clear(); + } + \;|\n|\= { + modus_operandi& m = modi.front(); + if(m.output.empty()) { + m._name=m._lastid; + }else{ + if(!m._lastid.empty()) { // XXX: lastid should never be emtpy, I believe? + if(!m._type.empty()) m._type += ' '; + m._type += m._lastid; + } + m._name=m.output; + m.output.clear(); + } + BEGIN(VARINIT); + if(*yytext!='=') + unput('\n'); + } +} +<VARINIT>{ + \n { + modus_operandi& m = modi.front(); + string::size_type t = m.output.find_first_not_of(" \t"); + if(t!=string::npos) + m.output.erase(0,t); + t = m.output.find_last_not_of(" \t;"); + if(t!=string::npos) + m.output.erase(t+1); + member_variables.push_back(member_variable(m._type,m._name,m.output)); + if(!m.output.empty()) + have_initializers=true; + modi.pop_front(); + BEGIN(INITIAL); + } +} +<DECLLINE>\n { + ECHO; + decl += modi.front().output; + modi.pop_front(); + BEGIN(INITIAL); +} +<IMPLLINE>\n { + ECHO; + impl += modi.front().output; + modi.pop_front(); + BEGIN(INITIAL); +} +<CLASSLINE>\n { + class_name = modi.front().output; + modi.pop_front(); + BEGIN(INITIAL); +} +<CLASSLINE,DECLLINE,IMPLLINE,VARLINE,VARINIT,IMPORTLINE,IMPORTCOMPONENT,CODEMETHODLINE,CODEMETHODARGS,INLINE,METHODLINE,METHODARGS,DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,CODELINE,CODEBLOCK>{ + "/*" { + yy_push_state(SLASHSTAR_COMMENT); + if(!M().devour_comments()) { + ECHO; + } + } + "//" { + yy_push_state(SLASHSLASH_COMMENT); + if(!M().devour_comments()) { + ECHO; + } + } + \" { + yy_push_state(STRING); + ECHO; + } + \'\\.\' { + ECHO; + } +} + +<INITIAL,METHODBLOCK,OUTPUTBLOCK>{ + \" soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\\"",2); + \n soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\n",2); + \r soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\r",2); + \t soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\t",2); + \b soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\b",2); + \a soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\a",2); + . soft_anchor(); M().modify(modus_operandi::modus_text); ECHO; + {WHITESPACE}+ soft_anchor(); M().modify(modus_operandi::modus_text); ECHO; +} + +<DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,METHODBLOCK,CODEBLOCK>{ + \<\/\%decl\> { + if(YY_START!=DECLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); + decl += modi.front().output; + modi.pop_front(); + BEGIN(INITIAL); + } + \<\/\%impl\> { + if(YY_START!=IMPLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); + impl += modi.front().output; + modi.pop_front(); + BEGIN(INITIAL); + } + \<\/\%constructor\> { + if(YY_START!=CONSTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); + member_functions.push_back(member_function("","","",modi.front().output)); + have_constructor = true; + modi.pop_front(); + BEGIN(INITIAL); + } + \<\/\%destructor\> { + if(YY_START!=DESTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); + member_functions.push_back(member_function("","~","",modi.front().output)); + modi.pop_front(); + BEGIN(INITIAL); + } + \<\/\%codemethod\> { + if(YY_START!=CODEMETHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); + modus_operandi& m = modi.front(); + member_functions.push_back(member_function(m._type,m._name,m._args,m.output)); + modi.pop_front(); + BEGIN(INITIAL); + } + \<\/%method\> { + if(YY_START!=METHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); + modus_operandi& m = modi.front(); + m.modify(modus_operandi::modus_code); + member_functions.push_back(member_function(m._type,m._name,m._args,m.output)); + modi.pop_front(); + BEGIN(INITIAL); + } + \<\/%code\> { + if(YY_START!=CODEBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno()); + yy_pop_state(); + } + \n ECHO; +} + +<SLASHSTAR_COMMENT>{ + "*/" { + if(!M().devour_comments()) { + ECHO; + } + yy_pop_state(); + unput(' '); + } + \n { + if(!M().devour_comments()) { + ECHO; + } + } +} +<SLASHSLASH_COMMENT>{ + \n { + if(!M().devour_comments()) { + ECHO; + } + yy_pop_state(); + if(YY_START!=CODEBLOCK && YY_START!=CODEMETHODBLOCK && YY_START!=IMPLBLOCK && YY_START!=DECLBLOCK) + unput('\n'); + } +} +<SLASHSTAR_COMMENT,SLASHSLASH_COMMENT>. { + if(!M().devour_comments()) { + ECHO; + } +} +<STRING>{ + \\. ECHO; + \" ECHO; yy_pop_state(); + . ECHO; +} + +{WHITESPACE}+ { + if(!(M().flags&modus_operandi::flag_devour_whitespace)) { + ECHO; + } +} + +%% + +sitecing_parser::sitecing_parser(component_factory& f) + : factory(f), have_initializers(false), have_constructor(false), + base_class("sitecing::cgi_component"), + base_header("sitecing/cgi_component.h"), + skeleton(__SC_DEFAULT_SKELETON) { + } + +void sitecing_parser::preprocess(const string& in) { + ifstream ifs(in.c_str(),ios::in); + if(!ifs.good()) + throw preprocessor_error(CODEPOINT,"failed to open input file"); + input_file = in; + modi.push_front(modus_operandi(0)); + switch_streams(&ifs,NULL); + if(yylex()) + throw preprocessor_error(CODEPOINT,"unknown error"); + member_functions.push_back(member_function("void","main","(int _magic,va_list _args)",M().output)); + if(have_initializers && !have_constructor) + member_functions.push_back(member_function("","","","")); + sitecing_enflesher enflesher(*this); + enflesher.enflesh(); +} + +void sitecing_parser::LexerOutput(const char* buf,int size) { + assert(modi.size()); + M().output.append(buf,size); +} + +static const char *modus_transitions + [sitecing_parser::modus_operandi::modi] + [sitecing_parser::modus_operandi::modi] = { +// To: +// code preop postop text From: + { "", "(*(__SCIF->out))", "(*(__SCIF->out))<<", "(*(__SCIF->out))<<\"" }, // code + { ";", "", "<<", "<<\"" }, // preop + { NULL, NULL, "", "\"" }, // postop + { "\";", "\"", "\"<<", "" } // text +}; + +void sitecing_parser::modus_operandi::modify(modus_t m) { + const char * x = modus_transitions[modus][m]; + assert(x); + output += x; + modus = m; +} + +void sitecing_parser::soft_anchor() { + if(M().modus!=modus_operandi::modus_text) + anchor(); +} +void sitecing_parser::anchor() { + if(M().modus==modus_operandi::modus_text) + M().modify(modus_operandi::modus_preop); + M().output += "\n#line "; + char tmp[7]; + snprintf(tmp,sizeof(tmp),"%d",lineno()); + M().output += tmp; + M().output += " \""; + M().output += input_file; + M().output += "\"\n"; +} +/* vim:set ft=lex: */ |