summaryrefslogtreecommitdiffabout
path: root/lib/sitecing_parser.ll
Side-by-side diff
Diffstat (limited to 'lib/sitecing_parser.ll') (more/less context) (ignore whitespace changes)
-rw-r--r--lib/sitecing_parser.ll594
1 files changed, 594 insertions, 0 deletions
diff --git a/lib/sitecing_parser.ll b/lib/sitecing_parser.ll
new file mode 100644
index 0000000..6cb78f3
--- a/dev/null
+++ b/lib/sitecing_parser.ll
@@ -0,0 +1,594 @@
+%{
+ /*
+ * XXX: I have a strong feeling that this parser should be completely rewritten.
+ */
+#include <iostream>
+#include <fstream>
+#include <cassert>
+#include <stdexcept>
+using namespace std;
+#include "sitecing/sitecing_util.h"
+#include "sitecing/sitecing_exception.h"
+using namespace sitecing;
+#define sitecing_parser_flexlexer_once
+#include "sitecing/sitecing_parser.h"
+#include "sitecing/sitecing_enflesher.h"
+#undef yyFlexLexer
+#define yyFlexLexer sitecing_parserFlexLexer
+%}
+%x SLASHSTAR_COMMENT SLASHSLASH_COMMENT STRING
+%x CODELINE CLASSLINE DECLLINE IMPLLINE DECLBLOCK IMPLBLOCK VARLINE VARINIT
+%x IMPORTLINE IMPORTCOMPONENT
+%x IMPORTTYPELINE IMPORTTYPECOMPONENT
+%x DERIVELINE DERIVECOMPONENT
+%x CONSTRUCTOR DESTRUCTOR CODEMETHODLINE CODEMETHODARGS
+%x CODEMETHODBLOCK INLINE METHODLINE METHODARGS METHODBLOCK CODEBLOCK OUTPUTBLOCK
+%option 8bit c++ verbose noyywrap yyclass="sitecing_parser" prefix="sitecing_parser" stack yylineno
+
+WHITESPACE [ \t]
+ID [A-Za-z_][A-Za-z0-9_]*
+NOIDCHAR [^A-Za-z0-9_]
+
+%%
+
+<INITIAL>{
+ ^\%\%class{WHITESPACE}+ {
+ // TODO: signal error if we already have class name acquired from source.
+ modi.push_front(modus_operandi(modus_operandi::flag_devour_comments|modus_operandi::flag_devour_whitespace));
+ BEGIN(CLASSLINE);
+ }
+ ^\%\%decl{WHITESPACE}+ {
+ modi.push_front(modus_operandi(0));
+ anchor();
+ BEGIN(DECLLINE);
+ }
+ ^\%\%impl{WHITESPACE}+ {
+ modi.push_front(modus_operandi(0));
+ anchor();
+ BEGIN(IMPLLINE);
+ }
+ \<\%decl\> {
+ modi.push_front(modus_operandi(0));
+ anchor();
+ BEGIN(DECLBLOCK);
+ }
+ \<\%impl\> {
+ modi.push_front(modus_operandi(0));
+ anchor();
+ BEGIN(IMPLBLOCK);
+ }
+ ^\%\%var{WHITESPACE}+ {
+ modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
+ anchor();
+ BEGIN(VARLINE);
+ }
+ ^\%\%import{WHITESPACE}+ {
+ modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
+ BEGIN(IMPORTLINE);
+ }
+ ^\%\%import_type{WHITESPACE}+ {
+ modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
+ BEGIN(IMPORTTYPELINE);
+ }
+ ^\%\%derive{WHITESPACE}+ {
+ modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
+ BEGIN(DERIVELINE);
+ }
+ \<\%constructor\> {
+ modi.push_front(modus_operandi());
+ anchor();
+ BEGIN(CONSTRUCTOR);
+ }
+ \<\%destructor\> {
+ modi.push_front(modus_operandi());
+ anchor();
+ BEGIN(DESTRUCTOR);
+ }
+ \<\%codemethod{WHITESPACE}+ {
+ modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
+ anchor();
+ BEGIN(CODEMETHODLINE);
+ }
+ \<\%method{WHITESPACE}+ {
+ modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
+ anchor();
+ BEGIN(METHODLINE);
+ }
+ <<EOF>> {
+ assert(modi.size()==1);
+ M().modify(modus_operandi::modus_preop);
+ LexerOutput(";",1);
+ return 0;
+ }
+}
+<<EOF>> throw preprocessor_error(CODEPOINT,"unexpected end of file",lineno());
+
+<CODEBLOCK,CODEMETHODBLOCK>{
+ "<%output>" {
+ anchor();
+ yy_push_state(OUTPUTBLOCK);
+ }
+}
+
+<METHODLINE>{
+ {WHITESPACE}+ {
+ modus_operandi& m = modi.front();
+ if(!m.output.empty()) {
+ if(!m._lastid.empty()) {
+ if(!m._type.empty()) m._type += ' ';
+ m._type += m._lastid;
+ }
+ m._lastid = m.output;
+ m.output.clear();
+ }
+ }
+ \* {
+ modus_operandi& m = modi.front();
+ ECHO;
+ if(!m._lastid.empty()) {
+ if(!m._type.empty()) m._type += ' ';
+ m._type += m._lastid;
+ }
+ m._lastid = m.output;
+ m.output.clear();
+ }
+ \( {
+ modus_operandi& m = modi.front();
+ if(m.output.empty()) {
+ m._name=m._lastid;
+ }else{
+ if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy...
+ if(!m._type.empty()) m._type += ' ';
+ m._type += m._lastid;
+ }
+ m._name = m.output;
+ m.output.clear();
+ }
+ ECHO;
+ BEGIN(METHODARGS);
+ }
+}
+<METHODARGS>{
+ \%\> {
+ modus_operandi& m = modi.front();
+ m._args = m.output;
+ m.output.clear();
+ anchor();
+ BEGIN(METHODBLOCK);
+ }
+}
+
+<INITIAL,METHODBLOCK,OUTPUTBLOCK>{
+ \<\%{WHITESPACE}+ {
+ M().modify(modus_operandi::modus_postop);
+ anchor();
+ LexerOutput("(",1);
+ yy_push_state(INLINE);
+ }
+ ^\%{WHITESPACE} {
+ M().modify(modus_operandi::modus_code);
+ anchor();
+ yy_push_state(CODELINE);
+ }
+ \<\%code\> {
+ M().modify(modus_operandi::modus_code);
+ anchor();
+ yy_push_state(CODEBLOCK);
+ }
+ "</%output>" {
+ if(YY_START!=OUTPUTBLOCK) throw preprocessor_error(CODEPOINT,"unexpected tag",lineno());
+ M().modify(modus_operandi::modus_code);
+ anchor();
+ yy_pop_state();
+ }
+}
+
+<INLINE>\%\> LexerOutput(")",1); M().modus=modus_operandi::modus_preop; yy_pop_state();
+<CODELINE>\n yy_pop_state();
+
+<CODEMETHODLINE>{
+ {WHITESPACE}+ {
+ modus_operandi& m = modi.front();
+ if(!m.output.empty()) {
+ if(!m._lastid.empty()) {
+ if(!m._type.empty()) m._type += ' ';
+ m._type += m._lastid;
+ }
+ m._lastid = m.output;
+ m.output.clear();
+ }
+ }
+ \* {
+ modus_operandi& m = modi.front();
+ ECHO;
+ if(!m._lastid.empty()) {
+ if(!m._type.empty()) m._type += ' ';
+ m._type += m._lastid;
+ }
+ m._lastid = m.output;
+ m.output.clear();
+ }
+ \( {
+ modus_operandi& m = modi.front();
+ if(m.output.empty()) {
+ m._name=m._lastid;
+ }else{
+ if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy...
+ if(!m._type.empty()) m._type += ' ';
+ m._type += m._lastid;
+ }
+ m._name = m.output;
+ m.output.clear();
+ }
+ ECHO;
+ BEGIN(CODEMETHODARGS);
+ }
+}
+<CODEMETHODARGS>{
+ \%\> {
+ modus_operandi& m = modi.front();
+ m._args = m.output;
+ m.output.clear();
+ m.flags=0;
+ anchor();
+ BEGIN(CODEMETHODBLOCK);
+ }
+}
+
+<IMPORTLINE>{
+ {WHITESPACE}+ { }
+ {ID} {
+ if(!modi.front()._name.empty())
+ throw preprocessor_error(CODEPOINT,"syntax error",lineno());
+ modi.front()._name = yytext;
+ }
+ \= {
+ modi.front().output.clear();
+ BEGIN(IMPORTCOMPONENT);
+ }
+}
+<IMPORTCOMPONENT>{
+ {WHITESPACE}+ { }
+ \n {
+ modus_operandi& m = M();
+ string::size_type t = m.output.find_first_not_of(" \t");
+ if(t!=string::npos)
+ m.output.erase(0,t);
+ t = m.output.find_last_not_of(" \t;");
+ if(t!=string::npos)
+ m.output.erase(t+1);
+ if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') {
+ m.output.erase(0,1);
+ m.output.erase(m.output.length()-1);
+ }
+ string c = combine_path(component_basename,m.output);
+ member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true));
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+}
+
+<IMPORTTYPELINE>{
+ {WHITESPACE}+ { }
+ {ID} {
+ if(!modi.front()._name.empty())
+ throw preprocessor_error(CODEPOINT,"syntax error",lineno());
+ modi.front()._name = yytext;
+ }
+ \= {
+ modi.front().output.clear();
+ BEGIN(IMPORTTYPECOMPONENT);
+ }
+}
+<IMPORTTYPECOMPONENT>{
+ {WHITESPACE}+ { }
+ \n {
+ modus_operandi& m = M();
+ string::size_type t = m.output.find_first_not_of(" \t");
+ if(t!=string::npos)
+ m.output.erase(0,t);
+ t = m.output.find_last_not_of(" \t;");
+ if(t!=string::npos)
+ m.output.erase(t+1);
+ if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') {
+ m.output.erase(0,1);
+ m.output.erase(m.output.length()-1);
+ }
+ string c = combine_path(component_basename,m.output);
+ member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true,true));
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+}
+
+<DERIVELINE>{
+ {WHITESPACE}+ { }
+ {ID} {
+ if(!modi.front()._name.empty())
+ throw preprocessor_error(CODEPOINT,"syntax_error",lineno());
+ modi.front()._name = yytext;
+ }
+ \= {
+ modi.front().output.clear();
+ BEGIN(DERIVECOMPONENT);
+ }
+}
+<DERIVECOMPONENT>{
+ {WHITESPACE}+ { }
+ \n {
+ modus_operandi& m = M();
+ string::size_type t = m.output.find_first_not_of(" \t");
+ if(t!=string::npos)
+ m.output.erase(0,t);
+ t = m.output.find_last_not_of(" \t;");
+ if(t!=string::npos)
+ m.output.erase(t+1);
+ if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') {
+ m.output.erase(0,1);
+ m.output.erase(m.output.length()-1);
+ }
+ string c = combine_path(component_basename,m.output);
+ ancestor_classes.push_back(ancestor_class(m._name,normalize_path(c,strip_leading_slash)));
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+}
+
+<VARLINE>{
+ {WHITESPACE}+ {
+ modus_operandi& m = modi.front();
+ if(!m.output.empty()) {
+ if(!m._lastid.empty()) {
+ if(!m._type.empty()) m._type += ' ';
+ m._type += m._lastid;
+ }
+ m._lastid = m.output;
+ m.output.clear();
+ }
+ }
+ \* {
+ modus_operandi& m = modi.front();
+ ECHO;
+ if(!m._lastid.empty()) {
+ if(!m._type.empty()) m._type += ' ';
+ m._type += m._lastid;
+ }
+ m._lastid = m.output;
+ m.output.clear();
+ }
+ \;|\n|\= {
+ modus_operandi& m = modi.front();
+ if(m.output.empty()) {
+ m._name=m._lastid;
+ }else{
+ if(!m._lastid.empty()) { // XXX: lastid should never be emtpy, I believe?
+ if(!m._type.empty()) m._type += ' ';
+ m._type += m._lastid;
+ }
+ m._name=m.output;
+ m.output.clear();
+ }
+ BEGIN(VARINIT);
+ if(*yytext!='=')
+ unput('\n');
+ }
+}
+<VARINIT>{
+ \n {
+ modus_operandi& m = modi.front();
+ string::size_type t = m.output.find_first_not_of(" \t");
+ if(t!=string::npos)
+ m.output.erase(0,t);
+ t = m.output.find_last_not_of(" \t;");
+ if(t!=string::npos)
+ m.output.erase(t+1);
+ member_variables.push_back(member_variable(m._type,m._name,m.output));
+ if(!m.output.empty())
+ have_initializers=true;
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+}
+<DECLLINE>\n {
+ ECHO;
+ decl += modi.front().output;
+ modi.pop_front();
+ BEGIN(INITIAL);
+}
+<IMPLLINE>\n {
+ ECHO;
+ impl += modi.front().output;
+ modi.pop_front();
+ BEGIN(INITIAL);
+}
+<CLASSLINE>\n {
+ class_name = modi.front().output;
+ modi.pop_front();
+ BEGIN(INITIAL);
+}
+<CLASSLINE,DECLLINE,IMPLLINE,VARLINE,VARINIT,IMPORTLINE,IMPORTCOMPONENT,CODEMETHODLINE,CODEMETHODARGS,INLINE,METHODLINE,METHODARGS,DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,CODELINE,CODEBLOCK>{
+ "/*" {
+ yy_push_state(SLASHSTAR_COMMENT);
+ if(!M().devour_comments()) {
+ ECHO;
+ }
+ }
+ "//" {
+ yy_push_state(SLASHSLASH_COMMENT);
+ if(!M().devour_comments()) {
+ ECHO;
+ }
+ }
+ \" {
+ yy_push_state(STRING);
+ ECHO;
+ }
+ \'\\.\' {
+ ECHO;
+ }
+}
+
+<INITIAL,METHODBLOCK,OUTPUTBLOCK>{
+ \" soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\\"",2);
+ \n soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\n",2);
+ \r soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\r",2);
+ \t soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\t",2);
+ \b soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\b",2);
+ \a soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\a",2);
+ . soft_anchor(); M().modify(modus_operandi::modus_text); ECHO;
+ {WHITESPACE}+ soft_anchor(); M().modify(modus_operandi::modus_text); ECHO;
+}
+
+<DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,METHODBLOCK,CODEBLOCK>{
+ \<\/\%decl\> {
+ if(YY_START!=DECLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
+ decl += modi.front().output;
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+ \<\/\%impl\> {
+ if(YY_START!=IMPLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
+ impl += modi.front().output;
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+ \<\/\%constructor\> {
+ if(YY_START!=CONSTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
+ member_functions.push_back(member_function("","","",modi.front().output));
+ have_constructor = true;
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+ \<\/\%destructor\> {
+ if(YY_START!=DESTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
+ member_functions.push_back(member_function("","~","",modi.front().output));
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+ \<\/\%codemethod\> {
+ if(YY_START!=CODEMETHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
+ modus_operandi& m = modi.front();
+ member_functions.push_back(member_function(m._type,m._name,m._args,m.output));
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+ \<\/%method\> {
+ if(YY_START!=METHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
+ modus_operandi& m = modi.front();
+ m.modify(modus_operandi::modus_code);
+ member_functions.push_back(member_function(m._type,m._name,m._args,m.output));
+ modi.pop_front();
+ BEGIN(INITIAL);
+ }
+ \<\/%code\> {
+ if(YY_START!=CODEBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
+ yy_pop_state();
+ }
+ \n ECHO;
+}
+
+<SLASHSTAR_COMMENT>{
+ "*/" {
+ if(!M().devour_comments()) {
+ ECHO;
+ }
+ yy_pop_state();
+ unput(' ');
+ }
+ \n {
+ if(!M().devour_comments()) {
+ ECHO;
+ }
+ }
+}
+<SLASHSLASH_COMMENT>{
+ \n {
+ if(!M().devour_comments()) {
+ ECHO;
+ }
+ yy_pop_state();
+ if(YY_START!=CODEBLOCK && YY_START!=CODEMETHODBLOCK && YY_START!=IMPLBLOCK && YY_START!=DECLBLOCK)
+ unput('\n');
+ }
+}
+<SLASHSTAR_COMMENT,SLASHSLASH_COMMENT>. {
+ if(!M().devour_comments()) {
+ ECHO;
+ }
+}
+<STRING>{
+ \\. ECHO;
+ \" ECHO; yy_pop_state();
+ . ECHO;
+}
+
+{WHITESPACE}+ {
+ if(!(M().flags&modus_operandi::flag_devour_whitespace)) {
+ ECHO;
+ }
+}
+
+%%
+
+sitecing_parser::sitecing_parser(component_factory& f)
+ : factory(f), have_initializers(false), have_constructor(false),
+ base_class("sitecing::cgi_component"),
+ base_header("sitecing/cgi_component.h"),
+ skeleton(__SC_DEFAULT_SKELETON) {
+ }
+
+void sitecing_parser::preprocess(const string& in) {
+ ifstream ifs(in.c_str(),ios::in);
+ if(!ifs.good())
+ throw preprocessor_error(CODEPOINT,"failed to open input file");
+ input_file = in;
+ modi.push_front(modus_operandi(0));
+ switch_streams(&ifs,NULL);
+ if(yylex())
+ throw preprocessor_error(CODEPOINT,"unknown error");
+ member_functions.push_back(member_function("void","main","(int _magic,va_list _args)",M().output));
+ if(have_initializers && !have_constructor)
+ member_functions.push_back(member_function("","","",""));
+ sitecing_enflesher enflesher(*this);
+ enflesher.enflesh();
+}
+
+void sitecing_parser::LexerOutput(const char* buf,int size) {
+ assert(modi.size());
+ M().output.append(buf,size);
+}
+
+static const char *modus_transitions
+ [sitecing_parser::modus_operandi::modi]
+ [sitecing_parser::modus_operandi::modi] = {
+// To:
+// code preop postop text From:
+ { "", "(*(__SCIF->out))", "(*(__SCIF->out))<<", "(*(__SCIF->out))<<\"" }, // code
+ { ";", "", "<<", "<<\"" }, // preop
+ { NULL, NULL, "", "\"" }, // postop
+ { "\";", "\"", "\"<<", "" } // text
+};
+
+void sitecing_parser::modus_operandi::modify(modus_t m) {
+ const char * x = modus_transitions[modus][m];
+ assert(x);
+ output += x;
+ modus = m;
+}
+
+void sitecing_parser::soft_anchor() {
+ if(M().modus!=modus_operandi::modus_text)
+ anchor();
+}
+void sitecing_parser::anchor() {
+ if(M().modus==modus_operandi::modus_text)
+ M().modify(modus_operandi::modus_preop);
+ M().output += "\n#line ";
+ char tmp[7];
+ snprintf(tmp,sizeof(tmp),"%d",lineno());
+ M().output += tmp;
+ M().output += " \"";
+ M().output += input_file;
+ M().output += "\"\n";
+}
+/* vim:set ft=lex: */