summaryrefslogtreecommitdiffabout
path: root/lib/sitecing_parser.ll
Side-by-side diff
Diffstat (limited to 'lib/sitecing_parser.ll') (more/less context) (show whitespace changes)
-rw-r--r--lib/sitecing_parser.ll47
1 files changed, 46 insertions, 1 deletions
diff --git a/lib/sitecing_parser.ll b/lib/sitecing_parser.ll
index 6cb78f3..8ba8673 100644
--- a/lib/sitecing_parser.ll
+++ b/lib/sitecing_parser.ll
@@ -1,594 +1,639 @@
%{
/*
* XXX: I have a strong feeling that this parser should be completely rewritten.
*/
#include <iostream>
#include <fstream>
#include <cassert>
#include <stdexcept>
using namespace std;
#include "sitecing/sitecing_util.h"
#include "sitecing/sitecing_exception.h"
using namespace sitecing;
#define sitecing_parser_flexlexer_once
#include "sitecing/sitecing_parser.h"
#include "sitecing/sitecing_enflesher.h"
#undef yyFlexLexer
#define yyFlexLexer sitecing_parserFlexLexer
%}
%x SLASHSTAR_COMMENT SLASHSLASH_COMMENT STRING
%x CODELINE CLASSLINE DECLLINE IMPLLINE DECLBLOCK IMPLBLOCK VARLINE VARINIT
%x IMPORTLINE IMPORTCOMPONENT
%x IMPORTTYPELINE IMPORTTYPECOMPONENT
%x DERIVELINE DERIVECOMPONENT
%x CONSTRUCTOR DESTRUCTOR CODEMETHODLINE CODEMETHODARGS
%x CODEMETHODBLOCK INLINE METHODLINE METHODARGS METHODBLOCK CODEBLOCK OUTPUTBLOCK
+%x PRAGMALINE
%option 8bit c++ verbose noyywrap yyclass="sitecing_parser" prefix="sitecing_parser" stack yylineno
WHITESPACE [ \t]
ID [A-Za-z_][A-Za-z0-9_]*
NOIDCHAR [^A-Za-z0-9_]
%%
<INITIAL>{
^\%\%class{WHITESPACE}+ {
// TODO: signal error if we already have class name acquired from source.
modi.push_front(modus_operandi(modus_operandi::flag_devour_comments|modus_operandi::flag_devour_whitespace));
BEGIN(CLASSLINE);
}
^\%\%decl{WHITESPACE}+ {
modi.push_front(modus_operandi(0));
anchor();
BEGIN(DECLLINE);
}
^\%\%impl{WHITESPACE}+ {
modi.push_front(modus_operandi(0));
anchor();
BEGIN(IMPLLINE);
}
\<\%decl\> {
modi.push_front(modus_operandi(0));
anchor();
BEGIN(DECLBLOCK);
}
\<\%impl\> {
modi.push_front(modus_operandi(0));
anchor();
BEGIN(IMPLBLOCK);
}
^\%\%var{WHITESPACE}+ {
modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
anchor();
BEGIN(VARLINE);
}
^\%\%import{WHITESPACE}+ {
modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
BEGIN(IMPORTLINE);
}
^\%\%import_type{WHITESPACE}+ {
modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
BEGIN(IMPORTTYPELINE);
}
^\%\%derive{WHITESPACE}+ {
modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
BEGIN(DERIVELINE);
}
\<\%constructor\> {
modi.push_front(modus_operandi());
anchor();
BEGIN(CONSTRUCTOR);
}
\<\%destructor\> {
modi.push_front(modus_operandi());
anchor();
BEGIN(DESTRUCTOR);
}
\<\%codemethod{WHITESPACE}+ {
modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
anchor();
BEGIN(CODEMETHODLINE);
}
\<\%method{WHITESPACE}+ {
modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
anchor();
BEGIN(METHODLINE);
}
+ ^\%\%pragma{WHITESPACE}+ {
+ modi.push_front(modus_operandi(modus_operandi::flag_devour_comments));
+ BEGIN(PRAGMALINE);
+ }
<<EOF>> {
assert(modi.size()==1);
M().modify(modus_operandi::modus_preop);
LexerOutput(";",1);
return 0;
}
}
<<EOF>> throw preprocessor_error(CODEPOINT,"unexpected end of file",lineno());
<CODEBLOCK,CODEMETHODBLOCK>{
"<%output>" {
anchor();
yy_push_state(OUTPUTBLOCK);
}
}
+<PRAGMALINE>{
+ {WHITESPACE}+ {
+ modus_operandi& m = M();
+ if(!m.output.empty()) {
+ string::size_type eq = m.output.find('=');
+ if(eq==string::npos) {
+ pragmas[m.output]=m.output;
+ }else{
+ pragmas[m.output.substr(0,eq)] = m.output.substr(eq+1);
+ }
+ m.output.erase();
+ }
+ }
+ \n {
+ modus_operandi& m = M();
+ if(!m.output.empty()) {
+ string::size_type eq = m.output.find('=');
+ if(eq==string::npos) {
+ pragmas[m.output]=m.output;
+ }else{
+ pragmas[m.output.substr(0,eq)] = m.output.substr(eq+1);
+ }
+ m.output.erase();
+ }
+ modi.pop_front();
+ BEGIN(INITIAL);
+ anchor();
+ }
+}
+
<METHODLINE>{
{WHITESPACE}+ {
modus_operandi& m = modi.front();
if(!m.output.empty()) {
if(!m._lastid.empty()) {
if(!m._type.empty()) m._type += ' ';
m._type += m._lastid;
}
m._lastid = m.output;
m.output.clear();
}
}
\* {
modus_operandi& m = modi.front();
ECHO;
if(!m._lastid.empty()) {
if(!m._type.empty()) m._type += ' ';
m._type += m._lastid;
}
m._lastid = m.output;
m.output.clear();
}
\( {
modus_operandi& m = modi.front();
if(m.output.empty()) {
m._name=m._lastid;
}else{
if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy...
if(!m._type.empty()) m._type += ' ';
m._type += m._lastid;
}
m._name = m.output;
m.output.clear();
}
ECHO;
BEGIN(METHODARGS);
}
}
<METHODARGS>{
\%\> {
modus_operandi& m = modi.front();
m._args = m.output;
m.output.clear();
anchor();
BEGIN(METHODBLOCK);
}
}
<INITIAL,METHODBLOCK,OUTPUTBLOCK>{
\<\%{WHITESPACE}+ {
M().modify(modus_operandi::modus_postop);
anchor();
LexerOutput("(",1);
yy_push_state(INLINE);
}
^\%{WHITESPACE} {
M().modify(modus_operandi::modus_code);
anchor();
yy_push_state(CODELINE);
}
\<\%code\> {
M().modify(modus_operandi::modus_code);
anchor();
yy_push_state(CODEBLOCK);
}
"</%output>" {
if(YY_START!=OUTPUTBLOCK) throw preprocessor_error(CODEPOINT,"unexpected tag",lineno());
M().modify(modus_operandi::modus_code);
anchor();
yy_pop_state();
}
}
<INLINE>\%\> LexerOutput(")",1); M().modus=modus_operandi::modus_preop; yy_pop_state();
<CODELINE>\n yy_pop_state();
<CODEMETHODLINE>{
{WHITESPACE}+ {
modus_operandi& m = modi.front();
if(!m.output.empty()) {
if(!m._lastid.empty()) {
if(!m._type.empty()) m._type += ' ';
m._type += m._lastid;
}
m._lastid = m.output;
m.output.clear();
}
}
\* {
modus_operandi& m = modi.front();
ECHO;
if(!m._lastid.empty()) {
if(!m._type.empty()) m._type += ' ';
m._type += m._lastid;
}
m._lastid = m.output;
m.output.clear();
}
\( {
modus_operandi& m = modi.front();
if(m.output.empty()) {
m._name=m._lastid;
}else{
if(!m._lastid.empty()) { // XXX: lastid, I believe should never be emtpy...
if(!m._type.empty()) m._type += ' ';
m._type += m._lastid;
}
m._name = m.output;
m.output.clear();
}
ECHO;
BEGIN(CODEMETHODARGS);
}
}
<CODEMETHODARGS>{
\%\> {
modus_operandi& m = modi.front();
m._args = m.output;
m.output.clear();
m.flags=0;
anchor();
BEGIN(CODEMETHODBLOCK);
}
}
<IMPORTLINE>{
{WHITESPACE}+ { }
{ID} {
if(!modi.front()._name.empty())
throw preprocessor_error(CODEPOINT,"syntax error",lineno());
modi.front()._name = yytext;
}
\= {
modi.front().output.clear();
BEGIN(IMPORTCOMPONENT);
}
}
<IMPORTCOMPONENT>{
{WHITESPACE}+ { }
\n {
modus_operandi& m = M();
string::size_type t = m.output.find_first_not_of(" \t");
if(t!=string::npos)
m.output.erase(0,t);
t = m.output.find_last_not_of(" \t;");
if(t!=string::npos)
m.output.erase(t+1);
if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') {
m.output.erase(0,1);
m.output.erase(m.output.length()-1);
}
string c = combine_path(component_basename,m.output);
member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true));
modi.pop_front();
BEGIN(INITIAL);
}
}
<IMPORTTYPELINE>{
{WHITESPACE}+ { }
{ID} {
if(!modi.front()._name.empty())
throw preprocessor_error(CODEPOINT,"syntax error",lineno());
modi.front()._name = yytext;
}
\= {
modi.front().output.clear();
BEGIN(IMPORTTYPECOMPONENT);
}
}
<IMPORTTYPECOMPONENT>{
{WHITESPACE}+ { }
\n {
modus_operandi& m = M();
string::size_type t = m.output.find_first_not_of(" \t");
if(t!=string::npos)
m.output.erase(0,t);
t = m.output.find_last_not_of(" \t;");
if(t!=string::npos)
m.output.erase(t+1);
if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') {
m.output.erase(0,1);
m.output.erase(m.output.length()-1);
}
string c = combine_path(component_basename,m.output);
member_variables.push_back(member_variable(m._type,m._name,normalize_path(c,strip_leading_slash),true,true));
modi.pop_front();
BEGIN(INITIAL);
}
}
<DERIVELINE>{
{WHITESPACE}+ { }
{ID} {
if(!modi.front()._name.empty())
throw preprocessor_error(CODEPOINT,"syntax_error",lineno());
modi.front()._name = yytext;
}
\= {
modi.front().output.clear();
BEGIN(DERIVECOMPONENT);
}
}
<DERIVECOMPONENT>{
{WHITESPACE}+ { }
\n {
modus_operandi& m = M();
string::size_type t = m.output.find_first_not_of(" \t");
if(t!=string::npos)
m.output.erase(0,t);
t = m.output.find_last_not_of(" \t;");
if(t!=string::npos)
m.output.erase(t+1);
if(m.output[0]=='"' && m.output[m.output.length()-1]=='"') {
m.output.erase(0,1);
m.output.erase(m.output.length()-1);
}
string c = combine_path(component_basename,m.output);
ancestor_classes.push_back(ancestor_class(m._name,normalize_path(c,strip_leading_slash)));
modi.pop_front();
BEGIN(INITIAL);
}
}
<VARLINE>{
{WHITESPACE}+ {
modus_operandi& m = modi.front();
if(!m.output.empty()) {
if(!m._lastid.empty()) {
if(!m._type.empty()) m._type += ' ';
m._type += m._lastid;
}
m._lastid = m.output;
m.output.clear();
}
}
\* {
modus_operandi& m = modi.front();
ECHO;
if(!m._lastid.empty()) {
if(!m._type.empty()) m._type += ' ';
m._type += m._lastid;
}
m._lastid = m.output;
m.output.clear();
}
\;|\n|\= {
modus_operandi& m = modi.front();
if(m.output.empty()) {
m._name=m._lastid;
}else{
if(!m._lastid.empty()) { // XXX: lastid should never be emtpy, I believe?
if(!m._type.empty()) m._type += ' ';
m._type += m._lastid;
}
m._name=m.output;
m.output.clear();
}
BEGIN(VARINIT);
if(*yytext!='=')
unput('\n');
}
}
<VARINIT>{
\n {
modus_operandi& m = modi.front();
string::size_type t = m.output.find_first_not_of(" \t");
if(t!=string::npos)
m.output.erase(0,t);
t = m.output.find_last_not_of(" \t;");
if(t!=string::npos)
m.output.erase(t+1);
member_variables.push_back(member_variable(m._type,m._name,m.output));
if(!m.output.empty())
have_initializers=true;
modi.pop_front();
BEGIN(INITIAL);
}
}
<DECLLINE>\n {
ECHO;
decl += modi.front().output;
modi.pop_front();
BEGIN(INITIAL);
}
<IMPLLINE>\n {
ECHO;
impl += modi.front().output;
modi.pop_front();
BEGIN(INITIAL);
}
<CLASSLINE>\n {
class_name = modi.front().output;
modi.pop_front();
BEGIN(INITIAL);
}
-<CLASSLINE,DECLLINE,IMPLLINE,VARLINE,VARINIT,IMPORTLINE,IMPORTCOMPONENT,CODEMETHODLINE,CODEMETHODARGS,INLINE,METHODLINE,METHODARGS,DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,CODELINE,CODEBLOCK>{
+<CLASSLINE,DECLLINE,IMPLLINE,VARLINE,VARINIT,IMPORTLINE,IMPORTCOMPONENT,CODEMETHODLINE,CODEMETHODARGS,INLINE,METHODLINE,METHODARGS,DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,CODELINE,CODEBLOCK,PRAGMALINE>{
"/*" {
yy_push_state(SLASHSTAR_COMMENT);
if(!M().devour_comments()) {
ECHO;
}
}
"//" {
yy_push_state(SLASHSLASH_COMMENT);
if(!M().devour_comments()) {
ECHO;
}
}
\" {
yy_push_state(STRING);
ECHO;
}
\'\\.\' {
ECHO;
}
}
<INITIAL,METHODBLOCK,OUTPUTBLOCK>{
\" soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\\"",2);
\n soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\n",2);
\r soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\r",2);
\t soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\t",2);
\b soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\b",2);
\a soft_anchor(); M().modify(modus_operandi::modus_text); LexerOutput("\\a",2);
. soft_anchor(); M().modify(modus_operandi::modus_text); ECHO;
{WHITESPACE}+ soft_anchor(); M().modify(modus_operandi::modus_text); ECHO;
}
<DECLBLOCK,IMPLBLOCK,CONSTRUCTOR,DESTRUCTOR,CODEMETHODBLOCK,METHODBLOCK,CODEBLOCK>{
\<\/\%decl\> {
if(YY_START!=DECLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
decl += modi.front().output;
modi.pop_front();
BEGIN(INITIAL);
}
\<\/\%impl\> {
if(YY_START!=IMPLBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
impl += modi.front().output;
modi.pop_front();
BEGIN(INITIAL);
}
\<\/\%constructor\> {
if(YY_START!=CONSTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
member_functions.push_back(member_function("","","",modi.front().output));
have_constructor = true;
modi.pop_front();
BEGIN(INITIAL);
}
\<\/\%destructor\> {
if(YY_START!=DESTRUCTOR) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
member_functions.push_back(member_function("","~","",modi.front().output));
modi.pop_front();
BEGIN(INITIAL);
}
\<\/\%codemethod\> {
if(YY_START!=CODEMETHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
modus_operandi& m = modi.front();
member_functions.push_back(member_function(m._type,m._name,m._args,m.output));
modi.pop_front();
BEGIN(INITIAL);
}
\<\/%method\> {
if(YY_START!=METHODBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
modus_operandi& m = modi.front();
m.modify(modus_operandi::modus_code);
member_functions.push_back(member_function(m._type,m._name,m._args,m.output));
modi.pop_front();
BEGIN(INITIAL);
}
\<\/%code\> {
if(YY_START!=CODEBLOCK) throw preprocessor_error(CODEPOINT,"tags mismatch",lineno());
yy_pop_state();
}
\n ECHO;
}
<SLASHSTAR_COMMENT>{
"*/" {
if(!M().devour_comments()) {
ECHO;
}
yy_pop_state();
unput(' ');
}
\n {
if(!M().devour_comments()) {
ECHO;
}
}
}
<SLASHSLASH_COMMENT>{
\n {
if(!M().devour_comments()) {
ECHO;
}
yy_pop_state();
if(YY_START!=CODEBLOCK && YY_START!=CODEMETHODBLOCK && YY_START!=IMPLBLOCK && YY_START!=DECLBLOCK)
unput('\n');
}
}
<SLASHSTAR_COMMENT,SLASHSLASH_COMMENT>. {
if(!M().devour_comments()) {
ECHO;
}
}
<STRING>{
\\. ECHO;
\" ECHO; yy_pop_state();
. ECHO;
}
{WHITESPACE}+ {
if(!(M().flags&modus_operandi::flag_devour_whitespace)) {
ECHO;
}
}
%%
sitecing_parser::sitecing_parser(component_factory& f)
: factory(f), have_initializers(false), have_constructor(false),
base_class("sitecing::cgi_component"),
base_header("sitecing/cgi_component.h"),
skeleton(__SC_DEFAULT_SKELETON) {
}
void sitecing_parser::preprocess(const string& in) {
ifstream ifs(in.c_str(),ios::in);
if(!ifs.good())
throw preprocessor_error(CODEPOINT,"failed to open input file");
input_file = in;
modi.push_front(modus_operandi(0));
switch_streams(&ifs,NULL);
if(yylex())
throw preprocessor_error(CODEPOINT,"unknown error");
+ pragmas_t::const_iterator mp = pragmas.find("main");
+ if(mp==pragmas.end()) {
member_functions.push_back(member_function("void","main","(int _magic,va_list _args)",M().output));
+ }else{
+ member_functions.push_back(
+ member_function(
+ "void","main","(int _magic,va_list _args)",
+ mp->second+"::main(_magic,_args);"
+ )
+ );
+ }
if(have_initializers && !have_constructor)
member_functions.push_back(member_function("","","",""));
sitecing_enflesher enflesher(*this);
enflesher.enflesh();
}
void sitecing_parser::LexerOutput(const char* buf,int size) {
assert(modi.size());
M().output.append(buf,size);
}
static const char *modus_transitions
[sitecing_parser::modus_operandi::modi]
[sitecing_parser::modus_operandi::modi] = {
// To:
// code preop postop text From:
{ "", "(*(__SCIF->out))", "(*(__SCIF->out))<<", "(*(__SCIF->out))<<\"" }, // code
{ ";", "", "<<", "<<\"" }, // preop
{ NULL, NULL, "", "\"" }, // postop
{ "\";", "\"", "\"<<", "" } // text
};
void sitecing_parser::modus_operandi::modify(modus_t m) {
const char * x = modus_transitions[modus][m];
assert(x);
output += x;
modus = m;
}
void sitecing_parser::soft_anchor() {
if(M().modus!=modus_operandi::modus_text)
anchor();
}
void sitecing_parser::anchor() {
if(M().modus==modus_operandi::modus_text)
M().modify(modus_operandi::modus_preop);
M().output += "\n#line ";
char tmp[7];
snprintf(tmp,sizeof(tmp),"%d",lineno());
M().output += tmp;
M().output += " \"";
M().output += input_file;
M().output += "\"\n";
}
/* vim:set ft=lex: */