-rw-r--r-- | src/cliche.rl | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/src/cliche.rl b/src/cliche.rl new file mode 100644 index 0000000..1f1f07f --- a/dev/null +++ b/src/cliche.rl | |||
@@ -0,0 +1,331 @@ | |||
1 | #include <getopt.h> | ||
2 | #include <iostream> | ||
3 | #include <cstring> | ||
4 | #include <cassert> | ||
5 | #include <algorithm> | ||
6 | #include <fstream> | ||
7 | |||
8 | #include "config.h" | ||
9 | |||
10 | struct line_counting_monkey { | ||
11 | int n; | ||
12 | const char *p; | ||
13 | const char *fn; | ||
14 | |||
15 | line_counting_monkey(const char *fn_,const char *p_) : fn(fn_), n(1), p(p_) { } | ||
16 | |||
17 | int lineno(const char *pp) { | ||
18 | if(pp>p) { | ||
19 | n += std::count(p,pp,'\n'); | ||
20 | p = pp; | ||
21 | } | ||
22 | return n; | ||
23 | } | ||
24 | }; | ||
25 | |||
26 | struct code_writing_monkey { | ||
27 | std::ostream& o; | ||
28 | enum omode_type { | ||
29 | om_code = 0, om_output, om_inline, om_literal, | ||
30 | oms | ||
31 | }; | ||
32 | omode_type omode; | ||
33 | int last_anchor, since_anchor; | ||
34 | |||
35 | code_writing_monkey(std::ostream& o_) : o(o_), omode(om_code), last_anchor(-1), since_anchor(0) { } | ||
36 | |||
37 | void modify(omode_type om,line_counting_monkey* lcm=0,const char *p=0) { | ||
38 | static const char *om_transitions[oms][oms] = { | ||
39 | // To: From: | ||
40 | // code output inline literal | ||
41 | { "", "CLICHE_OUTPUT_LITERAL(\n", "CLICHE_STREAM << (", "" }, // code | ||
42 | { ");\n", "", ");\n(CLICHE_STREAM) << (", 0 }, // output | ||
43 | { ");\n", ");\nCLICHE_OUTPUT_LITERAL(\n", "", 0 }, // inline | ||
44 | { " ", 0, 0, "" }, // literal | ||
45 | }; | ||
46 | assert(0 <= omode && omode < oms); | ||
47 | assert(0 <= om && om < oms); | ||
48 | const char *t = om_transitions[omode][om]; | ||
49 | assert(t); // TODO: complain? | ||
50 | o << t; | ||
51 | since_anchor += std::count(t,t+strlen(t),'\n'); | ||
52 | if(lcm && t && *t && om!=omode && p) anchor(*lcm,p); | ||
53 | omode = om; | ||
54 | } | ||
55 | |||
56 | void prologue() { | ||
57 | assert(omode==om_code); | ||
58 | o << | ||
59 | "#ifndef CLICHE_STREAM\n" | ||
60 | "# define CLICHE_STREAM (std::cout)\n" | ||
61 | "# define CLICHE_STREAM_AUTODEFINED\n" | ||
62 | "#endif\n" | ||
63 | "#ifndef CLICHE_OUTPUT_LITERAL\n" | ||
64 | "# define CLICHE_OUTPUT_LITERAL(sl) (CLICHE_STREAM).write((sl),sizeof(sl)-sizeof(\"\"))\n" | ||
65 | "#endif\n"; | ||
66 | } | ||
67 | void epilogue() { | ||
68 | modify(om_code); | ||
69 | o << "\n" | ||
70 | "#ifdef CLICHE_STREAM_AUTODEFINED\n" | ||
71 | "# undef CLICHE_STREAM\n" | ||
72 | "# undef CLICHE_STREAM_AUTODEFINED\n" | ||
73 | "#endif\n"; | ||
74 | } | ||
75 | |||
76 | void monkey(const char *d,size_t l=0) { | ||
77 | if(!(l || (l=strlen(d)))) return; | ||
78 | if(omode!=om_output && omode!=om_literal) { | ||
79 | since_anchor += std::count(d,d+l,'\n'); | ||
80 | o.write(d,l); | ||
81 | return; | ||
82 | } | ||
83 | o.put('"'); | ||
84 | const char *p=d; | ||
85 | while(l--) { | ||
86 | char c; | ||
87 | switch(*d) { | ||
88 | case '\r': c='r'; break; | ||
89 | case '\n': c='n'; break; | ||
90 | case '\t': c='t'; break; | ||
91 | case '\a': c='a'; break; | ||
92 | case '\b': c='b'; break; | ||
93 | case '\v': c='v'; break; | ||
94 | case '\f': c='f'; break; | ||
95 | case '\'': case '\"': case '\\': c=*d; break; | ||
96 | case 0: c='0'; break; | ||
97 | default: c=0; break; | ||
98 | }; | ||
99 | if(!c) { | ||
100 | ++d; | ||
101 | continue; | ||
102 | } | ||
103 | if(p!=d) o.write(p,d-p); | ||
104 | o.put('\\'); | ||
105 | if(c=='0') | ||
106 | o.write("000",3); | ||
107 | else | ||
108 | o.put(c); | ||
109 | p=++d; | ||
110 | } | ||
111 | if(p!=d) o.write(p,d-p); | ||
112 | o.write("\"\n",2); ++since_anchor; | ||
113 | } | ||
114 | |||
115 | void monkey_as(omode_type om,const char *d,size_t l=0,line_counting_monkey *lcm=0,const char *p=0) { modify(om,lcm,p); monkey(d,l); } | ||
116 | |||
117 | void anchor(line_counting_monkey& lcm,const char *p) { | ||
118 | // modify(om_code); | ||
119 | int l = lcm.lineno(p); | ||
120 | if(last_anchor>0 && last_anchor+since_anchor==l) return; | ||
121 | o << "\n#line " << (since_anchor=0,last_anchor=l) << " \"" << lcm.fn << "\"\n"; | ||
122 | } | ||
123 | }; | ||
124 | |||
125 | |||
126 | %%{ | ||
127 | machine cliche; | ||
128 | |||
129 | linebreak = /[\r\n]/; | ||
130 | |||
131 | action monkey { | ||
132 | cwm.monkey(ts,te-ts); | ||
133 | } | ||
134 | action monkey_code { | ||
135 | cwm.monkey_as(cwm.om_code,ts,te-ts,&lcm,p); | ||
136 | } | ||
137 | action monkey_output { | ||
138 | cwm.monkey_as(cwm.om_output,ts,te-ts,&lcm,p); | ||
139 | } | ||
140 | action monkey_literal { | ||
141 | cwm.monkey_as(cwm.om_literal,ts,te-ts,&lcm,p); | ||
142 | } | ||
143 | |||
144 | slashstar_comment := | ||
145 | ( any* :>> '*/' ) ${ cwm.monkey(fpc,1); } @{ fret; }; | ||
146 | |||
147 | outputblock := |* | ||
148 | '%' (^linebreak)* linebreak { cwm.monkey_as(cwm.om_code,ts+1,te-ts-1,&lcm,p); }; | ||
149 | any=> { fhold; fcall outputline; }; | ||
150 | |||
151 | *|; | ||
152 | outputline := |* | ||
153 | (^linebreak)* linebreak -- ('</%output>' | '<%code>' | ('<%' space) ) { cwm.monkey_as(cwm.om_output,ts,te-ts,&lcm,p); fret; }; | ||
154 | '<%code>' { cwm.modify(cwm.om_code,&lcm,p); fcall codeblock; }; | ||
155 | '</%output>' { --top; fret; }; | ||
156 | '<%' space { cwm.modify(cwm.om_inline,&lcm,p); fcall inlineblock; }; | ||
157 | (^linebreak)+ -- ( '%' | '<' ) => monkey_output; | ||
158 | any => monkey_output; | ||
159 | *|; | ||
160 | |||
161 | inlineblock := |* | ||
162 | space '%>' { cwm.modify(cwm.om_code,&lcm,p); fret; }; | ||
163 | "'" ( [^'\\] | /\\./ )* "'" => monkey; | ||
164 | '"' ( [^"\\] | /\\./ )* '"' => monkey; | ||
165 | '/*' { cwm.monkey("/*",2); fcall slashstar_comment; }; | ||
166 | '//' (^linebreak)* (linebreak) => monkey; | ||
167 | any => monkey; | ||
168 | *|; | ||
169 | |||
170 | literalblock := |* | ||
171 | any => { fhold; fcall literalline; }; | ||
172 | *|; | ||
173 | literalline := |* | ||
174 | (^linebreak)* linebreak -- ('</%literal>' | ('<%' space) ) { cwm.monkey_as(cwm.om_literal,ts,te-ts,&lcm,p); fret; }; | ||
175 | '</%literal>' { --top; fret; }; | ||
176 | '<%' space { cwm.modify(cwm.om_code,&lcm,p); fcall inlineblock; }; | ||
177 | (^linebreak)+ -- ( '%' | '<' ) => monkey_literal; | ||
178 | any => monkey_literal; | ||
179 | *|; | ||
180 | |||
181 | codeblock := |* | ||
182 | '<%output>'{ fcall outputblock; }; | ||
183 | '<%literal>'{ fcall literalblock; }; | ||
184 | '</%code>'{ fret; }; | ||
185 | "'" ( [^'\\] | /\\./ )* "'" => monkey_code; | ||
186 | '"' ( [^"\\] | /\\./ )* '"' => monkey_code; | ||
187 | '/*' { cwm.monkey("/*",2); fcall slashstar_comment; }; | ||
188 | '//' (^linebreak)* (linebreak) => monkey_code; | ||
189 | any => monkey_code; | ||
190 | *|; | ||
191 | |||
192 | main := any >{ | ||
193 | fhold; | ||
194 | switch(topmode) { | ||
195 | case code_writing_monkey::om_output: fgoto outputblock; | ||
196 | case code_writing_monkey::om_code: fgoto codeblock; | ||
197 | default: ;/* TODO: WTD? */ | ||
198 | }; | ||
199 | }; | ||
200 | }%% | ||
201 | |||
202 | %% write data; | ||
203 | |||
204 | static const char *biname = 0; | ||
205 | static void display_usage() { | ||
206 | std::cerr << PACKAGE " Version " VERSION "\n" | ||
207 | "Copyright (c) 2011 Klever Group\n" | ||
208 | "\n" | ||
209 | " " << biname << " [otpions] [input-file]\n" | ||
210 | "\n" | ||
211 | #ifdef HAVE_GETOPT_LONG | ||
212 | " -h, --help\n" | ||
213 | " --usage display this text\n" | ||
214 | " -V, --version display version number\n" | ||
215 | " -L, --license show license\n" | ||
216 | " -o <file>, --output=<file> write output to the named file\n" | ||
217 | " -t code|output, --top=code|output\n" | ||
218 | #else | ||
219 | " -h display this text\n" | ||
220 | " -V display version number\n" | ||
221 | " -L show license\n" | ||
222 | " -o <file> write output to the named file\n" | ||
223 | " -t code|output\n" | ||
224 | #endif | ||
225 | " set toplevel processing mode [output]\n" | ||
226 | " -C same as -t=code\n" | ||
227 | " -O same as -t=output (default)\n" | ||
228 | "\n"; | ||
229 | } | ||
230 | |||
231 | int main(int argc,char *argv[]) { | ||
232 | biname = *argv; | ||
233 | std::string ofile; | ||
234 | code_writing_monkey::omode_type topmode = code_writing_monkey::om_output; | ||
235 | while(true) { | ||
236 | static const char shopts[] = "hVLo:t:CO"; | ||
237 | #if HAVE_GETOPT_LONG | ||
238 | static struct option opts[] = { | ||
239 | { "help", no_argument, 0, 'h' }, | ||
240 | { "usage", no_argument, 0, 'h' }, | ||
241 | { "version", no_argument, 0, 'V' }, | ||
242 | { "license", no_argument, 0, 'L' }, | ||
243 | { "output", required_argument, 0, 'o' }, | ||
244 | { "top", required_argument, 0, 't' }, | ||
245 | { NULL, 0, 0, 0 } | ||
246 | }; | ||
247 | int c = getopt_long(argc,argv,shopts,opts,NULL); | ||
248 | #else | ||
249 | int c = getopt(argc,argv,shopts); | ||
250 | #endif | ||
251 | if(c==-1) break; | ||
252 | switch(c) { | ||
253 | case 't': | ||
254 | if(!strcasecmp(optarg,"code")) { | ||
255 | topmode = code_writing_monkey::om_code; | ||
256 | break; | ||
257 | }else if(!strcasecmp(optarg,"output")) { | ||
258 | topmode = code_writing_monkey::om_output; | ||
259 | break; | ||
260 | } | ||
261 | std::cerr << "Unkown '" << optarg << "' mode" << std::endl; | ||
262 | case '?': /* unknown option */ | ||
263 | case 'h': display_usage(); exit(0); break; | ||
264 | case 'V': std::cerr << VERSION << std::endl; exit(0); break; | ||
265 | case 'L': | ||
266 | extern const char *COPYING; | ||
267 | std::cerr << COPYING << std::endl; | ||
268 | exit(0); break; | ||
269 | case 'o': ofile = optarg; break; | ||
270 | case 'C': topmode = code_writing_monkey::om_code; break; | ||
271 | case 'O': topmode = code_writing_monkey::om_output; break; | ||
272 | default: | ||
273 | std::cerr << "Huh?" << std::endl; | ||
274 | exit(1); break; | ||
275 | } | ||
276 | } | ||
277 | #undef LS | ||
278 | if((optind+1)!=argc) { | ||
279 | display_usage(); exit(1); | ||
280 | /* TODO: or use stdin if no parameter specified? */ | ||
281 | } | ||
282 | |||
283 | std::string ifile = argv[optind]; | ||
284 | if(ofile.empty()) ofile = ifile+".cc"; | ||
285 | std::ifstream ist(ifile.c_str(),std::ios::in); | ||
286 | std::ofstream ost(ofile.c_str(),std::ios::out); | ||
287 | if(!ost) { | ||
288 | std::cerr << "failed to open '" << ofile << "' for writing" << std::endl; | ||
289 | exit(2); | ||
290 | } | ||
291 | |||
292 | int cs, act; | ||
293 | char *ts, *te; | ||
294 | int stack[128], top=0; | ||
295 | %% write init; | ||
296 | char input[512]; | ||
297 | int have = 0; | ||
298 | char *eof = 0; | ||
299 | code_writing_monkey cwm(ost); | ||
300 | cwm.prologue(); | ||
301 | line_counting_monkey lcm(ifile.c_str(),input); | ||
302 | cwm.anchor(lcm,0); | ||
303 | while(!eof) { | ||
304 | if(have==sizeof(input)) { | ||
305 | std::cerr << "No space to read in" << std::endl; | ||
306 | break; | ||
307 | } | ||
308 | char *p = input+have; | ||
309 | int lw = sizeof(input)-have; | ||
310 | int lp = ist.read(p,lw).gcount(); | ||
311 | char *pe = p+lp; | ||
312 | eof = (lp==lw)?0:pe; | ||
313 | %%write exec; | ||
314 | if(cs==cliche_error) { | ||
315 | std::cerr << "cliche error" << std::endl; | ||
316 | break; | ||
317 | } | ||
318 | if(ts) { | ||
319 | lcm.lineno(ts); | ||
320 | te = ((char*)memmove(input,ts,have=pe-ts)) + (te-ts); | ||
321 | ts = input; | ||
322 | }else{ | ||
323 | lcm.lineno(pe); | ||
324 | have = 0; | ||
325 | } | ||
326 | lcm.p = input; | ||
327 | } | ||
328 | cwm.epilogue(); | ||
329 | return 0; | ||
330 | } | ||
331 | /* vim:set ft=ragel ts=8 sw=8 cin si ai: */ | ||