libs/web: prepare template parser, dispatcher and i18n class for upcoming po format...
[project/luci.git] / libs / web / src / template_parser.c
1 /*
2  * LuCI Template - Parser implementation
3  *
4  *   Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
5  *
6  *  Licensed under the Apache License, Version 2.0 (the "License");
7  *  you may not use this file except in compliance with the License.
8  *  You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  *  Unless required by applicable law or agreed to in writing, software
13  *  distributed under the License is distributed on an "AS IS" BASIS,
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  *  See the License for the specific language governing permissions and
16  *  limitations under the License.
17  */
18
19 #include "template_parser.h"
20
21
22 /* leading and trailing code for different types */
23 const char * gen_code[7][2] = {
24         { "write(\"",                   "\")"                   },
25         { NULL,                                 NULL                    },
26         { "write(tostring(",    " or \"\"))"    },
27         { "include(\"",                 "\")"                   },
28         { "write(pcdata(translate(\"",  "\")))" },
29         { "write(translate(\"", "\"))"                  },
30         { NULL,                                 " "                             }
31 };
32
33 /* Simple strstr() like function that takes len arguments for both haystack and needle. */
34 static char *strfind(char *haystack, int hslen, const char *needle, int ndlen)
35 {
36         int match = 0;
37         int i, j;
38
39         for( i = 0; i < hslen; i++ )
40         {
41                 if( haystack[i] == needle[0] )
42                 {
43                         match = ((ndlen == 1) || ((i + ndlen) <= hslen));
44
45                         for( j = 1; (j < ndlen) && ((i + j) < hslen); j++ )
46                         {
47                                 if( haystack[i+j] != needle[j] )
48                                 {
49                                         match = 0;
50                                         break;
51                                 }
52                         }
53
54                         if( match )
55                                 return &haystack[i];
56                 }
57         }
58
59         return NULL;
60 }
61
62 /*
63  * Inspect current read buffer and find the number of "vague" characters at the end
64  * which could indicate an opening token. Returns the number of "vague" chars.
65  * The last continuous sequence of whitespace, optionally followed by a "<" is
66  * treated as "vague" because whitespace may be discarded if the upcoming opening
67  * token indicates pre-whitespace-removal ("<%-"). A single remaining "<" char
68  * can't be differentiated from an opening token ("<%"), so it's kept to be processed
69  * in the next cycle.
70  */
71 static int stokscan(struct template_parser *data, int off, int no_whitespace)
72 {
73         int i;
74         int skip = 0;
75         int tokoff = data->bufsize - 1;
76
77         for( i = tokoff; i >= off; i-- )
78         {
79                 if( data->buf[i] == T_TOK_START[0] )
80                 {
81                         skip = tokoff - i + 1;
82                         tokoff = i - 1;
83                         break;
84                 }
85         }
86
87         if( !no_whitespace )
88         {
89                 for( i = tokoff; i >= off; i-- )
90                 {
91                         if( isspace(data->buf[i]) )
92                                 skip++;
93                         else
94                                 break;
95                 }
96         }
97
98         return skip;
99 }
100
101 /*
102  * Similar to stokscan() but looking for closing token indicators.
103  * Matches "-", optionally followed by a "%" char.
104  */
105 static int etokscan(struct template_parser *data)
106 {
107         int skip = 0;
108
109         if( (data->bufsize > 0) && (data->buf[data->bufsize-1] == T_TOK_END[0]) )
110                 skip++;
111
112         if( (data->bufsize > skip) && (data->buf[data->bufsize-skip-1] == T_TOK_SKIPWS[0]) )
113                 skip++;
114
115         return skip;
116 }
117
118 /*
119  * Generate Lua expressions from the given raw code, write it into the
120  * output buffer and set the lua_Reader specific size pointer.
121  * Takes parser-state, lua_Reader's size pointer and generator flags
122  * as parameter. The given flags indicate whether leading or trailing
123  * code should be added. Returns a pointer to the output buffer.
124  */
125 static const char * generate_expression(struct template_parser *data, size_t *sz, int what)
126 {
127         char tmp[T_OUTBUFSZ];
128         int i;
129         int size = 0;
130         int start = 0;
131         int whitespace = 0;
132
133         memset(tmp, 0, T_OUTBUFSZ);
134
135         /* Inject leading expression code (if any) */
136         if( (what & T_GEN_START) && (gen_code[data->type][0] != NULL) )
137         {
138                 memcpy(tmp, gen_code[data->type][0], strlen(gen_code[data->type][0]));
139                 size += strlen(gen_code[data->type][0]);
140         }
141
142         /* Parse source buffer */
143         for( i = 0; i < data->outsize; i++ )
144         {
145                 /* Skip leading whitespace for non-raw and non-expr chunks */
146                 if( !start && isspace(data->out[i]) && (data->type == T_TYPE_I18N ||
147            data->type == T_TYPE_I18N_RAW || data->type == T_TYPE_INCLUDE) )
148                         continue;
149                 else if( !start )
150                         start = 1;
151
152                 /* Found whitespace after i18n key */
153                 if( data->type == T_TYPE_I18N || data->type == T_TYPE_I18N_RAW )
154                 {
155                         /* Is initial whitespace, insert space */
156                         if( !whitespace && isspace(data->out[i]) )
157                         {
158                                 tmp[size++] = ' ';
159                                 whitespace = 1;
160                         }
161
162                         /* Suppress subsequent whitespace, escape special chars */
163                         else if( !isspace(data->out[i]) )
164                         {
165                                 if( data->out[i] == '\\' || data->out[i] == '"' )
166                                         tmp[size++] = '\\';
167
168                                 tmp[size++] = data->out[i];
169                                 whitespace = 0;
170                         }
171                 }
172
173                 /* Escape quotes, backslashes and newlines for plain and include expressions */
174                 else if( (data->type == T_TYPE_TEXT || data->type == T_TYPE_INCLUDE) &&
175                     (data->out[i] == '\\' || data->out[i] == '"' || data->out[i] == '\n' || data->out[i] == '\t') )
176                 {
177                         tmp[size++] = '\\';
178
179                         switch(data->out[i])
180                         {
181                                 case '\n':
182                                         tmp[size++] = 'n';
183                                         break;
184
185                                 case '\t':
186                                         tmp[size++] = 't';
187                                         break;
188
189                                 default:
190                                         tmp[size++] = data->out[i];
191                         }
192                 }
193
194                 /* Normal char */
195                 else
196                 {
197                         tmp[size++] = data->out[i];
198                 }
199         }
200
201         /* Inject trailing expression code (if any) */
202         if( (what & T_GEN_END) && (gen_code[data->type][1] != NULL) )
203         {
204                 /* Strip trailing space for i18n expressions */
205                 if( data->type == T_TYPE_I18N || data->type == T_TYPE_I18N_RAW )
206                         if( (size > 0) && (tmp[size-1] == ' ') )
207                                 size--;
208
209                 memcpy(&tmp[size], gen_code[data->type][1], strlen(gen_code[data->type][1]));
210                 size += strlen(gen_code[data->type][1]);
211         }
212
213         *sz = data->outsize = size;
214         memset(data->out, 0, T_OUTBUFSZ);
215         memcpy(data->out, tmp, size);
216
217         //printf("<<<%i|%i|%i|%s>>>\n", what, data->type, *sz, data->out);
218
219         return data->out;
220 }
221
222 /*
223  * Move the number of bytes specified in data->bufsize from the
224  * given source pointer to the beginning of the read buffer.
225  */
226 static void bufmove(struct template_parser *data, const char *src)
227 {
228         if( data->bufsize > 0 )
229                 memmove(data->buf, src, data->bufsize);
230         else if( data->bufsize < 0 )
231                 data->bufsize = 0;
232
233         data->buf[data->bufsize] = 0;
234 }
235
236 /*
237  * Move the given amount of bytes from the given source pointer
238  * to the output buffer and set data->outputsize.
239  */
240 static void bufout(struct template_parser *data, const char *src, int len)
241 {
242         if( len >= 0 )
243         {
244                 memset(data->out, 0, T_OUTBUFSZ);
245                 memcpy(data->out, src, len);
246                 data->outsize = len;
247         }
248         else
249         {
250                 data->outsize = 0;
251         }
252 }
253
254 /*
255  * lua_Reader compatible function that parses template code on demand from
256  * the given file handle.
257  */
258 const char *template_reader(lua_State *L, void *ud, size_t *sz)
259 {
260         struct template_parser *data = ud;
261         char *match = NULL;
262         int off = 0;
263         int ignore = 0;
264         int genflags = 0;
265         int readlen = 0;
266         int vague = 0;
267
268         while( !(data->flags & T_FLAG_EOF) || (data->bufsize > 0) )
269         {
270                 /* Fill buffer */
271                 if( !(data->flags & T_FLAG_EOF) && (data->bufsize < T_READBUFSZ) )
272                 {
273                         if( (readlen = read(data->fd, &data->buf[data->bufsize], T_READBUFSZ - data->bufsize)) > 0 )
274                                 data->bufsize += readlen;
275                         else if( readlen == 0 )
276                                 data->flags |= T_FLAG_EOF;
277                         else
278                                 return NULL;
279                 }
280
281                 /* Evaluate state */
282                 switch(data->state)
283                 {
284                         /* Plain text chunk (before "<%") */
285                         case T_STATE_TEXT_INIT:
286                         case T_STATE_TEXT_NEXT:
287                                 off = 0; ignore = 0; *sz = 0;
288                                 data->type = T_TYPE_TEXT;
289
290                                 /* Skip leading whitespace if requested */
291                                 if( data->flags & T_FLAG_SKIPWS )
292                                 {
293                                         data->flags &= ~T_FLAG_SKIPWS;
294                                         while( (off < data->bufsize) && isspace(data->buf[off]) )
295                                                 off++;
296                                 }
297
298                                 /* Found "<%" */
299                                 if( (match = strfind(&data->buf[off], data->bufsize - off - 1, T_TOK_START, strlen(T_TOK_START))) != NULL )
300                                 {
301                                         readlen = (int)(match - &data->buf[off]);
302                                         data->bufsize -= (readlen + strlen(T_TOK_START) + off);
303                                         match += strlen(T_TOK_START);
304
305                                         /* Check for leading '-' */
306                                         if( match[0] == T_TOK_SKIPWS[0] )
307                                         {
308                                                 data->bufsize--;
309                                                 match++;
310
311                                                 while( (readlen > 1) && isspace(data->buf[off+readlen-1]) )
312                                                 {
313                                                         readlen--;
314                                                 }
315                                         }
316
317                                         bufout(data, &data->buf[off], readlen);
318                                         bufmove(data, match);
319                                         data->state = T_STATE_CODE_INIT;
320                                 }
321
322                                 /* Maybe plain chunk */
323                                 else
324                                 {
325                                         /* Preserve trailing "<" or white space, maybe a start token */
326                                         vague = stokscan(data, off, 0);
327
328                                         /* We can process some bytes ... */
329                                         if( vague < data->bufsize )
330                                         {
331                                                 readlen = data->bufsize - vague - off;
332                                         }
333
334                                         /* No bytes to process, so try to remove at least whitespace ... */
335                                         else
336                                         {
337                                                 /* ... but try to preserve trailing "<" ... */
338                                                 vague = stokscan(data, off, 1);
339
340                                                 if( vague < data->bufsize )
341                                                 {
342                                                         readlen = data->bufsize - vague - off;
343                                                 }
344
345                                                 /* ... no chance, push out buffer */
346                                                 else
347                                                 {
348                                                         readlen = vague - off;
349                                                         vague   = 0;
350                                                 }
351                                         }
352
353                                         bufout(data, &data->buf[off], readlen);
354
355                                         data->state   = T_STATE_TEXT_NEXT;
356                                         data->bufsize = vague;
357                                         bufmove(data, &data->buf[off+readlen]);
358                                 }
359
360                                 if( ignore || data->outsize == 0 )
361                                         continue;
362                                 else
363                                         return generate_expression(data, sz, T_GEN_START | T_GEN_END);
364
365                                 break;
366
367                         /* Ignored chunk (inside "<%# ... %>") */
368                         case T_STATE_SKIP:
369                                 ignore = 1;
370
371                         /* Initial code chunk ("<% ...") */
372                         case T_STATE_CODE_INIT:
373                                 off = 0;
374
375                                 /* Check for leading '-' */
376                                 if( data->buf[off] == T_TOK_SKIPWS[0] )
377                                         off++;
378
379                                 /* Determine code type */
380                                 switch(data->buf[off])
381                                 {
382                                         case '#':
383                                                 ignore = 1;
384                                                 off++;
385                                                 data->type = T_TYPE_COMMENT;
386                                                 break;
387
388                                         case '=':
389                                                 off++;
390                                                 data->type = T_TYPE_EXPR;
391                                                 break;
392
393                                         case '+':
394                                                 off++;
395                                                 data->type = T_TYPE_INCLUDE;
396                                                 break;
397
398                                         case ':':
399                                                 off++;
400                                                 data->type = T_TYPE_I18N;
401                                                 break;
402
403                                         case '_':
404                                                 off++;
405                                                 data->type = T_TYPE_I18N_RAW;
406                                                 break;
407
408                                         default:
409                                                 data->type = T_TYPE_CODE;
410                                                 break;
411                                 }
412
413                         /* Subsequent code chunk ("..." or "... %>") */ 
414                         case T_STATE_CODE_NEXT:
415                                 /* Found "%>" */
416                                 if( (match = strfind(&data->buf[off], data->bufsize - off, T_TOK_END, strlen(T_TOK_END))) != NULL )
417                                 {
418                                         genflags = ( data->state == T_STATE_CODE_INIT )
419                                                 ? (T_GEN_START | T_GEN_END) : T_GEN_END;
420
421                                         readlen = (int)(match - &data->buf[off]);
422
423                                         /* Check for trailing '-' */
424                                         if( (match > data->buf) && (*(match-1) == T_TOK_SKIPWS[0]) )
425                                         {
426                                                 readlen--;
427                                                 data->flags |= T_FLAG_SKIPWS;
428                                         }
429
430                                         bufout(data, &data->buf[off], readlen);
431
432                                         data->state = T_STATE_TEXT_INIT;
433                                         data->bufsize -= ((int)(match - &data->buf[off]) + strlen(T_TOK_END) + off);
434                                         bufmove(data, &match[strlen(T_TOK_END)]);
435                                 }
436
437                                 /* Code chunk */
438                                 else
439                                 {
440                                         genflags = ( data->state == T_STATE_CODE_INIT ) ? T_GEN_START : 0;
441
442                                         /* Preserve trailing "%" and "-", maybe an end token */
443                                         vague   = etokscan(data);
444                                         readlen = data->bufsize - off - vague;
445                                         bufout(data, &data->buf[off], readlen);
446
447                                         data->state   = T_STATE_CODE_NEXT;
448                                         data->bufsize = vague;
449                                         bufmove(data, &data->buf[readlen+off]);
450                                 }
451
452                                 if( ignore || (data->outsize == 0 && !genflags) )
453                                         continue;
454                                 else
455                                         return generate_expression(data, sz, genflags);
456
457                                 break;
458                 }
459         }
460
461         *sz = 0;
462         return NULL;
463 }
464
465