libs/web: fix template parsing of default texts in i18n expressions
[project/luci.git] / libs / web / src / template_parser.c
1 /*
2  * LuCI Template - Parser implementation
3  *
4  *   Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
5  *
6  *  Licensed under the Apache License, Version 2.0 (the "License");
7  *  you may not use this file except in compliance with the License.
8  *  You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  *  Unless required by applicable law or agreed to in writing, software
13  *  distributed under the License is distributed on an "AS IS" BASIS,
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  *  See the License for the specific language governing permissions and
16  *  limitations under the License.
17  */
18
19 #include "template_parser.h"
20
21
22 /* leading and trailing code for different types */
23 const char * gen_code[6][2] = {
24         { "write(\"",                   "\")"   },
25         { NULL,                                 NULL    },
26         { "write(tostring(",    "))"    },
27         { "include(\"",                 "\")"   },
28         { "write(translate(\"", "\"))"  },
29         { NULL,                                 " "             }
30 };
31
32 /* Simple strstr() like function that takes len arguments for both haystack and needle. */
33 static char *strfind(char *haystack, int hslen, const char *needle, int ndlen)
34 {
35         int match = 0;
36         int i, j;
37
38         for( i = 0; i < hslen; i++ )
39         {
40                 if( haystack[i] == needle[0] )
41                 {
42                         match = ((ndlen == 1) || ((i + ndlen) <= hslen));
43
44                         for( j = 1; (j < ndlen) && ((i + j) < hslen); j++ )
45                         {
46                                 if( haystack[i+j] != needle[j] )
47                                 {
48                                         match = 0;
49                                         break;
50                                 }
51                         }
52
53                         if( match )
54                                 return &haystack[i];
55                 }
56         }
57
58         return NULL;
59 }
60
61 /*
62  * Inspect current read buffer and find the number of "vague" characters at the end
63  * which could indicate an opening token. Returns the number of "vague" chars.
64  * The last continuous sequence of whitespace, optionally followed by a "<" is
65  * treated as "vague" because whitespace may be discarded if the upcoming opening
66  * token indicates pre-whitespace-removal ("<%-"). A single remaining "<" char
67  * can't be differentiated from an opening token ("<%"), so it's kept to be processed
68  * in the next cycle.
69  */
70 static int stokscan(struct template_parser *data, int off, int no_whitespace)
71 {
72         int i;
73         int skip = 0;
74         int tokoff = data->bufsize - 1;
75
76         for( i = tokoff; i >= off; i-- )
77         {
78                 if( data->buf[i] == T_TOK_START[0] )
79                 {
80                         skip = tokoff - i + 1;
81                         tokoff = i - 1;
82                         break;
83                 }
84         }
85
86         if( !no_whitespace )
87         {
88                 for( i = tokoff; i >= off; i-- )
89                 {
90                         if( isspace(data->buf[i]) )
91                                 skip++;
92                         else
93                                 break;
94                 }
95         }
96
97         return skip;
98 }
99
100 /*
101  * Similar to stokscan() but looking for closing token indicators.
102  * Matches "-", optionally followed by a "%" char.
103  */
104 static int etokscan(struct template_parser *data)
105 {
106         int skip = 0;
107
108         if( (data->bufsize > 0) && (data->buf[data->bufsize-1] == T_TOK_END[0]) )
109                 skip++;
110
111         if( (data->bufsize > skip) && (data->buf[data->bufsize-skip-1] == T_TOK_SKIPWS[0]) )
112                 skip++;
113
114         return skip;
115 }
116
117 /*
118  * Generate Lua expressions from the given raw code, write it into the
119  * output buffer and set the lua_Reader specific size pointer.
120  * Takes parser-state, lua_Reader's size pointer and generator flags
121  * as parameter. The given flags indicate whether leading or trailing
122  * code should be added. Returns a pointer to the output buffer.
123  */
124 static const char * generate_expression(struct template_parser *data, size_t *sz, int what)
125 {
126         char tmp[T_OUTBUFSZ];
127         int i;
128         int size = 0;
129         int start = 0;
130         int i18n_hasdef = 0;
131
132         memset(tmp, 0, T_OUTBUFSZ);
133
134         /* Inject leading expression code (if any) */
135         if( (what & T_GEN_START) && (gen_code[data->type][0] != NULL) )
136         {
137                 memcpy(tmp, gen_code[data->type][0], strlen(gen_code[data->type][0]));
138                 size += strlen(gen_code[data->type][0]);
139         }
140
141         /* Parse source buffer */
142         for( i = 0; i < data->outsize; i++ )
143         {
144                 /* Skip leading whitespace for non-raw and non-expr chunks */
145                 if( !start && isspace(data->out[i]) && (data->type == T_TYPE_I18N || data->type == T_TYPE_INCLUDE) )
146                         continue;
147                 else if( !start )
148                         start = 1;
149
150                 /* Found whitespace after i18n key */
151                 if( (data->type == T_TYPE_I18N) && (i18n_hasdef == 1) )
152                 {
153                         /* At non-whitespace char, inject seperator token */
154                         if( !isspace(data->out[i]) )
155                         {
156                                 memcpy(&tmp[size], T_TOK_I18NSEP, strlen(T_TOK_I18NSEP));
157                                 size += strlen(T_TOK_I18NSEP);
158                                 i18n_hasdef = 2;
159                         }
160
161                         /* At further whitespace, skip */
162                         else
163                         {
164                                 continue;
165                         }
166                 }
167
168                 /* Escape quotes, backslashes and newlines for plain, i18n and include expressions */
169                 if( (data->type == T_TYPE_TEXT || data->type == T_TYPE_I18N || data->type == T_TYPE_INCLUDE) &&
170                     (data->out[i] == '\\' || data->out[i] == '"' || data->out[i] == '\n' || data->out[i] == '\t') )
171                 {
172                         tmp[size++] = '\\';
173
174                         switch(data->out[i])
175                         {
176                                 case '\n':
177                                         tmp[size++] = 'n';
178                                         break;
179
180                                 case '\t':
181                                         tmp[size++] = 't';
182                                         break;
183
184                                 default:
185                                         tmp[size++] = data->out[i];
186                         }
187                 }
188
189                 /* Found first whitespace in i18n expression, raise flag */
190                 else if( isspace(data->out[i]) && (data->type == T_TYPE_I18N) && (i18n_hasdef == 0) )
191                 {
192                         i18n_hasdef = 1;
193                 }
194
195                 /* Normal char */
196                 else
197                 {
198                         tmp[size++] = data->out[i];
199                 }
200         }
201
202         /* Processed i18n expression without default text, inject separator */
203         if( (data->type == T_TYPE_I18N) && (i18n_hasdef < 2) )
204         {
205                 memcpy(&tmp[size], T_TOK_I18NSEP, strlen(T_TOK_I18NSEP));
206                 size += strlen(T_TOK_I18NSEP);
207         }
208
209         /* Inject trailing expression code (if any) */
210         if( (what & T_GEN_END) && (gen_code[data->type][1] != NULL) )
211         {
212                 memcpy(&tmp[size], gen_code[data->type][1], strlen(gen_code[data->type][1]));
213                 size += strlen(gen_code[data->type][1]);
214         }
215
216         *sz = data->outsize = size;
217         memset(data->out, 0, T_OUTBUFSZ);
218         memcpy(data->out, tmp, size);
219
220         //printf("<<<%i|%i|%i|%s>>>\n", what, data->type, *sz, data->out);
221
222         return data->out;
223 }
224
225 /*
226  * Move the number of bytes specified in data->bufsize from the
227  * given source pointer to the beginning of the read buffer.
228  */
229 static void bufmove(struct template_parser *data, const char *src)
230 {
231         if( data->bufsize > 0 )
232                 memmove(data->buf, src, data->bufsize);
233         else if( data->bufsize < 0 )
234                 data->bufsize = 0;
235
236         data->buf[data->bufsize] = 0;
237 }
238
239 /*
240  * Move the given amount of bytes from the given source pointer
241  * to the output buffer and set data->outputsize.
242  */
243 static void bufout(struct template_parser *data, const char *src, int len)
244 {
245         if( len >= 0 )
246         {
247                 memset(data->out, 0, T_OUTBUFSZ);
248                 memcpy(data->out, src, len);
249                 data->outsize = len;
250         }
251         else
252         {
253                 data->outsize = 0;
254         }
255 }
256
257 /*
258  * lua_Reader compatible function that parses template code on demand from
259  * the given file handle.
260  */
261 const char *template_reader(lua_State *L, void *ud, size_t *sz)
262 {
263         struct template_parser *data = ud;
264         char *match = NULL;
265         int off = 0;
266         int ignore = 0;
267         int genflags = 0;
268         int readlen = 0;
269         int vague = 0;
270
271         while( !(data->flags & T_FLAG_EOF) || (data->bufsize > 0) )
272         {
273                 /* Fill buffer */
274                 if( !(data->flags & T_FLAG_EOF) && (data->bufsize < T_READBUFSZ) )
275                 {
276                         if( (readlen = read(data->fd, &data->buf[data->bufsize], T_READBUFSZ - data->bufsize)) > 0 )
277                                 data->bufsize += readlen;
278                         else if( readlen == 0 )
279                                 data->flags |= T_FLAG_EOF;
280                         else
281                                 return NULL;
282                 }
283
284                 /* Evaluate state */
285                 switch(data->state)
286                 {
287                         /* Plain text chunk (before "<%") */
288                         case T_STATE_TEXT_INIT:
289                         case T_STATE_TEXT_NEXT:
290                                 off = 0; ignore = 0; *sz = 0;
291                                 data->type = T_TYPE_TEXT;
292
293                                 /* Skip leading whitespace if requested */
294                                 if( data->flags & T_FLAG_SKIPWS )
295                                 {
296                                         data->flags &= ~T_FLAG_SKIPWS;
297                                         while( (off < data->bufsize) && isspace(data->buf[off]) )
298                                                 off++;
299                                 }
300
301                                 /* Found "<%" */
302                                 if( (match = strfind(&data->buf[off], data->bufsize - off - 1, T_TOK_START, strlen(T_TOK_START))) != NULL )
303                                 {
304                                         readlen = (int)(match - &data->buf[off]);
305                                         data->bufsize -= (readlen + strlen(T_TOK_START) + off);
306                                         match += strlen(T_TOK_START);
307
308                                         /* Check for leading '-' */
309                                         if( match[0] == T_TOK_SKIPWS[0] )
310                                         {
311                                                 data->bufsize--;
312                                                 match++;
313
314                                                 while( (readlen > 1) && isspace(data->buf[off+readlen-1]) )
315                                                 {
316                                                         readlen--;
317                                                 }
318                                         }
319
320                                         bufout(data, &data->buf[off], readlen);
321                                         bufmove(data, match);
322                                         data->state = T_STATE_CODE_INIT;
323                                 }
324
325                                 /* Maybe plain chunk */
326                                 else
327                                 {
328                                         /* Preserve trailing "<" or white space, maybe a start token */
329                                         vague = stokscan(data, off, 0);
330
331                                         /* We can process some bytes ... */
332                                         if( vague < data->bufsize )
333                                         {
334                                                 readlen = data->bufsize - vague - off;
335                                         }
336
337                                         /* No bytes to process, so try to remove at least whitespace ... */
338                                         else
339                                         {
340                                                 /* ... but try to preserve trailing "<" ... */
341                                                 vague = stokscan(data, off, 1);
342
343                                                 if( vague < data->bufsize )
344                                                 {
345                                                         readlen = data->bufsize - vague - off;
346                                                 }
347
348                                                 /* ... no chance, push out buffer */
349                                                 else
350                                                 {
351                                                         readlen = vague - off;
352                                                         vague   = 0;
353                                                 }
354                                         }
355
356                                         bufout(data, &data->buf[off], readlen);
357
358                                         data->state   = T_STATE_TEXT_NEXT;
359                                         data->bufsize = vague;
360                                         bufmove(data, &data->buf[off+readlen]);
361                                 }
362
363                                 if( ignore || data->outsize == 0 )
364                                         continue;
365                                 else
366                                         return generate_expression(data, sz, T_GEN_START | T_GEN_END);
367
368                                 break;
369
370                         /* Ignored chunk (inside "<%# ... %>") */
371                         case T_STATE_SKIP:
372                                 ignore = 1;
373
374                         /* Initial code chunk ("<% ...") */
375                         case T_STATE_CODE_INIT:
376                                 off = 0;
377
378                                 /* Check for leading '-' */
379                                 if( data->buf[off] == T_TOK_SKIPWS[0] )
380                                         off++;
381
382                                 /* Determine code type */
383                                 switch(data->buf[off])
384                                 {
385                                         case '#':
386                                                 ignore = 1;
387                                                 off++;
388                                                 data->type = T_TYPE_COMMENT;
389                                                 break;
390
391                                         case '=':
392                                                 off++;
393                                                 data->type = T_TYPE_EXPR;
394                                                 break;
395
396                                         case '+':
397                                                 off++;
398                                                 data->type = T_TYPE_INCLUDE;
399                                                 break;
400
401                                         case ':':
402                                                 off++;
403                                                 data->type = T_TYPE_I18N;
404                                                 break;
405
406                                         default:
407                                                 data->type = T_TYPE_CODE;
408                                                 break;
409                                 }
410
411                         /* Subsequent code chunk ("..." or "... %>") */ 
412                         case T_STATE_CODE_NEXT:
413                                 /* Found "%>" */
414                                 if( (match = strfind(&data->buf[off], data->bufsize - off, T_TOK_END, strlen(T_TOK_END))) != NULL )
415                                 {
416                                         genflags = ( data->state == T_STATE_CODE_INIT )
417                                                 ? (T_GEN_START | T_GEN_END) : T_GEN_END;
418
419                                         readlen = (int)(match - &data->buf[off]);
420
421                                         /* Check for trailing '-' */
422                                         if( (match > data->buf) && (*(match-1) == T_TOK_SKIPWS[0]) )
423                                         {
424                                                 readlen--;
425                                                 data->flags |= T_FLAG_SKIPWS;
426                                         }
427
428                                         bufout(data, &data->buf[off], readlen);
429
430                                         data->state = T_STATE_TEXT_INIT;
431                                         data->bufsize -= ((int)(match - &data->buf[off]) + strlen(T_TOK_END) + off);
432                                         bufmove(data, &match[strlen(T_TOK_END)]);
433                                 }
434
435                                 /* Code chunk */
436                                 else
437                                 {
438                                         genflags = ( data->state == T_STATE_CODE_INIT ) ? T_GEN_START : 0;
439
440                                         /* Preserve trailing "%" and "-", maybe an end token */
441                                         vague   = etokscan(data);
442                                         readlen = data->bufsize - off - vague;
443                                         bufout(data, &data->buf[off], readlen);
444
445                                         data->state   = T_STATE_CODE_NEXT;
446                                         data->bufsize = vague;
447                                         bufmove(data, &data->buf[readlen+off]);
448                                 }
449
450                                 if( ignore || (data->outsize == 0 && !genflags) )
451                                         continue;
452                                 else
453                                         return generate_expression(data, sz, genflags);
454
455                                 break;
456                 }
457         }
458
459         *sz = 0;
460         return NULL;
461 }
462
463