libs/web/src/template_parser.c

   1 /*
   2  * LuCI Template - Parser implementation
   3  *
   4  *   Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
   5  *
   6  *  Licensed under the Apache License, Version 2.0 (the "License");
   7  *  you may not use this file except in compliance with the License.
   8  *  You may obtain a copy of the License at
   9  *
  10  *      http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  *  Unless required by applicable law or agreed to in writing, software
  13  *  distributed under the License is distributed on an "AS IS" BASIS,
  14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  *  See the License for the specific language governing permissions and
  16  *  limitations under the License.
  17  */
  18
  19 #include "template_parser.h"
  20
  21
  22 /* leading and trailing code for different types */
  23 const char * gen_code[6][2] = {
  24         { "write(\"",                   "\")"   },
  25         { NULL,                                 NULL    },
  26         { "write(tostring(",    "))"    },
  27         { "include(\"",                 "\")"   },
  28         { "write(translate(\"", "\"))"  },
  29         { NULL,                                 " "             }
  30 };
  31
  32 /* Simple strstr() like function that takes len arguments for both haystack and needle. */
  33 static char *strfind(char *haystack, int hslen, const char *needle, int ndlen)
  34 {
  35         int match = 0;
  36         int i, j;
  37
  38         for( i = 0; i < hslen; i++ )
  39         {
  40                 if( haystack[i] == needle[0] )
  41                 {
  42                         match = ((ndlen == 1) || ((i + ndlen) <= hslen));
  43
  44                         for( j = 1; (j < ndlen) && ((i + j) < hslen); j++ )
  45                         {
  46                                 if( haystack[i+j] != needle[j] )
  47                                 {
  48                                         match = 0;
  49                                         break;
  50                                 }
  51                         }
  52
  53                         if( match )
  54                                 return &haystack[i];
  55                 }
  56         }
  57
  58         return NULL;
  59 }
  60
  61 /*
  62  * Inspect current read buffer and find the number of "vague" characters at the end
  63  * which could indicate an opening token. Returns the number of "vague" chars.
  64  * The last continuous sequence of whitespace, optionally followed by a "<" is
  65  * treated as "vague" because whitespace may be discarded if the upcoming opening
  66  * token indicates pre-whitespace-removal ("<%-"). A single remaining "<" char
  67  * can't be differentiated from an opening token ("<%"), so it's kept to be processed
  68  * in the next cycle.
  69  */
  70 static int stokscan(struct template_parser *data, int off, int no_whitespace)
  71 {
  72         int i;
  73         int skip = 0;
  74         int tokoff = data->bufsize - 1;
  75
  76         for( i = tokoff; i >= off; i-- )
  77         {
  78                 if( data->buf[i] == T_TOK_START[0] )
  79                 {
  80                         skip = tokoff - i + 1;
  81                         tokoff = i - 1;
  82                         break;
  83                 }
  84         }
  85
  86         if( !no_whitespace )
  87         {
  88                 for( i = tokoff; i >= off; i-- )
  89                 {
  90                         if( isspace(data->buf[i]) )
  91                                 skip++;
  92                         else
  93                                 break;
  94                 }
  95         }
  96
  97         return skip;
  98 }
  99
 100 /*
 101  * Similar to stokscan() but looking for closing token indicators.
 102  * Matches "-", optionally followed by a "%" char.
 103  */
 104 static int etokscan(struct template_parser *data)
 105 {
 106         int skip = 0;
 107
 108         if( (data->bufsize > 0) && (data->buf[data->bufsize-1] == T_TOK_END[0]) )
 109                 skip++;
 110
 111         if( (data->bufsize > skip) && (data->buf[data->bufsize-skip-1] == T_TOK_SKIPWS[0]) )
 112                 skip++;
 113
 114         return skip;
 115 }
 116
 117 /*
 118  * Generate Lua expressions from the given raw code, write it into the
 119  * output buffer and set the lua_Reader specific size pointer.
 120  * Takes parser-state, lua_Reader's size pointer and generator flags
 121  * as parameter. The given flags indicate whether leading or trailing
 122  * code should be added. Returns a pointer to the output buffer.
 123  */
 124 static const char * generate_expression(struct template_parser *data, size_t *sz, int what)
 125 {
 126         char tmp[T_OUTBUFSZ];
 127         int i;
 128         int size = 0;
 129         int start = 0;
 130         int i18n_hasdef = 0;
 131
 132         memset(tmp, 0, T_OUTBUFSZ);
 133
 134         /* Inject leading expression code (if any) */
 135         if( (what & T_GEN_START) && (gen_code[data->type][0] != NULL) )
 136         {
 137                 memcpy(tmp, gen_code[data->type][0], strlen(gen_code[data->type][0]));
 138                 size += strlen(gen_code[data->type][0]);
 139         }
 140
 141         /* Parse source buffer */
 142         for( i = 0; i < data->outsize; i++ )
 143         {
 144                 /* Skip leading whitespace for non-raw and non-expr chunks */
 145                 if( !start && isspace(data->out[i]) && (data->type == T_TYPE_I18N || data->type == T_TYPE_INCLUDE) )
 146                         continue;
 147                 else if( !start )
 148                         start = 1;
 149
 150                 /* Found whitespace after i18n key */
 151                 if( (data->type == T_TYPE_I18N) && (i18n_hasdef == 1) )
 152                 {
 153                         /* At non-whitespace char, inject seperator token */
 154                         if( !isspace(data->out[i]) )
 155                         {
 156                                 memcpy(&tmp[size], T_TOK_I18NSEP, strlen(T_TOK_I18NSEP));
 157                                 size += strlen(T_TOK_I18NSEP);
 158                                 i18n_hasdef = 2;
 159                         }
 160
 161                         /* At further whitespace, skip */
 162                         else
 163                         {
 164                                 continue;
 165                         }
 166                 }
 167
 168                 /* Escape quotes, backslashes and newlines for plain, i18n and include expressions */
 169                 if( (data->type == T_TYPE_TEXT || data->type == T_TYPE_I18N || data->type == T_TYPE_INCLUDE) &&
 170                     (data->out[i] == '\\' || data->out[i] == '"' || data->out[i] == '\n' || data->out[i] == '\t') )
 171                 {
 172                         tmp[size++] = '\\';
 173
 174                         switch(data->out[i])
 175                         {
 176                                 case '\n':
 177                                         tmp[size++] = 'n';
 178                                         break;
 179
 180                                 case '\t':
 181                                         tmp[size++] = 't';
 182                                         break;
 183
 184                                 default:
 185                                         tmp[size++] = data->out[i];
 186                         }
 187                 }
 188
 189                 /* Found first whitespace in i18n expression, raise flag */
 190                 else if( isspace(data->out[i]) && (data->type == T_TYPE_I18N) && (i18n_hasdef == 0) )
 191                 {
 192                         i18n_hasdef = 1;
 193                 }
 194
 195                 /* Normal char */
 196                 else
 197                 {
 198                         tmp[size++] = data->out[i];
 199                 }
 200         }
 201
 202         /* Processed i18n expression without default text, inject separator */
 203         if( (data->type == T_TYPE_I18N) && (i18n_hasdef < 2) )
 204         {
 205                 memcpy(&tmp[size], T_TOK_I18NSEP, strlen(T_TOK_I18NSEP));
 206                 size += strlen(T_TOK_I18NSEP);
 207         }
 208
 209         /* Inject trailing expression code (if any) */
 210         if( (what & T_GEN_END) && (gen_code[data->type][1] != NULL) )
 211         {
 212                 memcpy(&tmp[size], gen_code[data->type][1], strlen(gen_code[data->type][1]));
 213                 size += strlen(gen_code[data->type][1]);
 214         }
 215
 216         *sz = data->outsize = size;
 217         memset(data->out, 0, T_OUTBUFSZ);
 218         memcpy(data->out, tmp, size);
 219
 220         //printf("<<<%i|%i|%i|%s>>>\n", what, data->type, *sz, data->out);
 221
 222         return data->out;
 223 }
 224
 225 /*
 226  * Move the number of bytes specified in data->bufsize from the
 227  * given source pointer to the beginning of the read buffer.
 228  */
 229 static void bufmove(struct template_parser *data, const char *src)
 230 {
 231         if( data->bufsize > 0 )
 232                 memmove(data->buf, src, data->bufsize);
 233         else if( data->bufsize < 0 )
 234                 data->bufsize = 0;
 235
 236         data->buf[data->bufsize] = 0;
 237 }
 238
 239 /*
 240  * Move the given amount of bytes from the given source pointer
 241  * to the output buffer and set data->outputsize.
 242  */
 243 static void bufout(struct template_parser *data, const char *src, int len)
 244 {
 245         if( len >= 0 )
 246         {
 247                 memset(data->out, 0, T_OUTBUFSZ);
 248                 memcpy(data->out, src, len);
 249                 data->outsize = len;
 250         }
 251         else
 252         {
 253                 data->outsize = 0;
 254         }
 255 }
 256
 257 /*
 258  * lua_Reader compatible function that parses template code on demand from
 259  * the given file handle.
 260  */
 261 const char *template_reader(lua_State *L, void *ud, size_t *sz)
 262 {
 263         struct template_parser *data = ud;
 264         char *match = NULL;
 265         int off = 0;
 266         int ignore = 0;
 267         int genflags = 0;
 268         int readlen = 0;
 269         int vague = 0;
 270
 271         while( !(data->flags & T_FLAG_EOF) || (data->bufsize > 0) )
 272         {
 273                 /* Fill buffer */
 274                 if( !(data->flags & T_FLAG_EOF) && (data->bufsize < T_READBUFSZ) )
 275                 {
 276                         if( (readlen = read(data->fd, &data->buf[data->bufsize], T_READBUFSZ - data->bufsize)) > 0 )
 277                                 data->bufsize += readlen;
 278                         else if( readlen == 0 )
 279                                 data->flags |= T_FLAG_EOF;
 280                         else
 281                                 return NULL;
 282                 }
 283
 284                 /* Evaluate state */
 285                 switch(data->state)
 286                 {
 287                         /* Plain text chunk (before "<%") */
 288                         case T_STATE_TEXT_INIT:
 289                         case T_STATE_TEXT_NEXT:
 290                                 off = 0; ignore = 0; *sz = 0;
 291                                 data->type = T_TYPE_TEXT;
 292
 293                                 /* Skip leading whitespace if requested */
 294                                 if( data->flags & T_FLAG_SKIPWS )
 295                                 {
 296                                         data->flags &= ~T_FLAG_SKIPWS;
 297                                         while( (off < data->bufsize) && isspace(data->buf[off]) )
 298                                                 off++;
 299                                 }
 300
 301                                 /* Found "<%" */
 302                                 if( (match = strfind(&data->buf[off], data->bufsize - off - 1, T_TOK_START, strlen(T_TOK_START))) != NULL )
 303                                 {
 304                                         readlen = (int)(match - &data->buf[off]);
 305                                         data->bufsize -= (readlen + strlen(T_TOK_START) + off);
 306                                         match += strlen(T_TOK_START);
 307
 308                                         /* Check for leading '-' */
 309                                         if( match[0] == T_TOK_SKIPWS[0] )
 310                                         {
 311                                                 data->bufsize--;
 312                                                 match++;
 313
 314                                                 while( (readlen > 1) && isspace(data->buf[off+readlen-1]) )
 315                                                 {
 316                                                         readlen--;
 317                                                 }
 318                                         }
 319
 320                                         bufout(data, &data->buf[off], readlen);
 321                                         bufmove(data, match);
 322                                         data->state = T_STATE_CODE_INIT;
 323                                 }
 324
 325                                 /* Maybe plain chunk */
 326                                 else
 327                                 {
 328                                         /* Preserve trailing "<" or white space, maybe a start token */
 329                                         vague = stokscan(data, off, 0);
 330
 331                                         /* We can process some bytes ... */
 332                                         if( vague < data->bufsize )
 333                                         {
 334                                                 readlen = data->bufsize - vague - off;
 335                                         }
 336
 337                                         /* No bytes to process, so try to remove at least whitespace ... */
 338                                         else
 339                                         {
 340                                                 /* ... but try to preserve trailing "<" ... */
 341                                                 vague = stokscan(data, off, 1);
 342
 343                                                 if( vague < data->bufsize )
 344                                                 {
 345                                                         readlen = data->bufsize - vague - off;
 346                                                 }
 347
 348                                                 /* ... no chance, push out buffer */
 349                                                 else
 350                                                 {
 351                                                         readlen = vague - off;
 352                                                         vague   = 0;
 353                                                 }
 354                                         }
 355
 356                                         bufout(data, &data->buf[off], readlen);
 357
 358                                         data->state   = T_STATE_TEXT_NEXT;
 359                                         data->bufsize = vague;
 360                                         bufmove(data, &data->buf[off+readlen]);
 361                                 }
 362
 363                                 if( ignore || data->outsize == 0 )
 364                                         continue;
 365                                 else
 366                                         return generate_expression(data, sz, T_GEN_START | T_GEN_END);
 367
 368                                 break;
 369
 370                         /* Ignored chunk (inside "<%# ... %>") */
 371                         case T_STATE_SKIP:
 372                                 ignore = 1;
 373
 374                         /* Initial code chunk ("<% ...") */
 375                         case T_STATE_CODE_INIT:
 376                                 off = 0;
 377
 378                                 /* Check for leading '-' */
 379                                 if( data->buf[off] == T_TOK_SKIPWS[0] )
 380                                         off++;
 381
 382                                 /* Determine code type */
 383                                 switch(data->buf[off])
 384                                 {
 385                                         case '#':
 386                                                 ignore = 1;
 387                                                 off++;
 388                                                 data->type = T_TYPE_COMMENT;
 389                                                 break;
 390
 391                                         case '=':
 392                                                 off++;
 393                                                 data->type = T_TYPE_EXPR;
 394                                                 break;
 395
 396                                         case '+':
 397                                                 off++;
 398                                                 data->type = T_TYPE_INCLUDE;
 399                                                 break;
 400
 401                                         case ':':
 402                                                 off++;
 403                                                 data->type = T_TYPE_I18N;
 404                                                 break;
 405
 406                                         default:
 407                                                 data->type = T_TYPE_CODE;
 408                                                 break;
 409                                 }
 410
 411                         /* Subsequent code chunk ("..." or "... %>") */
 412                         case T_STATE_CODE_NEXT:
 413                                 /* Found "%>" */
 414                                 if( (match = strfind(&data->buf[off], data->bufsize - off, T_TOK_END, strlen(T_TOK_END))) != NULL )
 415                                 {
 416                                         genflags = ( data->state == T_STATE_CODE_INIT )
 417                                                 ? (T_GEN_START | T_GEN_END) : T_GEN_END;
 418
 419                                         readlen = (int)(match - &data->buf[off]);
 420
 421                                         /* Check for trailing '-' */
 422                                         if( (match > data->buf) && (*(match-1) == T_TOK_SKIPWS[0]) )
 423                                         {
 424                                                 readlen--;
 425                                                 data->flags |= T_FLAG_SKIPWS;
 426                                         }
 427
 428                                         bufout(data, &data->buf[off], readlen);
 429
 430                                         data->state = T_STATE_TEXT_INIT;
 431                                         data->bufsize -= ((int)(match - &data->buf[off]) + strlen(T_TOK_END) + off);
 432                                         bufmove(data, &match[strlen(T_TOK_END)]);
 433                                 }
 434
 435                                 /* Code chunk */
 436                                 else
 437                                 {
 438                                         genflags = ( data->state == T_STATE_CODE_INIT ) ? T_GEN_START : 0;
 439
 440                                         /* Preserve trailing "%" and "-", maybe an end token */
 441                                         vague   = etokscan(data);
 442                                         readlen = data->bufsize - off - vague;
 443                                         bufout(data, &data->buf[off], readlen);
 444
 445                                         data->state   = T_STATE_CODE_NEXT;
 446                                         data->bufsize = vague;
 447                                         bufmove(data, &data->buf[readlen+off]);
 448                                 }
 449
 450                                 if( ignore || (data->outsize == 0 && !genflags) )
 451                                         continue;
 452                                 else
 453                                         return generate_expression(data, sz, genflags);
 454
 455                                 break;
 456                 }
 457         }
 458
 459         *sz = 0;
 460         return NULL;
 461 }
 462
 463