2 * Copyright (C) 2013-2014 Jo-Philipp Wich <jow@openwrt.org>
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
31 int (*parse)(const char *buf, struct jp_opcode *op, struct jp_state *s);
38 (((x) >= 'a') ? (10 + (x) - 'a') : \
39 (((x) >= 'A') ? (10 + (x) - 'A') : dec(x)))
42 * Stores the given codepoint as a utf8 multibyte sequence into the given
43 * output buffer and substracts the required amount of bytes from the given
46 * Returns false if the multibyte sequence would not fit into the buffer,
51 utf8enc(char **out, int *rem, int code)
53 if (code > 0 && code <= 0x7F)
58 *(*out++) = code; (*rem)--;
61 else if (code > 0 && code <= 0x7FF)
66 *(*out)++ = ((code >> 6) & 0x1F) | 0xC0; (*rem)--;
67 *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--;
70 else if (code > 0 && code <= 0xFFFF)
75 *(*out)++ = ((code >> 12) & 0x0F) | 0xE0; (*rem)--;
76 *(*out)++ = ((code >> 6) & 0x3F) | 0x80; (*rem)--;
77 *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--;
80 else if (code > 0 && code <= 0x10FFFF)
85 *(*out)++ = ((code >> 18) & 0x07) | 0xF0; (*rem)--;
86 *(*out)++ = ((code >> 12) & 0x3F) | 0x80; (*rem)--;
87 *(*out)++ = ((code >> 6) & 0x3F) | 0x80; (*rem)--;
88 *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--;
97 * Parses a string literal from the given buffer.
99 * Returns a negative value on error, otherwise the amount of consumed
100 * characters from the given buffer.
103 * -1 Unterminated string
104 * -2 Invalid escape sequence
105 * -3 String literal too long
109 parse_string(const char *buf, struct jp_opcode *op, struct jp_state *s)
112 char str[128] = { 0 };
114 const char *in = buf;
116 int rem = sizeof(str) - 1;
121 /* continuation of escape sequence */
127 if (isxdigit(in[1]) && isxdigit(in[2]) &&
128 isxdigit(in[3]) && isxdigit(in[4]))
130 if (!utf8enc(&out, &rem,
131 hex(in[1]) * 16 * 16 * 16 +
132 hex(in[2]) * 16 * 16 +
136 s->error_pos = s->off + (in - buf);
144 s->error_pos = s->off + (in - buf);
150 else if (in[0] == 'x')
152 if (isxdigit(in[1]) && isxdigit(in[2]))
154 if (!utf8enc(&out, &rem, hex(in[1]) * 16 + hex(in[2])))
156 s->error_pos = s->off + (in - buf);
164 s->error_pos = s->off + (in - buf);
169 /* \377, \77 or \7 */
170 else if (in[0] >= '0' && in[0] <= '7')
173 if (in[1] >= '0' && in[1] <= '7' &&
174 in[2] >= '0' && in[2] <= '7')
176 code = dec(in[0]) * 8 * 8 +
182 s->error_pos = s->off + (in - buf);
186 if (!utf8enc(&out, &rem, code))
188 s->error_pos = s->off + (in - buf);
196 else if (in[1] >= '0' && in[1] <= '7')
198 if (!utf8enc(&out, &rem, dec(in[0]) * 8 + dec(in[1])))
200 s->error_pos = s->off + (in - buf);
210 if (!utf8enc(&out, &rem, dec(in[0])))
212 s->error_pos = s->off + (in - buf);
220 /* single character escape */
225 s->error_pos = s->off + (in - buf);
231 case 'a': *out = '\a'; break;
232 case 'b': *out = '\b'; break;
233 case 'e': *out = '\e'; break;
234 case 'f': *out = '\f'; break;
235 case 'n': *out = '\n'; break;
236 case 'r': *out = '\r'; break;
237 case 't': *out = '\t'; break;
238 case 'v': *out = '\v'; break;
239 default: *out = *in; break;
249 /* begin of escape sequence */
250 else if (*in == '\\')
256 /* terminating quote */
259 op->str = strdup(str);
260 return (in - buf) + 2;
268 s->error_pos = s->off + (in - buf);
281 * Parses a label from the given buffer.
283 * Returns a negative value on error, otherwise the amount of consumed
284 * characters from the given buffer.
291 parse_label(const char *buf, struct jp_opcode *op, struct jp_state *s)
293 char str[128] = { 0 };
295 const char *in = buf;
296 int rem = sizeof(str) - 1;
298 while (*in == '_' || isalnum(*in))
302 s->error_pos = s->off + (in - buf);
309 if (!strcmp(str, "true") || !strcmp(str, "false"))
311 op->num = (str[0] == 't');
316 op->str = strdup(str);
324 * Parses a number literal from the given buffer.
326 * Returns a negative value on error, otherwise the amount of consumed
327 * characters from the given buffer.
330 * -2 Invalid number character
334 parse_number(const char *buf, struct jp_opcode *op, struct jp_state *s)
337 int n = strtol(buf, &e, 10);
341 s->error_pos = s->off;
350 static const struct token tokens[] = {
360 { T_BROPEN, "[", 1 },
361 { T_BRCLOSE, "]", 1 },
363 { T_PCLOSE, ")", 1 },
371 { T_WILDCARD, "*", 1 },
372 { T_STRING, "'", 1, parse_string },
373 { T_STRING, "\"", 1, parse_string },
374 { T_LABEL, "_", 1, parse_label },
375 { T_LABEL, "az", 0, parse_label },
376 { T_LABEL, "AZ", 0, parse_label },
377 { T_NUMBER, "-", 1, parse_number },
378 { T_NUMBER, "09", 0, parse_number },
381 const char *tokennames[23] = {
397 [T_WILDCARD] = "'*'",
401 [T_NUMBER] = "Number",
402 [T_STRING] = "String",
409 match_token(const char *ptr, struct jp_opcode *op, struct jp_state *s)
412 const struct token *tok;
414 for (i = 0, tok = &tokens[0];
415 i < sizeof(tokens) / sizeof(tokens[0]);
416 i++, tok = &tokens[i])
418 if ((tok->plen > 0 && !strncmp(ptr, tok->pat, tok->plen)) ||
419 (tok->plen == 0 && *ptr >= tok->pat[0] && *ptr <= tok->pat[1]))
421 op->type = tok->type;
424 return tok->parse(ptr, op, s);
430 s->error_pos = s->off;
435 jp_get_token(struct jp_state *s, const char *input, int *mlen)
437 struct jp_opcode op = { 0 };
439 *mlen = match_token(input, &op, s);
443 s->error_code = *mlen;
446 else if (op.type == 0)
451 return jp_alloc_op(s, op.type, op.num, op.str, NULL);