X-Git-Url: http://git.archive.openwrt.org/?p=project%2Fjsonpath.git;a=blobdiff_plain;f=lexer.c;h=c016d41efd43dbef99d0a1a4d25722370614eab4;hp=3703d56d7bc4500ec07213a2d140442c21e5ff52;hb=HEAD;hpb=f3830138661374ca10fe6a0b6f2f4b949dea3e5c diff --git a/lexer.c b/lexer.c index 3703d56..c016d41 100644 --- a/lexer.c +++ b/lexer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2014 Jo-Philipp Wich + * Copyright (C) 2013-2014 Jo-Philipp Wich * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -18,6 +18,7 @@ #include #include #include +#include #include "ast.h" #include "lexer.h" @@ -28,7 +29,7 @@ struct token { int type; const char *pat; int plen; - int (*parse)(const char *buf, struct jp_opcode *op); + int (*parse)(const char *buf, struct jp_opcode *op, struct jp_state *s); }; #define dec(o) \ @@ -55,7 +56,7 @@ utf8enc(char **out, int *rem, int code) if (*rem < 1) return false; - *(*out++) = code; (*rem)--; + *(*out)++ = code; (*rem)--; return true; } else if (code > 0 && code <= 0x7FF) @@ -106,7 +107,7 @@ utf8enc(char **out, int *rem, int code) */ static int -parse_string(const char *buf, struct jp_opcode *op) +parse_string(const char *buf, struct jp_opcode *op, struct jp_state *s) { char q = *(buf++); char str[128] = { 0 }; @@ -132,12 +133,16 @@ parse_string(const char *buf, struct jp_opcode *op) hex(in[2]) * 16 * 16 + hex(in[3]) * 16 + hex(in[4]))) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 5; } else { + s->error_pos = s->off + (in - buf); return -2; } } @@ -148,12 +153,16 @@ parse_string(const char *buf, struct jp_opcode *op) if (isxdigit(in[1]) && isxdigit(in[2])) { if (!utf8enc(&out, &rem, hex(in[1]) * 16 + hex(in[2]))) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 3; } else { + s->error_pos = s->off + (in - buf); return -2; } } @@ -170,10 +179,16 @@ parse_string(const char *buf, struct jp_opcode *op) dec(in[2]); if (code > 255) + { + s->error_pos = s->off + (in - buf); return -2; + } if (!utf8enc(&out, &rem, code)) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 3; } @@ -182,7 +197,10 @@ parse_string(const char *buf, struct jp_opcode *op) else if (in[1] >= '0' && in[1] <= '7') { if (!utf8enc(&out, &rem, dec(in[0]) * 8 + dec(in[1]))) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 2; } @@ -191,7 +209,10 @@ parse_string(const char *buf, struct jp_opcode *op) else { if (!utf8enc(&out, &rem, dec(in[0]))) + { + s->error_pos = s->off + (in - buf); return -3; + } in += 1; } @@ -201,7 +222,10 @@ parse_string(const char *buf, struct jp_opcode *op) else { if (rem-- < 1) + { + s->error_pos = s->off + (in - buf); return -3; + } switch (in[0]) { @@ -213,7 +237,21 @@ parse_string(const char *buf, struct jp_opcode *op) case 'r': *out = '\r'; break; case 't': *out = '\t'; break; case 'v': *out = '\v'; break; - default: *out = *in; break; + default: + /* in regexp mode, retain backslash */ + if (q == '/') + { + if (rem-- < 1) + { + s->error_pos = s->off + (in - buf); + return -3; + } + + *out++ = '\\'; + } + + *out = *in; + break; } in++; @@ -241,7 +279,10 @@ parse_string(const char *buf, struct jp_opcode *op) else { if (rem-- < 1) + { + s->error_pos = s->off + (in - buf); return -3; + } *out++ = *in++; } @@ -252,6 +293,58 @@ parse_string(const char *buf, struct jp_opcode *op) /* + * Parses a regexp literal from the given buffer. + * + * Returns a negative value on error, otherwise the amount of consumed + * characters from the given buffer. + * + * Error values: + * -1 Unterminated regexp + * -2 Invalid escape sequence + * -3 Regexp literal too long + */ + +static int +parse_regexp(const char *buf, struct jp_opcode *op, struct jp_state *s) +{ + int len = parse_string(buf, op, s); + const char *p; + + if (len >= 2) + { + op->num = REG_NOSUB | REG_NEWLINE; + + for (p = buf + len; p; p++) + { + switch (*p) + { + case 'e': + op->num |= REG_EXTENDED; + len++; + break; + + case 'i': + op->num |= REG_ICASE; + len++; + break; + + case 's': + op->num &= ~REG_NEWLINE; + len++; + break; + + default: + return len; + } + } + + } + + return len; +} + + +/* * Parses a label from the given buffer. * * Returns a negative value on error, otherwise the amount of consumed @@ -262,7 +355,7 @@ parse_string(const char *buf, struct jp_opcode *op) */ static int -parse_label(const char *buf, struct jp_opcode *op) +parse_label(const char *buf, struct jp_opcode *op, struct jp_state *s) { char str[128] = { 0 }; char *out = str; @@ -272,7 +365,10 @@ parse_label(const char *buf, struct jp_opcode *op) while (*in == '_' || isalnum(*in)) { if (rem-- < 1) + { + s->error_pos = s->off + (in - buf); return -3; + } *out++ = *in++; } @@ -302,13 +398,16 @@ parse_label(const char *buf, struct jp_opcode *op) */ static int -parse_number(const char *buf, struct jp_opcode *op) +parse_number(const char *buf, struct jp_opcode *op, struct jp_state *s) { char *e; int n = strtol(buf, &e, 10); if (e == buf) + { + s->error_pos = s->off; return -2; + } op->num = n; @@ -335,8 +434,10 @@ static const struct token tokens[] = { { T_LT, "<", 1 }, { T_GT, ">", 1 }, { T_EQ, "=", 1 }, + { T_MATCH, "~", 1 }, { T_NOT, "!", 1 }, { T_WILDCARD, "*", 1 }, + { T_REGEXP, "/", 1, parse_regexp }, { T_STRING, "'", 1, parse_string }, { T_STRING, "\"", 1, parse_string }, { T_LABEL, "_", 1, parse_label }, @@ -346,7 +447,7 @@ static const struct token tokens[] = { { T_NUMBER, "09", 0, parse_number }, }; -const char *tokennames[23] = { +const char *tokennames[25] = { [0] = "End of file", [T_AND] = "'&&'", [T_OR] = "'||'", @@ -357,12 +458,14 @@ const char *tokennames[23] = { [T_GE] = "'>='", [T_LT] = "'<'", [T_LE] = "'<='", + [T_MATCH] = "'~'", [T_NOT] = "'!'", [T_LABEL] = "Label", [T_ROOT] = "'$'", [T_THIS] = "'@'", [T_DOT] = "'.'", [T_WILDCARD] = "'*'", + [T_REGEXP] = "/.../", [T_BROPEN] = "'['", [T_BRCLOSE] = "']'", [T_BOOL] = "Bool", @@ -374,7 +477,7 @@ const char *tokennames[23] = { static int -match_token(const char *ptr, struct jp_opcode *op) +match_token(const char *ptr, struct jp_opcode *op, struct jp_state *s) { int i; const struct token *tok; @@ -389,13 +492,14 @@ match_token(const char *ptr, struct jp_opcode *op) op->type = tok->type; if (tok->parse) - return tok->parse(ptr, op); + return tok->parse(ptr, op, s); return tok->plen; } } - return -1; + s->error_pos = s->off; + return -4; } struct jp_opcode * @@ -403,10 +507,17 @@ jp_get_token(struct jp_state *s, const char *input, int *mlen) { struct jp_opcode op = { 0 }; - *mlen = match_token(input, &op); + *mlen = match_token(input, &op, s); - if (*mlen < 0 || op.type == 0) + if (*mlen < 0) + { + s->error_code = *mlen; return NULL; + } + else if (op.type == 0) + { + return NULL; + } return jp_alloc_op(s, op.type, op.num, op.str, NULL); }