X-Git-Url: http://git.archive.openwrt.org/?a=blobdiff_plain;f=lexer.c;h=c016d41efd43dbef99d0a1a4d25722370614eab4;hb=c7e938d6582a436dddc938539e72dd1320625c54;hp=b1615ad2cf8b09fc9ef0d72d572d5699ccce7075;hpb=afa3a10096e6d3ad50dc9d8250f40d8f23a9ad42;p=project%2Fjsonpath.git diff --git a/lexer.c b/lexer.c index b1615ad..c016d41 100644 --- a/lexer.c +++ b/lexer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2014 Jo-Philipp Wich + * Copyright (C) 2013-2014 Jo-Philipp Wich * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -18,6 +18,7 @@ #include #include #include +#include #include "ast.h" #include "lexer.h" @@ -55,7 +56,7 @@ utf8enc(char **out, int *rem, int code) if (*rem < 1) return false; - *(*out++) = code; (*rem)--; + *(*out)++ = code; (*rem)--; return true; } else if (code > 0 && code <= 0x7FF) @@ -236,7 +237,21 @@ parse_string(const char *buf, struct jp_opcode *op, struct jp_state *s) case 'r': *out = '\r'; break; case 't': *out = '\t'; break; case 'v': *out = '\v'; break; - default: *out = *in; break; + default: + /* in regexp mode, retain backslash */ + if (q == '/') + { + if (rem-- < 1) + { + s->error_pos = s->off + (in - buf); + return -3; + } + + *out++ = '\\'; + } + + *out = *in; + break; } in++; @@ -278,6 +293,58 @@ parse_string(const char *buf, struct jp_opcode *op, struct jp_state *s) /* + * Parses a regexp literal from the given buffer. + * + * Returns a negative value on error, otherwise the amount of consumed + * characters from the given buffer. + * + * Error values: + * -1 Unterminated regexp + * -2 Invalid escape sequence + * -3 Regexp literal too long + */ + +static int +parse_regexp(const char *buf, struct jp_opcode *op, struct jp_state *s) +{ + int len = parse_string(buf, op, s); + const char *p; + + if (len >= 2) + { + op->num = REG_NOSUB | REG_NEWLINE; + + for (p = buf + len; p; p++) + { + switch (*p) + { + case 'e': + op->num |= REG_EXTENDED; + len++; + break; + + case 'i': + op->num |= REG_ICASE; + len++; + break; + + case 's': + op->num &= ~REG_NEWLINE; + len++; + break; + + default: + return len; + } + } + + } + + return len; +} + + +/* * Parses a label from the given buffer. * * Returns a negative value on error, otherwise the amount of consumed @@ -367,8 +434,10 @@ static const struct token tokens[] = { { T_LT, "<", 1 }, { T_GT, ">", 1 }, { T_EQ, "=", 1 }, + { T_MATCH, "~", 1 }, { T_NOT, "!", 1 }, { T_WILDCARD, "*", 1 }, + { T_REGEXP, "/", 1, parse_regexp }, { T_STRING, "'", 1, parse_string }, { T_STRING, "\"", 1, parse_string }, { T_LABEL, "_", 1, parse_label }, @@ -378,7 +447,7 @@ static const struct token tokens[] = { { T_NUMBER, "09", 0, parse_number }, }; -const char *tokennames[23] = { +const char *tokennames[25] = { [0] = "End of file", [T_AND] = "'&&'", [T_OR] = "'||'", @@ -389,12 +458,14 @@ const char *tokennames[23] = { [T_GE] = "'>='", [T_LT] = "'<'", [T_LE] = "'<='", + [T_MATCH] = "'~'", [T_NOT] = "'!'", [T_LABEL] = "Label", [T_ROOT] = "'$'", [T_THIS] = "'@'", [T_DOT] = "'.'", [T_WILDCARD] = "'*'", + [T_REGEXP] = "/.../", [T_BROPEN] = "'['", [T_BRCLOSE] = "']'", [T_BOOL] = "Bool",