/*
- * Copyright (C) 2013-2014 Jo-Philipp Wich <jow@openwrt.org>
+ * Copyright (C) 2013-2014 Jo-Philipp Wich <jo@mein.io>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include <regex.h>
#include "ast.h"
#include "lexer.h"
int type;
const char *pat;
int plen;
- int (*parse)(const char *buf, struct jp_opcode *op);
+ int (*parse)(const char *buf, struct jp_opcode *op, struct jp_state *s);
};
#define dec(o) \
if (*rem < 1)
return false;
- *(*out++) = code; (*rem)--;
+ *(*out)++ = code; (*rem)--;
return true;
}
else if (code > 0 && code <= 0x7FF)
*/
static int
-parse_string(const char *buf, struct jp_opcode *op)
+parse_string(const char *buf, struct jp_opcode *op, struct jp_state *s)
{
char q = *(buf++);
char str[128] = { 0 };
hex(in[2]) * 16 * 16 +
hex(in[3]) * 16 +
hex(in[4])))
+ {
+ s->error_pos = s->off + (in - buf);
return -3;
+ }
in += 5;
}
else
{
+ s->error_pos = s->off + (in - buf);
return -2;
}
}
if (isxdigit(in[1]) && isxdigit(in[2]))
{
if (!utf8enc(&out, &rem, hex(in[1]) * 16 + hex(in[2])))
+ {
+ s->error_pos = s->off + (in - buf);
return -3;
+ }
in += 3;
}
else
{
+ s->error_pos = s->off + (in - buf);
return -2;
}
}
dec(in[2]);
if (code > 255)
+ {
+ s->error_pos = s->off + (in - buf);
return -2;
+ }
if (!utf8enc(&out, &rem, code))
+ {
+ s->error_pos = s->off + (in - buf);
return -3;
+ }
in += 3;
}
else if (in[1] >= '0' && in[1] <= '7')
{
if (!utf8enc(&out, &rem, dec(in[0]) * 8 + dec(in[1])))
+ {
+ s->error_pos = s->off + (in - buf);
return -3;
+ }
in += 2;
}
else
{
if (!utf8enc(&out, &rem, dec(in[0])))
+ {
+ s->error_pos = s->off + (in - buf);
return -3;
+ }
in += 1;
}
else
{
if (rem-- < 1)
+ {
+ s->error_pos = s->off + (in - buf);
return -3;
+ }
switch (in[0])
{
case 'r': *out = '\r'; break;
case 't': *out = '\t'; break;
case 'v': *out = '\v'; break;
- default: *out = *in; break;
+ default:
+ /* in regexp mode, retain backslash */
+ if (q == '/')
+ {
+ if (rem-- < 1)
+ {
+ s->error_pos = s->off + (in - buf);
+ return -3;
+ }
+
+ *out++ = '\\';
+ }
+
+ *out = *in;
+ break;
}
in++;
else
{
if (rem-- < 1)
+ {
+ s->error_pos = s->off + (in - buf);
return -3;
+ }
*out++ = *in++;
}
/*
+ * Parses a regexp literal from the given buffer.
+ *
+ * Returns a negative value on error, otherwise the amount of consumed
+ * characters from the given buffer.
+ *
+ * Error values:
+ * -1 Unterminated regexp
+ * -2 Invalid escape sequence
+ * -3 Regexp literal too long
+ */
+
+static int
+parse_regexp(const char *buf, struct jp_opcode *op, struct jp_state *s)
+{
+ int len = parse_string(buf, op, s);
+ const char *p;
+
+ if (len >= 2)
+ {
+ op->num = REG_NOSUB | REG_NEWLINE;
+
+ for (p = buf + len; p; p++)
+ {
+ switch (*p)
+ {
+ case 'e':
+ op->num |= REG_EXTENDED;
+ len++;
+ break;
+
+ case 'i':
+ op->num |= REG_ICASE;
+ len++;
+ break;
+
+ case 's':
+ op->num &= ~REG_NEWLINE;
+ len++;
+ break;
+
+ default:
+ return len;
+ }
+ }
+
+ }
+
+ return len;
+}
+
+
+/*
* Parses a label from the given buffer.
*
* Returns a negative value on error, otherwise the amount of consumed
*/
static int
-parse_label(const char *buf, struct jp_opcode *op)
+parse_label(const char *buf, struct jp_opcode *op, struct jp_state *s)
{
char str[128] = { 0 };
char *out = str;
while (*in == '_' || isalnum(*in))
{
if (rem-- < 1)
+ {
+ s->error_pos = s->off + (in - buf);
return -3;
+ }
*out++ = *in++;
}
*/
static int
-parse_number(const char *buf, struct jp_opcode *op)
+parse_number(const char *buf, struct jp_opcode *op, struct jp_state *s)
{
char *e;
int n = strtol(buf, &e, 10);
if (e == buf)
+ {
+ s->error_pos = s->off;
return -2;
+ }
op->num = n;
{ T_LT, "<", 1 },
{ T_GT, ">", 1 },
{ T_EQ, "=", 1 },
+ { T_MATCH, "~", 1 },
{ T_NOT, "!", 1 },
{ T_WILDCARD, "*", 1 },
+ { T_REGEXP, "/", 1, parse_regexp },
{ T_STRING, "'", 1, parse_string },
{ T_STRING, "\"", 1, parse_string },
{ T_LABEL, "_", 1, parse_label },
{ T_NUMBER, "09", 0, parse_number },
};
-const char *tokennames[23] = {
+const char *tokennames[25] = {
[0] = "End of file",
[T_AND] = "'&&'",
[T_OR] = "'||'",
[T_GE] = "'>='",
[T_LT] = "'<'",
[T_LE] = "'<='",
+ [T_MATCH] = "'~'",
[T_NOT] = "'!'",
[T_LABEL] = "Label",
[T_ROOT] = "'$'",
[T_THIS] = "'@'",
[T_DOT] = "'.'",
[T_WILDCARD] = "'*'",
+ [T_REGEXP] = "/.../",
[T_BROPEN] = "'['",
[T_BRCLOSE] = "']'",
[T_BOOL] = "Bool",
static int
-match_token(const char *ptr, struct jp_opcode *op)
+match_token(const char *ptr, struct jp_opcode *op, struct jp_state *s)
{
int i;
const struct token *tok;
op->type = tok->type;
if (tok->parse)
- return tok->parse(ptr, op);
+ return tok->parse(ptr, op, s);
return tok->plen;
}
}
- return -1;
+ s->error_pos = s->off;
+ return -4;
}
struct jp_opcode *
{
struct jp_opcode op = { 0 };
- *mlen = match_token(input, &op);
+ *mlen = match_token(input, &op, s);
- if (*mlen < 0 || op.type == 0)
+ if (*mlen < 0)
+ {
+ s->error_code = *mlen;
return NULL;
+ }
+ else if (op.type == 0)
+ {
+ return NULL;
+ }
return jp_alloc_op(s, op.type, op.num, op.str, NULL);
}