lexer, parser, matcher: extend grammar to allow comma separated keys/indexes as more...
[project/jsonpath.git] / lexer.l
1 %{
2 /*
3  * Copyright (C) 2013 Jo-Philipp Wich <jow@openwrt.org>
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 #include <ctype.h>
19
20 #include "parser.h"
21
22 int yylex(struct jp_state *s);
23
24 #define YY_DECL int yylex(struct jp_state *s)
25
26 static void
27 str_put(struct jp_state *s, char c)
28 {
29         if ((s->str_ptr - s->str_buf + 1) < sizeof(s->str_buf))
30                 *s->str_ptr++ = c;
31 }
32
33 static void
34 str_decode(struct jp_state *s, const char *input, int base)
35 {
36         int code;
37         char *end;
38
39         code = strtoul(input, &end, base);
40
41         if (end == input || *end)
42                 return;
43
44         if (code > 0 && code <= 0x7F)
45         {
46                 str_put(s, code);
47         }
48         else if (code > 0 && code <= 0x7FF)
49         {
50                 str_put(s, ((code >>  6) & 0x1F) | 0xC0);
51                 str_put(s, ( code        & 0x3F) | 0x80);
52         }
53         else if (code > 0 && code <= 0xFFFF)
54         {
55                 str_put(s, ((code >> 12) & 0x0F) | 0xE0);
56                 str_put(s, ((code >>  6) & 0x3F) | 0x80);
57                 str_put(s, ( code        & 0x3F) | 0x80);
58         }
59         else if (code > 0 && code <= 0x10FFFF)
60         {
61                 str_put(s, ((code >> 18) & 0x07) | 0xF0);
62                 str_put(s, ((code >> 12) & 0x3F) | 0x80);
63                 str_put(s, ((code >>  6) & 0x3F) | 0x80);
64                 str_put(s, ( code        & 0x3F) | 0x80);
65         }
66 }
67
68 %}
69
70 %option outfile="lexer.c" header-file="lexer.h"
71 %option noyywrap nounput noinput
72
73 DOT                     "."
74 LABEL           [a-zA-Z_][a-zA-Z0-9_]*
75
76 BROPEN          "["
77 BRCLOSE         "]"
78 POPEN           "("
79 PCLOSE          ")"
80 COMMA           ","
81
82 ROOT            "$"
83 THIS            "@"
84
85 LT                      "<"
86 LE                      "<="
87 GT                      ">"
88 GE                      ">="
89 NE                      "!="
90 EQ                      "="
91 NOT                     "!"
92 AND                     "&&"
93 OR                      "||"
94
95 NUMBER          -?[0-9]+
96 WILDCARD        "*"
97 BOOL            (true|false)
98
99 WS                      [ \t\n]*
100
101 %x                      STRING
102
103 %%
104
105 ["'] {
106         s->str_ptr = s->str_buf;
107         s->str_quote = *yytext;
108         memset(s->str_buf, 0, sizeof(s->str_buf));
109         BEGIN(STRING);
110 }
111
112 <STRING>{
113         ["'] {
114                 if (*yytext == s->str_quote)
115                 {
116                         BEGIN(INITIAL);
117                         yylval.op = jp_alloc_op(T_STRING, 0, s->str_buf);
118                         return T_STRING;
119                 }
120
121                 str_put(s, *yytext);
122         }
123
124         \\([0-3][0-7]{1,2}|[0-7]{0,2})  { str_decode(s, yytext + 1, 8); }
125         \\x[A-Fa-f0-9]{2}                               { str_decode(s, yytext + 2, 16); }
126         \\u[A-Fa-f0-9]{4}                               { str_decode(s, yytext + 2, 16); }
127         \\a                                                             { str_put(s, '\a'); }
128         \\b                                                             { str_put(s, '\b'); }
129         \\e                                                             { str_put(s, '\e'); }
130         \\f                                                             { str_put(s, '\f'); }
131         \\n                                                             { str_put(s, '\n'); }
132         \\r                                                             { str_put(s, '\r'); }
133         \\t                                                             { str_put(s, '\t'); }
134         \\v                                                             { str_put(s, '\v'); }
135         \\.                                                             { str_put(s, *yytext); }
136         [^\\"']+                                                { while (*yytext) str_put(s, *yytext++); }
137 }
138
139 {BOOL} {
140         yylval.op = jp_alloc_op(T_BOOL, (*yytext == 't'), NULL);
141         return T_BOOL;
142 }
143
144 {NUMBER} {
145         yylval.op = jp_alloc_op(T_NUMBER, atoi(yytext), NULL);
146         return T_NUMBER;
147 }
148
149 {LABEL} {
150         yylval.op = jp_alloc_op(T_LABEL, 0, yytext);
151         return T_LABEL;
152 }
153
154 {WILDCARD} {
155         yylval.op = jp_alloc_op(T_WILDCARD, 0, NULL);
156         return T_WILDCARD;
157 }
158
159 {DOT}           { return T_DOT; }
160 {BROPEN}        { return T_BROPEN; }
161 {BRCLOSE}       { return T_BRCLOSE; }
162 {POPEN}         { return T_POPEN; }
163 {PCLOSE}        { return T_PCLOSE; }
164 {COMMA}         { return T_UNION; }
165
166 {ROOT}          { return T_ROOT; }
167 {THIS}          { return T_THIS; }
168
169 {LT}            { return T_LT; }
170 {LE}            { return T_LE; }
171 {GT}            { return T_GT; }
172 {GE}            { return T_GE; }
173 {EQ}            { return T_EQ; }
174 {NE}            { return T_NE; }
175 {NOT}           { return T_NOT; }
176 {AND}           { return T_AND; }
177 {OR}            { return T_OR; }
178
179 {WS}            { }
180
181 %%