Initial commit of LuCI2
[project/luci2/ui.git] / luci2 / src / io / multipart_parser.c
1 /* Based on node-formidable by Felix Geisendörfer
2  * Igor Afonov - afonov@gmail.com - 2012
3  * MIT License - http://www.opensource.org/licenses/mit-license.php
4  */
5
6 #include "multipart_parser.h"
7
8 #include <stdio.h>
9 #include <stdarg.h>
10 #include <string.h>
11
12 static void multipart_log(const char * format, ...)
13 {
14 #ifdef DEBUG_MULTIPART
15     va_list args;
16     va_start(args, format);
17
18     fprintf(stderr, "[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
19     vfprintf(stderr, format, args);
20     fprintf(stderr, "\n");
21 #endif
22 }
23
24 #define NOTIFY_CB(FOR)                                                 \
25 do {                                                                   \
26   if (p->settings->on_##FOR) {                                         \
27     if (p->settings->on_##FOR(p) != 0) {                               \
28       return i;                                                        \
29     }                                                                  \
30   }                                                                    \
31 } while (0)
32
33 #define EMIT_DATA_CB(FOR, ptr, len)                                    \
34 do {                                                                   \
35   if (p->settings->on_##FOR) {                                         \
36     if (p->settings->on_##FOR(p, ptr, len) != 0) {                     \
37       return i;                                                        \
38     }                                                                  \
39   }                                                                    \
40 } while (0)
41
42
43 #define LF 10
44 #define CR 13
45
46 struct multipart_parser {
47   void * data;
48
49   size_t index;
50   size_t boundary_length;
51
52   unsigned char state;
53
54   const multipart_parser_settings* settings;
55
56   char* lookbehind;
57   char multipart_boundary[1];
58 };
59
60 enum state {
61   s_uninitialized = 1,
62   s_start,
63   s_start_boundary,
64   s_header_field_start,
65   s_header_field,
66   s_headers_almost_done,
67   s_header_value_start,
68   s_header_value,
69   s_header_value_almost_done,
70   s_part_data_start,
71   s_part_data,
72   s_part_data_almost_boundary,
73   s_part_data_boundary,
74   s_part_data_almost_end,
75   s_part_data_end,
76   s_part_data_final_hyphen,
77   s_end
78 };
79
80 multipart_parser* multipart_parser_init
81     (const char *boundary, const multipart_parser_settings* settings) {
82
83   multipart_parser* p = malloc(sizeof(multipart_parser) +
84                                strlen(boundary) +
85                                strlen(boundary) + 9);
86
87   strcpy(p->multipart_boundary, boundary);
88   p->boundary_length = strlen(boundary);
89
90   p->lookbehind = (p->multipart_boundary + p->boundary_length + 1);
91
92   p->index = 0;
93   p->state = s_start;
94   p->settings = settings;
95
96   return p;
97 }
98
99 void multipart_parser_free(multipart_parser* p) {
100   free(p);
101 }
102
103 void multipart_parser_set_data(multipart_parser *p, void *data) {
104     p->data = data;
105 }
106
107 void *multipart_parser_get_data(multipart_parser *p) {
108     return p->data;
109 }
110
111 size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
112   size_t i = 0;
113   size_t mark = 0;
114   char c, cl;
115   int is_last = 0;
116
117   while(i < len) {
118     c = buf[i];
119     is_last = (i == (len - 1));
120     switch (p->state) {
121       case s_start:
122         multipart_log("s_start");
123         p->index = 0;
124         p->state = s_start_boundary;
125
126       /* fallthrough */
127       case s_start_boundary:
128         multipart_log("s_start_boundary");
129         if (p->index == p->boundary_length) {
130           if (c != CR) {
131             return i;
132           }
133           p->index++;
134           break;
135         } else if (p->index == (p->boundary_length + 1)) {
136           if (c != LF) {
137             return i;
138           }
139           p->index = 0;
140           NOTIFY_CB(part_data_begin);
141           p->state = s_header_field_start;
142           break;
143         }
144         if (c != p->multipart_boundary[p->index]) {
145           return i;
146         }
147         p->index++;
148         break;
149
150       case s_header_field_start:
151         multipart_log("s_header_field_start");
152         mark = i;
153         p->state = s_header_field;
154
155       /* fallthrough */
156       case s_header_field:
157         multipart_log("s_header_field");
158         if (c == CR) {
159           p->state = s_headers_almost_done;
160           break;
161         }
162
163         if (c == '-') {
164           break;
165         }
166
167         if (c == ':') {
168           EMIT_DATA_CB(header_field, buf + mark, i - mark);
169           p->state = s_header_value_start;
170           break;
171         }
172
173         cl = tolower(c);
174         if (cl < 'a' || cl > 'z') {
175           multipart_log("invalid character in header name");
176           return i;
177         }
178         if (is_last)
179             EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
180         break;
181
182       case s_headers_almost_done:
183         multipart_log("s_headers_almost_done");
184         if (c != LF) {
185           return i;
186         }
187
188         p->state = s_part_data_start;
189         break;
190
191       case s_header_value_start:
192         multipart_log("s_header_value_start");
193         if (c == ' ') {
194           break;
195         }
196
197         mark = i;
198         p->state = s_header_value;
199
200       /* fallthrough */
201       case s_header_value:
202         multipart_log("s_header_value");
203         if (c == CR) {
204           EMIT_DATA_CB(header_value, buf + mark, i - mark);
205           p->state = s_header_value_almost_done;
206         }
207         if (is_last)
208             EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
209         break;
210
211       case s_header_value_almost_done:
212         multipart_log("s_header_value_almost_done");
213         if (c != LF) {
214           return i;
215         }
216         p->state = s_header_field_start;
217         break;
218
219       case s_part_data_start:
220         multipart_log("s_part_data_start");
221         NOTIFY_CB(headers_complete);
222         mark = i;
223         p->state = s_part_data;
224
225       /* fallthrough */
226       case s_part_data:
227         multipart_log("s_part_data");
228         if (c == CR) {
229             EMIT_DATA_CB(part_data, buf + mark, i - mark);
230             mark = i;
231             p->state = s_part_data_almost_boundary;
232             p->lookbehind[0] = CR;
233             break;
234         }
235         if (is_last)
236             EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
237         break;
238
239       case s_part_data_almost_boundary:
240         multipart_log("s_part_data_almost_boundary");
241         if (c == LF) {
242             p->state = s_part_data_boundary;
243             p->lookbehind[1] = LF;
244             p->index = 0;
245             break;
246         }
247         EMIT_DATA_CB(part_data, p->lookbehind, 1);
248         p->state = s_part_data;
249         mark = i --;
250         break;
251
252       case s_part_data_boundary:
253         multipart_log("s_part_data_boundary");
254         if (p->multipart_boundary[p->index] != c) {
255           EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
256           p->state = s_part_data;
257           mark = i --;
258           break;
259         }
260         p->lookbehind[2 + p->index] = c;
261         if ((++ p->index) == p->boundary_length) {
262             NOTIFY_CB(part_data_end);
263             p->state = s_part_data_almost_end;
264         }
265         break;
266
267       case s_part_data_almost_end:
268         multipart_log("s_part_data_almost_end");
269         if (c == '-') {
270             p->state = s_part_data_final_hyphen;
271             break;
272         }
273         if (c == CR) {
274             p->state = s_part_data_end;
275             break;
276         }
277         return i;
278
279       case s_part_data_final_hyphen:
280         multipart_log("s_part_data_final_hyphen");
281         if (c == '-') {
282             NOTIFY_CB(body_end);
283             p->state = s_end;
284             break;
285         }
286         return i;
287
288       case s_part_data_end:
289         multipart_log("s_part_data_end");
290         if (c == LF) {
291             p->state = s_header_field_start;
292             NOTIFY_CB(part_data_begin);
293             break;
294         }
295         return i;
296
297       case s_end:
298         multipart_log("s_end: %02X", (int) c);
299         break;
300
301       default:
302         multipart_log("Multipart parser unrecoverable error");
303         return 0;
304     }
305     ++ i;
306   }
307
308   return len;
309 }