* luci/libs/http: added more sanity checks to mime decoder
[project/luci.git] / libs / http / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10         http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 local ltn12 = require("luci.ltn12")
19
20 HTTP_MAX_CONTENT      = 1024*4          -- 4 kB maximum content size
21 HTTP_URLENC_MAXKEYLEN = 1024            -- maximum allowd size of urlencoded parameter names
22 TSRC_BLOCKSIZE        = 2048            -- target block size for throttling sources
23
24
25 -- Decode an urlencoded string.
26 -- Returns the decoded value.
27 function urldecode( str, no_plus )
28
29         local function __chrdec( hex )
30                 return string.char( tonumber( hex, 16 ) )
31         end
32
33         if type(str) == "string" then
34                 if not no_plus then
35                         str = str:gsub( "+", " " )
36                 end
37
38                 str = str:gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
39         end
40
41         return str
42 end
43
44
45 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
46 -- Returns a table value with urldecoded values.
47 function urldecode_params( url, tbl )
48
49         local params = tbl or { }
50
51         if url:find("?") then
52                 url = url:gsub( "^.+%?([^?]+)", "%1" )
53         end
54
55         for pair in url:gmatch( "[^&;]+" ) do
56
57                 -- find key and value
58                 local key = urldecode( pair:match("^([^=]+)")     )
59                 local val = urldecode( pair:match("^[^=]+=(.+)$") )
60
61                 -- store
62                 if type(key) == "string" and key:len() > 0 then
63                         if type(val) ~= "string" then val = "" end
64
65                         if not params[key] then
66                                 params[key] = val
67                         elseif type(params[key]) ~= "table" then
68                                 params[key] = { params[key], val }
69                         else
70                                 table.insert( params[key], val )
71                         end
72                 end
73         end
74
75         return params
76 end
77
78
79 -- Encode given string in urlencoded format.
80 -- Returns the encoded string.
81 function urlencode( str )
82
83         local function __chrenc( chr )
84                 return string.format(
85                         "%%%02x", string.byte( chr )
86                 )
87         end
88
89         if type(str) == "string" then
90                 str = str:gsub(
91                         "([^a-zA-Z0-9$_%-%.%+!*'(),])",
92                         __chrenc
93                 )
94         end
95
96         return str
97 end
98
99
100 -- Encode given table to urlencoded string.
101 -- Returns the encoded string.
102 function urlencode_params( tbl )
103         local enc = ""
104
105         for k, v in pairs(tbl) do
106                 enc = enc .. ( enc and "&" or "" ) ..
107                         urlencode(k) .. "="  ..
108                         urlencode(v)
109         end
110
111         return enc
112 end
113
114
115 -- Parameter helper
116 local function __initval( tbl, key )
117         if tbl[key] == nil then
118                 tbl[key] = ""
119         elseif type(tbl[key]) == "string" then
120                 tbl[key] = { tbl[key], "" }
121         else
122                 table.insert( tbl[key], "" )
123         end
124 end
125
126 local function __appendval( tbl, key, chunk )
127         if type(tbl[key]) == "table" then
128                 tbl[key][#tbl[key]] = tbl[key][#tbl[key]] .. chunk
129         else
130                 tbl[key] = tbl[key] .. chunk
131         end
132 end
133
134 local function __finishval( tbl, key, handler )
135         if handler then
136                 if type(tbl[key]) == "table" then
137                         tbl[key][#tbl[key]] = handler( tbl[key][#tbl[key]] )
138                 else
139                         tbl[key] = handler( tbl[key] )
140                 end
141         end
142 end
143
144
145 -- Table of our process states
146 local process_states = { }
147
148 -- Extract "magic", the first line of a http message.
149 -- Extracts the message type ("get", "post" or "response"), the requested uri
150 -- or the status code if the line descripes a http response.
151 process_states['magic'] = function( msg, chunk, err )
152
153         if chunk ~= nil then
154                 -- ignore empty lines before request
155                 if #chunk == 0 then
156                         return true, nil
157                 end
158
159                 -- Is it a request?
160                 local method, uri, http_ver = chunk:match("^([A-Z]+) ([^ ]+) HTTP/([01]%.[019])$")
161
162                 -- Yup, it is
163                 if method then
164
165                         msg.type           = "request"
166                         msg.request_method = method:lower()
167                         msg.request_uri    = uri
168                         msg.http_version   = tonumber( http_ver )
169                         msg.headers        = { }
170
171                         -- We're done, next state is header parsing
172                         return true, function( chunk )
173                                 return process_states['headers']( msg, chunk )
174                         end
175
176                 -- Is it a response?
177                 else
178
179                         local http_ver, code, message = chunk:match("^HTTP/([01]%.[019]) ([0-9]+) ([^\r\n]+)$")
180
181                         -- Is a response
182                         if code then
183
184                                 msg.type           = "response"
185                                 msg.status_code    = code
186                                 msg.status_message = message
187                                 msg.http_version   = tonumber( http_ver )
188                                 msg.headers        = { }
189
190                                 -- We're done, next state is header parsing
191                                 return true, function( chunk )
192                                         return process_states['headers']( msg, chunk )
193                                 end
194                         end
195                 end
196         end
197
198         -- Can't handle it
199         return nil, "Invalid HTTP message magic"
200 end
201
202
203 -- Extract headers from given string.
204 process_states['headers'] = function( msg, chunk )
205
206         if chunk ~= nil then
207
208                 -- Look for a valid header format
209                 local hdr, val = chunk:match( "^([A-Z][A-Za-z0-9%-_]+): +(.+)$" )
210
211                 if type(hdr) == "string" and hdr:len() > 0 and
212                    type(val) == "string" and val:len() > 0
213                 then
214                         msg.headers[hdr] = val
215
216                         -- Valid header line, proceed
217                         return true, nil
218
219                 elseif #chunk == 0 then
220                         -- Empty line, we won't accept data anymore
221                         return false, nil
222                 else
223                         -- Junk data
224                         return nil, "Invalid HTTP header received"
225                 end
226         else
227                 return nil, "Unexpected EOF"
228         end
229 end
230
231
232 -- Init urldecoding stream
233 process_states['urldecode-init'] = function( msg, chunk, filecb )
234
235         if chunk ~= nil then
236
237                 -- Check for Content-Length
238                 if msg.env.CONTENT_LENGTH then
239                         msg.content_length = tonumber(msg.env.CONTENT_LENGTH)
240
241                         if msg.content_length <= HTTP_MAX_CONTENT then
242                                 -- Initialize buffer
243                                 msg._urldecbuffer = chunk
244                                 msg._urldeclength = 0
245
246                                 -- Switch to urldecode-key state
247                                 return true, function(chunk)
248                                         return process_states['urldecode-key']( msg, chunk, filecb )
249                                 end
250                         else
251                                 return nil, "Request exceeds maximum allowed size"
252                         end
253                 else
254                         return nil, "Missing Content-Length header"
255                 end
256         else
257                 return nil, "Unexpected EOF"
258         end
259 end
260
261
262 -- Process urldecoding stream, read and validate parameter key
263 process_states['urldecode-key'] = function( msg, chunk, filecb )
264         if chunk ~= nil then
265
266                 -- Prevent oversized requests
267                 if msg._urldeclength >= msg.content_length then
268                         return nil, "Request exceeds maximum allowed size"
269                 end
270
271                 -- Combine look-behind buffer with current chunk
272                 local buffer = msg._urldecbuffer .. chunk
273                 local spos, epos = buffer:find("=")
274
275                 -- Found param
276                 if spos then
277
278                         -- Check that key doesn't exceed maximum allowed key length
279                         if ( spos - 1 ) <= HTTP_URLENC_MAXKEYLEN then
280                                 local key = urldecode( buffer:sub( 1, spos - 1 ) )
281
282                                 -- Prepare buffers
283                                 msg._urldeclength   = msg._urldeclength + epos
284                                 msg._urldecbuffer   = buffer:sub( epos + 1, #buffer )
285
286                                 -- Use file callback or store values inside msg.params
287                                 if filecb then
288                                         msg._urldeccallback = function( chunk, eof )
289                                                 filecb( field, chunk, eof )
290                                         end
291                                 else
292                                         __initval( msg.params, key )
293
294                                         msg._urldeccallback = function( chunk, eof )
295                                                 __appendval( msg.params, key, chunk )
296
297                                                 -- FIXME: Use a filter
298                                                 if eof then
299                                                         __finishval( msg.params, key, urldecode )
300                                                 end
301                                         end
302                                 end
303
304                                 -- Proceed with urldecode-value state
305                                 return true, function( chunk )
306                                         return process_states['urldecode-value']( msg, chunk, filecb )
307                                 end
308                         else
309                                 return nil, "POST parameter exceeds maximum allowed length"
310                         end
311                 else
312                         return nil, "POST data exceeds maximum allowed length"
313                 end
314         else
315                 return nil, "Unexpected EOF"
316         end
317 end
318
319
320 -- Process urldecoding stream, read parameter value
321 process_states['urldecode-value'] = function( msg, chunk, filecb )
322
323         if chunk ~= nil then
324
325                 -- Combine look-behind buffer with current chunk
326                 local buffer = msg._urldecbuffer .. chunk
327
328                 -- Check for EOF
329                 if #buffer == 0 then
330                         -- Compare processed length
331                         if msg._urldeclength == msg.content_length then
332                                 -- Cleanup
333                                 msg._urldeclength   = nil
334                                 msg._urldecbuffer   = nil
335                                 msg._urldeccallback = nil
336
337                                 -- We won't accept data anymore
338                                 return false
339                         else
340                                 return nil, "Content-Length mismatch"
341                         end
342                 end
343
344                 -- Check for end of value
345                 local spos, epos = buffer:find("[&;]")
346                 if spos then
347
348                         -- Flush buffer, send eof
349                         msg._urldeccallback( buffer:sub( 1, spos - 1 ), true )
350                         msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
351                         msg._urldeclength = msg._urldeclength + epos
352
353                         -- Back to urldecode-key state
354                         return true, function( chunk )
355                                 return process_states['urldecode-key']( msg, chunk, filecb )
356                         end
357                 else
358                         -- We're somewhere within a data section and our buffer is full
359                         if #buffer > #chunk then
360                                 -- Flush buffered data
361                                 msg._urldeccallback( buffer:sub( 1, #buffer - #chunk ), false )
362
363                                 -- Store new data
364                                 msg._urldeclength = msg._urldeclength + #buffer - #chunk
365                                 msg._urldecbuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
366
367                         -- Buffer is not full yet, append new data
368                         else
369                                 msg._urldecbuffer = buffer
370                         end
371
372                         -- Keep feeding me
373                         return true
374                 end
375         else
376                 -- Send EOF
377                 msg._urldeccallback( "", true )
378                 return false
379         end
380 end
381
382
383 -- Creates a header source from a given socket
384 function header_source( sock )
385         return ltn12.source.simplify( function()
386
387                 local chunk, err, part = sock:receive("*l")
388
389                 -- Line too long
390                 if chunk == nil then
391                         if err ~= "timeout" then
392                                 return nil, part
393                                         and "Line exceeds maximum allowed length"
394                                         or  "Unexpected EOF"
395                         else
396                                 return nil, err
397                         end
398
399                 -- Line ok
400                 elseif chunk ~= nil then
401
402                         -- Strip trailing CR
403                         chunk = chunk:gsub("\r$","")
404
405                         return chunk, nil
406                 end
407         end )
408 end
409
410
411 -- Decode MIME encoded data.
412 function mimedecode_message_body( src, msg, filecb )
413
414         if msg and msg.env.CONTENT_TYPE then
415                 msg.mime_boundary = msg.env.CONTENT_TYPE:match("^multipart/form%-data; boundary=(.+)$")
416         end
417
418         if not msg.mime_boundary then
419                 return nil, "Invalid Content-Type found"
420         end
421
422
423         local function parse_headers( chunk, field )
424
425                 local stat
426                 repeat
427                         chunk, stat = chunk:gsub(
428                                 "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r\n",
429                                 function(k,v)
430                                         field.headers[k] = v
431                                         return ""
432                                 end
433                         )
434                 until stat == 0
435
436                 chunk, stat = chunk:gsub("^\r\n","")
437
438                 -- End of headers
439                 if stat > 0 then
440                         if field.headers["Content-Disposition"] then
441                                 if field.headers["Content-Disposition"]:match("^form%-data; ") then
442                                         field.name = field.headers["Content-Disposition"]:match('name="(.-)"')
443                                         field.file = field.headers["Content-Disposition"]:match('filename="(.+)"$')
444                                 end
445                         end
446
447                         if not field.headers["Content-Type"] then
448                                 field.headers["Content-Type"] = "text/plain"
449                         end
450
451                         return chunk, true
452                 end
453
454                 return chunk, false
455         end
456
457
458         local tlen   = 0
459         local inhdr  = false
460         local field  = nil
461         local store  = nil
462         local lchunk = nil
463
464         local function snk( chunk )
465
466                 tlen = tlen + ( chunk and #chunk or 0 )
467
468                 if msg.env.CONTENT_LENGTH and tlen > msg.env.CONTENT_LENGTH then
469                         return nil, "Message body size exceeds Content-Length"
470                 end
471
472                 if chunk and not lchunk then
473                         lchunk = "\r\n" .. chunk
474
475                 elseif lchunk then
476                         local data = lchunk .. ( chunk or "" )
477                         local spos, epos, found
478
479                         repeat
480                                 spos, epos = data:find( "\r\n--" .. msg.mime_boundary .. "\r\n", 1, true )
481
482                                 if not spos then
483                                         spos, epos = data:find( "\r\n--" .. msg.mime_boundary .. "--\r\n", 1, true )
484                                 end
485
486
487                                 if spos then
488                                         local predata = data:sub( 1, spos - 1 )
489
490                                         if inhdr then
491                                                 predata, eof = parse_headers( predata, field )
492
493                                                 if not eof then
494                                                         return nil, "Invalid MIME section header"
495                                                 end
496
497                                                 if not field.name then
498                                                         return nil, "Invalid Content-Disposition header"
499                                                 end
500                                         end
501
502                                         if store then
503                                                 store( field.headers, predata, true )
504                                         end
505
506
507                                         field = { headers = { } }
508                                         found = found or true
509
510                                         data, eof = parse_headers( data:sub( epos + 1, #data ), field )
511                                         inhdr = not eof
512
513                                         if eof then
514                                                 if field.file and filecb then
515                                                         msg.params[field.name] = field.file
516                                                         store = filecb
517                                                 else
518                                                         __initval( msg.params, field.name )
519
520                                                         store = function( hdr, buf, eof )
521                                                                 __appendval( msg.params, field.name, buf )
522                                                         end
523                                                 end
524                                         end
525                                 end
526                         until not spos
527
528
529                         if found then
530                                 if #data > 78 then
531                                         lchunk = data:sub( #data - 78 + 1, #data )
532                                         data   = data:sub( 1, #data - 78 )
533
534                                         if store and field and field.name then
535                                                 store( field.headers, data )
536                                         else
537                                                 return nil, "Invalid MIME section header"
538                                         end
539                                 else
540                                         lchunk, data = data, nil
541                                 end
542                         else
543                                 if inhdr then
544                                         lchunk, eof = parse_headers( data, field )
545                                         inhdr = not eof
546                                 else
547                                         store( field.headers, lchunk )
548                                         lchunk, chunk = chunk, nil
549                                 end
550                         end
551                 end
552
553                 return true
554         end
555
556         return luci.ltn12.pump.all( src, snk )
557 end
558
559
560 -- Decode urlencoded data.
561 function urldecode_message_body( source, msg )
562
563         -- Create an initial LTN12 sink
564         -- Return the initial state.
565         local sink = ltn12.sink.simplify(
566                 function( chunk )
567                         return process_states['urldecode-init']( msg, chunk )
568                 end
569         )
570
571         -- Create a throttling LTN12 source
572         -- See explaination in mimedecode_message_body().
573         local tsrc = function()
574                 if msg._urldecbuffer ~= nil and #msg._urldecbuffer > 0 then
575                         return ""
576                 else
577                         return source()
578                 end
579         end
580
581         -- Pump input data...
582         while true do
583                 -- get data
584                 local ok, err = ltn12.pump.step( tsrc, sink )
585
586                 -- step
587                 if not ok and err then
588                         return nil, err
589
590                 -- eof
591                 elseif not ok then
592                         return true
593                 end
594         end
595 end
596
597
598 -- Parse a http message header
599 function parse_message_header( source )
600
601         local ok   = true
602         local msg  = { }
603
604         local sink = ltn12.sink.simplify(
605                 function( chunk )
606                         return process_states['magic']( msg, chunk )
607                 end
608         )
609
610         -- Pump input data...
611         while ok do
612
613                 -- get data
614                 ok, err = ltn12.pump.step( source, sink )
615
616                 -- error
617                 if not ok and err then
618                         return nil, err
619
620                 -- eof
621                 elseif not ok then
622
623                         -- Process get parameters
624                         if ( msg.request_method == "get" or msg.request_method == "post" ) and
625                            msg.request_uri:match("?")
626                         then
627                                 msg.params = urldecode_params( msg.request_uri )
628                         else
629                                 msg.params = { }
630                         end
631
632                         -- Populate common environment variables
633                         msg.env = {
634                                 CONTENT_LENGTH    = tonumber(msg.headers['Content-Length']);
635                                 CONTENT_TYPE      = msg.headers['Content-Type'];
636                                 REQUEST_METHOD    = msg.request_method:upper();
637                                 REQUEST_URI       = msg.request_uri;
638                                 SCRIPT_NAME       = msg.request_uri:gsub("?.+$","");
639                                 SCRIPT_FILENAME   = "";         -- XXX implement me
640                                 SERVER_PROTOCOL   = "HTTP/" .. string.format("%.1f", msg.http_version)
641                         }
642
643                         -- Populate HTTP_* environment variables
644                         for i, hdr in ipairs( {
645                                 'Accept',
646                                 'Accept-Charset',
647                                 'Accept-Encoding',
648                                 'Accept-Language',
649                                 'Connection',
650                                 'Cookie',
651                                 'Host',
652                                 'Referer',
653                                 'User-Agent',
654                         } ) do
655                                 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
656                                 local val = msg.headers[hdr]
657
658                                 msg.env[var] = val
659                         end
660                 end
661         end
662
663         return msg
664 end
665
666
667 -- Parse a http message body
668 function parse_message_body( source, msg, filecb )
669         -- Is it multipart/mime ?
670         if msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
671            msg.env.CONTENT_TYPE:match("^multipart/form%-data")
672         then
673
674                 return mimedecode_message_body( source, msg, filecb )
675
676         -- Is it application/x-www-form-urlencoded ?
677         elseif msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
678                msg.env.CONTENT_TYPE == "application/x-www-form-urlencoded"
679         then
680                 return urldecode_message_body( source, msg, filecb )
681
682
683         -- Unhandled encoding
684         -- If a file callback is given then feed it chunk by chunk, else
685         -- store whole buffer in message.content
686         else
687
688                 local sink
689
690                 -- If we have a file callback then feed it
691                 if type(filecb) == "function" then
692                         sink = filecb
693
694                 -- ... else append to .content
695                 else
696                         msg.content = ""
697                         msg.content_length = 0
698
699                         sink = function( chunk )
700                                 if ( msg.content_length + #chunk ) <= HTTP_MAX_CONTENT then
701
702                                         msg.content        = msg.content        .. chunk
703                                         msg.content_length = msg.content_length + #chunk
704
705                                         return true
706                                 else
707                                         return nil, "POST data exceeds maximum allowed length"
708                                 end
709                         end
710                 end
711
712                 -- Pump data...
713                 while true do
714                         local ok, err = ltn12.pump.step( source, sink )
715
716                         if not ok and err then
717                                 return nil, err
718                         elseif not err then
719                                 return true
720                         end
721                 end
722         end
723 end
724
725 -- Status codes
726 statusmsg = {
727         [200] = "OK",
728         [301] = "Moved Permanently",
729         [304] = "Not Modified",
730         [400] = "Bad Request",
731         [403] = "Forbidden",
732         [404] = "Not Found",
733         [405] = "Method Not Allowed",
734         [411] = "Length Required",
735         [412] = "Precondition Failed",
736         [500] = "Internal Server Error",
737         [503] = "Server Unavailable",
738 }