* luci/libs: fix eof handling for urldecode_message_body() in protocol.lua
[project/luci.git] / libs / http / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10         http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 require("ltn12")
19 require("luci.http.protocol.filter")
20
21 HTTP_MAX_CONTENT      = 1024*4          -- 4 kB maximum content size
22 HTTP_URLENC_MAXKEYLEN = 1024            -- maximum allowd size of urlencoded parameter names
23
24
25 -- Decode an urlencoded string.
26 -- Returns the decoded value.
27 function urldecode( str )
28
29         local function __chrdec( hex )
30                 return string.char( tonumber( hex, 16 ) )
31         end
32
33         if type(str) == "string" then
34                 str = str:gsub( "+", " " ):gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
35         end
36
37         return str
38 end
39
40
41 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
42 -- Returns a table value with urldecoded values.
43 function urldecode_params( url, tbl )
44
45         local params = tbl or { }
46
47         if url:find("?") then
48                 url = url:gsub( "^.+%?([^?]+)", "%1" )
49         end
50
51         for pair in url:gmatch( "[^&;]+" ) do
52
53                 -- find key and value
54                 local key = urldecode( pair:match("^([^=]+)")     )
55                 local val = urldecode( pair:match("^[^=]+=(.+)$") )
56
57                 -- store
58                 if type(key) == "string" and key:len() > 0 then
59                         if type(val) ~= "string" then val = "" end
60
61                         if not params[key] then
62                                 params[key] = val
63                         elseif type(params[key]) ~= "table" then
64                                 params[key] = { params[key], val }
65                         else
66                                 table.insert( params[key], val )
67                         end
68                 end
69         end
70
71         return params
72 end
73
74
75 -- Encode given string in urlencoded format.
76 -- Returns the encoded string.
77 function urlencode( str )
78
79         local function __chrenc( chr )
80                 return string.format(
81                         "%%%02x", string.byte( chr )
82                 )
83         end
84
85         if type(str) == "string" then
86                 str = str:gsub(
87                         "([^a-zA-Z0-9$_%-%.+!*'(),])",
88                         __chrenc
89                 )
90         end
91
92         return str
93 end
94
95
96 -- Encode given table to urlencoded string.
97 -- Returns the encoded string.
98 function urlencode_params( tbl )
99         local enc = ""
100
101         for k, v in pairs(tbl) do
102                 enc = enc .. ( enc and "&" or "" ) ..
103                         urlencode(k) .. "="  ..
104                         urlencode(v)
105         end
106
107         return enc
108 end
109
110
111 -- Table of our process states
112 local process_states = { }
113
114 -- Extract "magic", the first line of a http message.
115 -- Extracts the message type ("get", "post" or "response"), the requested uri
116 -- or the status code if the line descripes a http response.
117 process_states['magic'] = function( msg, chunk )
118
119         if chunk ~= nil then
120
121                 -- Is it a request?
122                 local method, uri, http_ver = chunk:match("^([A-Z]+) ([^ ]+) HTTP/([01]%.[019])$")
123
124                 -- Yup, it is
125                 if method then
126
127                         msg.type           = "request"
128                         msg.request_method = method:lower()
129                         msg.request_uri    = uri
130                         msg.http_version   = tonumber( http_ver )
131                         msg.headers        = { }
132
133                         -- We're done, next state is header parsing
134                         return true, function( chunk )
135                                 return process_states['headers']( msg, chunk )
136                         end
137
138                 -- Is it a response?
139                 else
140
141                         local http_ver, code, message = chunk:match("^HTTP/([01]%.[019]) ([0-9]+) ([^\r\n]+)$")
142
143                         -- Is a response
144                         if code then
145
146                                 msg.type           = "response"
147                                 msg.status_code    = code
148                                 msg.status_message = message
149                                 msg.http_version   = tonumber( http_ver )
150                                 msg.headers        = { }
151
152                                 -- We're done, next state is header parsing
153                                 return true, function( chunk )
154                                         return process_states['headers']( msg, chunk )
155                                 end
156                         end
157                 end
158         end
159
160         -- Can't handle it
161         return nil, "Invalid HTTP message magic"
162 end
163
164
165 -- Extract headers from given string.
166 process_states['headers'] = function( msg, chunk )
167
168         if chunk ~= nil then
169
170                 -- Look for a valid header format
171                 local hdr, val = chunk:match( "^([A-Z][A-Za-z0-9%-_]+): +(.+)$" )
172
173                 if type(hdr) == "string" and hdr:len() > 0 and
174                    type(val) == "string" and val:len() > 0
175                 then
176                         msg.headers[hdr] = val
177
178                         -- Valid header line, proceed
179                         return true, nil
180
181                 elseif #chunk == 0 then
182                         -- Empty line, we won't accept data anymore
183                         return false, nil
184                 else
185                         -- Junk data
186                         return nil, "Invalid HTTP header received"
187                 end
188         else
189                 return nil, "Unexpected EOF"
190         end
191 end
192
193
194 -- Find first MIME boundary
195 process_states['mime-init'] = function( msg, chunk, filecb )
196
197         if chunk ~= nil then
198                 if #chunk >= #msg.mime_boundary + 2 then
199                         local boundary = chunk:sub( 1, #msg.mime_boundary + 4 )
200
201                         if boundary == "--" .. msg.mime_boundary .. "\r\n" then
202
203                                 -- Store remaining data in buffer
204                                 msg._mimebuffer = chunk:sub( #msg.mime_boundary + 5, #chunk )
205
206                                 -- Switch to header processing state
207                                 return true, function( chunk )
208                                         return process_states['mime-headers']( msg, chunk, filecb )
209                                 end
210                         else
211                                 return nil, "Invalid MIME boundary"
212                         end
213                 else
214                         return true
215                 end
216         else
217                 return nil, "Unexpected EOF"
218         end
219 end
220
221
222 -- Read MIME part headers
223 process_states['mime-headers'] = function( msg, chunk, filecb )
224
225         if chunk ~= nil then
226
227                 -- Combine look-behind buffer with current chunk
228                 chunk = msg._mimebuffer .. chunk
229
230                 if not msg._mimeheaders then
231                         msg._mimeheaders = { }
232                 end
233
234                 local function __storehdr( k, v )
235                         msg._mimeheaders[k] = v
236                         return ""
237                 end
238
239                 -- Read all header lines
240                 local ok, count = 1, 0
241                 while ok > 0 do
242                         chunk, ok = chunk:gsub( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r\n", __storehdr )
243                         count = count + ok
244                 end
245
246                 -- Headers processed, check for empty line
247                 chunk, ok = chunk:gsub( "^\r\n", "" )
248
249                 -- Store remaining buffer contents
250                 msg._mimebuffer = chunk
251
252                 -- End of headers
253                 if ok > 0 then
254
255                         -- When no Content-Type header is given assume text/plain
256                         if not msg._mimeheaders['Content-Type'] then
257                                 msg._mimeheaders['Content-Type'] = 'text/plain'
258                         end
259
260                         -- Check Content-Disposition
261                         if msg._mimeheaders['Content-Disposition'] then
262                                 -- Check for "form-data" token
263                                 if msg._mimeheaders['Content-Disposition']:match("^form%-data; ") then
264                                         -- Check for field name, filename
265                                         local field = msg._mimeheaders['Content-Disposition']:match('name="(.-)"')
266                                         local file  = msg._mimeheaders['Content-Disposition']:match('filename="(.+)"$')
267
268                                         -- Is a file field and we have a callback
269                                         if file and filecb then
270                                                 msg.params[field] = file
271                                                 msg._mimecallback = function(chunk,eof)
272                                                         filecb( {
273                                                                 name    = field;
274                                                                 file    = file;
275                                                                 headers = msg._mimeheaders
276                                                         }, chunk, eof )
277                                                 end
278
279                                         -- Treat as form field
280                                         else
281                                                 msg.params[field] = ""
282                                                 msg._mimecallback = function(chunk,eof)
283                                                         msg.params[field] = msg.params[field] .. chunk
284                                                 end
285                                         end
286
287                                         -- Header was valid, continue with mime-data
288                                         return true, function( chunk )
289                                                 return process_states['mime-data']( msg, chunk, filecb )
290                                         end
291                                 else
292                                         -- Unknown Content-Disposition, abort
293                                         return nil, "Unexpected Content-Disposition MIME section header"
294                                 end
295                         else
296                                 -- Content-Disposition is required, abort without
297                                 return nil, "Missing Content-Disposition MIME section header"
298                         end
299
300                 -- We parsed no headers yet and buffer is almost empty
301                 elseif count > 0 or #chunk < 128 then
302                         -- Keep feeding me with chunks
303                         return true, nil
304                 end
305
306                 -- Buffer looks like garbage
307                 return nil, "Malformed MIME section header"
308         else
309                 return nil, "Unexpected EOF"
310         end
311 end
312
313
314 -- Read MIME part data
315 process_states['mime-data'] = function( msg, chunk, filecb )
316
317         if chunk ~= nil then
318
319                 -- Combine look-behind buffer with current chunk
320                 local buffer = msg._mimebuffer .. chunk
321
322                 -- Look for MIME boundary
323                 local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "\r\n", 1, true )
324
325                 if spos then
326                         -- Content data
327                         msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
328
329                         -- Store remainder
330                         msg._mimebuffer = buffer:sub( epos + 1, #buffer )
331
332                         -- Next state is mime-header processing
333                         return true, function( chunk )
334                                 return process_states['mime-headers']( msg, chunk, filecb )
335                         end
336                 else
337                         -- Look for EOF?
338                         local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "--\r\n", 1, true )
339
340                         if spos then
341                                 -- Content data
342                                 msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
343
344                                 -- We processed the final MIME boundary, cleanup
345                                 msg._mimebuffer   = nil
346                                 msg._mimeheaders  = nil
347                                 msg._mimecallback = nil
348
349                                 -- We won't accept data anymore
350                                 return false
351                         else
352                                 -- We're somewhere within a data section and our buffer is full
353                                 if #buffer > #chunk then
354                                         -- Flush buffered data
355                                         msg._mimecallback( buffer:sub( 1, #buffer - #chunk ), false )
356
357                                         -- Store new data
358                                         msg._mimebuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
359
360                                 -- Buffer is not full yet, append new data
361                                 else
362                                         msg._mimebuffer = buffer
363                                 end
364
365                                 -- Keep feeding me
366                                 return true
367                         end
368                 end
369         else
370                 return nil, "Unexpected EOF"
371         end
372 end
373
374
375 -- Init urldecoding stream
376 process_states['urldecode-init'] = function( msg, chunk, filecb )
377
378         if chunk ~= nil then
379
380                 -- Check for Content-Length
381                 if msg.env.CONTENT_LENGTH then
382                         msg.content_length = tonumber(msg.env.CONTENT_LENGTH)
383
384                         if msg.content_length <= HTTP_MAX_CONTENT then
385                                 -- Initialize buffer
386                                 msg._urldecbuffer = chunk
387                                 msg._urldeclength = 0
388
389                                 -- Switch to urldecode-key state
390                                 return true, function(chunk)
391                                         return process_states['urldecode-key']( msg, chunk, filecb )
392                                 end
393                         else
394                                 return nil, "Request exceeds maximum allowed size"
395                         end
396                 else
397                         return nil, "Missing Content-Length header"
398                 end
399         else
400                 return nil, "Unexpected EOF"
401         end
402 end
403
404
405 -- Process urldecoding stream, read and validate parameter key
406 process_states['urldecode-key'] = function( msg, chunk, filecb )
407         if chunk ~= nil then
408
409                 -- Prevent oversized requests
410                 if msg._urldeclength >= msg.content_length then
411                         return nil, "Request exceeds maximum allowed size"
412                 end
413
414                 -- Combine look-behind buffer with current chunk
415                 local buffer = msg._urldecbuffer .. chunk
416                 local spos, epos = buffer:find("=")
417
418                 -- Found param
419                 if spos then
420
421                         -- Check that key doesn't exceed maximum allowed key length
422                         if ( spos - 1 ) <= HTTP_URLENC_MAXKEYLEN then
423                                 local key = urldecode( buffer:sub( 1, spos - 1 ) )
424
425                                 -- Prepare buffers
426                                 msg.params[key]         = ""
427                                 msg._urldeclength   = msg._urldeclength + epos
428                                 msg._urldecbuffer   = buffer:sub( epos + 1, #buffer )
429
430                                 -- Use file callback or store values inside msg.params
431                                 if filecb then
432                                         msg._urldeccallback = function( chunk, eof )
433                                                 filecb( field, chunk, eof )
434                                         end
435                                 else
436                                         msg._urldeccallback = function( chunk, eof )
437                                                 msg.params[key] = msg.params[key] .. chunk
438
439                                                 -- FIXME: Use a filter
440                                                 if eof then
441                                                         msg.params[key] = urldecode( msg.params[key] )
442                                                 end
443                                         end
444                                 end
445
446                                 -- Proceed with urldecode-value state
447                                 return true, function( chunk )
448                                         return process_states['urldecode-value']( msg, chunk, filecb )
449                                 end
450                         else
451                                 return nil, "POST parameter exceeds maximum allowed length"
452                         end
453                 else
454                         return nil, "POST data exceeds maximum allowed length"
455                 end
456         else
457                 return nil, "Unexpected EOF"
458         end
459 end
460
461
462 -- Process urldecoding stream, read parameter value
463 process_states['urldecode-value'] = function( msg, chunk, filecb )
464
465         if chunk ~= nil then
466
467                 -- Combine look-behind buffer with current chunk
468                 local buffer = msg._urldecbuffer .. chunk
469
470                 -- Check for EOF
471                 if #buffer == 0 then
472                         -- Compare processed length
473                         if msg._urldeclength == msg.content_length then
474                                 -- Cleanup
475                                 msg._urldeclength   = nil
476                                 msg._urldecbuffer   = nil
477                                 msg._urldeccallback = nil
478
479                                 -- We won't accept data anymore
480                                 return false
481                         else
482                                 return nil, "Content-Length mismatch"
483                         end
484                 end
485
486                 -- Check for end of value
487                 local spos, epos = buffer:find("[&;]")
488                 if spos then
489
490                         -- Flush buffer, send eof
491                         msg._urldeccallback( buffer:sub( 1, spos - 1 ), true )
492                         msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
493                         msg._urldeclength = msg._urldeclength + epos
494
495                         -- Back to urldecode-key state
496                         return true, function( chunk )
497                                 return process_states['urldecode-key']( msg, chunk, filecb )
498                         end
499                 else
500                         -- We're somewhere within a data section and our buffer is full
501                         if #buffer > #chunk then
502                                 -- Flush buffered data
503                                 msg._urldeccallback( buffer:sub( 1, #buffer - #chunk ), false )
504
505                                 -- Store new data
506                                 msg._urldeclength = msg._urldeclength + #buffer - #chunk
507                                 msg._urldecbuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
508
509                         -- Buffer is not full yet, append new data
510                         else
511                                 msg._urldecbuffer = buffer
512                         end
513
514                         -- Keep feeding me
515                         return true
516                 end
517         else
518                 -- Send EOF
519                 msg._urldeccallback( "", true )
520                 return false
521         end
522 end
523
524
525 -- Decode MIME encoded data.
526 function mimedecode_message_body( source, msg, filecb )
527
528         -- Find mime boundary
529         if msg and msg.env.CONTENT_TYPE then
530
531                 local bound = msg.env.CONTENT_TYPE:match("^multipart/form%-data; boundary=(.+)")
532
533                 if bound then
534                         msg.mime_boundary = bound
535                 else
536                         return nil, "No MIME boundary found or invalid content type given"
537                 end
538         end
539
540         -- Create an initial LTN12 sink
541         -- The whole MIME parsing process is implemented as fancy sink, sinks replace themself
542         -- depending on current processing state (init, header, data). Return the initial state.
543         local sink = ltn12.sink.simplify(
544                 function( chunk )
545                         return process_states['mime-init']( msg, chunk, filecb )
546                 end
547         )
548
549         -- Create a throttling LTN12 source
550         -- Frequent state switching in the mime parsing process leads to unwanted buffer aggregation.
551         -- This source checks wheather there's still data in our internal read buffer and returns an
552         -- empty string if there's already enough data in the processing queue. If the internal buffer
553         -- runs empty we're calling the original source to get the next chunk of data.
554         local tsrc = function()
555
556                 -- XXX: we schould propably keep the maximum buffer size in sync with
557                 --      the blocksize of our original source... but doesn't really matter
558                 if msg._mimebuffer ~= null and #msg._mimebuffer > 256 then
559                         return ""
560                 else
561                         return source()
562                 end
563         end
564
565         -- Pump input data...
566         while true do
567                 -- get data
568                 local ok, err = ltn12.pump.step( tsrc, sink )
569
570                 -- error
571                 if not ok and err then
572                         return nil, err
573
574                 -- eof
575                 elseif not ok then
576                         return true
577                 end
578         end
579 end
580
581
582 -- Decode urlencoded data.
583 function urldecode_message_body( source, msg )
584
585         -- Create an initial LTN12 sink
586         -- Return the initial state.
587         local sink = ltn12.sink.simplify(
588                 function( chunk )
589                         return process_states['urldecode-init']( msg, chunk )
590                 end
591         )
592
593         -- Create a throttling LTN12 source
594         -- See explaination in mimedecode_message_body().
595         local tsrc = function()
596                 if msg._urldecbuffer ~= null and #msg._urldecbuffer > 0 then
597                         return ""
598                 else
599                         return source()
600                 end
601         end
602
603         -- Pump input data...
604         while true do
605                 -- get data
606                 local ok, err = ltn12.pump.step( tsrc, sink )
607
608                 -- step
609                 if not ok and err then
610                         return nil, err
611
612                 -- eof
613                 elseif not ok then
614                         return true
615                 end
616         end
617 end
618
619
620 -- Parse a http message
621 function parse_message( data, filecb )
622
623         local reader  = _linereader( data, HTTP_MAX_READBUF )
624         local message = parse_message_header( reader )
625
626         if message then
627                 parse_message_body( reader, message, filecb )
628         end
629
630         return message
631 end
632
633
634 -- Parse a http message header
635 function parse_message_header( source )
636
637         local ok   = true
638         local msg  = { }
639
640         local sink = ltn12.sink.simplify(
641                 function( chunk )
642                         return process_states['magic']( msg, chunk )
643                 end
644         )
645
646         -- Pump input data...
647         while ok do
648
649                 -- get data
650                 ok, err = ltn12.pump.step( source, sink )
651
652                 -- error
653                 if not ok and err then
654                         return nil, err
655
656                 -- eof
657                 elseif not ok then
658
659                         -- Process get parameters
660                         if ( msg.request_method == "get" or msg.request_method == "post" ) and
661                            msg.request_uri:match("?")
662                         then
663                                 msg.params = urldecode_params( msg.request_uri )
664                         else
665                                 msg.params = { }
666                         end
667
668                         -- Populate common environment variables
669                         msg.env = {
670                                 CONTENT_LENGTH    = msg.headers['Content-Length'];
671                                 CONTENT_TYPE      = msg.headers['Content-Type'];
672                                 REQUEST_METHOD    = msg.request_method:upper();
673                                 REQUEST_URI       = msg.request_uri;
674                                 SCRIPT_NAME       = msg.request_uri:gsub("?.+$","");
675                                 SCRIPT_FILENAME   = "";         -- XXX implement me
676                                 SERVER_PROTOCOL   = "HTTP/" .. msg.http_version
677                         }
678
679                         -- Populate HTTP_* environment variables
680                         for i, hdr in ipairs( {
681                                 'Accept',
682                                 'Accept-Charset',
683                                 'Accept-Encoding',
684                                 'Accept-Language',
685                                 'Connection',
686                                 'Cookie',
687                                 'Host',
688                                 'Referer',
689                                 'User-Agent',
690                         } ) do
691                                 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
692                                 local val = msg.headers[hdr]
693
694                                 msg.env[var] = val
695                         end
696                 end
697         end
698
699         return msg
700 end
701
702
703 -- Parse a http message body
704 function parse_message_body( source, msg, filecb )
705
706         -- Install an additional filter if we're operating on chunked transfer
707         -- coding and client is HTTP/1.1 capable
708         if msg.http_version == 1.1 and
709            msg.headers['Transfer-Encoding'] and
710            msg.headers['Transfer-Encoding']:find("chunked")
711         then
712                 source = ltn12.source.chain(
713                         source, luci.http.protocol.filter.decode_chunked
714                 )
715         end
716
717
718         -- Is it multipart/mime ?
719         if msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
720            msg.env.CONTENT_TYPE:match("^multipart/form%-data")
721         then
722
723                 return mimedecode_message_body( source, msg, filecb )
724
725         -- Is it application/x-www-form-urlencoded ?
726         elseif msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
727                msg.env.CONTENT_TYPE == "application/x-www-form-urlencoded"
728         then
729                 return urldecode_message_body( source, msg, filecb )
730
731
732         -- Unhandled encoding
733         -- If a file callback is given then feed it chunk by chunk, else
734         -- store whole buffer in message.content
735         else
736
737                 local sink
738
739                 -- If we have a file callback then feed it
740                 if type(filecb) == "function" then
741                         sink = filecb
742
743                 -- ... else append to .content
744                 else
745                         msg.content = ""
746                         msg.content_length = 0
747
748                         sink = function( chunk )
749                                 if ( msg.content_length + #chunk ) <= HTTP_MAX_CONTENT then
750
751                                         msg.content        = msg.content        .. chunk
752                                         msg.content_length = msg.content_length + #chunk
753
754                                         return true
755                                 else
756                                         return nil, "POST data exceeds maximum allowed length"
757                                 end
758                         end
759                 end
760
761                 -- Pump data...
762                 while true do
763                         local ok, err = ltn12.pump.step( source, sink )
764
765                         if not ok and err then
766                                 return nil, err
767                         elseif not err then
768                                 return true
769                         end
770                 end
771         end
772 end
773
774
775 -- Push a response to a socket
776 function push_response(request, response, sourceout, sinkout, sinkerr)
777         local code = response.status
778         sinkout(request.env.SERVER_PROTOCOL .. " " .. code .. " " .. statusmsg[code] .. "\r\n")
779
780         -- FIXME: Add support for keep-alive
781         response.headers["Connection"] = "close"
782
783         for k,v in pairs(response.headers) do
784                 sinkout(k .. ": " .. v .. "\r\n")
785         end
786
787         sinkout("\r\n")
788
789         if sourceout then
790                 ltn12.pump.all(sourceout, sinkout)
791         end
792 end
793
794
795 -- Status codes
796 statusmsg = {
797         [200] = "OK",
798         [400] = "Bad Request",
799         [403] = "Forbidden",
800         [404] = "Not Found",
801         [500] = "Internal Server Error",
802         [503] = "Server Unavailable",
803 }