* libs/http: fix blocksize-related mime decoding problems in protocol.lua
[project/luci.git] / libs / http / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10         http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 local ltn12 = require("luci.ltn12")
19
20 HTTP_MAX_CONTENT      = 1024*4          -- 4 kB maximum content size
21 HTTP_URLENC_MAXKEYLEN = 1024            -- maximum allowd size of urlencoded parameter names
22 TSRC_BLOCKSIZE        = 2048            -- target block size for throttling sources
23
24
25 -- Decode an urlencoded string.
26 -- Returns the decoded value.
27 function urldecode( str, no_plus )
28
29         local function __chrdec( hex )
30                 return string.char( tonumber( hex, 16 ) )
31         end
32
33         if type(str) == "string" then
34                 if not no_plus then
35                         str = str:gsub( "+", " " )
36                 end
37
38                 str = str:gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
39         end
40
41         return str
42 end
43
44
45 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
46 -- Returns a table value with urldecoded values.
47 function urldecode_params( url, tbl )
48
49         local params = tbl or { }
50
51         if url:find("?") then
52                 url = url:gsub( "^.+%?([^?]+)", "%1" )
53         end
54
55         for pair in url:gmatch( "[^&;]+" ) do
56
57                 -- find key and value
58                 local key = urldecode( pair:match("^([^=]+)")     )
59                 local val = urldecode( pair:match("^[^=]+=(.+)$") )
60
61                 -- store
62                 if type(key) == "string" and key:len() > 0 then
63                         if type(val) ~= "string" then val = "" end
64
65                         if not params[key] then
66                                 params[key] = val
67                         elseif type(params[key]) ~= "table" then
68                                 params[key] = { params[key], val }
69                         else
70                                 table.insert( params[key], val )
71                         end
72                 end
73         end
74
75         return params
76 end
77
78
79 -- Encode given string in urlencoded format.
80 -- Returns the encoded string.
81 function urlencode( str )
82
83         local function __chrenc( chr )
84                 return string.format(
85                         "%%%02x", string.byte( chr )
86                 )
87         end
88
89         if type(str) == "string" then
90                 str = str:gsub(
91                         "([^a-zA-Z0-9$_%-%.%+!*'(),])",
92                         __chrenc
93                 )
94         end
95
96         return str
97 end
98
99
100 -- Encode given table to urlencoded string.
101 -- Returns the encoded string.
102 function urlencode_params( tbl )
103         local enc = ""
104
105         for k, v in pairs(tbl) do
106                 enc = enc .. ( enc and "&" or "" ) ..
107                         urlencode(k) .. "="  ..
108                         urlencode(v)
109         end
110
111         return enc
112 end
113
114
115 -- Parameter helper
116 local function __initval( tbl, key )
117         if tbl[key] == nil then
118                 tbl[key] = ""
119         elseif type(tbl[key]) == "string" then
120                 tbl[key] = { tbl[key], "" }
121         else
122                 table.insert( tbl[key], "" )
123         end
124 end
125
126 local function __appendval( tbl, key, chunk )
127         if type(tbl[key]) == "table" then
128                 tbl[key][#tbl[key]] = tbl[key][#tbl[key]] .. chunk
129         else
130                 tbl[key] = tbl[key] .. chunk
131         end
132 end
133
134 local function __finishval( tbl, key, handler )
135         if handler then
136                 if type(tbl[key]) == "table" then
137                         tbl[key][#tbl[key]] = handler( tbl[key][#tbl[key]] )
138                 else
139                         tbl[key] = handler( tbl[key] )
140                 end
141         end
142 end
143
144
145 -- Table of our process states
146 local process_states = { }
147
148 -- Extract "magic", the first line of a http message.
149 -- Extracts the message type ("get", "post" or "response"), the requested uri
150 -- or the status code if the line descripes a http response.
151 process_states['magic'] = function( msg, chunk, err )
152
153         if chunk ~= nil then
154                 -- ignore empty lines before request
155                 if #chunk == 0 then
156                         return true, nil
157                 end
158
159                 -- Is it a request?
160                 local method, uri, http_ver = chunk:match("^([A-Z]+) ([^ ]+) HTTP/([01]%.[019])$")
161
162                 -- Yup, it is
163                 if method then
164
165                         msg.type           = "request"
166                         msg.request_method = method:lower()
167                         msg.request_uri    = uri
168                         msg.http_version   = tonumber( http_ver )
169                         msg.headers        = { }
170
171                         -- We're done, next state is header parsing
172                         return true, function( chunk )
173                                 return process_states['headers']( msg, chunk )
174                         end
175
176                 -- Is it a response?
177                 else
178
179                         local http_ver, code, message = chunk:match("^HTTP/([01]%.[019]) ([0-9]+) ([^\r\n]+)$")
180
181                         -- Is a response
182                         if code then
183
184                                 msg.type           = "response"
185                                 msg.status_code    = code
186                                 msg.status_message = message
187                                 msg.http_version   = tonumber( http_ver )
188                                 msg.headers        = { }
189
190                                 -- We're done, next state is header parsing
191                                 return true, function( chunk )
192                                         return process_states['headers']( msg, chunk )
193                                 end
194                         end
195                 end
196         end
197
198         -- Can't handle it
199         return nil, "Invalid HTTP message magic"
200 end
201
202
203 -- Extract headers from given string.
204 process_states['headers'] = function( msg, chunk )
205
206         if chunk ~= nil then
207
208                 -- Look for a valid header format
209                 local hdr, val = chunk:match( "^([A-Z][A-Za-z0-9%-_]+): +(.+)$" )
210
211                 if type(hdr) == "string" and hdr:len() > 0 and
212                    type(val) == "string" and val:len() > 0
213                 then
214                         msg.headers[hdr] = val
215
216                         -- Valid header line, proceed
217                         return true, nil
218
219                 elseif #chunk == 0 then
220                         -- Empty line, we won't accept data anymore
221                         return false, nil
222                 else
223                         -- Junk data
224                         return nil, "Invalid HTTP header received"
225                 end
226         else
227                 return nil, "Unexpected EOF"
228         end
229 end
230
231
232 -- Find first MIME boundary
233 process_states['mime-init'] = function( msg, chunk, filecb )
234
235         if chunk ~= nil then
236                 if #chunk >= #msg.mime_boundary + 2 then
237                         local boundary = chunk:sub( 1, #msg.mime_boundary + 4 )
238
239                         if boundary == "--" .. msg.mime_boundary .. "\r\n" then
240
241                                 -- Store remaining data in buffer
242                                 msg._mimebuffer = chunk:sub( #msg.mime_boundary + 5, #chunk )
243
244                                 -- Switch to header processing state
245                                 return true, function( chunk )
246                                         return process_states['mime-headers']( msg, chunk, filecb )
247                                 end
248                         else
249                                 return nil, "Invalid MIME boundary"
250                         end
251                 else
252                         return true
253                 end
254         else
255                 return nil, "Unexpected EOF"
256         end
257 end
258
259
260 -- Read MIME part headers
261 process_states['mime-headers'] = function( msg, chunk, filecb )
262
263         if chunk ~= nil then
264
265                 -- Combine look-behind buffer with current chunk
266                 chunk = msg._mimebuffer .. chunk
267
268                 if not msg._mimeheaders then
269                         msg._mimeheaders = { }
270                 end
271
272                 local function __storehdr( k, v )
273                         msg._mimeheaders[k] = v
274                         return ""
275                 end
276
277                 -- Read all header lines
278                 local ok, count = 1, 0
279                 while ok > 0 do
280                         chunk, ok = chunk:gsub( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r\n", __storehdr )
281                         count = count + ok
282                 end
283
284                 -- Headers processed, check for empty line
285                 chunk, ok = chunk:gsub( "^\r\n", "" )
286
287                 -- Store remaining buffer contents
288                 msg._mimebuffer = chunk
289
290                 -- End of headers
291                 if ok > 0 then
292
293                         -- When no Content-Type header is given assume text/plain
294                         if not msg._mimeheaders['Content-Type'] then
295                                 msg._mimeheaders['Content-Type'] = 'text/plain'
296                         end
297
298                         -- Check Content-Disposition
299                         if msg._mimeheaders['Content-Disposition'] then
300                                 -- Check for "form-data" token
301                                 if msg._mimeheaders['Content-Disposition']:match("^form%-data; ") then
302                                         -- Check for field name, filename
303                                         local field = msg._mimeheaders['Content-Disposition']:match('name="(.-)"')
304                                         local file  = msg._mimeheaders['Content-Disposition']:match('filename="(.+)"$')
305
306                                         -- Is a file field and we have a callback
307                                         if file and filecb then
308                                                 msg.params[field] = file
309                                                 msg._mimecallback = function(chunk,eof)
310                                                         filecb( {
311                                                                 name    = field;
312                                                                 file    = file;
313                                                                 headers = msg._mimeheaders
314                                                         }, chunk, eof )
315                                                 end
316
317                                         -- Treat as form field
318                                         else
319                                                 __initval( msg.params, field )
320
321                                                 msg._mimecallback = function(chunk,eof)
322                                                         __appendval( msg.params, field, chunk )
323                                                 end
324                                         end
325
326                                         -- Header was valid, continue with mime-data
327                                         return true, function( chunk )
328                                                 return process_states['mime-data']( msg, chunk, filecb )
329                                         end
330                                 else
331                                         -- Unknown Content-Disposition, abort
332                                         return nil, "Unexpected Content-Disposition MIME section header"
333                                 end
334                         else
335                                 -- Content-Disposition is required, abort without
336                                 return nil, "Missing Content-Disposition MIME section header"
337                         end
338
339                 -- We parsed no headers yet and buffer is almost empty
340                 elseif count > 0 or #chunk < 128 then
341                         -- Keep feeding me with chunks
342                         return true, nil
343                 end
344
345                 -- Buffer looks like garbage
346                 return nil, "Malformed MIME section header"
347         else
348                 return nil, "Unexpected EOF"
349         end
350 end
351
352
353 -- Read MIME part data
354 process_states['mime-data'] = function( msg, chunk, filecb )
355
356         if chunk ~= nil then
357
358                 -- Combine look-behind buffer with current chunk
359                 local buffer = msg._mimebuffer .. chunk
360
361                 -- Look for MIME boundary
362                 local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "\r\n", 1, true )
363
364                 if spos then
365                         -- Content data
366                         msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
367
368                         -- Store remainder
369                         msg._mimebuffer = buffer:sub( epos + 1, #buffer )
370
371                         -- Next state is mime-header processing
372                         return true, function( chunk )
373                                 return process_states['mime-headers']( msg, chunk, filecb )
374                         end
375                 else
376                         -- Look for EOF?
377                         local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "--\r\n", 1, true )
378
379                         if spos then
380                                 -- Content data
381                                 msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
382
383                                 -- We processed the final MIME boundary, cleanup
384                                 msg._mimebuffer   = nil
385                                 msg._mimeheaders  = nil
386                                 msg._mimecallback = nil
387
388                                 -- We won't accept data anymore
389                                 return false
390                         else
391                                 -- We're somewhere within a data section and our buffer is full
392                                 if #buffer > #chunk then
393                                         -- Flush buffered data
394                                         msg._mimecallback( buffer:sub( 1, #buffer - #chunk ), false )
395
396                                         -- Store new data
397                                         msg._mimebuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
398
399                                 -- Buffer is not full yet, append new data
400                                 else
401                                         msg._mimebuffer = buffer
402                                 end
403
404                                 -- Keep feeding me
405                                 return true
406                         end
407                 end
408         else
409                 return nil, "Unexpected EOF"
410         end
411 end
412
413
414 -- Init urldecoding stream
415 process_states['urldecode-init'] = function( msg, chunk, filecb )
416
417         if chunk ~= nil then
418
419                 -- Check for Content-Length
420                 if msg.env.CONTENT_LENGTH then
421                         msg.content_length = tonumber(msg.env.CONTENT_LENGTH)
422
423                         if msg.content_length <= HTTP_MAX_CONTENT then
424                                 -- Initialize buffer
425                                 msg._urldecbuffer = chunk
426                                 msg._urldeclength = 0
427
428                                 -- Switch to urldecode-key state
429                                 return true, function(chunk)
430                                         return process_states['urldecode-key']( msg, chunk, filecb )
431                                 end
432                         else
433                                 return nil, "Request exceeds maximum allowed size"
434                         end
435                 else
436                         return nil, "Missing Content-Length header"
437                 end
438         else
439                 return nil, "Unexpected EOF"
440         end
441 end
442
443
444 -- Process urldecoding stream, read and validate parameter key
445 process_states['urldecode-key'] = function( msg, chunk, filecb )
446         if chunk ~= nil then
447
448                 -- Prevent oversized requests
449                 if msg._urldeclength >= msg.content_length then
450                         return nil, "Request exceeds maximum allowed size"
451                 end
452
453                 -- Combine look-behind buffer with current chunk
454                 local buffer = msg._urldecbuffer .. chunk
455                 local spos, epos = buffer:find("=")
456
457                 -- Found param
458                 if spos then
459
460                         -- Check that key doesn't exceed maximum allowed key length
461                         if ( spos - 1 ) <= HTTP_URLENC_MAXKEYLEN then
462                                 local key = urldecode( buffer:sub( 1, spos - 1 ) )
463
464                                 -- Prepare buffers
465                                 msg._urldeclength   = msg._urldeclength + epos
466                                 msg._urldecbuffer   = buffer:sub( epos + 1, #buffer )
467
468                                 -- Use file callback or store values inside msg.params
469                                 if filecb then
470                                         msg._urldeccallback = function( chunk, eof )
471                                                 filecb( field, chunk, eof )
472                                         end
473                                 else
474                                         __initval( msg.params, key )
475
476                                         msg._urldeccallback = function( chunk, eof )
477                                                 __appendval( msg.params, key, chunk )
478
479                                                 -- FIXME: Use a filter
480                                                 if eof then
481                                                         __finishval( msg.params, key, urldecode )
482                                                 end
483                                         end
484                                 end
485
486                                 -- Proceed with urldecode-value state
487                                 return true, function( chunk )
488                                         return process_states['urldecode-value']( msg, chunk, filecb )
489                                 end
490                         else
491                                 return nil, "POST parameter exceeds maximum allowed length"
492                         end
493                 else
494                         return nil, "POST data exceeds maximum allowed length"
495                 end
496         else
497                 return nil, "Unexpected EOF"
498         end
499 end
500
501
502 -- Process urldecoding stream, read parameter value
503 process_states['urldecode-value'] = function( msg, chunk, filecb )
504
505         if chunk ~= nil then
506
507                 -- Combine look-behind buffer with current chunk
508                 local buffer = msg._urldecbuffer .. chunk
509
510                 -- Check for EOF
511                 if #buffer == 0 then
512                         -- Compare processed length
513                         if msg._urldeclength == msg.content_length then
514                                 -- Cleanup
515                                 msg._urldeclength   = nil
516                                 msg._urldecbuffer   = nil
517                                 msg._urldeccallback = nil
518
519                                 -- We won't accept data anymore
520                                 return false
521                         else
522                                 return nil, "Content-Length mismatch"
523                         end
524                 end
525
526                 -- Check for end of value
527                 local spos, epos = buffer:find("[&;]")
528                 if spos then
529
530                         -- Flush buffer, send eof
531                         msg._urldeccallback( buffer:sub( 1, spos - 1 ), true )
532                         msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
533                         msg._urldeclength = msg._urldeclength + epos
534
535                         -- Back to urldecode-key state
536                         return true, function( chunk )
537                                 return process_states['urldecode-key']( msg, chunk, filecb )
538                         end
539                 else
540                         -- We're somewhere within a data section and our buffer is full
541                         if #buffer > #chunk then
542                                 -- Flush buffered data
543                                 msg._urldeccallback( buffer:sub( 1, #buffer - #chunk ), false )
544
545                                 -- Store new data
546                                 msg._urldeclength = msg._urldeclength + #buffer - #chunk
547                                 msg._urldecbuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
548
549                         -- Buffer is not full yet, append new data
550                         else
551                                 msg._urldecbuffer = buffer
552                         end
553
554                         -- Keep feeding me
555                         return true
556                 end
557         else
558                 -- Send EOF
559                 msg._urldeccallback( "", true )
560                 return false
561         end
562 end
563
564
565 -- Creates a header source from a given socket
566 function header_source( sock )
567         return ltn12.source.simplify( function()
568
569                 local chunk, err, part = sock:receive("*l")
570
571                 -- Line too long
572                 if chunk == nil then
573                         if err ~= "timeout" then
574                                 return nil, part
575                                         and "Line exceeds maximum allowed length"
576                                         or  "Unexpected EOF"
577                         else
578                                 return nil, err
579                         end
580
581                 -- Line ok
582                 elseif chunk ~= nil then
583
584                         -- Strip trailing CR
585                         chunk = chunk:gsub("\r$","")
586
587                         return chunk, nil
588                 end
589         end )
590 end
591
592
593 -- Decode MIME encoded data.
594 function mimedecode_message_body( source, msg, filecb )
595
596         -- Find mime boundary
597         if msg and msg.env.CONTENT_TYPE then
598
599                 local bound = msg.env.CONTENT_TYPE:match("^multipart/form%-data; boundary=(.+)")
600
601                 if bound then
602                         msg.mime_boundary = bound
603                 else
604                         return nil, "No MIME boundary found or invalid content type given"
605                 end
606         end
607
608         -- Create an initial LTN12 sink
609         -- The whole MIME parsing process is implemented as fancy sink, sinks replace themself
610         -- depending on current processing state (init, header, data). Return the initial state.
611         local sink = ltn12.sink.simplify(
612                 function( chunk )
613                         return process_states['mime-init']( msg, chunk, filecb )
614                 end
615         )
616
617         -- Create a throttling LTN12 source
618         -- Frequent state switching in the mime parsing process leads to unwanted buffer aggregation.
619         -- This source checks wheather there's still data in our internal read buffer and returns an
620         -- empty string if there's already enough data in the processing queue. If the internal buffer
621         -- runs empty we're calling the original source to get the next chunk of data.
622         local tsrc = function()
623
624                 -- XXX: we schould propably keep the maximum buffer size in sync with
625                 --      the blocksize of our original source... but doesn't really matter
626                 if msg._mimebuffer ~= nil and #msg._mimebuffer > TSRC_BLOCKSIZE then
627                         return ""
628                 else
629                         return source()
630                 end
631         end
632
633         -- Pump input data...
634         while true do
635                 -- get data
636                 local ok, err = ltn12.pump.step( tsrc, sink )
637
638                 -- error
639                 if not ok and err then
640                         return nil, err
641
642                 -- eof
643                 elseif not ok then
644                         return true
645                 end
646         end
647 end
648
649
650 -- Decode urlencoded data.
651 function urldecode_message_body( source, msg )
652
653         -- Create an initial LTN12 sink
654         -- Return the initial state.
655         local sink = ltn12.sink.simplify(
656                 function( chunk )
657                         return process_states['urldecode-init']( msg, chunk )
658                 end
659         )
660
661         -- Create a throttling LTN12 source
662         -- See explaination in mimedecode_message_body().
663         local tsrc = function()
664                 if msg._urldecbuffer ~= nil and #msg._urldecbuffer > TSRC_BLOCKSIZE then
665                         return ""
666                 else
667                         return source()
668                 end
669         end
670
671         -- Pump input data...
672         while true do
673                 -- get data
674                 local ok, err = ltn12.pump.step( tsrc, sink )
675
676                 -- step
677                 if not ok and err then
678                         return nil, err
679
680                 -- eof
681                 elseif not ok then
682                         return true
683                 end
684         end
685 end
686
687
688 -- Parse a http message header
689 function parse_message_header( source )
690
691         local ok   = true
692         local msg  = { }
693
694         local sink = ltn12.sink.simplify(
695                 function( chunk )
696                         return process_states['magic']( msg, chunk )
697                 end
698         )
699
700         -- Pump input data...
701         while ok do
702
703                 -- get data
704                 ok, err = ltn12.pump.step( source, sink )
705
706                 -- error
707                 if not ok and err then
708                         return nil, err
709
710                 -- eof
711                 elseif not ok then
712
713                         -- Process get parameters
714                         if ( msg.request_method == "get" or msg.request_method == "post" ) and
715                            msg.request_uri:match("?")
716                         then
717                                 msg.params = urldecode_params( msg.request_uri )
718                         else
719                                 msg.params = { }
720                         end
721
722                         -- Populate common environment variables
723                         msg.env = {
724                                 CONTENT_LENGTH    = msg.headers['Content-Length'];
725                                 CONTENT_TYPE      = msg.headers['Content-Type'];
726                                 REQUEST_METHOD    = msg.request_method:upper();
727                                 REQUEST_URI       = msg.request_uri;
728                                 SCRIPT_NAME       = msg.request_uri:gsub("?.+$","");
729                                 SCRIPT_FILENAME   = "";         -- XXX implement me
730                                 SERVER_PROTOCOL   = "HTTP/" .. string.format("%.1f", msg.http_version)
731                         }
732
733                         -- Populate HTTP_* environment variables
734                         for i, hdr in ipairs( {
735                                 'Accept',
736                                 'Accept-Charset',
737                                 'Accept-Encoding',
738                                 'Accept-Language',
739                                 'Connection',
740                                 'Cookie',
741                                 'Host',
742                                 'Referer',
743                                 'User-Agent',
744                         } ) do
745                                 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
746                                 local val = msg.headers[hdr]
747
748                                 msg.env[var] = val
749                         end
750                 end
751         end
752
753         return msg
754 end
755
756
757 -- Parse a http message body
758 function parse_message_body( source, msg, filecb )
759         -- Is it multipart/mime ?
760         if msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
761            msg.env.CONTENT_TYPE:match("^multipart/form%-data")
762         then
763
764                 return mimedecode_message_body( source, msg, filecb )
765
766         -- Is it application/x-www-form-urlencoded ?
767         elseif msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
768                msg.env.CONTENT_TYPE == "application/x-www-form-urlencoded"
769         then
770                 return urldecode_message_body( source, msg, filecb )
771
772
773         -- Unhandled encoding
774         -- If a file callback is given then feed it chunk by chunk, else
775         -- store whole buffer in message.content
776         else
777
778                 local sink
779
780                 -- If we have a file callback then feed it
781                 if type(filecb) == "function" then
782                         sink = filecb
783
784                 -- ... else append to .content
785                 else
786                         msg.content = ""
787                         msg.content_length = 0
788
789                         sink = function( chunk )
790                                 if ( msg.content_length + #chunk ) <= HTTP_MAX_CONTENT then
791
792                                         msg.content        = msg.content        .. chunk
793                                         msg.content_length = msg.content_length + #chunk
794
795                                         return true
796                                 else
797                                         return nil, "POST data exceeds maximum allowed length"
798                                 end
799                         end
800                 end
801
802                 -- Pump data...
803                 while true do
804                         local ok, err = ltn12.pump.step( source, sink )
805
806                         if not ok and err then
807                                 return nil, err
808                         elseif not err then
809                                 return true
810                         end
811                 end
812         end
813 end
814
815 -- Status codes
816 statusmsg = {
817         [200] = "OK",
818         [301] = "Moved Permanently",
819         [304] = "Not Modified",
820         [400] = "Bad Request",
821         [403] = "Forbidden",
822         [404] = "Not Found",
823         [405] = "Method Not Allowed",
824         [411] = "Length Required",
825         [412] = "Precondition Failed",
826         [500] = "Internal Server Error",
827         [503] = "Server Unavailable",
828 }