* luci/libs/http: replaced mime decoder in http.protocol
[project/luci.git] / libs / http / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10         http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 local ltn12 = require("luci.ltn12")
19
20 HTTP_MAX_CONTENT      = 1024*4          -- 4 kB maximum content size
21 HTTP_URLENC_MAXKEYLEN = 1024            -- maximum allowd size of urlencoded parameter names
22 TSRC_BLOCKSIZE        = 2048            -- target block size for throttling sources
23
24
25 -- Decode an urlencoded string.
26 -- Returns the decoded value.
27 function urldecode( str, no_plus )
28
29         local function __chrdec( hex )
30                 return string.char( tonumber( hex, 16 ) )
31         end
32
33         if type(str) == "string" then
34                 if not no_plus then
35                         str = str:gsub( "+", " " )
36                 end
37
38                 str = str:gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
39         end
40
41         return str
42 end
43
44
45 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
46 -- Returns a table value with urldecoded values.
47 function urldecode_params( url, tbl )
48
49         local params = tbl or { }
50
51         if url:find("?") then
52                 url = url:gsub( "^.+%?([^?]+)", "%1" )
53         end
54
55         for pair in url:gmatch( "[^&;]+" ) do
56
57                 -- find key and value
58                 local key = urldecode( pair:match("^([^=]+)")     )
59                 local val = urldecode( pair:match("^[^=]+=(.+)$") )
60
61                 -- store
62                 if type(key) == "string" and key:len() > 0 then
63                         if type(val) ~= "string" then val = "" end
64
65                         if not params[key] then
66                                 params[key] = val
67                         elseif type(params[key]) ~= "table" then
68                                 params[key] = { params[key], val }
69                         else
70                                 table.insert( params[key], val )
71                         end
72                 end
73         end
74
75         return params
76 end
77
78
79 -- Encode given string in urlencoded format.
80 -- Returns the encoded string.
81 function urlencode( str )
82
83         local function __chrenc( chr )
84                 return string.format(
85                         "%%%02x", string.byte( chr )
86                 )
87         end
88
89         if type(str) == "string" then
90                 str = str:gsub(
91                         "([^a-zA-Z0-9$_%-%.%+!*'(),])",
92                         __chrenc
93                 )
94         end
95
96         return str
97 end
98
99
100 -- Encode given table to urlencoded string.
101 -- Returns the encoded string.
102 function urlencode_params( tbl )
103         local enc = ""
104
105         for k, v in pairs(tbl) do
106                 enc = enc .. ( enc and "&" or "" ) ..
107                         urlencode(k) .. "="  ..
108                         urlencode(v)
109         end
110
111         return enc
112 end
113
114
115 -- Parameter helper
116 local function __initval( tbl, key )
117         if tbl[key] == nil then
118                 tbl[key] = ""
119         elseif type(tbl[key]) == "string" then
120                 tbl[key] = { tbl[key], "" }
121         else
122                 table.insert( tbl[key], "" )
123         end
124 end
125
126 local function __appendval( tbl, key, chunk )
127         if type(tbl[key]) == "table" then
128                 tbl[key][#tbl[key]] = tbl[key][#tbl[key]] .. chunk
129         else
130                 tbl[key] = tbl[key] .. chunk
131         end
132 end
133
134 local function __finishval( tbl, key, handler )
135         if handler then
136                 if type(tbl[key]) == "table" then
137                         tbl[key][#tbl[key]] = handler( tbl[key][#tbl[key]] )
138                 else
139                         tbl[key] = handler( tbl[key] )
140                 end
141         end
142 end
143
144
145 -- Table of our process states
146 local process_states = { }
147
148 -- Extract "magic", the first line of a http message.
149 -- Extracts the message type ("get", "post" or "response"), the requested uri
150 -- or the status code if the line descripes a http response.
151 process_states['magic'] = function( msg, chunk, err )
152
153         if chunk ~= nil then
154                 -- ignore empty lines before request
155                 if #chunk == 0 then
156                         return true, nil
157                 end
158
159                 -- Is it a request?
160                 local method, uri, http_ver = chunk:match("^([A-Z]+) ([^ ]+) HTTP/([01]%.[019])$")
161
162                 -- Yup, it is
163                 if method then
164
165                         msg.type           = "request"
166                         msg.request_method = method:lower()
167                         msg.request_uri    = uri
168                         msg.http_version   = tonumber( http_ver )
169                         msg.headers        = { }
170
171                         -- We're done, next state is header parsing
172                         return true, function( chunk )
173                                 return process_states['headers']( msg, chunk )
174                         end
175
176                 -- Is it a response?
177                 else
178
179                         local http_ver, code, message = chunk:match("^HTTP/([01]%.[019]) ([0-9]+) ([^\r\n]+)$")
180
181                         -- Is a response
182                         if code then
183
184                                 msg.type           = "response"
185                                 msg.status_code    = code
186                                 msg.status_message = message
187                                 msg.http_version   = tonumber( http_ver )
188                                 msg.headers        = { }
189
190                                 -- We're done, next state is header parsing
191                                 return true, function( chunk )
192                                         return process_states['headers']( msg, chunk )
193                                 end
194                         end
195                 end
196         end
197
198         -- Can't handle it
199         return nil, "Invalid HTTP message magic"
200 end
201
202
203 -- Extract headers from given string.
204 process_states['headers'] = function( msg, chunk )
205
206         if chunk ~= nil then
207
208                 -- Look for a valid header format
209                 local hdr, val = chunk:match( "^([A-Z][A-Za-z0-9%-_]+): +(.+)$" )
210
211                 if type(hdr) == "string" and hdr:len() > 0 and
212                    type(val) == "string" and val:len() > 0
213                 then
214                         msg.headers[hdr] = val
215
216                         -- Valid header line, proceed
217                         return true, nil
218
219                 elseif #chunk == 0 then
220                         -- Empty line, we won't accept data anymore
221                         return false, nil
222                 else
223                         -- Junk data
224                         return nil, "Invalid HTTP header received"
225                 end
226         else
227                 return nil, "Unexpected EOF"
228         end
229 end
230
231
232 -- Init urldecoding stream
233 process_states['urldecode-init'] = function( msg, chunk, filecb )
234
235         if chunk ~= nil then
236
237                 -- Check for Content-Length
238                 if msg.env.CONTENT_LENGTH then
239                         msg.content_length = tonumber(msg.env.CONTENT_LENGTH)
240
241                         if msg.content_length <= HTTP_MAX_CONTENT then
242                                 -- Initialize buffer
243                                 msg._urldecbuffer = chunk
244                                 msg._urldeclength = 0
245
246                                 -- Switch to urldecode-key state
247                                 return true, function(chunk)
248                                         return process_states['urldecode-key']( msg, chunk, filecb )
249                                 end
250                         else
251                                 return nil, "Request exceeds maximum allowed size"
252                         end
253                 else
254                         return nil, "Missing Content-Length header"
255                 end
256         else
257                 return nil, "Unexpected EOF"
258         end
259 end
260
261
262 -- Process urldecoding stream, read and validate parameter key
263 process_states['urldecode-key'] = function( msg, chunk, filecb )
264         if chunk ~= nil then
265
266                 -- Prevent oversized requests
267                 if msg._urldeclength >= msg.content_length then
268                         return nil, "Request exceeds maximum allowed size"
269                 end
270
271                 -- Combine look-behind buffer with current chunk
272                 local buffer = msg._urldecbuffer .. chunk
273                 local spos, epos = buffer:find("=")
274
275                 -- Found param
276                 if spos then
277
278                         -- Check that key doesn't exceed maximum allowed key length
279                         if ( spos - 1 ) <= HTTP_URLENC_MAXKEYLEN then
280                                 local key = urldecode( buffer:sub( 1, spos - 1 ) )
281
282                                 -- Prepare buffers
283                                 msg._urldeclength   = msg._urldeclength + epos
284                                 msg._urldecbuffer   = buffer:sub( epos + 1, #buffer )
285
286                                 -- Use file callback or store values inside msg.params
287                                 if filecb then
288                                         msg._urldeccallback = function( chunk, eof )
289                                                 filecb( field, chunk, eof )
290                                         end
291                                 else
292                                         __initval( msg.params, key )
293
294                                         msg._urldeccallback = function( chunk, eof )
295                                                 __appendval( msg.params, key, chunk )
296
297                                                 -- FIXME: Use a filter
298                                                 if eof then
299                                                         __finishval( msg.params, key, urldecode )
300                                                 end
301                                         end
302                                 end
303
304                                 -- Proceed with urldecode-value state
305                                 return true, function( chunk )
306                                         return process_states['urldecode-value']( msg, chunk, filecb )
307                                 end
308                         else
309                                 return nil, "POST parameter exceeds maximum allowed length"
310                         end
311                 else
312                         return nil, "POST data exceeds maximum allowed length"
313                 end
314         else
315                 return nil, "Unexpected EOF"
316         end
317 end
318
319
320 -- Process urldecoding stream, read parameter value
321 process_states['urldecode-value'] = function( msg, chunk, filecb )
322
323         if chunk ~= nil then
324
325                 -- Combine look-behind buffer with current chunk
326                 local buffer = msg._urldecbuffer .. chunk
327
328                 -- Check for EOF
329                 if #buffer == 0 then
330                         -- Compare processed length
331                         if msg._urldeclength == msg.content_length then
332                                 -- Cleanup
333                                 msg._urldeclength   = nil
334                                 msg._urldecbuffer   = nil
335                                 msg._urldeccallback = nil
336
337                                 -- We won't accept data anymore
338                                 return false
339                         else
340                                 return nil, "Content-Length mismatch"
341                         end
342                 end
343
344                 -- Check for end of value
345                 local spos, epos = buffer:find("[&;]")
346                 if spos then
347
348                         -- Flush buffer, send eof
349                         msg._urldeccallback( buffer:sub( 1, spos - 1 ), true )
350                         msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
351                         msg._urldeclength = msg._urldeclength + epos
352
353                         -- Back to urldecode-key state
354                         return true, function( chunk )
355                                 return process_states['urldecode-key']( msg, chunk, filecb )
356                         end
357                 else
358                         -- We're somewhere within a data section and our buffer is full
359                         if #buffer > #chunk then
360                                 -- Flush buffered data
361                                 msg._urldeccallback( buffer:sub( 1, #buffer - #chunk ), false )
362
363                                 -- Store new data
364                                 msg._urldeclength = msg._urldeclength + #buffer - #chunk
365                                 msg._urldecbuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
366
367                         -- Buffer is not full yet, append new data
368                         else
369                                 msg._urldecbuffer = buffer
370                         end
371
372                         -- Keep feeding me
373                         return true
374                 end
375         else
376                 -- Send EOF
377                 msg._urldeccallback( "", true )
378                 return false
379         end
380 end
381
382
383 -- Creates a header source from a given socket
384 function header_source( sock )
385         return ltn12.source.simplify( function()
386
387                 local chunk, err, part = sock:receive("*l")
388
389                 -- Line too long
390                 if chunk == nil then
391                         if err ~= "timeout" then
392                                 return nil, part
393                                         and "Line exceeds maximum allowed length"
394                                         or  "Unexpected EOF"
395                         else
396                                 return nil, err
397                         end
398
399                 -- Line ok
400                 elseif chunk ~= nil then
401
402                         -- Strip trailing CR
403                         chunk = chunk:gsub("\r$","")
404
405                         return chunk, nil
406                 end
407         end )
408 end
409
410
411 -- Decode MIME encoded data.
412 function mimedecode_message_body( src, msg, filecb )
413
414         if msg and msg.env.CONTENT_TYPE then
415                 msg.mime_boundary = msg.env.CONTENT_TYPE:match("^multipart/form%-data; boundary=(.+)$")
416         end
417
418         if not msg.mime_boundary then
419                 return nil, "Invalid Content-Type found"
420         end
421
422
423         local function parse_headers( chunk, field )
424
425                 local stat
426                 repeat
427                         chunk, stat = chunk:gsub(
428                                 "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r\n",
429                                 function(k,v)
430                                         field.headers[k] = v
431                                         return ""
432                                 end
433                         )
434                 until stat == 0
435
436                 chunk, stat = chunk:gsub("^\r\n","")
437
438                 -- End of headers
439                 if stat > 0 then
440                         if field.headers["Content-Disposition"] then
441                                 if field.headers["Content-Disposition"]:match("^form%-data; ") then
442                                         field.name = field.headers["Content-Disposition"]:match('name="(.-)"')
443                                         field.file = field.headers["Content-Disposition"]:match('filename="(.+)"$')
444                                 end
445                         end
446
447                         if not field.headers["Content-Type"] then
448                                 field.headers["Content-Type"] = "text/plain"
449                         end
450
451                         return chunk, true
452                 end
453
454                 return chunk, false
455         end
456
457
458         local field  = { headers = { } }
459         local inhdr  = false
460         local store  = nil
461         local lchunk = nil
462
463         local function snk( chunk )
464
465                 if chunk and not lchunk then
466                         lchunk = "\r\n" .. chunk
467
468                 elseif lchunk then
469                         local data = lchunk .. ( chunk or "" )
470                         local spos, epos, found
471
472                         repeat
473                                 spos, epos = data:find( "\r\n--" .. msg.mime_boundary .. "\r\n", 1, true )
474
475                                 if not spos then
476                                         spos, epos = data:find( "\r\n--" .. msg.mime_boundary .. "--\r\n", 1, true )
477                                 end
478
479
480                                 if spos then
481                                         local predata = data:sub( 1, spos - 1 )
482
483                                         if hdr then
484                                                 predata, eof = parse_headers( predata, field )
485
486                                                 if not eof then
487                                                         return nil, "Invalid MIME section header"
488                                                 end
489
490                                                 if not field.name then
491                                                         return nil, "Invalid Content-Disposition header"
492                                                 end
493                                         end
494
495                                         if store then
496                                                 store( field.headers, predata, true )
497                                         end
498
499
500                                         field = { headers = { } }
501                                         found = found or true
502
503                                         data, eof = parse_headers( data:sub( epos + 1, #data ), field )
504                                         inhdr = not eof
505
506                                         if eof then
507                                                 if field.file and filecb then
508                                                         msg.params[field.name] = field.file
509                                                         store = filecb
510                                                 else
511                                                         __initval( msg.params, field.name )
512
513                                                         store = function( hdr, buf, eof )
514                                                                 __appendval( msg.params, field.name, buf )
515                                                         end
516                                                 end
517                                         end
518                                 end
519                         until not spos
520
521
522                         if found then
523                                 if #data > 78 then
524                                         lchunk = data:sub( #data - 78 + 1, #data )
525                                         data   = data:sub( 1, #data - 78 )
526
527                                         store( field.headers, data )
528                                 else
529                                         lchunk, data = data, nil
530                                 end
531                         else
532                                 if inhdr then
533                                         lchunk, eof = parse_headers( data, field )
534                                         inhdr = not eof
535                                 else
536                                         store( field.headers, lchunk )
537                                         lchunk, chunk = chunk, nil
538                                 end
539                         end
540                 end
541
542                 return true
543         end
544
545         return luci.ltn12.pump.all( src, snk )
546 end
547
548
549 -- Decode urlencoded data.
550 function urldecode_message_body( source, msg )
551
552         -- Create an initial LTN12 sink
553         -- Return the initial state.
554         local sink = ltn12.sink.simplify(
555                 function( chunk )
556                         return process_states['urldecode-init']( msg, chunk )
557                 end
558         )
559
560         -- Create a throttling LTN12 source
561         -- See explaination in mimedecode_message_body().
562         local tsrc = function()
563                 if msg._urldecbuffer ~= nil and #msg._urldecbuffer > 0 then
564                         return ""
565                 else
566                         return source()
567                 end
568         end
569
570         -- Pump input data...
571         while true do
572                 -- get data
573                 local ok, err = ltn12.pump.step( tsrc, sink )
574
575                 -- step
576                 if not ok and err then
577                         return nil, err
578
579                 -- eof
580                 elseif not ok then
581                         return true
582                 end
583         end
584 end
585
586
587 -- Parse a http message header
588 function parse_message_header( source )
589
590         local ok   = true
591         local msg  = { }
592
593         local sink = ltn12.sink.simplify(
594                 function( chunk )
595                         return process_states['magic']( msg, chunk )
596                 end
597         )
598
599         -- Pump input data...
600         while ok do
601
602                 -- get data
603                 ok, err = ltn12.pump.step( source, sink )
604
605                 -- error
606                 if not ok and err then
607                         return nil, err
608
609                 -- eof
610                 elseif not ok then
611
612                         -- Process get parameters
613                         if ( msg.request_method == "get" or msg.request_method == "post" ) and
614                            msg.request_uri:match("?")
615                         then
616                                 msg.params = urldecode_params( msg.request_uri )
617                         else
618                                 msg.params = { }
619                         end
620
621                         -- Populate common environment variables
622                         msg.env = {
623                                 CONTENT_LENGTH    = msg.headers['Content-Length'];
624                                 CONTENT_TYPE      = msg.headers['Content-Type'];
625                                 REQUEST_METHOD    = msg.request_method:upper();
626                                 REQUEST_URI       = msg.request_uri;
627                                 SCRIPT_NAME       = msg.request_uri:gsub("?.+$","");
628                                 SCRIPT_FILENAME   = "";         -- XXX implement me
629                                 SERVER_PROTOCOL   = "HTTP/" .. string.format("%.1f", msg.http_version)
630                         }
631
632                         -- Populate HTTP_* environment variables
633                         for i, hdr in ipairs( {
634                                 'Accept',
635                                 'Accept-Charset',
636                                 'Accept-Encoding',
637                                 'Accept-Language',
638                                 'Connection',
639                                 'Cookie',
640                                 'Host',
641                                 'Referer',
642                                 'User-Agent',
643                         } ) do
644                                 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
645                                 local val = msg.headers[hdr]
646
647                                 msg.env[var] = val
648                         end
649                 end
650         end
651
652         return msg
653 end
654
655
656 -- Parse a http message body
657 function parse_message_body( source, msg, filecb )
658         -- Is it multipart/mime ?
659         if msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
660            msg.env.CONTENT_TYPE:match("^multipart/form%-data")
661         then
662
663                 return mimedecode_message_body( source, msg, filecb )
664
665         -- Is it application/x-www-form-urlencoded ?
666         elseif msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
667                msg.env.CONTENT_TYPE == "application/x-www-form-urlencoded"
668         then
669                 return urldecode_message_body( source, msg, filecb )
670
671
672         -- Unhandled encoding
673         -- If a file callback is given then feed it chunk by chunk, else
674         -- store whole buffer in message.content
675         else
676
677                 local sink
678
679                 -- If we have a file callback then feed it
680                 if type(filecb) == "function" then
681                         sink = filecb
682
683                 -- ... else append to .content
684                 else
685                         msg.content = ""
686                         msg.content_length = 0
687
688                         sink = function( chunk )
689                                 if ( msg.content_length + #chunk ) <= HTTP_MAX_CONTENT then
690
691                                         msg.content        = msg.content        .. chunk
692                                         msg.content_length = msg.content_length + #chunk
693
694                                         return true
695                                 else
696                                         return nil, "POST data exceeds maximum allowed length"
697                                 end
698                         end
699                 end
700
701                 -- Pump data...
702                 while true do
703                         local ok, err = ltn12.pump.step( source, sink )
704
705                         if not ok and err then
706                                 return nil, err
707                         elseif not err then
708                                 return true
709                         end
710                 end
711         end
712 end
713
714 -- Status codes
715 statusmsg = {
716         [200] = "OK",
717         [301] = "Moved Permanently",
718         [304] = "Not Modified",
719         [400] = "Bad Request",
720         [403] = "Forbidden",
721         [404] = "Not Found",
722         [405] = "Method Not Allowed",
723         [411] = "Length Required",
724         [412] = "Precondition Failed",
725         [500] = "Internal Server Error",
726         [503] = "Server Unavailable",
727 }