d557f78e5e2bb1546ab0194a40e8be54d6c1183a
[project/luci.git] / libs / web / luasrc / http / protocol.lua
1 --[[                                                                            
2                                                                                 
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>           
5                                                                                 
6 Licensed under the Apache License, Version 2.0 (the "License");                 
7 you may not use this file except in compliance with the License.                
8 You may obtain a copy of the License at                                         
9                                                                                 
10         http://www.apache.org/licenses/LICENSE-2.0                              
11                                                                                 
12 $Id$                             
13                                                                                 
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 require("luci.util")
19
20
21 HTTP_MAX_CONTENT     = 1048576          -- 1 MB
22 HTTP_DEFAULT_CTYPE   = "text/html"      -- default content type
23 HTTP_DEFAULT_VERSION = "1.0"            -- HTTP default version
24 HTTP_DEFAULT_LINEBUF = 1024 * 4         -- Read buffer size
25
26 -- Decode an urlencoded string.
27 -- Returns the decoded value.
28 function urldecode( str )
29
30         local function __chrdec( hex )
31                 return string.char( tonumber( hex, 16 ) )
32         end
33
34         if type(str) == "string" then
35                 str = str:gsub( "+", " " ):gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
36         end
37
38         return str
39 end
40
41
42 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
43 -- Returns a table value with urldecoded values.
44 function urldecode_params( url )
45
46         local params = { }
47
48         if url:find("?") then
49                 url = url:gsub( "^.+%?([^?]+)", "%1" )
50         end
51
52         for i, pair in ipairs(luci.util.split( url, "[&;]+", nil, true )) do
53
54                 -- find key and value
55                 local key = urldecode( pair:match("^([^=]+)")     )
56                 local val = urldecode( pair:match("^[^=]+=(.+)$") )
57
58                 -- store
59                 if type(key) == "string" and key:len() > 0 then
60                         if type(val) ~= "string" then val = "" end
61
62                         if not params[key] then
63                                 params[key] = val
64                         elseif type(params[key]) ~= "table" then
65                                 params[key] = { params[key], val }
66                         else
67                                 table.insert( params[key], val )
68                         end
69                 end
70         end
71
72         return params
73 end
74
75
76 -- Encode given string in urlencoded format.
77 -- Returns the encoded string.
78 function urlencode( str )
79
80         local function __chrenc( chr )
81                 return string.format(
82                         "%%%02x", string.byte( chr )
83                 )
84         end
85
86         if type(str) == "string" then
87                 str = str:gsub(
88                         "([^a-zA-Z0-9$_%-%.+!*'(),])",
89                         __chrenc
90                 )
91         end
92
93         return str
94 end
95
96
97 -- Encode given table to urlencoded string.
98 -- Returns the encoded string.
99 function urlencode_params( tbl )
100         local enc = ""
101
102         for k, v in pairs(tbl) do
103                 enc = enc .. ( enc and "&" or "" ) .. 
104                         urlencode(k) .. "="  ..
105                         urlencode(v)
106         end
107
108         return enc
109 end
110
111
112 -- Decode MIME encoded data.
113 -- Returns a table with decoded values.
114 function mimedecode( data, boundary, filecb )
115
116         local params = { }
117
118         -- create a line reader
119         local reader = _linereader( data, HTTP_DEFAULT_LINEBUF )
120
121         -- state variables
122         local in_part = false
123         local in_file = false
124         local in_fbeg = false
125         local in_size = true
126
127         local filename
128         local buffer
129         local field
130         local clen = 0
131
132         -- try to read all mime parts
133         for line, eol in reader do
134
135                 -- update content length
136                 clen = clen + line:len()
137
138                 if clen >= HTTP_MAX_CONTENT then
139                         in_size = false
140                 end
141
142                 -- when no boundary is given, try to find it
143                 if not boundary then
144                         boundary = line:match("^%-%-([^\r\n]+)\r?\n$")
145                 end
146
147                 -- Got a valid boundary line or reached max allowed size.
148                 if ( boundary and line:sub(1,2) == "--" and line:len() > #boundary + 2 and
149                      line:sub( 3, 2 + #boundary ) == boundary ) or not in_size
150                 then
151                         -- Flush the data of the previous mime part.
152                         -- When field and/or buffer are set to nil we should discard
153                         -- the previous section entirely due to format violations.
154                         if type(field)  == "string" and field:len() > 0 and
155                            type(buffer) == "string"
156                         then
157                                 -- According to the rfc the \r\n preceeding a boundary
158                                 -- is assumed to be part of the boundary itself.
159                                 -- Since we are reading line by line here, this crlf
160                                 -- is part of the last line of our section content,
161                                 -- so strip it before storing the buffer.
162                                 buffer = buffer:gsub("\r?\n$","")
163
164                                 -- If we're in a file part and a file callback has been provided
165                                 -- then do a final call and send eof.
166                                 if in_file and type(filecb) == "function" then
167                                         filecb( field, filename, buffer, true )
168                                         params[field] = filename
169
170                                 -- Store buffer.
171                                 else
172                                         params[field] = buffer
173                                 end
174                         end
175
176                         -- Reset vars
177                         buffer   = ""
178                         filename = nil
179                         field    = nil
180                         in_file  = false
181
182                         -- Abort here if we reached maximum allowed size
183                         if not in_size then break end
184
185                         -- Do we got the last boundary?
186                         if line:len() > #boundary + 4 and
187                            line:sub( #boundary + 2, #boundary + 4 ) == "--"
188                         then
189                                 -- No more processing
190                                 in_part = false
191
192                         -- It's a middle boundary
193                         else
194
195                                 -- Read headers
196                                 local hlen, headers = extract_headers( reader )
197
198                                 -- Check for valid headers
199                                 if headers['Content-Disposition'] then
200
201                                         -- Got no content type header, assume content-type "text/plain"
202                                         if not headers['Content-Type'] then
203                                                 headers['Content-Type'] = 'text/plain'
204                                         end
205
206                                         -- Find field name
207                                         local hdrvals = luci.util.split(
208                                                 headers['Content-Disposition'], '; '
209                                         )
210
211                                         -- Valid form data part?
212                                         if hdrvals[1] == "form-data" and hdrvals[2]:match("^name=") then
213
214                                                 -- Store field identifier
215                                                 field = hdrvals[2]:match('^name="(.+)"$')
216
217                                                 -- Do we got a file upload field?
218                                                 if #hdrvals == 3 and hdrvals[3]:match("^filename=") then
219                                                         in_file  = true
220                                                         if_fbeg  = true
221                                                         filename = hdrvals[3]:match('^filename="(.+)"$')
222                                                 end
223
224                                                 -- Entering next part processing
225                                                 in_part = true
226                                         end
227                                 end
228                         end
229
230                 -- Processing content
231                 elseif in_part then
232
233                         -- XXX: Would be really good to switch from line based to
234                         --      buffered reading here.
235
236
237                         -- If we're in a file part and a file callback has been provided
238                         -- then call the callback and reset the buffer.
239                         if in_file and type(filecb) == "function" then
240
241                                 -- If we're not processing the first chunk, then call 
242                                 if not in_fbeg then
243                                         filecb( field, filename, buffer, false )
244                                         buffer = ""
245                                 
246                                 -- Clear in_fbeg flag after first run
247                                 else
248                                         in_fbeg = false
249                                 end
250                         end
251
252                         -- Append date to buffer
253                         buffer = buffer .. line
254                 end
255         end
256
257         return params
258 end
259
260
261 -- Extract "magic", the first line of a http message.
262 -- Returns the message type ("get", "post" or "response"), the requested uri
263 -- if it is a valid http request or the status code if the line descripes a 
264 -- http response. For requests the third parameter is nil, for responses it
265 -- contains the human readable status description.
266 function extract_magic( reader )
267
268         for line in reader do
269                 -- Is it a request?
270                 local method, uri = line:match("^([A-Z]+) ([^ ]+) HTTP/[01]%.[019]\r?\n$")
271
272                 -- Yup, it is
273                 if method then
274                         return method:lower(), uri, nil
275
276                 -- Is it a response?
277                 else
278                         local code, message = line:match("^HTTP/[01]%.[019] ([0-9]+) ([^\r\n]+)\r?\n$")
279
280                         -- Is a response
281                         if code then
282                                 return "response", code + 0, message
283
284                         -- Can't handle it
285                         else
286                                 return nil
287                         end
288                 end
289         end
290 end
291
292
293 -- Extract headers from given string.
294 -- Returns a table of extracted headers and the remainder of the parsed data.
295 function extract_headers( reader, tbl )
296
297         local headers = tbl or { }
298         local count   = 0
299
300         -- Iterate line by line
301         for line in reader do
302
303                 -- Look for a valid header format
304                 local hdr, val = line:match( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r?\n$" )
305
306                 if type(hdr) == "string" and hdr:len() > 0 and
307                    type(val) == "string" and val:len() > 0
308                 then
309                         count = count + line:len()
310                         headers[hdr] = val
311
312                 elseif line:match("^\r?\n$") then
313                         
314                         return count + line:len(), headers
315
316                 else
317                         -- junk data, don't add length
318                         return count, headers
319                 end
320         end
321
322         return count, headers
323 end
324
325
326 -- Parse a http message
327 function parse_message( data, filecb )
328
329         local reader  = _linereader( data, HTTP_DEFAULT_LINEBUF )
330         local message = parse_message_header( reader )
331
332         if message then
333                 parse_message_body( reader, message, filecb )
334         end
335
336         return message
337 end
338
339
340 -- Parse a http message header
341 function parse_message_header( data )
342
343         -- Create a line reader
344         local reader  = _linereader( data, HTTP_DEFAULT_LINEBUF )
345         local message = { }
346
347         -- Try to extract magic
348         local method, arg1, arg2 = extract_magic( reader )
349
350         -- Does it looks like a valid message?
351         if method then
352
353                 message.request_method = method
354                 message.status_code    = arg2 and arg1 or 200
355                 message.status_message = arg2 or nil
356                 message.request_uri    = arg2 and nil or arg1
357
358                 if method == "response" then
359                         message.type = "response"
360                 else
361                         message.type = "request"
362                 end
363
364                 -- Parse headers?
365                 local hlen, hdrs = extract_headers( reader )
366
367                 -- Valid headers?
368                 if hlen > 2 and type(hdrs) == "table" then
369
370                         message.headers = hdrs
371
372                         -- Process get parameters
373                         if ( method == "get" or method == "post" ) and
374                            message.request_uri:match("?")
375                         then
376                                 message.params = urldecode_params( message.request_uri )
377                         else
378                                 message.params = { }
379                         end
380
381                         -- Populate common environment variables
382                         message.env = {
383                                 CONTENT_LENGTH    = hdrs['Content-Length'];
384                                 CONTENT_TYPE      = hdrs['Content-Type'];
385                                 REQUEST_METHOD    = message.request_method;
386                                 REQUEST_URI       = message.request_uri;
387                                 SCRIPT_NAME       = message.request_uri:gsub("?.+$","");
388                                 SCRIPT_FILENAME   = ""          -- XXX implement me
389                         }
390
391                         -- Populate HTTP_* environment variables
392                         for i, hdr in ipairs( {
393                                 'Accept',
394                                 'Accept-Charset',
395                                 'Accept-Encoding',
396                                 'Accept-Language',
397                                 'Connection',
398                                 'Cookie',
399                                 'Host',
400                                 'Referer',
401                                 'User-Agent',
402                         } ) do
403                                 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
404                                 local val = hdrs[hdr]
405
406                                 message.env[var] = val
407                         end
408
409
410                         return message
411                 end
412         end
413 end
414
415
416 -- Parse a http message body
417 function parse_message_body( reader, message, filecb )
418
419         if type(message) == "table" then
420                 local env = message.env
421
422                 local clen = ( env.CONTENT_LENGTH or HTTP_MAX_CONTENT ) + 0
423                 
424                 -- Process post method
425                 if env.REQUEST_METHOD:lower() == "post" and env.CONTENT_TYPE then
426                         -- Is it multipart/form-data ?
427                         if env.CONTENT_TYPE:match("^multipart/form%-data") then
428                                 for k, v in pairs( mimedecode(
429                                         reader,
430                                         env.CONTENT_TYPE:match("boundary=(.+)"),
431                                         filecb
432                                 ) ) do
433                                         message.params[k] = v
434                                 end
435
436                         -- Is it x-www-form-urlencoded?
437                         elseif env.CONTENT_TYPE:match('^application/x%-www%-form%-urlencoded') then
438                                 -- XXX: readline isn't the best solution here
439                                 for chunk in reader do
440                                         for k, v in pairs( urldecode_params( chunk ) ) do
441                                                 message.params[k] = v
442                                         end
443
444                                         -- XXX: unreliable (undefined line length)
445                                         if clen + chunk:len() >= HTTP_MAX_CONTENT then
446                                                 break
447                                         end
448
449                                         clen = clen + chunk:len()
450                                 end
451
452                         -- Unhandled encoding
453                         -- If a file callback is given then feed it line by line, else
454                         -- store whole buffer in message.content
455                         else
456                                 for chunk in reader do
457
458                                         -- We have a callback, feed it.
459                                         if type(filecb) == "function" then
460
461                                                 filecb( "_post", nil, chunk, false )
462
463                                         -- Append to .content buffer.
464                                         else
465                                                 message.content = 
466                                                         type(message.content) == "string"
467                                                                 and message.content .. chunk
468                                                                 or chunk
469                                         end
470
471                                         -- XXX: unreliable
472                                         if clen + chunk:len() >= HTTP_MAX_CONTENT then
473                                                 break
474                                         end
475
476                                         clen = clen + chunk:len()
477                                 end
478
479                                 -- Send eof to callback
480                                 if type(filecb) == "function" then
481                                         filecb( "_post", nil, "", true )
482                                 end
483                         end
484                 end
485         end
486 end
487
488
489 function _linereader( obj, bufsz )
490
491         bufsz = ( bufsz and bufsz >= 256 ) and bufsz or 256
492
493         local __read = function()  return nil end
494         local __eof  = function(x) return type(x) ~= "string" or #x == 0 end
495
496         local _pos = 1
497         local _buf = ""
498         local _eof = nil
499
500         -- object is string
501         if type(obj) == "string" then
502
503                 __read = function() return obj:sub( _pos, _pos + bufsz - #_buf - 1 ) end
504
505         -- object implements a receive() or read() function
506         elseif type(obj) == "userdata" and ( type(obj.receive) == "function" or type(obj.read) == "function" ) then
507
508                 if type(obj.read) == "function" then
509                         __read = function() return obj:read( bufsz ) end
510                 else
511                         __read = function() return obj:receive( bufsz ) end
512                 end
513
514         -- object is a function
515         elseif type(obj) == "function" then
516
517                 return obj
518
519         -- no usable data type
520         else
521
522                 -- dummy iterator
523                 return __read
524         end
525
526
527         -- generic block to line algorithm
528         return function()
529                 if not _eof then
530                         local buffer = __read()
531
532                         if __eof( buffer ) then
533                                 buffer = ""
534                         end
535
536                         _pos   = _pos + #buffer
537                         buffer = _buf .. buffer
538
539                         local crlf, endpos = buffer:find("\r?\n")
540
541
542                         if crlf then
543                                 _buf = buffer:sub( endpos + 1, #buffer )
544                                 return buffer:sub( 1, endpos ), true
545                         else
546                                 -- check for eof
547                                 _eof = __eof( buffer )
548
549                                 -- clear overflow buffer
550                                 _buf = ""
551
552                                 return buffer, false
553                         end
554                 else
555                         return nil
556                 end
557         end
558 end