* Use CRLF instead of LF in HTTP headers
[project/luci.git] / libs / web / luasrc / http / protocol.lua
1 --[[                                                                            
2                                                                                 
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>           
5                                                                                 
6 Licensed under the Apache License, Version 2.0 (the "License");                 
7 you may not use this file except in compliance with the License.                
8 You may obtain a copy of the License at                                         
9                                                                                 
10         http://www.apache.org/licenses/LICENSE-2.0                              
11                                                                                 
12 $Id$                             
13                                                                                 
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 require("luci.util")
19
20
21 HTTP_MAX_CONTENT     = 1024^2           -- 1 MB maximum content size
22 HTTP_MAX_READBUF     = 1024             -- 1 kB read buffer size
23
24 HTTP_DEFAULT_CTYPE   = "text/html"      -- default content type
25 HTTP_DEFAULT_VERSION = "1.0"            -- HTTP default version
26
27
28 -- Decode an urlencoded string.
29 -- Returns the decoded value.
30 function urldecode( str )
31
32         local function __chrdec( hex )
33                 return string.char( tonumber( hex, 16 ) )
34         end
35
36         if type(str) == "string" then
37                 str = str:gsub( "+", " " ):gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
38         end
39
40         return str
41 end
42
43
44 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
45 -- Returns a table value with urldecoded values.
46 function urldecode_params( url )
47
48         local params = { }
49
50         if url:find("?") then
51                 url = url:gsub( "^.+%?([^?]+)", "%1" )
52         end
53
54         for i, pair in ipairs(luci.util.split( url, "[&;]+", nil, true )) do
55
56                 -- find key and value
57                 local key = urldecode( pair:match("^([^=]+)")     )
58                 local val = urldecode( pair:match("^[^=]+=(.+)$") )
59
60                 -- store
61                 if type(key) == "string" and key:len() > 0 then
62                         if type(val) ~= "string" then val = "" end
63
64                         if not params[key] then
65                                 params[key] = val
66                         elseif type(params[key]) ~= "table" then
67                                 params[key] = { params[key], val }
68                         else
69                                 table.insert( params[key], val )
70                         end
71                 end
72         end
73
74         return params
75 end
76
77
78 -- Encode given string in urlencoded format.
79 -- Returns the encoded string.
80 function urlencode( str )
81
82         local function __chrenc( chr )
83                 return string.format(
84                         "%%%02x", string.byte( chr )
85                 )
86         end
87
88         if type(str) == "string" then
89                 str = str:gsub(
90                         "([^a-zA-Z0-9$_%-%.+!*'(),])",
91                         __chrenc
92                 )
93         end
94
95         return str
96 end
97
98
99 -- Encode given table to urlencoded string.
100 -- Returns the encoded string.
101 function urlencode_params( tbl )
102         local enc = ""
103
104         for k, v in pairs(tbl) do
105                 enc = enc .. ( enc and "&" or "" ) .. 
106                         urlencode(k) .. "="  ..
107                         urlencode(v)
108         end
109
110         return enc
111 end
112
113
114 -- Decode MIME encoded data.
115 -- Returns a table with decoded values.
116 function mimedecode( data, boundary, filecb )
117
118         local params = { }
119
120         -- create a line reader
121         local reader = _linereader( data, HTTP_MAX_READBUF )
122
123         -- state variables
124         local in_part = false
125         local in_file = false
126         local in_fbeg = false
127         local in_size = true
128
129         local filename
130         local buffer
131         local field
132         local clen = 0
133
134         -- try to read all mime parts
135         for line, eol in reader do
136
137                 -- update content length
138                 clen = clen + line:len()
139
140                 if clen >= HTTP_MAX_CONTENT then
141                         in_size = false
142                 end
143
144                 -- when no boundary is given, try to find it
145                 if not boundary then
146                         boundary = line:match("^%-%-([^\r\n]+)\r?\n$")
147                 end
148
149                 -- Got a valid boundary line or reached max allowed size.
150                 if ( boundary and line:sub(1,2) == "--" and line:len() > #boundary + 2 and
151                      line:sub( 3, 2 + #boundary ) == boundary ) or not in_size
152                 then
153                         -- Flush the data of the previous mime part.
154                         -- When field and/or buffer are set to nil we should discard
155                         -- the previous section entirely due to format violations.
156                         if type(field)  == "string" and field:len() > 0 and
157                            type(buffer) == "string"
158                         then
159                                 -- According to the rfc the \r\n preceeding a boundary
160                                 -- is assumed to be part of the boundary itself.
161                                 -- Since we are reading line by line here, this crlf
162                                 -- is part of the last line of our section content,
163                                 -- so strip it before storing the buffer.
164                                 buffer = buffer:gsub("\r?\n$","")
165
166                                 -- If we're in a file part and a file callback has been provided
167                                 -- then do a final call and send eof.
168                                 if in_file and type(filecb) == "function" then
169                                         filecb( field, filename, buffer, true )
170                                         params[field] = filename
171
172                                 -- Store buffer.
173                                 else
174                                         params[field] = buffer
175                                 end
176                         end
177
178                         -- Reset vars
179                         buffer   = ""
180                         filename = nil
181                         field    = nil
182                         in_file  = false
183
184                         -- Abort here if we reached maximum allowed size
185                         if not in_size then break end
186
187                         -- Do we got the last boundary?
188                         if line:len() > #boundary + 4 and
189                            line:sub( #boundary + 2, #boundary + 4 ) == "--"
190                         then
191                                 -- No more processing
192                                 in_part = false
193
194                         -- It's a middle boundary
195                         else
196
197                                 -- Read headers
198                                 local hlen, headers = extract_headers( reader )
199
200                                 -- Check for valid headers
201                                 if headers['Content-Disposition'] then
202
203                                         -- Got no content type header, assume content-type "text/plain"
204                                         if not headers['Content-Type'] then
205                                                 headers['Content-Type'] = 'text/plain'
206                                         end
207
208                                         -- Find field name
209                                         local hdrvals = luci.util.split(
210                                                 headers['Content-Disposition'], '; '
211                                         )
212
213                                         -- Valid form data part?
214                                         if hdrvals[1] == "form-data" and hdrvals[2]:match("^name=") then
215
216                                                 -- Store field identifier
217                                                 field = hdrvals[2]:match('^name="(.+)"$')
218
219                                                 -- Do we got a file upload field?
220                                                 if #hdrvals == 3 and hdrvals[3]:match("^filename=") then
221                                                         in_file  = true
222                                                         if_fbeg  = true
223                                                         filename = hdrvals[3]:match('^filename="(.+)"$')
224                                                 end
225
226                                                 -- Entering next part processing
227                                                 in_part = true
228                                         end
229                                 end
230                         end
231
232                 -- Processing content
233                 elseif in_part then
234
235                         -- XXX: Would be really good to switch from line based to
236                         --      buffered reading here.
237
238
239                         -- If we're in a file part and a file callback has been provided
240                         -- then call the callback and reset the buffer.
241                         if in_file and type(filecb) == "function" then
242
243                                 -- If we're not processing the first chunk, then call 
244                                 if not in_fbeg then
245                                         filecb( field, filename, buffer, false )
246                                         buffer = ""
247                                 
248                                 -- Clear in_fbeg flag after first run
249                                 else
250                                         in_fbeg = false
251                                 end
252                         end
253
254                         -- Append date to buffer
255                         buffer = buffer .. line
256                 end
257         end
258
259         return params
260 end
261
262
263 -- Extract "magic", the first line of a http message.
264 -- Returns the message type ("get", "post" or "response"), the requested uri
265 -- if it is a valid http request or the status code if the line descripes a 
266 -- http response. For requests the third parameter is nil, for responses it
267 -- contains the human readable status description.
268 function extract_magic( reader )
269
270         for line in reader do
271                 -- Is it a request?
272                 local method, uri = line:match("^([A-Z]+) ([^ ]+) HTTP/[01]%.[019]\r?\n$")
273
274                 -- Yup, it is
275                 if method then
276                         return method:lower(), uri, nil
277
278                 -- Is it a response?
279                 else
280                         local code, message = line:match("^HTTP/[01]%.[019] ([0-9]+) ([^\r\n]+)\r?\n$")
281
282                         -- Is a response
283                         if code then
284                                 return "response", code + 0, message
285
286                         -- Can't handle it
287                         else
288                                 return nil
289                         end
290                 end
291         end
292 end
293
294
295 -- Extract headers from given string.
296 -- Returns a table of extracted headers and the remainder of the parsed data.
297 function extract_headers( reader, tbl )
298
299         local headers = tbl or { }
300         local count   = 0
301
302         -- Iterate line by line
303         for line in reader do
304
305                 -- Look for a valid header format
306                 local hdr, val = line:match( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r?\n$" )
307
308                 if type(hdr) == "string" and hdr:len() > 0 and
309                    type(val) == "string" and val:len() > 0
310                 then
311                         count = count + line:len()
312                         headers[hdr] = val
313
314                 elseif line:match("^\r?\n$") then
315                         
316                         return count + line:len(), headers
317
318                 else
319                         -- junk data, don't add length
320                         return count, headers
321                 end
322         end
323
324         return count, headers
325 end
326
327
328 -- Parse a http message
329 function parse_message( data, filecb )
330
331         local reader  = _linereader( data, HTTP_MAX_READBUF )
332         local message = parse_message_header( reader )
333
334         if message then
335                 parse_message_body( reader, message, filecb )
336         end
337
338         return message
339 end
340
341
342 -- Parse a http message header
343 function parse_message_header( data )
344
345         -- Create a line reader
346         local reader  = _linereader( data, HTTP_MAX_READBUF )
347         local message = { }
348
349         -- Try to extract magic
350         local method, arg1, arg2 = extract_magic( reader )
351
352         -- Does it looks like a valid message?
353         if method then
354
355                 message.request_method = method
356                 message.status_code    = arg2 and arg1 or 200
357                 message.status_message = arg2 or nil
358                 message.request_uri    = arg2 and nil or arg1
359
360                 if method == "response" then
361                         message.type = "response"
362                 else
363                         message.type = "request"
364                 end
365
366                 -- Parse headers?
367                 local hlen, hdrs = extract_headers( reader )
368
369                 -- Valid headers?
370                 if hlen > 2 and type(hdrs) == "table" then
371
372                         message.headers = hdrs
373
374                         -- Process get parameters
375                         if ( method == "get" or method == "post" ) and
376                            message.request_uri:match("?")
377                         then
378                                 message.params = urldecode_params( message.request_uri )
379                         else
380                                 message.params = { }
381                         end
382
383                         -- Populate common environment variables
384                         message.env = {
385                                 CONTENT_LENGTH    = hdrs['Content-Length'];
386                                 CONTENT_TYPE      = hdrs['Content-Type'];
387                                 REQUEST_METHOD    = message.request_method;
388                                 REQUEST_URI       = message.request_uri;
389                                 SCRIPT_NAME       = message.request_uri:gsub("?.+$","");
390                                 SCRIPT_FILENAME   = ""          -- XXX implement me
391                         }
392
393                         -- Populate HTTP_* environment variables
394                         for i, hdr in ipairs( {
395                                 'Accept',
396                                 'Accept-Charset',
397                                 'Accept-Encoding',
398                                 'Accept-Language',
399                                 'Connection',
400                                 'Cookie',
401                                 'Host',
402                                 'Referer',
403                                 'User-Agent',
404                         } ) do
405                                 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
406                                 local val = hdrs[hdr]
407
408                                 message.env[var] = val
409                         end
410
411
412                         return message
413                 end
414         end
415 end
416
417
418 -- Parse a http message body
419 function parse_message_body( reader, message, filecb )
420
421         if type(message) == "table" then
422                 local env = message.env
423
424                 local clen = ( env.CONTENT_LENGTH or HTTP_MAX_CONTENT ) + 0
425                 
426                 -- Process post method
427                 if env.REQUEST_METHOD:lower() == "post" and env.CONTENT_TYPE then
428
429                         -- Is it multipart/form-data ?
430                         if env.CONTENT_TYPE:match("^multipart/form%-data") then
431                                 
432                                 -- Read multipart/mime data
433                                 for k, v in pairs( mimedecode(
434                                         reader,
435                                         env.CONTENT_TYPE:match("boundary=(.+)"),
436                                         filecb
437                                 ) ) do
438                                         message.params[k] = v
439                                 end
440
441                         -- Is it x-www-form-urlencoded?
442                         elseif env.CONTENT_TYPE:match('^application/x%-www%-form%-urlencoded') then
443
444                                 -- Read post data
445                                 local post_data = ""
446
447                                 for chunk, eol in reader do
448
449                                         post_data = post_data .. chunk
450
451                                         -- Abort on eol or if maximum allowed size or content length is reached
452                                         if eol or #post_data >= HTTP_MAX_CONTENT or #post_data > clen then
453                                                 break
454                                         end
455                                 end
456
457                                 -- Parse params
458                                 for k, v in pairs( urldecode_params( post_data ) ) do
459                                         message.params[k] = v
460                                 end
461
462                         -- Unhandled encoding
463                         -- If a file callback is given then feed it line by line, else
464                         -- store whole buffer in message.content
465                         else
466
467                                 local len = 0
468
469                                 for chunk in reader do
470
471                                         len = len + #chunk
472
473                                         -- We have a callback, feed it.
474                                         if type(filecb) == "function" then
475
476                                                 filecb( "_post", nil, chunk, false )
477
478                                         -- Append to .content buffer.
479                                         else
480                                                 message.content = 
481                                                         type(message.content) == "string"
482                                                                 and message.content .. chunk
483                                                                 or chunk
484                                         end
485
486                                         -- Abort if maximum allowed size or content length is reached
487                                         if len >= HTTP_MAX_CONTENT or len >= clen then
488                                                 break
489                                         end
490                                 end
491
492                                 -- Send eof to callback
493                                 if type(filecb) == "function" then
494                                         filecb( "_post", nil, "", true )
495                                 end
496                         end
497                 end
498         end
499 end
500
501
502 -- Wrap given object into a line read iterator
503 function _linereader( obj, bufsz )
504
505         bufsz = ( bufsz and bufsz >= 256 ) and bufsz or 256
506
507         local __read = function()  return nil end
508         local __eof  = function(x) return type(x) ~= "string" or #x == 0 end
509
510         local _pos = 1
511         local _buf = ""
512         local _eof = nil
513
514         -- object is string
515         if type(obj) == "string" then
516
517                 __read = function() return obj:sub( _pos, _pos + bufsz - #_buf - 1 ) end
518
519         -- object implements a receive() or read() function
520         elseif (type(obj) == "userdata" or type(obj) == "table") and ( type(obj.receive) == "function" or type(obj.read) == "function" ) then
521
522                 if type(obj.read) == "function" then
523                         __read = function() return obj:read( bufsz - #_buf ) end
524                 else
525                         __read = function() return obj:receive( bufsz - #_buf ) end
526                 end
527
528         -- object is a function
529         elseif type(obj) == "function" then
530
531                 return obj
532
533         -- no usable data type
534         else
535
536                 -- dummy iterator
537                 return __read
538         end
539
540
541         -- generic block to line algorithm
542         return function()
543                 if not _eof then
544                         local buffer = __read()
545
546                         if __eof( buffer ) then
547                                 buffer = ""
548                         end
549
550                         _pos   = _pos + #buffer
551                         buffer = _buf .. buffer
552
553                         local crlf, endpos = buffer:find("\r?\n")
554
555
556                         if crlf then
557                                 _buf = buffer:sub( endpos + 1, #buffer )
558                                 return buffer:sub( 1, endpos ), true
559                         else
560                                 -- check for eof
561                                 _eof = __eof( buffer )
562
563                                 -- clear overflow buffer
564                                 _buf = ""
565
566                                 return buffer, false
567                         end
568                 else
569                         return nil
570                 end
571         end
572 end