01d3128b2525f4e9ffd9604f655054037598f442
[project/luci.git] / libs / http / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10         http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 require("ltn12")
19 require("luci.util")
20
21 HTTP_MAX_CONTENT      = 1024*4          -- 4 kB maximum content size
22 HTTP_URLENC_MAXKEYLEN = 1024            -- maximum allowd size of urlencoded parameter names
23
24
25 -- Decode an urlencoded string.
26 -- Returns the decoded value.
27 function urldecode( str )
28
29         local function __chrdec( hex )
30                 return string.char( tonumber( hex, 16 ) )
31         end
32
33         if type(str) == "string" then
34                 str = str:gsub( "+", " " ):gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
35         end
36
37         return str
38 end
39
40
41 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
42 -- Returns a table value with urldecoded values.
43 function urldecode_params( url, tbl )
44
45         local params = tbl or { }
46
47         if url:find("?") then
48                 url = url:gsub( "^.+%?([^?]+)", "%1" )
49         end
50
51         for i, pair in ipairs(luci.util.split( url, "[&;]+", nil, true )) do
52
53                 -- find key and value
54                 local key = urldecode( pair:match("^([^=]+)")     )
55                 local val = urldecode( pair:match("^[^=]+=(.+)$") )
56
57                 -- store
58                 if type(key) == "string" and key:len() > 0 then
59                         if type(val) ~= "string" then val = "" end
60
61                         if not params[key] then
62                                 params[key] = val
63                         elseif type(params[key]) ~= "table" then
64                                 params[key] = { params[key], val }
65                         else
66                                 table.insert( params[key], val )
67                         end
68                 end
69         end
70
71         return params
72 end
73
74
75 -- Encode given string in urlencoded format.
76 -- Returns the encoded string.
77 function urlencode( str )
78
79         local function __chrenc( chr )
80                 return string.format(
81                         "%%%02x", string.byte( chr )
82                 )
83         end
84
85         if type(str) == "string" then
86                 str = str:gsub(
87                         "([^a-zA-Z0-9$_%-%.+!*'(),])",
88                         __chrenc
89                 )
90         end
91
92         return str
93 end
94
95
96 -- Encode given table to urlencoded string.
97 -- Returns the encoded string.
98 function urlencode_params( tbl )
99         local enc = ""
100
101         for k, v in pairs(tbl) do
102                 enc = enc .. ( enc and "&" or "" ) ..
103                         urlencode(k) .. "="  ..
104                         urlencode(v)
105         end
106
107         return enc
108 end
109
110
111 -- Table of our process states
112 local process_states = { }
113
114 -- Extract "magic", the first line of a http message.
115 -- Extracts the message type ("get", "post" or "response"), the requested uri
116 -- or the status code if the line descripes a http response.
117 process_states['magic'] = function( msg, chunk )
118
119         if chunk ~= nil then
120
121                 -- Is it a request?
122                 local method, uri, http_ver = chunk:match("^([A-Z]+) ([^ ]+) HTTP/([01]%.[019])$")
123
124                 -- Yup, it is
125                 if method then
126
127                         msg.type           = "request"
128                         msg.request_method = method:lower()
129                         msg.request_uri    = uri
130                         msg.http_version   = http_ver
131                         msg.headers        = { }
132
133                         -- We're done, next state is header parsing
134                         return true, function( chunk )
135                                 return process_states['headers']( msg, chunk )
136                         end
137
138                 -- Is it a response?
139                 else
140
141                         local http_ver, code, message = chunk:match("^HTTP/([01]%.[019]) ([0-9]+) ([^\r\n]+)$")
142
143                         -- Is a response
144                         if code then
145
146                                 msg.type           = "response"
147                                 msg.status_code    = code
148                                 msg.status_message = message
149                                 msg.http_version   = http_ver
150                                 msg.headers        = { }
151
152                                 -- We're done, next state is header parsing
153                                 return true, function( chunk )
154                                         return process_states['headers']( msg, chunk )
155                                 end
156                         end
157                 end
158         end
159
160         -- Can't handle it
161         return nil, "Invalid HTTP message magic"
162 end
163
164
165 -- Extract headers from given string.
166 process_states['headers'] = function( msg, chunk )
167
168         if chunk ~= nil then
169
170                 -- Look for a valid header format
171                 local hdr, val = chunk:match( "^([A-Z][A-Za-z0-9%-_]+): +(.+)$" )
172
173                 if type(hdr) == "string" and hdr:len() > 0 and
174                    type(val) == "string" and val:len() > 0
175                 then
176                         msg.headers[hdr] = val
177
178                         -- Valid header line, proceed
179                         return true, nil
180
181                 elseif #chunk == 0 then
182                         -- Empty line, we won't accept data anymore
183                         return false, nil
184                 else
185                         -- Junk data
186                         return nil, "Invalid HTTP header received"
187                 end
188         else
189                 return nil, "Unexpected EOF"
190         end
191 end
192
193
194 -- Find first MIME boundary
195 process_states['mime-init'] = function( msg, chunk, filecb )
196
197         if chunk ~= nil then
198                 if #chunk >= #msg.mime_boundary + 2 then
199                         local boundary = chunk:sub( 1, #msg.mime_boundary + 4 )
200
201                         if boundary == "--" .. msg.mime_boundary .. "\r\n" then
202
203                                 -- Store remaining data in buffer
204                                 msg._mimebuffer = chunk:sub( #msg.mime_boundary + 5, #chunk )
205
206                                 -- Switch to header processing state
207                                 return true, function( chunk )
208                                         return process_states['mime-headers']( msg, chunk, filecb )
209                                 end
210                         else
211                                 return nil, "Invalid MIME boundary"
212                         end
213                 else
214                         return true
215                 end
216         else
217                 return nil, "Unexpected EOF"
218         end
219 end
220
221
222 -- Read MIME part headers
223 process_states['mime-headers'] = function( msg, chunk, filecb )
224
225         if chunk ~= nil then
226
227                 -- Combine look-behind buffer with current chunk
228                 chunk = msg._mimebuffer .. chunk
229
230                 if not msg._mimeheaders then
231                         msg._mimeheaders = { }
232                 end
233
234                 local function __storehdr( k, v )
235                         msg._mimeheaders[k] = v
236                         return ""
237                 end
238
239                 -- Read all header lines
240                 local ok, count = 1, 0
241                 while ok > 0 do
242                         chunk, ok = chunk:gsub( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r\n", __storehdr )
243                         count = count + ok
244                 end
245
246                 -- Headers processed, check for empty line
247                 chunk, ok = chunk:gsub( "^\r\n", "" )
248
249                 -- Store remaining buffer contents
250                 msg._mimebuffer = chunk
251
252                 -- End of headers
253                 if ok > 0 then
254
255                         -- When no Content-Type header is given assume text/plain
256                         if not msg._mimeheaders['Content-Type'] then
257                                 msg._mimeheaders['Content-Type'] = 'text/plain'
258                         end
259
260                         -- Check Content-Disposition
261                         if msg._mimeheaders['Content-Disposition'] then
262                                 -- Check for "form-data" token
263                                 if msg._mimeheaders['Content-Disposition']:match("^form%-data; ") then
264                                         -- Check for field name, filename
265                                         local field = msg._mimeheaders['Content-Disposition']:match('name="(.-)"')
266                                         local file  = msg._mimeheaders['Content-Disposition']:match('filename="(.+)"$')
267
268                                         -- Is a file field and we have a callback
269                                         if file and filecb then
270                                                 msg.params[field] = file
271                                                 msg._mimecallback = function(chunk,eof)
272                                                         filecb( {
273                                                                 name    = field;
274                                                                 file    = file;
275                                                                 headers = msg._mimeheaders
276                                                         }, chunk, eof )
277                                                 end
278
279                                         -- Treat as form field
280                                         else
281                                                 msg.params[field] = ""
282                                                 msg._mimecallback = function(chunk,eof)
283                                                         msg.params[field] = msg.params[field] .. chunk
284                                                 end
285                                         end
286
287                                         -- Header was valid, continue with mime-data
288                                         return true, function( chunk )
289                                                 return process_states['mime-data']( msg, chunk, filecb )
290                                         end
291                                 else
292                                         -- Unknown Content-Disposition, abort
293                                         return nil, "Unexpected Content-Disposition MIME section header"
294                                 end
295                         else
296                                 -- Content-Disposition is required, abort without
297                                 return nil, "Missing Content-Disposition MIME section header"
298                         end
299
300                 -- We parsed no headers yet and buffer is almost empty
301                 elseif count > 0 or #chunk < 128 then
302                         -- Keep feeding me with chunks
303                         return true, nil
304                 end
305
306                 -- Buffer looks like garbage
307                 return nil, "Malformed MIME section header"
308         else
309                 return nil, "Unexpected EOF"
310         end
311 end
312
313
314 -- Read MIME part data
315 process_states['mime-data'] = function( msg, chunk, filecb )
316
317         if chunk ~= nil then
318
319                 -- Combine look-behind buffer with current chunk
320                 local buffer = msg._mimebuffer .. chunk
321
322                 -- Look for MIME boundary
323                 local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "\r\n", 1, true )
324
325                 if spos then
326                         -- Content data
327                         msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
328
329                         -- Store remainder
330                         msg._mimebuffer = buffer:sub( epos + 1, #buffer )
331
332                         -- Next state is mime-header processing
333                         return true, function( chunk )
334                                 return process_states['mime-headers']( msg, chunk, filecb )
335                         end
336                 else
337                         -- Look for EOF?
338                         local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "--\r\n", 1, true )
339
340                         if spos then
341                                 -- Content data
342                                 msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
343
344                                 -- We processed the final MIME boundary, cleanup
345                                 msg._mimebuffer   = nil
346                                 msg._mimeheaders  = nil
347                                 msg._mimecallback = nil
348
349                                 -- We won't accept data anymore
350                                 return false
351                         else
352                                 -- We're somewhere within a data section and our buffer is full
353                                 if #buffer > #chunk then
354                                         -- Flush buffered data
355                                         msg._mimecallback( buffer:sub( 1, #buffer - #chunk ), false )
356
357                                         -- Store new data
358                                         msg._mimebuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
359
360                                 -- Buffer is not full yet, append new data
361                                 else
362                                         msg._mimebuffer = buffer
363                                 end
364
365                                 -- Keep feeding me
366                                 return true
367                         end
368                 end
369         else
370                 return nil, "Unexpected EOF"
371         end
372 end
373
374
375 -- Init urldecoding stream
376 process_states['urldecode-init'] = function( msg, chunk, filecb )
377
378         if chunk ~= nil then
379
380                 -- Check for Content-Length
381                 if msg.headers['Content-Length'] then
382                         msg.content_length = tonumber(msg.headers['Content-Length'])
383
384                         if msg.content_length <= HTTP_MAX_CONTENT then
385                                 -- Initialize buffer
386                                 msg._urldecbuffer = chunk
387                                 msg._urldeclength = 0
388
389                                 -- Switch to urldecode-key state
390                                 return true, function(chunk)
391                                         return process_states['urldecode-key']( msg, chunk, filecb )
392                                 end
393                         else
394                                 return nil, "Request exceeds maximum allowed size"
395                         end
396                 else
397                         return nil, "Missing Content-Length header"
398                 end
399         else
400                 return nil, "Unexpected EOF"
401         end
402 end
403
404
405 -- Process urldecoding stream, read and validate parameter key
406 process_states['urldecode-key'] = function( msg, chunk, filecb )
407
408         if chunk ~= nil then
409
410                 -- Prevent oversized requests
411                 if msg._urldeclength >= msg.content_length then
412                         return nil, "Request exceeds maximum allowed size"
413                 end
414
415                 -- Combine look-behind buffer with current chunk
416                 local buffer = msg._urldecbuffer .. chunk
417                 local spos, epos = buffer:find("=")
418
419                 -- Found param
420                 if spos then
421
422                         -- Check that key doesn't exceed maximum allowed key length
423                         if ( spos - 1 ) <= HTTP_URLENC_MAXKEYLEN then
424                                 local key = urldecode( buffer:sub( 1, spos - 1 ) )
425
426                                 -- Prepare buffers
427                                 msg.params[key]         = ""
428                                 msg._urldeclength   = msg._urldeclength + epos
429                                 msg._urldecbuffer   = buffer:sub( epos + 1, #buffer )
430
431                                 -- Use file callback or store values inside msg.params
432                                 if filecb then
433                                         msg._urldeccallback = function( chunk, eof )
434                                                 filecb( field, chunk, eof )
435                                         end
436                                 else
437                                         msg._urldeccallback = function( chunk, eof )
438                                                 msg.params[key] = msg.params[key] .. chunk
439                                         end
440                                 end
441
442                                 -- Proceed with urldecode-value state
443                                 return true, function( chunk )
444                                         return process_states['urldecode-value']( msg, chunk, filecb )
445                                 end
446                         else
447                                 return nil, "POST parameter exceeds maximum allowed length"
448                         end
449                 else
450                         return nil, "POST data exceeds maximum allowed length"
451                 end
452         else
453                 return nil, "Unexpected EOF"
454         end
455 end
456
457
458 -- Process urldecoding stream, read parameter value
459 process_states['urldecode-value'] = function( msg, chunk, filecb )
460
461         if chunk ~= nil then
462
463                 -- Combine look-behind buffer with current chunk
464                 local buffer = msg._urldecbuffer .. chunk
465
466                 -- Check for EOF
467                 if #buffer == 0 then
468                         -- Compare processed length
469                         if msg._urldeclength == msg.content_length then
470                                 -- Cleanup
471                                 msg._urldeclength   = nil
472                                 msg._urldecbuffer   = nil
473                                 msg._urldeccallback = nil
474
475                                 -- We won't accept data anymore
476                                 return false
477                         else
478                                 return nil, "Content-Length mismatch"
479                         end
480                 end
481
482                 -- Check for end of value
483                 local spos, epos = buffer:find("[&;]")
484                 if spos then
485
486                         -- Flush buffer, send eof
487                         msg._urldeccallback( buffer:sub( 1, spos - 1 ), true )
488                         msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
489                         msg._urldeclength = msg._urldeclength + epos
490
491                         -- Back to urldecode-key state
492                         return true, function( chunk )
493                                 return process_states['urldecode-key']( msg, chunk, filecb )
494                         end
495                 else
496                         -- We're somewhere within a data section and our buffer is full
497                         if #buffer > #chunk then
498                                 -- Flush buffered data
499                                 msg._urldeccallback( buffer:sub( 1, #buffer - #chunk ), false )
500
501                                 -- Store new data
502                                 msg._urldeclength = msg._urldeclength + #buffer - #chunk
503                                 msg._urldecbuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
504
505                         -- Buffer is not full yet, append new data
506                         else
507                                 msg._urldecbuffer = buffer
508                         end
509
510                         -- Keep feeding me
511                         return true
512                 end
513         else
514                 return nil, "Unexpected EOF"
515         end
516 end
517
518
519 -- Decode MIME encoded data.
520 function mimedecode_message_body( source, msg, filecb )
521
522         -- Find mime boundary
523         if msg and msg.headers['Content-Type'] then
524
525                 local bound = msg.headers['Content-Type']:match("^multipart/form%-data; boundary=(.+)")
526
527                 if bound then
528                         msg.mime_boundary = bound
529                 else
530                         return nil, "No MIME boundary found or invalid content type given"
531                 end
532         end
533
534         -- Create an initial LTN12 sink
535         -- The whole MIME parsing process is implemented as fancy sink, sinks replace themself
536         -- depending on current processing state (init, header, data). Return the initial state.
537         local sink = ltn12.sink.simplify(
538                 function( chunk )
539                         return process_states['mime-init']( msg, chunk, filecb )
540                 end
541         )
542
543         -- Create a throttling LTN12 source
544         -- Frequent state switching in the mime parsing process leads to unwanted buffer aggregation.
545         -- This source checks wheather there's still data in our internal read buffer and returns an
546         -- empty string if there's already enough data in the processing queue. If the internal buffer
547         -- runs empty we're calling the original source to get the next chunk of data.
548         local tsrc = function()
549
550                 -- XXX: we schould propably keep the maximum buffer size in sync with
551                 --      the blocksize of our original source... but doesn't really matter
552                 if msg._mimebuffer ~= null and #msg._mimebuffer > 256 then
553                         return ""
554                 else
555                         return source()
556                 end
557         end
558
559         -- Pump input data...
560         while true do
561                 -- get data
562                 local ok, err = ltn12.pump.step( tsrc, sink )
563
564                 -- error
565                 if not ok and err then
566                         return nil, err
567
568                 -- eof
569                 elseif not ok then
570                         return true
571                 end
572         end
573 end
574
575
576 -- Decode urlencoded data.
577 function urldecode_message_body( source, msg )
578
579         -- Create an initial LTN12 sink
580         -- Return the initial state.
581         local sink = ltn12.sink.simplify(
582                 function( chunk )
583                         return process_states['urldecode-init']( msg, chunk )
584                 end
585         )
586
587         -- Create a throttling LTN12 source
588         -- See explaination in mimedecode_message_body().
589         local tsrc = function()
590                 if msg._urldecbuffer ~= null and #msg._urldecbuffer > 0 then
591                         return ""
592                 else
593                         return source()
594                 end
595         end
596
597         -- Pump input data...
598         while true do
599                 -- get data
600                 local ok, err = ltn12.pump.step( tsrc, sink )
601
602                 -- step
603                 if not ok and err then
604                         return nil, err
605
606                 -- eof
607                 elseif not ok then
608                         return true
609                 end
610         end
611 end
612
613
614 -- Parse a http message
615 function parse_message( data, filecb )
616
617         local reader  = _linereader( data, HTTP_MAX_READBUF )
618         local message = parse_message_header( reader )
619
620         if message then
621                 parse_message_body( reader, message, filecb )
622         end
623
624         return message
625 end
626
627
628 -- Parse a http message header
629 function parse_message_header( source )
630
631         local ok   = true
632         local msg  = { }
633
634         local sink = ltn12.sink.simplify(
635                 function( chunk )
636                         return process_states['magic']( msg, chunk )
637                 end
638         )
639
640         -- Pump input data...
641         while ok do
642
643                 -- get data
644                 ok, err = ltn12.pump.step( source, sink )
645
646                 -- error
647                 if not ok and err then
648                         return nil, err
649
650                 -- eof
651                 elseif not ok then
652
653                         -- Process get parameters
654                         if ( msg.request_method == "get" or msg.request_method == "post" ) and
655                            msg.request_uri:match("?")
656                         then
657                                 msg.params = urldecode_params( msg.request_uri )
658                         else
659                                 msg.params = { }
660                         end
661
662                         -- Populate common environment variables
663                         msg.env = {
664                                 CONTENT_LENGTH    = msg.headers['Content-Length'];
665                                 CONTENT_TYPE      = msg.headers['Content-Type'];
666                                 REQUEST_METHOD    = msg.request_method:upper();
667                                 REQUEST_URI       = msg.request_uri;
668                                 SCRIPT_NAME       = msg.request_uri:gsub("?.+$","");
669                                 SCRIPT_FILENAME   = ""          -- XXX implement me
670                         }
671
672                         -- Populate HTTP_* environment variables
673                         for i, hdr in ipairs( {
674                                 'Accept',
675                                 'Accept-Charset',
676                                 'Accept-Encoding',
677                                 'Accept-Language',
678                                 'Connection',
679                                 'Cookie',
680                                 'Host',
681                                 'Referer',
682                                 'User-Agent',
683                         } ) do
684                                 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
685                                 local val = msg.headers[hdr]
686
687                                 msg.env[var] = val
688                         end
689                 end
690         end
691
692         return msg
693 end
694
695
696 -- Parse a http message body
697 function parse_message_body( source, msg, filecb )
698
699         -- Is it multipart/mime ?
700         if msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
701            msg.env.CONTENT_TYPE:match("^multipart/form%-data")
702         then
703
704                 return mimedecode_message_body( source, msg, filecb )
705
706         -- Is it application/x-www-form-urlencoded ?
707         elseif msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
708                msg.env.CONTENT_TYPE == "application/x-www-form-urlencoded"
709         then
710
711                 return urldecode_message_body( source, msg, filecb )
712
713         -- Unhandled encoding
714         -- If a file callback is given then feed it line by line, else
715         -- store whole buffer in message.content
716         else
717
718                 local sink
719                 local length = 0
720
721                 -- If we have a file callback then feed it
722                 if type(filecb) == "function" then
723                         sink = filecb
724
725                 -- ... else append to .content
726                 else
727                         msg.content = ""
728                         msg.content_length = 0
729
730                         sink = function( chunk )
731                                 if ( msg.content_length ) + #chunk <= HTTP_MAX_CONTENT then
732
733                                         msg.content        = msg.content        .. chunk
734                                         msg.content_length = msg.content_length + #chunk
735
736                                         return true
737                                 else
738                                         return nil, "POST data exceeds maximum allowed length"
739                                 end
740                         end
741                 end
742
743                 -- Pump data...
744                 while true do
745                         local ok, err = ltn12.pump.step( source, sink )
746
747                         if not ok and err then
748                                 return nil, err
749                         elseif not err then
750                                 return true
751                         end
752                 end
753         end
754 end