08e8ba25603c6de7b2bb5550b85e36a17ea91bcd
[project/luci.git] / libs / http / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10         http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 require("ltn12")
19 require("luci.util")
20 require("luci.http.protocol.filter")
21
22 HTTP_MAX_CONTENT      = 1024*4          -- 4 kB maximum content size
23 HTTP_URLENC_MAXKEYLEN = 1024            -- maximum allowd size of urlencoded parameter names
24
25
26 -- Decode an urlencoded string.
27 -- Returns the decoded value.
28 function urldecode( str )
29
30         local function __chrdec( hex )
31                 return string.char( tonumber( hex, 16 ) )
32         end
33
34         if type(str) == "string" then
35                 str = str:gsub( "+", " " ):gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
36         end
37
38         return str
39 end
40
41
42 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
43 -- Returns a table value with urldecoded values.
44 function urldecode_params( url, tbl )
45
46         local params = tbl or { }
47
48         if url:find("?") then
49                 url = url:gsub( "^.+%?([^?]+)", "%1" )
50         end
51
52         for i, pair in ipairs(luci.util.split( url, "[&;]+", nil, true )) do
53
54                 -- find key and value
55                 local key = urldecode( pair:match("^([^=]+)")     )
56                 local val = urldecode( pair:match("^[^=]+=(.+)$") )
57
58                 -- store
59                 if type(key) == "string" and key:len() > 0 then
60                         if type(val) ~= "string" then val = "" end
61
62                         if not params[key] then
63                                 params[key] = val
64                         elseif type(params[key]) ~= "table" then
65                                 params[key] = { params[key], val }
66                         else
67                                 table.insert( params[key], val )
68                         end
69                 end
70         end
71
72         return params
73 end
74
75
76 -- Encode given string in urlencoded format.
77 -- Returns the encoded string.
78 function urlencode( str )
79
80         local function __chrenc( chr )
81                 return string.format(
82                         "%%%02x", string.byte( chr )
83                 )
84         end
85
86         if type(str) == "string" then
87                 str = str:gsub(
88                         "([^a-zA-Z0-9$_%-%.+!*'(),])",
89                         __chrenc
90                 )
91         end
92
93         return str
94 end
95
96
97 -- Encode given table to urlencoded string.
98 -- Returns the encoded string.
99 function urlencode_params( tbl )
100         local enc = ""
101
102         for k, v in pairs(tbl) do
103                 enc = enc .. ( enc and "&" or "" ) ..
104                         urlencode(k) .. "="  ..
105                         urlencode(v)
106         end
107
108         return enc
109 end
110
111
112 -- Table of our process states
113 local process_states = { }
114
115 -- Extract "magic", the first line of a http message.
116 -- Extracts the message type ("get", "post" or "response"), the requested uri
117 -- or the status code if the line descripes a http response.
118 process_states['magic'] = function( msg, chunk )
119
120         if chunk ~= nil then
121
122                 -- Is it a request?
123                 local method, uri, http_ver = chunk:match("^([A-Z]+) ([^ ]+) HTTP/([01]%.[019])$")
124
125                 -- Yup, it is
126                 if method then
127
128                         msg.type           = "request"
129                         msg.request_method = method:lower()
130                         msg.request_uri    = uri
131                         msg.http_version   = tonumber( http_ver )
132                         msg.headers        = { }
133
134                         -- We're done, next state is header parsing
135                         return true, function( chunk )
136                                 return process_states['headers']( msg, chunk )
137                         end
138
139                 -- Is it a response?
140                 else
141
142                         local http_ver, code, message = chunk:match("^HTTP/([01]%.[019]) ([0-9]+) ([^\r\n]+)$")
143
144                         -- Is a response
145                         if code then
146
147                                 msg.type           = "response"
148                                 msg.status_code    = code
149                                 msg.status_message = message
150                                 msg.http_version   = tonumber( http_ver )
151                                 msg.headers        = { }
152
153                                 -- We're done, next state is header parsing
154                                 return true, function( chunk )
155                                         return process_states['headers']( msg, chunk )
156                                 end
157                         end
158                 end
159         end
160
161         -- Can't handle it
162         return nil, "Invalid HTTP message magic"
163 end
164
165
166 -- Extract headers from given string.
167 process_states['headers'] = function( msg, chunk )
168
169         if chunk ~= nil then
170
171                 -- Look for a valid header format
172                 local hdr, val = chunk:match( "^([A-Z][A-Za-z0-9%-_]+): +(.+)$" )
173
174                 if type(hdr) == "string" and hdr:len() > 0 and
175                    type(val) == "string" and val:len() > 0
176                 then
177                         msg.headers[hdr] = val
178
179                         -- Valid header line, proceed
180                         return true, nil
181
182                 elseif #chunk == 0 then
183                         -- Empty line, we won't accept data anymore
184                         return false, nil
185                 else
186                         -- Junk data
187                         return nil, "Invalid HTTP header received"
188                 end
189         else
190                 return nil, "Unexpected EOF"
191         end
192 end
193
194
195 -- Find first MIME boundary
196 process_states['mime-init'] = function( msg, chunk, filecb )
197
198         if chunk ~= nil then
199                 if #chunk >= #msg.mime_boundary + 2 then
200                         local boundary = chunk:sub( 1, #msg.mime_boundary + 4 )
201
202                         if boundary == "--" .. msg.mime_boundary .. "\r\n" then
203
204                                 -- Store remaining data in buffer
205                                 msg._mimebuffer = chunk:sub( #msg.mime_boundary + 5, #chunk )
206
207                                 -- Switch to header processing state
208                                 return true, function( chunk )
209                                         return process_states['mime-headers']( msg, chunk, filecb )
210                                 end
211                         else
212                                 return nil, "Invalid MIME boundary"
213                         end
214                 else
215                         return true
216                 end
217         else
218                 return nil, "Unexpected EOF"
219         end
220 end
221
222
223 -- Read MIME part headers
224 process_states['mime-headers'] = function( msg, chunk, filecb )
225
226         if chunk ~= nil then
227
228                 -- Combine look-behind buffer with current chunk
229                 chunk = msg._mimebuffer .. chunk
230
231                 if not msg._mimeheaders then
232                         msg._mimeheaders = { }
233                 end
234
235                 local function __storehdr( k, v )
236                         msg._mimeheaders[k] = v
237                         return ""
238                 end
239
240                 -- Read all header lines
241                 local ok, count = 1, 0
242                 while ok > 0 do
243                         chunk, ok = chunk:gsub( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r\n", __storehdr )
244                         count = count + ok
245                 end
246
247                 -- Headers processed, check for empty line
248                 chunk, ok = chunk:gsub( "^\r\n", "" )
249
250                 -- Store remaining buffer contents
251                 msg._mimebuffer = chunk
252
253                 -- End of headers
254                 if ok > 0 then
255
256                         -- When no Content-Type header is given assume text/plain
257                         if not msg._mimeheaders['Content-Type'] then
258                                 msg._mimeheaders['Content-Type'] = 'text/plain'
259                         end
260
261                         -- Check Content-Disposition
262                         if msg._mimeheaders['Content-Disposition'] then
263                                 -- Check for "form-data" token
264                                 if msg._mimeheaders['Content-Disposition']:match("^form%-data; ") then
265                                         -- Check for field name, filename
266                                         local field = msg._mimeheaders['Content-Disposition']:match('name="(.-)"')
267                                         local file  = msg._mimeheaders['Content-Disposition']:match('filename="(.+)"$')
268
269                                         -- Is a file field and we have a callback
270                                         if file and filecb then
271                                                 msg.params[field] = file
272                                                 msg._mimecallback = function(chunk,eof)
273                                                         filecb( {
274                                                                 name    = field;
275                                                                 file    = file;
276                                                                 headers = msg._mimeheaders
277                                                         }, chunk, eof )
278                                                 end
279
280                                         -- Treat as form field
281                                         else
282                                                 msg.params[field] = ""
283                                                 msg._mimecallback = function(chunk,eof)
284                                                         msg.params[field] = msg.params[field] .. chunk
285                                                 end
286                                         end
287
288                                         -- Header was valid, continue with mime-data
289                                         return true, function( chunk )
290                                                 return process_states['mime-data']( msg, chunk, filecb )
291                                         end
292                                 else
293                                         -- Unknown Content-Disposition, abort
294                                         return nil, "Unexpected Content-Disposition MIME section header"
295                                 end
296                         else
297                                 -- Content-Disposition is required, abort without
298                                 return nil, "Missing Content-Disposition MIME section header"
299                         end
300
301                 -- We parsed no headers yet and buffer is almost empty
302                 elseif count > 0 or #chunk < 128 then
303                         -- Keep feeding me with chunks
304                         return true, nil
305                 end
306
307                 -- Buffer looks like garbage
308                 return nil, "Malformed MIME section header"
309         else
310                 return nil, "Unexpected EOF"
311         end
312 end
313
314
315 -- Read MIME part data
316 process_states['mime-data'] = function( msg, chunk, filecb )
317
318         if chunk ~= nil then
319
320                 -- Combine look-behind buffer with current chunk
321                 local buffer = msg._mimebuffer .. chunk
322
323                 -- Look for MIME boundary
324                 local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "\r\n", 1, true )
325
326                 if spos then
327                         -- Content data
328                         msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
329
330                         -- Store remainder
331                         msg._mimebuffer = buffer:sub( epos + 1, #buffer )
332
333                         -- Next state is mime-header processing
334                         return true, function( chunk )
335                                 return process_states['mime-headers']( msg, chunk, filecb )
336                         end
337                 else
338                         -- Look for EOF?
339                         local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "--\r\n", 1, true )
340
341                         if spos then
342                                 -- Content data
343                                 msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
344
345                                 -- We processed the final MIME boundary, cleanup
346                                 msg._mimebuffer   = nil
347                                 msg._mimeheaders  = nil
348                                 msg._mimecallback = nil
349
350                                 -- We won't accept data anymore
351                                 return false
352                         else
353                                 -- We're somewhere within a data section and our buffer is full
354                                 if #buffer > #chunk then
355                                         -- Flush buffered data
356                                         msg._mimecallback( buffer:sub( 1, #buffer - #chunk ), false )
357
358                                         -- Store new data
359                                         msg._mimebuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
360
361                                 -- Buffer is not full yet, append new data
362                                 else
363                                         msg._mimebuffer = buffer
364                                 end
365
366                                 -- Keep feeding me
367                                 return true
368                         end
369                 end
370         else
371                 return nil, "Unexpected EOF"
372         end
373 end
374
375
376 -- Init urldecoding stream
377 process_states['urldecode-init'] = function( msg, chunk, filecb )
378
379         if chunk ~= nil then
380
381                 -- Check for Content-Length
382                 if msg.env.CONTENT_LENGTH then
383                         msg.content_length = tonumber(msg.env.CONTENT_LENGTH)
384
385                         if msg.content_length <= HTTP_MAX_CONTENT then
386                                 -- Initialize buffer
387                                 msg._urldecbuffer = chunk
388                                 msg._urldeclength = 0
389
390                                 -- Switch to urldecode-key state
391                                 return true, function(chunk)
392                                         return process_states['urldecode-key']( msg, chunk, filecb )
393                                 end
394                         else
395                                 return nil, "Request exceeds maximum allowed size"
396                         end
397                 else
398                         return nil, "Missing Content-Length header"
399                 end
400         else
401                 return nil, "Unexpected EOF"
402         end
403 end
404
405
406 -- Process urldecoding stream, read and validate parameter key
407 process_states['urldecode-key'] = function( msg, chunk, filecb )
408         if chunk ~= nil then
409
410                 -- Prevent oversized requests
411                 if msg._urldeclength >= msg.content_length then
412                         return nil, "Request exceeds maximum allowed size"
413                 end
414
415                 -- Combine look-behind buffer with current chunk
416                 local buffer = msg._urldecbuffer .. chunk
417                 local spos, epos = buffer:find("=")
418
419                 -- Found param
420                 if spos then
421
422                         -- Check that key doesn't exceed maximum allowed key length
423                         if ( spos - 1 ) <= HTTP_URLENC_MAXKEYLEN then
424                                 local key = urldecode( buffer:sub( 1, spos - 1 ) )
425
426                                 -- Prepare buffers
427                                 msg.params[key]         = ""
428                                 msg._urldeclength   = msg._urldeclength + epos
429                                 msg._urldecbuffer   = buffer:sub( epos + 1, #buffer )
430
431                                 -- Use file callback or store values inside msg.params
432                                 if filecb then
433                                         msg._urldeccallback = function( chunk, eof )
434                                                 filecb( field, chunk, eof )
435                                         end
436                                 else
437                                         msg._urldeccallback = function( chunk, eof )
438                                                 msg.params[key] = msg.params[key] .. chunk
439
440                                                 -- FIXME: Use a filter
441                                                 if eof then
442                                                         msg.params[key] = urldecode( msg.params[key] )
443                                                 end
444                                         end
445                                 end
446
447                                 -- Proceed with urldecode-value state
448                                 return true, function( chunk )
449                                         return process_states['urldecode-value']( msg, chunk, filecb )
450                                 end
451                         else
452                                 return nil, "POST parameter exceeds maximum allowed length"
453                         end
454                 else
455                         return nil, "POST data exceeds maximum allowed length"
456                 end
457         else
458                 return nil, "Unexpected EOF"
459         end
460 end
461
462
463 -- Process urldecoding stream, read parameter value
464 process_states['urldecode-value'] = function( msg, chunk, filecb )
465
466         if chunk ~= nil then
467
468                 -- Combine look-behind buffer with current chunk
469                 local buffer = msg._urldecbuffer .. chunk
470
471                 -- Check for EOF
472                 if #buffer == 0 then
473                         -- Compare processed length
474                         if msg._urldeclength == msg.content_length then
475                                 -- Cleanup
476                                 msg._urldeclength   = nil
477                                 msg._urldecbuffer   = nil
478                                 msg._urldeccallback = nil
479
480                                 -- We won't accept data anymore
481                                 return false
482                         else
483                                 return nil, "Content-Length mismatch"
484                         end
485                 end
486
487                 -- Check for end of value
488                 local spos, epos = buffer:find("[&;]")
489                 if spos then
490
491                         -- Flush buffer, send eof
492                         msg._urldeccallback( buffer:sub( 1, spos - 1 ), true )
493                         msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
494                         msg._urldeclength = msg._urldeclength + epos
495
496                         -- Back to urldecode-key state
497                         return true, function( chunk )
498                                 return process_states['urldecode-key']( msg, chunk, filecb )
499                         end
500                 else
501                         -- We're somewhere within a data section and our buffer is full
502                         if #buffer > #chunk then
503                                 -- Flush buffered data
504                                 msg._urldeccallback( buffer:sub( 1, #buffer - #chunk ), false )
505
506                                 -- Store new data
507                                 msg._urldeclength = msg._urldeclength + #buffer - #chunk
508                                 msg._urldecbuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
509
510                         -- Buffer is not full yet, append new data
511                         else
512                                 msg._urldecbuffer = buffer
513                         end
514
515                         -- Keep feeding me
516                         return true
517                 end
518         else
519                 return nil, "Unexpected EOF"
520         end
521 end
522
523
524 -- Decode MIME encoded data.
525 function mimedecode_message_body( source, msg, filecb )
526
527         -- Find mime boundary
528         if msg and msg.env.CONTENT_TYPE then
529
530                 local bound = msg.env.CONTENT_TYPE:match("^multipart/form%-data; boundary=(.+)")
531
532                 if bound then
533                         msg.mime_boundary = bound
534                 else
535                         return nil, "No MIME boundary found or invalid content type given"
536                 end
537         end
538
539         -- Create an initial LTN12 sink
540         -- The whole MIME parsing process is implemented as fancy sink, sinks replace themself
541         -- depending on current processing state (init, header, data). Return the initial state.
542         local sink = ltn12.sink.simplify(
543                 function( chunk )
544                         return process_states['mime-init']( msg, chunk, filecb )
545                 end
546         )
547
548         -- Create a throttling LTN12 source
549         -- Frequent state switching in the mime parsing process leads to unwanted buffer aggregation.
550         -- This source checks wheather there's still data in our internal read buffer and returns an
551         -- empty string if there's already enough data in the processing queue. If the internal buffer
552         -- runs empty we're calling the original source to get the next chunk of data.
553         local tsrc = function()
554
555                 -- XXX: we schould propably keep the maximum buffer size in sync with
556                 --      the blocksize of our original source... but doesn't really matter
557                 if msg._mimebuffer ~= null and #msg._mimebuffer > 256 then
558                         return ""
559                 else
560                         return source()
561                 end
562         end
563
564         -- Pump input data...
565         while true do
566                 -- get data
567                 local ok, err = ltn12.pump.step( tsrc, sink )
568
569                 -- error
570                 if not ok and err then
571                         return nil, err
572
573                 -- eof
574                 elseif not ok then
575                         return true
576                 end
577         end
578 end
579
580
581 -- Decode urlencoded data.
582 function urldecode_message_body( source, msg )
583
584         -- Create an initial LTN12 sink
585         -- Return the initial state.
586         local sink = ltn12.sink.simplify(
587                 function( chunk )
588                         return process_states['urldecode-init']( msg, chunk )
589                 end
590         )
591
592         -- Create a throttling LTN12 source
593         -- See explaination in mimedecode_message_body().
594         local tsrc = function()
595                 if msg._urldecbuffer ~= null and #msg._urldecbuffer > 0 then
596                         return ""
597                 else
598                         return source()
599                 end
600         end
601
602         -- Pump input data...
603         while true do
604                 -- get data
605                 local ok, err = ltn12.pump.step( tsrc, sink )
606
607                 -- step
608                 if not ok and err then
609                         return nil, err
610
611                 -- eof
612                 elseif not ok then
613                         return true
614                 end
615         end
616 end
617
618
619 -- Parse a http message
620 function parse_message( data, filecb )
621
622         local reader  = _linereader( data, HTTP_MAX_READBUF )
623         local message = parse_message_header( reader )
624
625         if message then
626                 parse_message_body( reader, message, filecb )
627         end
628
629         return message
630 end
631
632
633 -- Parse a http message header
634 function parse_message_header( source )
635
636         local ok   = true
637         local msg  = { }
638
639         local sink = ltn12.sink.simplify(
640                 function( chunk )
641                         return process_states['magic']( msg, chunk )
642                 end
643         )
644
645         -- Pump input data...
646         while ok do
647
648                 -- get data
649                 ok, err = ltn12.pump.step( source, sink )
650
651                 -- error
652                 if not ok and err then
653                         return nil, err
654
655                 -- eof
656                 elseif not ok then
657
658                         -- Process get parameters
659                         if ( msg.request_method == "get" or msg.request_method == "post" ) and
660                            msg.request_uri:match("?")
661                         then
662                                 msg.params = urldecode_params( msg.request_uri )
663                         else
664                                 msg.params = { }
665                         end
666
667                         -- Populate common environment variables
668                         msg.env = {
669                                 CONTENT_LENGTH    = msg.headers['Content-Length'];
670                                 CONTENT_TYPE      = msg.headers['Content-Type'];
671                                 REQUEST_METHOD    = msg.request_method:upper();
672                                 REQUEST_URI       = msg.request_uri;
673                                 SCRIPT_NAME       = msg.request_uri:gsub("?.+$","");
674                                 SCRIPT_FILENAME   = "";         -- XXX implement me
675                                 SERVER_PROTOCOL   = "HTTP/" .. msg.http_version
676                         }
677
678                         -- Populate HTTP_* environment variables
679                         for i, hdr in ipairs( {
680                                 'Accept',
681                                 'Accept-Charset',
682                                 'Accept-Encoding',
683                                 'Accept-Language',
684                                 'Connection',
685                                 'Cookie',
686                                 'Host',
687                                 'Referer',
688                                 'User-Agent',
689                         } ) do
690                                 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
691                                 local val = msg.headers[hdr]
692
693                                 msg.env[var] = val
694                         end
695                 end
696         end
697
698         return msg
699 end
700
701
702 -- Parse a http message body
703 function parse_message_body( source, msg, filecb )
704
705         -- Install an additional filter if we're operating on chunked transfer
706         -- coding and client is HTTP/1.1 capable
707         if msg.http_version == 1.1 and
708            msg.headers['Transfer-Encoding'] and
709            msg.headers['Transfer-Encoding']:find("chunked")
710         then
711                 source = ltn12.source.chain(
712                         source, luci.http.protocol.filter.decode_chunked
713                 )
714         end
715
716
717         -- Is it multipart/mime ?
718         if msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
719            msg.env.CONTENT_TYPE:match("^multipart/form%-data")
720         then
721
722                 return mimedecode_message_body( source, msg, filecb )
723
724         -- Is it application/x-www-form-urlencoded ?
725         elseif msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
726                msg.env.CONTENT_TYPE == "application/x-www-form-urlencoded"
727         then
728                 return urldecode_message_body( source, msg, filecb )
729
730
731         -- Unhandled encoding
732         -- If a file callback is given then feed it line by line, else
733         -- store whole buffer in message.content
734         else
735
736                 local sink
737
738                 -- If we have a file callback then feed it
739                 if type(filecb) == "function" then
740                         sink = filecb
741
742                 -- ... else append to .content
743                 else
744                         msg.content = ""
745                         msg.content_length = 0
746
747                         sink = function( chunk )
748                                 if ( msg.content_length + #chunk ) <= HTTP_MAX_CONTENT then
749
750                                         msg.content        = msg.content        .. chunk
751                                         msg.content_length = msg.content_length + #chunk
752
753                                         return true
754                                 else
755                                         return nil, "POST data exceeds maximum allowed length"
756                                 end
757                         end
758                 end
759
760                 -- Pump data...
761                 while true do
762                         local ok, err = ltn12.pump.step( source, sink )
763
764                         if not ok and err then
765                                 return nil, err
766                         elseif not err then
767                                 return true
768                         end
769                 end
770         end
771 end