b8e9624495fca3f6bbd78b51d7b0bc07f3d25495
[project/luci.git] / libs / http / luasrc / http / protocol.lua
1 --[[
2
3 HTTP protocol implementation for LuCI
4 (c) 2008 Freifunk Leipzig / Jo-Philipp Wich <xm@leipzig.freifunk.net>
5
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10         http://www.apache.org/licenses/LICENSE-2.0
11
12 $Id$
13
14 ]]--
15
16 module("luci.http.protocol", package.seeall)
17
18 local ltn12 = require("luci.ltn12")
19
20 HTTP_MAX_CONTENT      = 1024*4          -- 4 kB maximum content size
21 HTTP_URLENC_MAXKEYLEN = 1024            -- maximum allowd size of urlencoded parameter names
22
23
24 -- Decode an urlencoded string.
25 -- Returns the decoded value.
26 function urldecode( str, no_plus )
27
28         local function __chrdec( hex )
29                 return string.char( tonumber( hex, 16 ) )
30         end
31
32         if type(str) == "string" then
33                 if not no_plus then
34                         str = str:gsub( "+", " " )
35                 end
36
37                 str = str:gsub( "%%([a-fA-F0-9][a-fA-F0-9])", __chrdec )
38         end
39
40         return str
41 end
42
43
44 -- Extract and split urlencoded data pairs, separated bei either "&" or ";" from given url.
45 -- Returns a table value with urldecoded values.
46 function urldecode_params( url, tbl )
47
48         local params = tbl or { }
49
50         if url:find("?") then
51                 url = url:gsub( "^.+%?([^?]+)", "%1" )
52         end
53
54         for pair in url:gmatch( "[^&;]+" ) do
55
56                 -- find key and value
57                 local key = urldecode( pair:match("^([^=]+)")     )
58                 local val = urldecode( pair:match("^[^=]+=(.+)$") )
59
60                 -- store
61                 if type(key) == "string" and key:len() > 0 then
62                         if type(val) ~= "string" then val = "" end
63
64                         if not params[key] then
65                                 params[key] = val
66                         elseif type(params[key]) ~= "table" then
67                                 params[key] = { params[key], val }
68                         else
69                                 table.insert( params[key], val )
70                         end
71                 end
72         end
73
74         return params
75 end
76
77
78 -- Encode given string in urlencoded format.
79 -- Returns the encoded string.
80 function urlencode( str )
81
82         local function __chrenc( chr )
83                 return string.format(
84                         "%%%02x", string.byte( chr )
85                 )
86         end
87
88         if type(str) == "string" then
89                 str = str:gsub(
90                         "([^a-zA-Z0-9$_%-%.%+!*'(),])",
91                         __chrenc
92                 )
93         end
94
95         return str
96 end
97
98
99 -- Encode given table to urlencoded string.
100 -- Returns the encoded string.
101 function urlencode_params( tbl )
102         local enc = ""
103
104         for k, v in pairs(tbl) do
105                 enc = enc .. ( enc and "&" or "" ) ..
106                         urlencode(k) .. "="  ..
107                         urlencode(v)
108         end
109
110         return enc
111 end
112
113
114 -- Table of our process states
115 local process_states = { }
116
117 -- Extract "magic", the first line of a http message.
118 -- Extracts the message type ("get", "post" or "response"), the requested uri
119 -- or the status code if the line descripes a http response.
120 process_states['magic'] = function( msg, chunk, err )
121
122         if chunk ~= nil then
123                 -- ignore empty lines before request
124                 if #chunk == 0 then
125                         return true, nil
126                 end
127
128                 -- Is it a request?
129                 local method, uri, http_ver = chunk:match("^([A-Z]+) ([^ ]+) HTTP/([01]%.[019])$")
130
131                 -- Yup, it is
132                 if method then
133
134                         msg.type           = "request"
135                         msg.request_method = method:lower()
136                         msg.request_uri    = uri
137                         msg.http_version   = tonumber( http_ver )
138                         msg.headers        = { }
139
140                         -- We're done, next state is header parsing
141                         return true, function( chunk )
142                                 return process_states['headers']( msg, chunk )
143                         end
144
145                 -- Is it a response?
146                 else
147
148                         local http_ver, code, message = chunk:match("^HTTP/([01]%.[019]) ([0-9]+) ([^\r\n]+)$")
149
150                         -- Is a response
151                         if code then
152
153                                 msg.type           = "response"
154                                 msg.status_code    = code
155                                 msg.status_message = message
156                                 msg.http_version   = tonumber( http_ver )
157                                 msg.headers        = { }
158
159                                 -- We're done, next state is header parsing
160                                 return true, function( chunk )
161                                         return process_states['headers']( msg, chunk )
162                                 end
163                         end
164                 end
165         end
166
167         -- Can't handle it
168         return nil, "Invalid HTTP message magic"
169 end
170
171
172 -- Extract headers from given string.
173 process_states['headers'] = function( msg, chunk )
174
175         if chunk ~= nil then
176
177                 -- Look for a valid header format
178                 local hdr, val = chunk:match( "^([A-Z][A-Za-z0-9%-_]+): +(.+)$" )
179
180                 if type(hdr) == "string" and hdr:len() > 0 and
181                    type(val) == "string" and val:len() > 0
182                 then
183                         msg.headers[hdr] = val
184
185                         -- Valid header line, proceed
186                         return true, nil
187
188                 elseif #chunk == 0 then
189                         -- Empty line, we won't accept data anymore
190                         return false, nil
191                 else
192                         -- Junk data
193                         return nil, "Invalid HTTP header received"
194                 end
195         else
196                 return nil, "Unexpected EOF"
197         end
198 end
199
200
201 -- Find first MIME boundary
202 process_states['mime-init'] = function( msg, chunk, filecb )
203
204         if chunk ~= nil then
205                 if #chunk >= #msg.mime_boundary + 2 then
206                         local boundary = chunk:sub( 1, #msg.mime_boundary + 4 )
207
208                         if boundary == "--" .. msg.mime_boundary .. "\r\n" then
209
210                                 -- Store remaining data in buffer
211                                 msg._mimebuffer = chunk:sub( #msg.mime_boundary + 5, #chunk )
212
213                                 -- Switch to header processing state
214                                 return true, function( chunk )
215                                         return process_states['mime-headers']( msg, chunk, filecb )
216                                 end
217                         else
218                                 return nil, "Invalid MIME boundary"
219                         end
220                 else
221                         return true
222                 end
223         else
224                 return nil, "Unexpected EOF"
225         end
226 end
227
228
229 -- Read MIME part headers
230 process_states['mime-headers'] = function( msg, chunk, filecb )
231
232         if chunk ~= nil then
233
234                 -- Combine look-behind buffer with current chunk
235                 chunk = msg._mimebuffer .. chunk
236
237                 if not msg._mimeheaders then
238                         msg._mimeheaders = { }
239                 end
240
241                 local function __storehdr( k, v )
242                         msg._mimeheaders[k] = v
243                         return ""
244                 end
245
246                 -- Read all header lines
247                 local ok, count = 1, 0
248                 while ok > 0 do
249                         chunk, ok = chunk:gsub( "^([A-Z][A-Za-z0-9%-_]+): +([^\r\n]+)\r\n", __storehdr )
250                         count = count + ok
251                 end
252
253                 -- Headers processed, check for empty line
254                 chunk, ok = chunk:gsub( "^\r\n", "" )
255
256                 -- Store remaining buffer contents
257                 msg._mimebuffer = chunk
258
259                 -- End of headers
260                 if ok > 0 then
261
262                         -- When no Content-Type header is given assume text/plain
263                         if not msg._mimeheaders['Content-Type'] then
264                                 msg._mimeheaders['Content-Type'] = 'text/plain'
265                         end
266
267                         -- Check Content-Disposition
268                         if msg._mimeheaders['Content-Disposition'] then
269                                 -- Check for "form-data" token
270                                 if msg._mimeheaders['Content-Disposition']:match("^form%-data; ") then
271                                         -- Check for field name, filename
272                                         local field = msg._mimeheaders['Content-Disposition']:match('name="(.-)"')
273                                         local file  = msg._mimeheaders['Content-Disposition']:match('filename="(.+)"$')
274
275                                         -- Is a file field and we have a callback
276                                         if file and filecb then
277                                                 msg.params[field] = file
278                                                 msg._mimecallback = function(chunk,eof)
279                                                         filecb( {
280                                                                 name    = field;
281                                                                 file    = file;
282                                                                 headers = msg._mimeheaders
283                                                         }, chunk, eof )
284                                                 end
285
286                                         -- Treat as form field
287                                         else
288                                                 msg.params[field] = ""
289                                                 msg._mimecallback = function(chunk,eof)
290                                                         msg.params[field] = msg.params[field] .. chunk
291                                                 end
292                                         end
293
294                                         -- Header was valid, continue with mime-data
295                                         return true, function( chunk )
296                                                 return process_states['mime-data']( msg, chunk, filecb )
297                                         end
298                                 else
299                                         -- Unknown Content-Disposition, abort
300                                         return nil, "Unexpected Content-Disposition MIME section header"
301                                 end
302                         else
303                                 -- Content-Disposition is required, abort without
304                                 return nil, "Missing Content-Disposition MIME section header"
305                         end
306
307                 -- We parsed no headers yet and buffer is almost empty
308                 elseif count > 0 or #chunk < 128 then
309                         -- Keep feeding me with chunks
310                         return true, nil
311                 end
312
313                 -- Buffer looks like garbage
314                 return nil, "Malformed MIME section header"
315         else
316                 return nil, "Unexpected EOF"
317         end
318 end
319
320
321 -- Read MIME part data
322 process_states['mime-data'] = function( msg, chunk, filecb )
323
324         if chunk ~= nil then
325
326                 -- Combine look-behind buffer with current chunk
327                 local buffer = msg._mimebuffer .. chunk
328
329                 -- Look for MIME boundary
330                 local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "\r\n", 1, true )
331
332                 if spos then
333                         -- Content data
334                         msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
335
336                         -- Store remainder
337                         msg._mimebuffer = buffer:sub( epos + 1, #buffer )
338
339                         -- Next state is mime-header processing
340                         return true, function( chunk )
341                                 return process_states['mime-headers']( msg, chunk, filecb )
342                         end
343                 else
344                         -- Look for EOF?
345                         local spos, epos = buffer:find( "\r\n--" .. msg.mime_boundary .. "--\r\n", 1, true )
346
347                         if spos then
348                                 -- Content data
349                                 msg._mimecallback( buffer:sub( 1, spos - 1 ), true )
350
351                                 -- We processed the final MIME boundary, cleanup
352                                 msg._mimebuffer   = nil
353                                 msg._mimeheaders  = nil
354                                 msg._mimecallback = nil
355
356                                 -- We won't accept data anymore
357                                 return false
358                         else
359                                 -- We're somewhere within a data section and our buffer is full
360                                 if #buffer > #chunk then
361                                         -- Flush buffered data
362                                         msg._mimecallback( buffer:sub( 1, #buffer - #chunk ), false )
363
364                                         -- Store new data
365                                         msg._mimebuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
366
367                                 -- Buffer is not full yet, append new data
368                                 else
369                                         msg._mimebuffer = buffer
370                                 end
371
372                                 -- Keep feeding me
373                                 return true
374                         end
375                 end
376         else
377                 return nil, "Unexpected EOF"
378         end
379 end
380
381
382 -- Init urldecoding stream
383 process_states['urldecode-init'] = function( msg, chunk, filecb )
384
385         if chunk ~= nil then
386
387                 -- Check for Content-Length
388                 if msg.env.CONTENT_LENGTH then
389                         msg.content_length = tonumber(msg.env.CONTENT_LENGTH)
390
391                         if msg.content_length <= HTTP_MAX_CONTENT then
392                                 -- Initialize buffer
393                                 msg._urldecbuffer = chunk
394                                 msg._urldeclength = 0
395
396                                 -- Switch to urldecode-key state
397                                 return true, function(chunk)
398                                         return process_states['urldecode-key']( msg, chunk, filecb )
399                                 end
400                         else
401                                 return nil, "Request exceeds maximum allowed size"
402                         end
403                 else
404                         return nil, "Missing Content-Length header"
405                 end
406         else
407                 return nil, "Unexpected EOF"
408         end
409 end
410
411
412 -- Process urldecoding stream, read and validate parameter key
413 process_states['urldecode-key'] = function( msg, chunk, filecb )
414         if chunk ~= nil then
415
416                 -- Prevent oversized requests
417                 if msg._urldeclength >= msg.content_length then
418                         return nil, "Request exceeds maximum allowed size"
419                 end
420
421                 -- Combine look-behind buffer with current chunk
422                 local buffer = msg._urldecbuffer .. chunk
423                 local spos, epos = buffer:find("=")
424
425                 -- Found param
426                 if spos then
427
428                         -- Check that key doesn't exceed maximum allowed key length
429                         if ( spos - 1 ) <= HTTP_URLENC_MAXKEYLEN then
430                                 local key = urldecode( buffer:sub( 1, spos - 1 ) )
431
432                                 -- Prepare buffers
433                                 msg.params[key]         = ""
434                                 msg._urldeclength   = msg._urldeclength + epos
435                                 msg._urldecbuffer   = buffer:sub( epos + 1, #buffer )
436
437                                 -- Use file callback or store values inside msg.params
438                                 if filecb then
439                                         msg._urldeccallback = function( chunk, eof )
440                                                 filecb( field, chunk, eof )
441                                         end
442                                 else
443                                         msg._urldeccallback = function( chunk, eof )
444                                                 msg.params[key] = msg.params[key] .. chunk
445
446                                                 -- FIXME: Use a filter
447                                                 if eof then
448                                                         msg.params[key] = urldecode( msg.params[key] )
449                                                 end
450                                         end
451                                 end
452
453                                 -- Proceed with urldecode-value state
454                                 return true, function( chunk )
455                                         return process_states['urldecode-value']( msg, chunk, filecb )
456                                 end
457                         else
458                                 return nil, "POST parameter exceeds maximum allowed length"
459                         end
460                 else
461                         return nil, "POST data exceeds maximum allowed length"
462                 end
463         else
464                 return nil, "Unexpected EOF"
465         end
466 end
467
468
469 -- Process urldecoding stream, read parameter value
470 process_states['urldecode-value'] = function( msg, chunk, filecb )
471
472         if chunk ~= nil then
473
474                 -- Combine look-behind buffer with current chunk
475                 local buffer = msg._urldecbuffer .. chunk
476
477                 -- Check for EOF
478                 if #buffer == 0 then
479                         -- Compare processed length
480                         if msg._urldeclength == msg.content_length then
481                                 -- Cleanup
482                                 msg._urldeclength   = nil
483                                 msg._urldecbuffer   = nil
484                                 msg._urldeccallback = nil
485
486                                 -- We won't accept data anymore
487                                 return false
488                         else
489                                 return nil, "Content-Length mismatch"
490                         end
491                 end
492
493                 -- Check for end of value
494                 local spos, epos = buffer:find("[&;]")
495                 if spos then
496
497                         -- Flush buffer, send eof
498                         msg._urldeccallback( buffer:sub( 1, spos - 1 ), true )
499                         msg._urldecbuffer = buffer:sub( epos + 1, #buffer )
500                         msg._urldeclength = msg._urldeclength + epos
501
502                         -- Back to urldecode-key state
503                         return true, function( chunk )
504                                 return process_states['urldecode-key']( msg, chunk, filecb )
505                         end
506                 else
507                         -- We're somewhere within a data section and our buffer is full
508                         if #buffer > #chunk then
509                                 -- Flush buffered data
510                                 msg._urldeccallback( buffer:sub( 1, #buffer - #chunk ), false )
511
512                                 -- Store new data
513                                 msg._urldeclength = msg._urldeclength + #buffer - #chunk
514                                 msg._urldecbuffer = buffer:sub( #buffer - #chunk + 1, #buffer )
515
516                         -- Buffer is not full yet, append new data
517                         else
518                                 msg._urldecbuffer = buffer
519                         end
520
521                         -- Keep feeding me
522                         return true
523                 end
524         else
525                 -- Send EOF
526                 msg._urldeccallback( "", true )
527                 return false
528         end
529 end
530
531
532 -- Creates a header source from a given socket
533 function header_source( sock )
534         return ltn12.source.simplify( function()
535
536                 local chunk, err, part = sock:receive("*l")
537
538                 -- Line too long
539                 if chunk == nil then
540                         if err ~= "timeout" then
541                                 return nil, part
542                                         and "Line exceeds maximum allowed length"
543                                         or  "Unexpected EOF"
544                         else
545                                 return nil, err
546                         end
547
548                 -- Line ok
549                 elseif chunk ~= nil then
550
551                         -- Strip trailing CR
552                         chunk = chunk:gsub("\r$","")
553
554                         return chunk, nil
555                 end
556         end )
557 end
558
559
560 -- Decode MIME encoded data.
561 function mimedecode_message_body( source, msg, filecb )
562
563         -- Find mime boundary
564         if msg and msg.env.CONTENT_TYPE then
565
566                 local bound = msg.env.CONTENT_TYPE:match("^multipart/form%-data; boundary=(.+)")
567
568                 if bound then
569                         msg.mime_boundary = bound
570                 else
571                         return nil, "No MIME boundary found or invalid content type given"
572                 end
573         end
574
575         -- Create an initial LTN12 sink
576         -- The whole MIME parsing process is implemented as fancy sink, sinks replace themself
577         -- depending on current processing state (init, header, data). Return the initial state.
578         local sink = ltn12.sink.simplify(
579                 function( chunk )
580                         return process_states['mime-init']( msg, chunk, filecb )
581                 end
582         )
583
584         -- Create a throttling LTN12 source
585         -- Frequent state switching in the mime parsing process leads to unwanted buffer aggregation.
586         -- This source checks wheather there's still data in our internal read buffer and returns an
587         -- empty string if there's already enough data in the processing queue. If the internal buffer
588         -- runs empty we're calling the original source to get the next chunk of data.
589         local tsrc = function()
590
591                 -- XXX: we schould propably keep the maximum buffer size in sync with
592                 --      the blocksize of our original source... but doesn't really matter
593                 if msg._mimebuffer ~= null and #msg._mimebuffer > 256 then
594                         return ""
595                 else
596                         return source()
597                 end
598         end
599
600         -- Pump input data...
601         while true do
602                 -- get data
603                 local ok, err = ltn12.pump.step( tsrc, sink )
604
605                 -- error
606                 if not ok and err then
607                         return nil, err
608
609                 -- eof
610                 elseif not ok then
611                         return true
612                 end
613         end
614 end
615
616
617 -- Decode urlencoded data.
618 function urldecode_message_body( source, msg )
619
620         -- Create an initial LTN12 sink
621         -- Return the initial state.
622         local sink = ltn12.sink.simplify(
623                 function( chunk )
624                         return process_states['urldecode-init']( msg, chunk )
625                 end
626         )
627
628         -- Create a throttling LTN12 source
629         -- See explaination in mimedecode_message_body().
630         local tsrc = function()
631                 if msg._urldecbuffer ~= null and #msg._urldecbuffer > 0 then
632                         return ""
633                 else
634                         return source()
635                 end
636         end
637
638         -- Pump input data...
639         while true do
640                 -- get data
641                 local ok, err = ltn12.pump.step( tsrc, sink )
642
643                 -- step
644                 if not ok and err then
645                         return nil, err
646
647                 -- eof
648                 elseif not ok then
649                         return true
650                 end
651         end
652 end
653
654
655 -- Parse a http message header
656 function parse_message_header( source )
657
658         local ok   = true
659         local msg  = { }
660
661         local sink = ltn12.sink.simplify(
662                 function( chunk )
663                         return process_states['magic']( msg, chunk )
664                 end
665         )
666
667         -- Pump input data...
668         while ok do
669
670                 -- get data
671                 ok, err = ltn12.pump.step( source, sink )
672
673                 -- error
674                 if not ok and err then
675                         return nil, err
676
677                 -- eof
678                 elseif not ok then
679
680                         -- Process get parameters
681                         if ( msg.request_method == "get" or msg.request_method == "post" ) and
682                            msg.request_uri:match("?")
683                         then
684                                 msg.params = urldecode_params( msg.request_uri )
685                         else
686                                 msg.params = { }
687                         end
688
689                         -- Populate common environment variables
690                         msg.env = {
691                                 CONTENT_LENGTH    = msg.headers['Content-Length'];
692                                 CONTENT_TYPE      = msg.headers['Content-Type'];
693                                 REQUEST_METHOD    = msg.request_method:upper();
694                                 REQUEST_URI       = msg.request_uri;
695                                 SCRIPT_NAME       = msg.request_uri:gsub("?.+$","");
696                                 SCRIPT_FILENAME   = "";         -- XXX implement me
697                                 SERVER_PROTOCOL   = "HTTP/" .. string.format("%.1f", msg.http_version)
698                         }
699
700                         -- Populate HTTP_* environment variables
701                         for i, hdr in ipairs( {
702                                 'Accept',
703                                 'Accept-Charset',
704                                 'Accept-Encoding',
705                                 'Accept-Language',
706                                 'Connection',
707                                 'Cookie',
708                                 'Host',
709                                 'Referer',
710                                 'User-Agent',
711                         } ) do
712                                 local var = 'HTTP_' .. hdr:upper():gsub("%-","_")
713                                 local val = msg.headers[hdr]
714
715                                 msg.env[var] = val
716                         end
717                 end
718         end
719
720         return msg
721 end
722
723
724 -- Parse a http message body
725 function parse_message_body( source, msg, filecb )
726         -- Is it multipart/mime ?
727         if msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
728            msg.env.CONTENT_TYPE:match("^multipart/form%-data")
729         then
730
731                 return mimedecode_message_body( source, msg, filecb )
732
733         -- Is it application/x-www-form-urlencoded ?
734         elseif msg.env.REQUEST_METHOD == "POST" and msg.env.CONTENT_TYPE and
735                msg.env.CONTENT_TYPE == "application/x-www-form-urlencoded"
736         then
737                 return urldecode_message_body( source, msg, filecb )
738
739
740         -- Unhandled encoding
741         -- If a file callback is given then feed it chunk by chunk, else
742         -- store whole buffer in message.content
743         else
744
745                 local sink
746
747                 -- If we have a file callback then feed it
748                 if type(filecb) == "function" then
749                         sink = filecb
750
751                 -- ... else append to .content
752                 else
753                         msg.content = ""
754                         msg.content_length = 0
755
756                         sink = function( chunk )
757                                 if ( msg.content_length + #chunk ) <= HTTP_MAX_CONTENT then
758
759                                         msg.content        = msg.content        .. chunk
760                                         msg.content_length = msg.content_length + #chunk
761
762                                         return true
763                                 else
764                                         return nil, "POST data exceeds maximum allowed length"
765                                 end
766                         end
767                 end
768
769                 -- Pump data...
770                 while true do
771                         local ok, err = ltn12.pump.step( source, sink )
772
773                         if not ok and err then
774                                 return nil, err
775                         elseif not err then
776                                 return true
777                         end
778                 end
779         end
780 end
781
782 -- Status codes
783 statusmsg = {
784         [200] = "OK",
785         [301] = "Moved Permanently",
786         [304] = "Not Modified",
787         [400] = "Bad Request",
788         [403] = "Forbidden",
789         [404] = "Not Found",
790         [405] = "Method Not Allowed",
791         [411] = "Length Required",
792         [412] = "Precondition Failed",
793         [500] = "Internal Server Error",
794         [503] = "Server Unavailable",
795 }