libs/json: add proper support for decoding unicode escapes
[project/luci.git] / libs / json / luasrc / json.lua
index 0d38ed4..8dbaf91 100644 (file)
@@ -11,28 +11,253 @@ You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 
 $Id$
+
+Decoder:
+       Info:
+               null will be decoded to luci.json.null if first parameter of Decoder() is true
+
+       Example:
+               decoder = luci.json.Decoder()
+               luci.ltn12.pump.all(luci.ltn12.source.string("decodableJSON"), decoder:sink())
+               luci.util.dumptable(decoder:get())
+
+       Known issues:
+               does not support unicode conversion \uXXYY with XX != 00 will be ignored
+
+
+Encoder:
+       Info:
+               Accepts numbers, strings, nil, booleans as they are
+               Accepts luci.json.null as replacement for nil
+               Accepts full associative and full numerically indexed tables
+               Mixed tables will loose their associative values during conversion
+               Iterator functions will be encoded as an array of their return values
+               Non-iterator functions will probably corrupt the encoder
+
+       Example:
+               encoder = luci.json.Encoder(encodableData)
+               luci.ltn12.pump.all(encoder:source(), luci.ltn12.sink.file(io.open("someFile", w)))
 ]]--
 
+local nixio     = require "nixio"
 local util      = require "luci.util"
-local ltn12     = require "luci.ltn12"
 local table     = require "table"
+local string    = require "string"
 local coroutine = require "coroutine"
 
 local assert    = assert
 local tonumber  = tonumber
+local tostring  = tostring
 local error     = error
+local type         = type
+local pairs        = pairs
+local ipairs    = ipairs
+local next      = next
+local pcall            = pcall
+
+local band      = nixio.bit.band
+local bor       = nixio.bit.bor
+local rshift    = nixio.bit.rshift
+local char      = string.char
+
+local getmetatable = getmetatable
 
+--- LuCI JSON-Library
+-- @cstyle     instance
 module "luci.json"
 
+
+--- Directly decode a JSON string
+-- @param json JSON-String
+-- @return Lua object
+function decode(json, ...)
+       local a = ActiveDecoder(function() return nil end, ...)
+       a.chunk = json
+       local s, obj = pcall(a.get, a)
+       return s and obj or nil
+end
+
+
+--- Direcly encode a Lua object into a JSON string.
+-- @param obj Lua Object
+-- @return JSON string
+function encode(obj, ...)
+       local out = {}
+       local e = Encoder(obj, 1, ...):source()
+       local chnk, err
+       repeat
+               chnk, err = e()
+               out[#out+1] = chnk
+       until not chnk
+       return not err and table.concat(out) or nil
+end
+
+
 --- Null replacement function
 -- @return null
 function null()
        return null
 end
 
+--- Create a new JSON-Encoder.
+-- @class      function
+-- @name       Encoder
+-- @param data                 Lua-Object to be encoded.
+-- @param buffersize   Blocksize of returned data source.
+-- @param fastescape   Use non-standard escaping (don't escape control chars)
+-- @return JSON-Encoder
+Encoder = util.class()
+
+function Encoder.__init__(self, data, buffersize, fastescape)
+       self.data = data
+       self.buffersize = buffersize or 512
+       self.buffer = ""
+       self.fastescape = fastescape
+
+       getmetatable(self).__call = Encoder.source
+end
+
+--- Create an LTN12 source providing the encoded JSON-Data.
+-- @return LTN12 source
+function Encoder.source(self)
+       local source = coroutine.create(self.dispatch)
+       return function()
+               local res, data = coroutine.resume(source, self, self.data, true)
+               if res then
+                       return data
+               else
+                       return nil, data
+               end
+       end
+end
+
+function Encoder.dispatch(self, data, start)
+       local parser = self.parsers[type(data)]
+
+       parser(self, data)
+
+       if start then
+               if #self.buffer > 0 then
+                       coroutine.yield(self.buffer)
+               end
+
+               coroutine.yield()
+       end
+end
+
+function Encoder.put(self, chunk)
+       if self.buffersize < 2 then
+               coroutine.yield(chunk)
+       else
+               if #self.buffer + #chunk > self.buffersize then
+                       local written = 0
+                       local fbuffer = self.buffersize - #self.buffer
+
+                       coroutine.yield(self.buffer .. chunk:sub(written + 1, fbuffer))
+                       written = fbuffer
+
+                       while #chunk - written > self.buffersize do
+                               fbuffer = written + self.buffersize
+                               coroutine.yield(chunk:sub(written + 1, fbuffer))
+                               written = fbuffer
+                       end
+
+                       self.buffer = chunk:sub(written + 1)
+               else
+                       self.buffer = self.buffer .. chunk
+               end
+       end
+end
+
+function Encoder.parse_nil(self)
+       self:put("null")
+end
+
+function Encoder.parse_bool(self, obj)
+       self:put(obj and "true" or "false")
+end
+
+function Encoder.parse_number(self, obj)
+       self:put(tostring(obj))
+end
+
+function Encoder.parse_string(self, obj)
+       if self.fastescape then
+               self:put('"' .. obj:gsub('\\', '\\\\'):gsub('"', '\\"') .. '"')
+       else
+               self:put('"' ..
+                       obj:gsub('[%c\\"]',
+                               function(char)
+                                       return '\\u00%02x' % char:byte()
+                               end
+                       )
+               .. '"')
+       end
+end
+
+function Encoder.parse_iter(self, obj)
+       if obj == null then
+               return self:put("null")
+       end
+
+       if type(obj) == "table" and (#obj == 0 and next(obj)) then
+               self:put("{")
+               local first = true
+
+               for key, entry in pairs(obj) do
+                       first = first or self:put(",")
+                       first = first and false
+                       self:parse_string(tostring(key))
+                       self:put(":")
+                       self:dispatch(entry)
+               end
+
+               self:put("}")
+       else
+               self:put("[")
+               local first = true
+
+               if type(obj) == "table" then
+                       for i=1, #obj do
+                               first = first or self:put(",")
+                               first = first and nil
+                               self:dispatch(obj[i])
+                       end
+               else
+                       for entry in obj do
+                               first = first or self:put(",")
+                               first = first and nil
+                               self:dispatch(entry)
+                       end
+               end
+
+               self:put("]")
+       end
+end
+
+Encoder.parsers = {
+       ['nil']      = Encoder.parse_nil,
+       ['table']    = Encoder.parse_iter,
+       ['number']   = Encoder.parse_number,
+       ['string']   = Encoder.parse_string,
+       ['boolean']  = Encoder.parse_bool,
+       ['function'] = Encoder.parse_iter
+}
+
+
+--- Create a new JSON-Decoder.
+-- @class      function
+-- @name       Decoder
+-- @param customnull Use luci.json.null instead of nil for decoding null
+-- @return JSON-Decoder
 Decoder = util.class()
 
---- Create an LTN12 sink from the decoder object
+function Decoder.__init__(self, customnull)
+       self.cnull = customnull
+       getmetatable(self).__call = Decoder.sink
+end
+
+--- Create an LTN12 sink from the decoder object which accepts the JSON-Data.
 -- @return LTN12 sink
 function Decoder.sink(self)
        local sink = coroutine.create(self.dispatch)
@@ -42,68 +267,47 @@ function Decoder.sink(self)
 end
 
 
---- Get the decoded data packets
+--- Get the decoded data packets after the rawdata has been sent to the sink.
 -- @return Decoded data
 function Decoder.get(self)
        return self.data
 end
 
-
 function Decoder.dispatch(self, chunk, src_err, strict)
        local robject, object
-        
+       local oset = false
+
        while chunk do
-               if #chunk < 1 then
+               while chunk and #chunk < 1 do
                        chunk = self:fetch()
                end
-               
+
                assert(not strict or chunk, "Unexpected EOS")
-               if not chunk then
-                       break
-               end
-               
-               local parser = nil
+               if not chunk then break end
+
                local char   = chunk:sub(1, 1)
-               
-               if char == '"' then
-                       parser = self.parse_string
-               elseif char == 't' then
-                       parser = self.parse_true
-               elseif char == 'f' then
-                       parser = self.parse_false
-               elseif char == 'n' then
-                       parser = self.parse_null
-               elseif char == '[' then
-                       parser = self.parse_array
-               elseif char == '{' then
-                       parser = self.parse_object
-               elseif char:match("%s") then
-                       parser = self.parse_space
-               elseif char:match("[0-9-]") then
-                       parser = self.parse_number
-               end
-               
-               if parser then
-                       chunk, robject = parser(self, chunk)
-                       
-                       if robject ~= nil then
-                               assert(object == nil, "Scope violation: Too many objects")
-                               object = robject
-                       end
-                       
-                       if strict and object ~= nil then
+               local parser = self.parsers[char]
+                or (char:match("%s")     and self.parse_space)
+                or (char:match("[0-9-]") and self.parse_number)
+                or error("Unexpected char '%s'" % char)
+
+               chunk, robject = parser(self, chunk)
+
+               if parser ~= self.parse_space then
+                       assert(not oset, "Scope violation: Too many objects")
+                       object = robject
+                       oset = true
+
+                       if strict then
                                return chunk, object
                        end
-               else
-                       error("Unexpected char '%s'" % char)
                end
        end
-       
+
        assert(not src_err, src_err)
-       assert(object ~= nil, "Unexpected EOS")
-       
+       assert(oset, "Unexpected EOS")
+
        self.data = object
-       return chunk, object
 end
 
 
@@ -120,7 +324,7 @@ function Decoder.fetch_atleast(self, chunk, bytes)
                assert(nchunk, "Unexpected EOS")
                chunk = chunk .. nchunk
        end
-       
+
        return chunk
 end
 
@@ -141,7 +345,7 @@ end
 
 function Decoder.parse_space(self, chunk)
        local start = chunk:find("[^%s]")
-       
+
        while not start do
                chunk = self:fetch()
                if not chunk then
@@ -149,20 +353,20 @@ function Decoder.parse_space(self, chunk)
                end
                start = chunk:find("[^%s]")
        end
-       
+
        return chunk:sub(start)
 end
 
 
 function Decoder.parse_literal(self, chunk, literal, value)
-       chunk = self:fetch_atleast(chunk, #literal)     
+       chunk = self:fetch_atleast(chunk, #literal)
        assert(chunk:sub(1, #literal) == literal, "Invalid character sequence")
        return chunk:sub(#literal + 1), value
 end
 
 
 function Decoder.parse_null(self, chunk)
-       return self:parse_literal(chunk, "null", null)
+       return self:parse_literal(chunk, "null", self.cnull and null)
 end
 
 
@@ -194,7 +398,7 @@ function Decoder.parse_string(self, chunk)
                local spos = chunk:find('[\\"]')
                if spos then
                        str = str .. chunk:sub(1, spos - 1)
-                       
+
                        local char = chunk:sub(spos, spos)
                        if char == '"' then                             -- String end
                                chunk = chunk:sub(spos + 1)
@@ -206,7 +410,7 @@ function Decoder.parse_string(self, chunk)
                else
                        str = str .. chunk
                        chunk = self:fetch()
-                       assert(chunk, "Unexpected EOS while parsing a string")          
+                       assert(chunk, "Unexpected EOS while parsing a string")
                end
        end
 
@@ -214,16 +418,52 @@ function Decoder.parse_string(self, chunk)
 end
 
 
+function Decoder.utf8_encode(self, s1, s2)
+       local n = s1 * 256 + s2
+
+       if n >= 0 and n <= 0x7F then
+               return char(n)
+       elseif n >= 0 and n <= 0x7FF then
+               return char(
+                       bor(band(rshift(n,  6), 0x1F), 0xC0),
+                       bor(band(n,             0x3F), 0x80)
+               )
+       elseif n >= 0 and n <= 0xFFFF then
+               return char(
+                       bor(band(rshift(n, 12), 0x0F), 0xE0),
+                       bor(band(rshift(n,  6), 0x3F), 0x80),
+                       bor(band(n,             0x3F), 0x80)
+               )
+       elseif n >= 0 and n <= 0x10FFFF then
+               return char(
+                       bor(band(rshift(n, 18), 0x07), 0xF0),
+                       bor(band(rshift(n, 12), 0x3F), 0x80),
+                       bor(band(rshift(n,  6), 0x3F), 0x80),
+                       bor(band(n,             0x3F), 0x80)
+               )
+       else
+               return "?"
+       end
+end
+
+
 function Decoder.parse_escape(self, chunk)
        local str = ""
        chunk = self:fetch_atleast(chunk:sub(2), 1)
        local char = chunk:sub(1, 1)
        chunk = chunk:sub(2)
-       
+
        if char == '"' then
                return chunk, '"'
        elseif char == "\\" then
                return chunk, "\\"
+       elseif char == "u" then
+               chunk = self:fetch_atleast(chunk, 4)
+               local s1, s2 = chunk:sub(1, 2), chunk:sub(3, 4)
+               s1, s2 = tonumber(s1, 16), tonumber(s2, 16)
+               assert(s1 and s2, "Invalid Unicode character")
+
+               return chunk:sub(5), self:utf8_encode(s1, s2)
        elseif char == "/" then
                return chunk, "/"
        elseif char == "b" then
@@ -236,14 +476,6 @@ function Decoder.parse_escape(self, chunk)
                return chunk, "\r"
        elseif char == "t" then
                return chunk, "\t"
-       elseif char == "u" then
-               chunk = self:fetch_atleast(chunk, 4)
-               local s1, s2 = chunk:sub(1, 4):match("^([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])$")
-               assert(s1 and s2, "Invalid Unicode character 'U+%s%s'" % {s1, s2})
-               s1, s2 = tonumber(s1, 16), tonumber(s2, 16)
-               
-               -- ToDo: Unicode support
-               return chunk:sub(5), s1 == 0 and s2 or ""
        else
                error("Unexpected escaping sequence '\\%s'" % char)
        end
@@ -253,17 +485,19 @@ end
 function Decoder.parse_array(self, chunk)
        chunk = chunk:sub(2)
        local array = {}
-       
+       local nextp = 1
+
        local chunk, object = self:parse_delimiter(chunk, "%]")
-       
+
        if object then
                return chunk, array
        end
-       
+
        repeat
                chunk, object = self:dispatch(chunk, nil, true)
-               table.insert(array, object)
-               
+               table.insert(array, nextp, object)
+               nextp = nextp + 1
+
                chunk, object = self:parse_delimiter(chunk, ",%]")
                assert(object, "Delimiter expected")
        until object == "]"
@@ -286,12 +520,12 @@ function Decoder.parse_object(self, chunk)
        repeat
                chunk = self:parse_space(chunk)
                assert(chunk, "Unexpected EOS")
-               
+
                chunk, name   = self:parse_string(chunk)
-               
+
                chunk, object = self:parse_delimiter(chunk, ":")
                assert(object, "Separator expected")
-               
+
                chunk, object = self:dispatch(chunk, nil, true)
                array[name] = object
 
@@ -316,4 +550,51 @@ function Decoder.parse_delimiter(self, chunk, delimiter)
                        return chunk, nil
                end
        end
-end
\ No newline at end of file
+end
+
+
+Decoder.parsers = {
+       ['"'] = Decoder.parse_string,
+       ['t'] = Decoder.parse_true,
+       ['f'] = Decoder.parse_false,
+       ['n'] = Decoder.parse_null,
+       ['['] = Decoder.parse_array,
+       ['{'] = Decoder.parse_object
+}
+
+
+--- Create a new Active JSON-Decoder.
+-- @class      function
+-- @name       ActiveDecoder
+-- @param   customnull Use luci.json.null instead of nil for decoding null
+-- @return  Active JSON-Decoder
+ActiveDecoder = util.class(Decoder)
+
+function ActiveDecoder.__init__(self, source, customnull)
+       Decoder.__init__(self, customnull)
+       self.source = source
+       self.chunk = nil
+       getmetatable(self).__call = self.get
+end
+
+
+--- Fetches one JSON-object from given source
+-- @return Decoded object
+function ActiveDecoder.get(self)
+       local chunk, src_err, object
+       if not self.chunk then
+               chunk, src_err = self.source()
+       else
+               chunk = self.chunk
+       end
+
+       self.chunk, object = self:dispatch(chunk, src_err, true)
+       return object
+end
+
+
+function ActiveDecoder.fetch(self)
+       local chunk, src_err = self.source()
+       assert(chunk or not src_err, src_err)
+       return chunk
+end