add json decoder
git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@2985 fd906abe-77d9-0310-91a1-e0d9ade77398
This commit is contained in:
parent
f627a50942
commit
16d4e0df7d
3 changed files with 396 additions and 1 deletions
352
util/json.py
352
util/json.py
|
@ -5,7 +5,11 @@ JSON (JavaScript Object Notation) <http://json.org> is a subset of
|
|||
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
|
||||
interchange format."""
|
||||
|
||||
import re
|
||||
# This is a stripped-down version of simplejson
|
||||
# by Bob Ippolito, http://undefined.org/python/
|
||||
|
||||
|
||||
import re, sys
|
||||
|
||||
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
|
||||
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
|
||||
|
@ -277,6 +281,334 @@ _default_encoder = JSONEncoder(
|
|||
)
|
||||
|
||||
|
||||
"""
|
||||
Implementation of JSONDecoder
|
||||
"""
|
||||
|
||||
import sre_parse
|
||||
import sre_compile
|
||||
import sre_constants
|
||||
from sre_constants import BRANCH, SUBPATTERN
|
||||
|
||||
FLAGS = (re.VERBOSE | re.MULTILINE | re.DOTALL)
|
||||
|
||||
class Scanner(object):
|
||||
def __init__(self, lexicon, flags=FLAGS):
|
||||
self.actions = [None]
|
||||
# Combine phrases into a compound pattern
|
||||
s = sre_parse.Pattern()
|
||||
s.flags = flags
|
||||
p = []
|
||||
for idx, token in enumerate(lexicon):
|
||||
phrase = token.pattern
|
||||
try:
|
||||
subpattern = sre_parse.SubPattern(s,
|
||||
[(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
|
||||
except sre_constants.error:
|
||||
raise
|
||||
p.append(subpattern)
|
||||
self.actions.append(token)
|
||||
|
||||
s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
|
||||
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
|
||||
self.scanner = sre_compile.compile(p)
|
||||
|
||||
def iterscan(self, string, idx=0, context=None):
|
||||
"""
|
||||
Yield match, end_idx for each match
|
||||
"""
|
||||
match = self.scanner.scanner(string, idx).match
|
||||
actions = self.actions
|
||||
lastend = idx
|
||||
end = len(string)
|
||||
while True:
|
||||
m = match()
|
||||
if m is None:
|
||||
break
|
||||
matchbegin, matchend = m.span()
|
||||
if lastend == matchend:
|
||||
break
|
||||
action = actions[m.lastindex]
|
||||
if action is not None:
|
||||
rval, next_pos = action(m, context)
|
||||
if next_pos is not None and next_pos != matchend:
|
||||
# "fast forward" the scanner
|
||||
matchend = next_pos
|
||||
match = self.scanner.scanner(string, matchend).match
|
||||
yield rval, matchend
|
||||
lastend = matchend
|
||||
|
||||
|
||||
def pattern(pattern, flags=FLAGS):
|
||||
def decorator(fn):
|
||||
fn.pattern = pattern
|
||||
fn.regex = re.compile(pattern, flags)
|
||||
return fn
|
||||
return decorator
|
||||
|
||||
|
||||
def _floatconstants():
|
||||
import struct
|
||||
import sys
|
||||
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
|
||||
if sys.byteorder != 'big':
|
||||
_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
|
||||
nan, inf = struct.unpack('dd', _BYTES)
|
||||
return nan, inf, -inf
|
||||
|
||||
NaN, PosInf, NegInf = _floatconstants()
|
||||
|
||||
|
||||
def linecol(doc, pos):
|
||||
lineno = doc.count('\n', 0, pos) + 1
|
||||
if lineno == 1:
|
||||
colno = pos
|
||||
else:
|
||||
colno = pos - doc.rindex('\n', 0, pos)
|
||||
return lineno, colno
|
||||
|
||||
|
||||
def errmsg(msg, doc, pos, end=None):
|
||||
lineno, colno = linecol(doc, pos)
|
||||
if end is None:
|
||||
return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
|
||||
endlineno, endcolno = linecol(doc, end)
|
||||
return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
|
||||
msg, lineno, colno, endlineno, endcolno, pos, end)
|
||||
|
||||
|
||||
_CONSTANTS = {
|
||||
'-Infinity': NegInf,
|
||||
'Infinity': PosInf,
|
||||
'NaN': NaN,
|
||||
'true': True,
|
||||
'false': False,
|
||||
'null': None,
|
||||
}
|
||||
|
||||
def JSONConstant(match, context, c=_CONSTANTS):
|
||||
s = match.group(0)
|
||||
fn = getattr(context, 'parse_constant', None)
|
||||
if fn is None:
|
||||
rval = c[s]
|
||||
else:
|
||||
rval = fn(s)
|
||||
return rval, None
|
||||
pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
|
||||
|
||||
|
||||
def JSONNumber(match, context):
|
||||
match = JSONNumber.regex.match(match.string, *match.span())
|
||||
integer, frac, exp = match.groups()
|
||||
if frac or exp:
|
||||
fn = getattr(context, 'parse_float', None) or float
|
||||
res = fn(integer + (frac or '') + (exp or ''))
|
||||
else:
|
||||
fn = getattr(context, 'parse_int', None) or int
|
||||
res = fn(integer)
|
||||
return res, None
|
||||
pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
|
||||
|
||||
|
||||
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
|
||||
BACKSLASH = {
|
||||
'"': u'"', '\\': u'\\', '/': u'/',
|
||||
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
|
||||
}
|
||||
|
||||
DEFAULT_ENCODING = "utf-8"
|
||||
|
||||
def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
|
||||
if encoding is None:
|
||||
encoding = DEFAULT_ENCODING
|
||||
chunks = []
|
||||
_append = chunks.append
|
||||
begin = end - 1
|
||||
while 1:
|
||||
chunk = _m(s, end)
|
||||
if chunk is None:
|
||||
raise ValueError(
|
||||
errmsg("Unterminated string starting at", s, begin))
|
||||
end = chunk.end()
|
||||
content, terminator = chunk.groups()
|
||||
if content:
|
||||
if not isinstance(content, unicode):
|
||||
content = unicode(content, encoding)
|
||||
_append(content)
|
||||
if terminator == '"':
|
||||
break
|
||||
elif terminator != '\\':
|
||||
if strict:
|
||||
raise ValueError(errmsg("Invalid control character %r at", s, end))
|
||||
else:
|
||||
_append(terminator)
|
||||
continue
|
||||
try:
|
||||
esc = s[end]
|
||||
except IndexError:
|
||||
raise ValueError(
|
||||
errmsg("Unterminated string starting at", s, begin))
|
||||
if esc != 'u':
|
||||
try:
|
||||
m = _b[esc]
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
errmsg("Invalid \\escape: %r" % (esc,), s, end))
|
||||
end += 1
|
||||
else:
|
||||
esc = s[end + 1:end + 5]
|
||||
next_end = end + 5
|
||||
msg = "Invalid \\uXXXX escape"
|
||||
try:
|
||||
if len(esc) != 4:
|
||||
raise ValueError
|
||||
uni = int(esc, 16)
|
||||
if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
|
||||
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
|
||||
if not s[end + 5:end + 7] == '\\u':
|
||||
raise ValueError
|
||||
esc2 = s[end + 7:end + 11]
|
||||
if len(esc2) != 4:
|
||||
raise ValueError
|
||||
uni2 = int(esc2, 16)
|
||||
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
|
||||
next_end += 6
|
||||
m = unichr(uni)
|
||||
except ValueError:
|
||||
raise ValueError(errmsg(msg, s, end))
|
||||
end = next_end
|
||||
_append(m)
|
||||
return u''.join(chunks), end
|
||||
|
||||
|
||||
scanstring = py_scanstring
|
||||
|
||||
def JSONString(match, context):
|
||||
encoding = getattr(context, 'encoding', None)
|
||||
strict = getattr(context, 'strict', True)
|
||||
return scanstring(match.string, match.end(), encoding, strict)
|
||||
pattern(r'"')(JSONString)
|
||||
|
||||
|
||||
WHITESPACE = re.compile(r'\s*', FLAGS)
|
||||
|
||||
def JSONObject(match, context, _w=WHITESPACE.match):
|
||||
pairs = {}
|
||||
s = match.string
|
||||
end = _w(s, match.end()).end()
|
||||
nextchar = s[end:end + 1]
|
||||
# Trivial empty object
|
||||
if nextchar == '}':
|
||||
return pairs, end + 1
|
||||
if nextchar != '"':
|
||||
raise ValueError(errmsg("Expecting property name", s, end))
|
||||
end += 1
|
||||
encoding = getattr(context, 'encoding', None)
|
||||
strict = getattr(context, 'strict', True)
|
||||
iterscan = JSONScanner.iterscan
|
||||
while True:
|
||||
key, end = scanstring(s, end, encoding, strict)
|
||||
end = _w(s, end).end()
|
||||
if s[end:end + 1] != ':':
|
||||
raise ValueError(errmsg("Expecting : delimiter", s, end))
|
||||
end = _w(s, end + 1).end()
|
||||
try:
|
||||
value, end = iterscan(s, idx=end, context=context).next()
|
||||
except StopIteration:
|
||||
raise ValueError(errmsg("Expecting object", s, end))
|
||||
pairs[key] = value
|
||||
end = _w(s, end).end()
|
||||
nextchar = s[end:end + 1]
|
||||
end += 1
|
||||
if nextchar == '}':
|
||||
break
|
||||
if nextchar != ',':
|
||||
raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
|
||||
end = _w(s, end).end()
|
||||
nextchar = s[end:end + 1]
|
||||
end += 1
|
||||
if nextchar != '"':
|
||||
raise ValueError(errmsg("Expecting property name", s, end - 1))
|
||||
object_hook = getattr(context, 'object_hook', None)
|
||||
if object_hook is not None:
|
||||
pairs = object_hook(pairs)
|
||||
return pairs, end
|
||||
pattern(r'{')(JSONObject)
|
||||
|
||||
|
||||
def JSONArray(match, context, _w=WHITESPACE.match):
|
||||
values = []
|
||||
s = match.string
|
||||
end = _w(s, match.end()).end()
|
||||
# Look-ahead for trivial empty array
|
||||
nextchar = s[end:end + 1]
|
||||
if nextchar == ']':
|
||||
return values, end + 1
|
||||
iterscan = JSONScanner.iterscan
|
||||
while True:
|
||||
try:
|
||||
value, end = iterscan(s, idx=end, context=context).next()
|
||||
except StopIteration:
|
||||
raise ValueError(errmsg("Expecting object", s, end))
|
||||
values.append(value)
|
||||
end = _w(s, end).end()
|
||||
nextchar = s[end:end + 1]
|
||||
end += 1
|
||||
if nextchar == ']':
|
||||
break
|
||||
if nextchar != ',':
|
||||
raise ValueError(errmsg("Expecting , delimiter", s, end))
|
||||
end = _w(s, end).end()
|
||||
return values, end
|
||||
pattern(r'\[')(JSONArray)
|
||||
|
||||
|
||||
ANYTHING = [
|
||||
JSONObject,
|
||||
JSONArray,
|
||||
JSONString,
|
||||
JSONConstant,
|
||||
JSONNumber,
|
||||
]
|
||||
|
||||
JSONScanner = Scanner(ANYTHING)
|
||||
|
||||
|
||||
class JSONDecoder(object):
|
||||
|
||||
_scanner = Scanner(ANYTHING)
|
||||
__all__ = ['__init__', 'decode', 'raw_decode']
|
||||
|
||||
def __init__(self, encoding=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, strict=True):
|
||||
self.encoding = encoding
|
||||
self.object_hook = object_hook
|
||||
self.parse_float = parse_float
|
||||
self.parse_int = parse_int
|
||||
self.parse_constant = parse_constant
|
||||
self.strict = strict
|
||||
|
||||
def decode(self, s, _w=WHITESPACE.match):
|
||||
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
|
||||
end = _w(s, end).end()
|
||||
if end != len(s):
|
||||
raise ValueError(errmsg("Extra data", s, end, len(s)))
|
||||
return obj
|
||||
|
||||
def raw_decode(self, s, **kw):
|
||||
kw.setdefault('context', self)
|
||||
try:
|
||||
obj, end = self._scanner.iterscan(s, **kw).next()
|
||||
except StopIteration:
|
||||
raise ValueError("No JSON object could be decoded")
|
||||
return obj, end
|
||||
|
||||
|
||||
_default_decoder = JSONDecoder(encoding=None, object_hook=None)
|
||||
|
||||
|
||||
# public functions
|
||||
|
||||
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
|
||||
allow_nan=True, cls=None, indent=None, separators=None,
|
||||
encoding='utf-8', default=None, **kw):
|
||||
|
@ -293,3 +625,21 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
separators=separators, encoding=encoding, default=default,
|
||||
**kw).encode(obj)
|
||||
|
||||
|
||||
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, **kw):
|
||||
if (cls is None and encoding is None and object_hook is None and
|
||||
parse_int is None and parse_float is None and
|
||||
parse_constant is None and not kw):
|
||||
return _default_decoder.decode(s)
|
||||
if cls is None:
|
||||
cls = JSONDecoder
|
||||
if object_hook is not None:
|
||||
kw['object_hook'] = object_hook
|
||||
if parse_float is not None:
|
||||
kw['parse_float'] = parse_float
|
||||
if parse_int is not None:
|
||||
kw['parse_int'] = parse_int
|
||||
if parse_constant is not None:
|
||||
kw['parse_constant'] = parse_constant
|
||||
return cls(encoding=encoding, **kw).decode(s)
|
||||
|
|
44
util/json.txt
Normal file
44
util/json.txt
Normal file
|
@ -0,0 +1,44 @@
|
|||
==========================
|
||||
JSON Endoding and Decoding
|
||||
==========================
|
||||
|
||||
$Id$
|
||||
|
||||
This is a stripped-down version of simplejson
|
||||
by Bob Ippolito, http://undefined.org/python/
|
||||
|
||||
>>> from cybertools.util.json import dumps, loads
|
||||
|
||||
Encoding basic Python object hierarchies::
|
||||
|
||||
>>> dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
|
||||
'["foo", {"bar": ["baz", null, 1.0, 2]}]'
|
||||
>>> print dumps("\"foo\bar")
|
||||
"\"foo\bar"
|
||||
>>> print dumps(u'\u1234')
|
||||
"\u1234"
|
||||
>>> print dumps('\\')
|
||||
"\\"
|
||||
>>> print dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
|
||||
{"a": 0, "b": 0, "c": 0}
|
||||
|
||||
Compact encoding::
|
||||
|
||||
>>> dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
|
||||
'[1,2,3,{"4":5,"6":7}]'
|
||||
|
||||
Pretty printing::
|
||||
|
||||
>>> print dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
|
||||
{
|
||||
"4": 5,
|
||||
"6": 7
|
||||
}
|
||||
|
||||
Decoding JSON::
|
||||
|
||||
>>> loads('["foo", {"bar":["baz", null, 1.0, 2]}]')
|
||||
[u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
|
||||
>>> loads('"\\"foo\\bar"')
|
||||
u'"foo\x08ar'
|
||||
|
|
@ -25,6 +25,7 @@ def test_suite():
|
|||
doctest.DocFileSuite('format.txt', optionflags=flags),
|
||||
doctest.DocFileSuite('multikey.txt', optionflags=flags),
|
||||
doctest.DocFileSuite('property.txt', optionflags=flags),
|
||||
doctest.DocFileSuite('json.txt', optionflags=flags),
|
||||
doctest.DocFileSuite('jeep.txt', optionflags=flags),
|
||||
doctest.DocFileSuite('randomname.txt', optionflags=flags),
|
||||
))
|
||||
|
|
Loading…
Add table
Reference in a new issue