From 16d4e0df7d98a95e3d82a547bdeddd2e1598b887 Mon Sep 17 00:00:00 2001 From: helmutm Date: Fri, 14 Nov 2008 09:02:30 +0000 Subject: [PATCH] add json decoder git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@2985 fd906abe-77d9-0310-91a1-e0d9ade77398 --- util/json.py | 352 +++++++++++++++++++++++++++++++++++++++++++++++++- util/json.txt | 44 +++++++ util/tests.py | 1 + 3 files changed, 396 insertions(+), 1 deletion(-) create mode 100644 util/json.txt diff --git a/util/json.py b/util/json.py index 8604f6e..786045b 100644 --- a/util/json.py +++ b/util/json.py @@ -5,7 +5,11 @@ JSON (JavaScript Object Notation) is a subset of JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data interchange format.""" -import re +# This is a stripped-down version of simplejson +# by Bob Ippolito, http://undefined.org/python/ + + +import re, sys ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') @@ -277,6 +281,334 @@ _default_encoder = JSONEncoder( ) +""" +Implementation of JSONDecoder +""" + +import sre_parse +import sre_compile +import sre_constants +from sre_constants import BRANCH, SUBPATTERN + +FLAGS = (re.VERBOSE | re.MULTILINE | re.DOTALL) + +class Scanner(object): + def __init__(self, lexicon, flags=FLAGS): + self.actions = [None] + # Combine phrases into a compound pattern + s = sre_parse.Pattern() + s.flags = flags + p = [] + for idx, token in enumerate(lexicon): + phrase = token.pattern + try: + subpattern = sre_parse.SubPattern(s, + [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) + except sre_constants.error: + raise + p.append(subpattern) + self.actions.append(token) + + s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work + p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) + self.scanner = sre_compile.compile(p) + + def iterscan(self, string, idx=0, context=None): + """ + Yield match, end_idx for each match + """ + match = self.scanner.scanner(string, idx).match + actions = self.actions + lastend = idx + end = len(string) + while True: + m = match() + if m is None: + break + matchbegin, matchend = m.span() + if lastend == matchend: + break + action = actions[m.lastindex] + if action is not None: + rval, next_pos = action(m, context) + if next_pos is not None and next_pos != matchend: + # "fast forward" the scanner + matchend = next_pos + match = self.scanner.scanner(string, matchend).match + yield rval, matchend + lastend = matchend + + +def pattern(pattern, flags=FLAGS): + def decorator(fn): + fn.pattern = pattern + fn.regex = re.compile(pattern, flags) + return fn + return decorator + + +def _floatconstants(): + import struct + import sys + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + + +def errmsg(msg, doc, pos, end=None): + lineno, colno = linecol(doc, pos) + if end is None: + return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( + msg, lineno, colno, endlineno, endcolno, pos, end) + + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, + 'true': True, + 'false': False, + 'null': None, +} + +def JSONConstant(match, context, c=_CONSTANTS): + s = match.group(0) + fn = getattr(context, 'parse_constant', None) + if fn is None: + rval = c[s] + else: + rval = fn(s) + return rval, None +pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) + + +def JSONNumber(match, context): + match = JSONNumber.regex.match(match.string, *match.span()) + integer, frac, exp = match.groups() + if frac or exp: + fn = getattr(context, 'parse_float', None) or float + res = fn(integer + (frac or '') + (exp or '')) + else: + fn = getattr(context, 'parse_int', None) or int + res = fn(integer) + return res, None +pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) + + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + if terminator == '"': + break + elif terminator != '\\': + if strict: + raise ValueError(errmsg("Invalid control character %r at", s, end)) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + if esc != 'u': + try: + m = _b[esc] + except KeyError: + raise ValueError( + errmsg("Invalid \\escape: %r" % (esc,), s, end)) + end += 1 + else: + esc = s[end + 1:end + 5] + next_end = end + 5 + msg = "Invalid \\uXXXX escape" + try: + if len(esc) != 4: + raise ValueError + uni = int(esc, 16) + if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise ValueError + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + m = unichr(uni) + except ValueError: + raise ValueError(errmsg(msg, s, end)) + end = next_end + _append(m) + return u''.join(chunks), end + + +scanstring = py_scanstring + +def JSONString(match, context): + encoding = getattr(context, 'encoding', None) + strict = getattr(context, 'strict', True) + return scanstring(match.string, match.end(), encoding, strict) +pattern(r'"')(JSONString) + + +WHITESPACE = re.compile(r'\s*', FLAGS) + +def JSONObject(match, context, _w=WHITESPACE.match): + pairs = {} + s = match.string + end = _w(s, match.end()).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + encoding = getattr(context, 'encoding', None) + strict = getattr(context, 'strict', True) + iterscan = JSONScanner.iterscan + while True: + key, end = scanstring(s, end, encoding, strict) + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + end = _w(s, end + 1).end() + try: + value, end = iterscan(s, idx=end, context=context).next() + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == '}': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + object_hook = getattr(context, 'object_hook', None) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end +pattern(r'{')(JSONObject) + + +def JSONArray(match, context, _w=WHITESPACE.match): + values = [] + s = match.string + end = _w(s, match.end()).end() + # Look-ahead for trivial empty array + nextchar = s[end:end + 1] + if nextchar == ']': + return values, end + 1 + iterscan = JSONScanner.iterscan + while True: + try: + value, end = iterscan(s, idx=end, context=context).next() + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + values.append(value) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + end = _w(s, end).end() + return values, end +pattern(r'\[')(JSONArray) + + +ANYTHING = [ + JSONObject, + JSONArray, + JSONString, + JSONConstant, + JSONNumber, +] + +JSONScanner = Scanner(ANYTHING) + + +class JSONDecoder(object): + + _scanner = Scanner(ANYTHING) + __all__ = ['__init__', 'decode', 'raw_decode'] + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True): + self.encoding = encoding + self.object_hook = object_hook + self.parse_float = parse_float + self.parse_int = parse_int + self.parse_constant = parse_constant + self.strict = strict + + def decode(self, s, _w=WHITESPACE.match): + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, **kw): + kw.setdefault('context', self) + try: + obj, end = self._scanner.iterscan(s, **kw).next() + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + + +# public functions + def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, encoding='utf-8', default=None, **kw): @@ -293,3 +625,21 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, separators=separators, encoding=encoding, default=default, **kw).encode(obj) + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, **kw): + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + return cls(encoding=encoding, **kw).decode(s) diff --git a/util/json.txt b/util/json.txt new file mode 100644 index 0000000..5687eee --- /dev/null +++ b/util/json.txt @@ -0,0 +1,44 @@ +========================== +JSON Endoding and Decoding +========================== + +$Id$ + +This is a stripped-down version of simplejson +by Bob Ippolito, http://undefined.org/python/ + + >>> from cybertools.util.json import dumps, loads + +Encoding basic Python object hierarchies:: + + >>> dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print dumps("\"foo\bar") + "\"foo\bar" + >>> print dumps(u'\u1234') + "\u1234" + >>> print dumps('\\') + "\\" + >>> print dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + +Compact encoding:: + + >>> dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> print dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> loads('["foo", {"bar":["baz", null, 1.0, 2]}]') + [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> loads('"\\"foo\\bar"') + u'"foo\x08ar' + diff --git a/util/tests.py b/util/tests.py index a4d50d4..5fd3764 100755 --- a/util/tests.py +++ b/util/tests.py @@ -25,6 +25,7 @@ def test_suite(): doctest.DocFileSuite('format.txt', optionflags=flags), doctest.DocFileSuite('multikey.txt', optionflags=flags), doctest.DocFileSuite('property.txt', optionflags=flags), + doctest.DocFileSuite('json.txt', optionflags=flags), doctest.DocFileSuite('jeep.txt', optionflags=flags), doctest.DocFileSuite('randomname.txt', optionflags=flags), ))