From d5a98c921d221cbff3dafced788e7127c05428a2 Mon Sep 17 00:00:00 2001 From: helmutm Date: Sat, 16 Aug 2008 09:57:28 +0000 Subject: [PATCH] add an extract of simplejson for JSON rendering only git-svn-id: svn://svn.cy55.de/Zope3/src/cybertools/trunk@2817 fd906abe-77d9-0310-91a1-e0d9ade77398 --- util/json.py | 295 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 util/json.py diff --git a/util/json.py b/util/json.py new file mode 100644 index 0000000..8604f6e --- /dev/null +++ b/util/json.py @@ -0,0 +1,295 @@ +r""" +A simple, fast, extensible JSON encoder + +JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format.""" + +import re + +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +# Assume this produces an infinity on all machines (probably not guaranteed) +INFINITY = float('1e66666') +FLOAT_REPR = repr + + +def floatstr(o, allow_nan=True): + if o != o: + text = 'NaN' + elif o == INFINITY: + text = 'Infinity' + elif o == -INFINITY: + text = '-Infinity' + else: + return FLOAT_REPR(o) + if not allow_nan: + raise ValueError("Out of range float values are not JSON compliant: %r" + % (o,)) + return text + + +def encode_basestring(s): + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + + +def py_encode_basestring_ascii(s): + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + +encode_basestring_ascii = py_encode_basestring_ascii + + +class JSONEncoder(object): + + item_separator = ', ' + key_separator = ': ' + + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None): + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + self.current_indent_level = 0 + if separators is not None: + self.item_separator, self.key_separator = separators + if default is not None: + self.default = default + self.encoding = encoding + + def _newline_indent(self): + return '\n' + (' ' * (self.indent * self.current_indent_level)) + + def _iterencode_list(self, lst, markers=None): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + yield '[' + if self.indent is not None: + self.current_indent_level += 1 + newline_indent = self._newline_indent() + separator = self.item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + separator = self.item_separator + first = True + for value in lst: + if first: + first = False + else: + yield separator + for chunk in self._iterencode(value, markers): + yield chunk + if newline_indent is not None: + self.current_indent_level -= 1 + yield self._newline_indent() + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(self, dct, markers=None): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + key_separator = self.key_separator + if self.indent is not None: + self.current_indent_level += 1 + newline_indent = self._newline_indent() + item_separator = self.item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = self.item_separator + first = True + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + allow_nan = self.allow_nan + if self.sort_keys: + keys = dct.keys() + keys.sort() + items = [(k, dct[k]) for k in keys] + else: + items = dct.iteritems() + _encoding = self.encoding + _do_decode = (_encoding is not None + and not (_encoding == 'utf-8')) + for key, value in items: + if isinstance(key, str): + if _do_decode: + key = key.decode(_encoding) + elif isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = floatstr(key, allow_nan) + elif isinstance(key, (int, long)): + key = str(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif self.skipkeys: + continue + else: + raise TypeError("key %r is not a string" % (key,)) + if first: + first = False + else: + yield item_separator + yield encoder(key) + yield key_separator + for chunk in self._iterencode(value, markers): + yield chunk + if newline_indent is not None: + self.current_indent_level -= 1 + yield self._newline_indent() + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(self, o, markers=None): + if isinstance(o, basestring): + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + _encoding = self.encoding + if (_encoding is not None and isinstance(o, str) + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + yield encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield floatstr(o, self.allow_nan) + elif isinstance(o, (list, tuple)): + for chunk in self._iterencode_list(o, markers): + yield chunk + elif isinstance(o, dict): + for chunk in self._iterencode_dict(o, markers): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + for chunk in self._iterencode_default(o, markers): + yield chunk + if markers is not None: + del markers[markerid] + + def _iterencode_default(self, o, markers=None): + newobj = self.default(o) + return self._iterencode(newobj, markers) + + def default(self, o): + raise TypeError("%r is not JSON serializable" % (o,)) + + def encode(self, o): + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + chunks = list(self.iterencode(o)) + return ''.join(chunks) + + def iterencode(self, o): + if self.check_circular: + markers = {} + else: + markers = None + return self._iterencode(o, markers) + + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, +) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, **kw): + if (skipkeys is False and ensure_ascii is True and + check_circular is True and allow_nan is True and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + **kw).encode(obj) +