r""" A simple, fast, extensible JSON encoder JSON (JavaScript Object Notation) is a subset of JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data interchange format.""" import re ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { '\\': '\\\\', '"': '\\"', '\b': '\\b', '\f': '\\f', '\n': '\\n', '\r': '\\r', '\t': '\\t', } for i in range(0x20): ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) # Assume this produces an infinity on all machines (probably not guaranteed) INFINITY = float('1e66666') FLOAT_REPR = repr def floatstr(o, allow_nan=True): if o != o: text = 'NaN' elif o == INFINITY: text = 'Infinity' elif o == -INFINITY: text = '-Infinity' else: return FLOAT_REPR(o) if not allow_nan: raise ValueError("Out of range float values are not JSON compliant: %r" % (o,)) return text def encode_basestring(s): def replace(match): return ESCAPE_DCT[match.group(0)] return '"' + ESCAPE.sub(replace, s) + '"' def py_encode_basestring_ascii(s): if isinstance(s, str) and HAS_UTF8.search(s) is not None: s = s.decode('utf-8') def replace(match): s = match.group(0) try: return ESCAPE_DCT[s] except KeyError: n = ord(s) if n < 0x10000: return '\\u%04x' % (n,) else: # surrogate pair n -= 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) s2 = 0xdc00 | (n & 0x3ff) return '\\u%04x\\u%04x' % (s1, s2) return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' encode_basestring_ascii = py_encode_basestring_ascii class JSONEncoder(object): item_separator = ', ' key_separator = ': ' def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, encoding='utf-8', default=None): self.skipkeys = skipkeys self.ensure_ascii = ensure_ascii self.check_circular = check_circular self.allow_nan = allow_nan self.sort_keys = sort_keys self.indent = indent self.current_indent_level = 0 if separators is not None: self.item_separator, self.key_separator = separators if default is not None: self.default = default self.encoding = encoding def _newline_indent(self): return '\n' + (' ' * (self.indent * self.current_indent_level)) def _iterencode_list(self, lst, markers=None): if not lst: yield '[]' return if markers is not None: markerid = id(lst) if markerid in markers: raise ValueError("Circular reference detected") markers[markerid] = lst yield '[' if self.indent is not None: self.current_indent_level += 1 newline_indent = self._newline_indent() separator = self.item_separator + newline_indent yield newline_indent else: newline_indent = None separator = self.item_separator first = True for value in lst: if first: first = False else: yield separator for chunk in self._iterencode(value, markers): yield chunk if newline_indent is not None: self.current_indent_level -= 1 yield self._newline_indent() yield ']' if markers is not None: del markers[markerid] def _iterencode_dict(self, dct, markers=None): if not dct: yield '{}' return if markers is not None: markerid = id(dct) if markerid in markers: raise ValueError("Circular reference detected") markers[markerid] = dct yield '{' key_separator = self.key_separator if self.indent is not None: self.current_indent_level += 1 newline_indent = self._newline_indent() item_separator = self.item_separator + newline_indent yield newline_indent else: newline_indent = None item_separator = self.item_separator first = True if self.ensure_ascii: encoder = encode_basestring_ascii else: encoder = encode_basestring allow_nan = self.allow_nan if self.sort_keys: keys = dct.keys() keys.sort() items = [(k, dct[k]) for k in keys] else: items = dct.iteritems() _encoding = self.encoding _do_decode = (_encoding is not None and not (_encoding == 'utf-8')) for key, value in items: if isinstance(key, str): if _do_decode: key = key.decode(_encoding) elif isinstance(key, basestring): pass # JavaScript is weakly typed for these, so it makes sense to # also allow them. Many encoders seem to do something like this. elif isinstance(key, float): key = floatstr(key, allow_nan) elif isinstance(key, (int, long)): key = str(key) elif key is True: key = 'true' elif key is False: key = 'false' elif key is None: key = 'null' elif self.skipkeys: continue else: raise TypeError("key %r is not a string" % (key,)) if first: first = False else: yield item_separator yield encoder(key) yield key_separator for chunk in self._iterencode(value, markers): yield chunk if newline_indent is not None: self.current_indent_level -= 1 yield self._newline_indent() yield '}' if markers is not None: del markers[markerid] def _iterencode(self, o, markers=None): if isinstance(o, basestring): if self.ensure_ascii: encoder = encode_basestring_ascii else: encoder = encode_basestring _encoding = self.encoding if (_encoding is not None and isinstance(o, str) and not (_encoding == 'utf-8')): o = o.decode(_encoding) yield encoder(o) elif o is None: yield 'null' elif o is True: yield 'true' elif o is False: yield 'false' elif isinstance(o, (int, long)): yield str(o) elif isinstance(o, float): yield floatstr(o, self.allow_nan) elif isinstance(o, (list, tuple)): for chunk in self._iterencode_list(o, markers): yield chunk elif isinstance(o, dict): for chunk in self._iterencode_dict(o, markers): yield chunk else: if markers is not None: markerid = id(o) if markerid in markers: raise ValueError("Circular reference detected") markers[markerid] = o for chunk in self._iterencode_default(o, markers): yield chunk if markers is not None: del markers[markerid] def _iterencode_default(self, o, markers=None): newobj = self.default(o) return self._iterencode(newobj, markers) def default(self, o): raise TypeError("%r is not JSON serializable" % (o,)) def encode(self, o): if isinstance(o, basestring): if isinstance(o, str): _encoding = self.encoding if (_encoding is not None and not (_encoding == 'utf-8')): o = o.decode(_encoding) if self.ensure_ascii: return encode_basestring_ascii(o) else: return encode_basestring(o) chunks = list(self.iterencode(o)) return ''.join(chunks) def iterencode(self, o): if self.check_circular: markers = {} else: markers = None return self._iterencode(o, markers) _default_encoder = JSONEncoder( skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, indent=None, separators=None, encoding='utf-8', default=None, ) def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, encoding='utf-8', default=None, **kw): if (skipkeys is False and ensure_ascii is True and check_circular is True and allow_nan is True and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder return cls( skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, default=default, **kw).encode(obj)