jsonもどきであればいい (LooseJSONDecoder、一応完成品)

jsonもどきであればいい (LooseJSONDecoder事始め)を整えましたる。

loose_json_decoder.py
  1 # -*- coding: utf-8 -*-
  2 r"""
  3 Decoding JSON::
  4 
  5     >>> import loose_json_decoder as loose_json
  6     >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
  7     >>> loose_json.loads('["foo", {"bar": ["baz", null, 1.0, 2, ], }, ]') == obj
  8     True
  9     >>> loose_json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
 10     True
 11     >>> from StringIO import StringIO
 12     >>> io = StringIO('["streaming API",]')
 13     >>> loose_json.load(io)[0] == 'streaming API'
 14     True
 15 
 16 Specializing JSON object decoding::
 17 
 18     >>> import loose_json_decoder as loose_json
 19     >>> def as_complex(dct):
 20     ...     if '__complex__' in dct:
 21     ...         return complex(dct['real'], dct['imag'])
 22     ...     return dct
 23     ...
 24     >>> loose_json.loads('{"__complex__": true, "real": 1, "imag": 2, }',
 25     ...     object_hook=as_complex)
 26     (1+2j)
 27     >>> from decimal import Decimal
 28     >>> loose_json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
 29     True
 30 """
 31 import json
 32 import re
 33 
 34 __all__ = ['LooseJSONDecoder', 'ParseObjectProxy', 'ParseArrayProxy', 'load', 'loads']
 35 
 36 
 37 # expose original json.decoder module publics
 38 from json import scanner
 39 from json.decoder import FLAGS
 40 from json.decoder import NaN, PosInf, NegInf
 41 from json.decoder import linecol, errmsg
 42 from json.decoder import STRINGCHUNK
 43 from json.decoder import BACKSLASH
 44 from json.decoder import DEFAULT_ENCODING
 45 from json.decoder import py_scanstring, scanstring
 46 from json.decoder import WHITESPACE, WHITESPACE_STR
 47 #
 48 
 49 
 50 _RGX_EXPECT_PROPNAME = re.compile(
 51     r'Expecting property name enclosed in double quotes: line ([0-9]+) column ([0-9]+) \(char ([0-9]+)\)')
 52 
 53 
 54 _RGX_EXPECT_OBJECT = re.compile(
 55     r'Expecting object: line ([0-9]+) column ([0-9]+) \(char ([0-9]+)\)')
 56 
 57 
 58 #
 59 # a JsonObject parser which allows extra comma.
 60 #
 61 class ParseObjectProxy(object):
 62     def __init__(self, orig_parser):
 63         self.orig_parser = orig_parser
 64 
 65     def __call__(self,
 66                  s_and_end, encoding, strict, scan_once,
 67                  object_hook, object_pairs_hook,
 68                  _w=WHITESPACE.match,
 69                  _ws=WHITESPACE_STR):
 70 
 71         try:
 72             return self.orig_parser(
 73                 s_and_end, encoding, strict, scan_once,
 74                 object_hook, object_pairs_hook, _w, _ws)
 75         except ValueError as e:
 76             m = _RGX_EXPECT_PROPNAME.match(str(e))
 77             if not m:
 78                 raise
 79             s, end = s_and_end
 80             maybe_brace = int(m.group(3))
 81             if s[maybe_brace] == '}':
 82                 for i in range(maybe_brace - 1, end, -1):
 83                     if s[i].isspace():
 84                         continue
 85                     if s[i] == ',':
 86                         s = "%s %s" % (s[:i], s[i+1:])
 87                         return self.orig_parser(
 88                             (s, end), encoding, strict, scan_once,
 89                             object_hook, object_pairs_hook, _w, _ws)
 90                     raise
 91             raise
 92 
 93 
 94 #
 95 # a JsonArray parser which allows extra comma.
 96 #
 97 class ParseArrayProxy(object):
 98     def __init__(self, orig_parser):
 99         self.orig_parser = orig_parser
100 
101     def __call__(self,
102                  s_and_end, scan_once,
103                  _w=WHITESPACE.match,
104                  _ws=WHITESPACE_STR):
105 
106         try:
107             return self.orig_parser(s_and_end, scan_once, _w, _ws)
108         except ValueError as e:
109             m = _RGX_EXPECT_OBJECT.match(str(e))
110             if not m:
111                 raise
112             s, end = s_and_end
113             maybe_bracket = int(m.group(3))
114             if s[maybe_bracket] == ']':
115                 for i in range(maybe_bracket - 1, end, -1):
116                     if s[i].isspace():
117                         continue
118                     if s[i] == ',':
119                         s = "%s %s" % (s[:i], s[i+1:])
120                         return self.orig_parser(
121                             (s, end), scan_once, _w, _ws)
122                     raise
123             raise
124 
125 
126 #
127 # `json-like' decoder
128 #
129 class LooseJSONDecoder(json.JSONDecoder):
130     '''
131     >>> decoder = LooseJSONDecoder()
132     >>> decoder.decode("""
133     ... {"a": [1, 2, 3, ], "b": ["x", "y", "z",],}
134     ... """)
135     {u'a': [1, 2, 3], u'b': [u'x', u'y', u'z']}
136     >>> decoder.decode("""
137     ... {
138     ...     "a": [
139     ...         1,
140     ...         2,
141     ...         3,
142     ...     ],
143     ...     "b": [
144     ...         "x",
145     ...         "y",
146     ...         "z",
147     ...     ],
148     ... }
149     ... """)
150     {u'a': [1, 2, 3], u'b': [u'x', u'y', u'z']}
151     '''
152     def __init__(self, encoding=None, object_hook=None, parse_float=None,
153             parse_int=None, parse_constant=None, strict=True,
154             object_pairs_hook=None):
155 
156         json.JSONDecoder.__init__(
157             self,
158             encoding, object_hook, parse_float,
159             parse_int, parse_constant, strict,
160             object_pairs_hook)
161 
162         self.parse_object = ParseObjectProxy(self.parse_object)
163         self.parse_array = ParseArrayProxy(self.parse_array)
164         self.scan_once = scanner.py_make_scanner(self)
165 
166 
167 def load(fp, encoding=None, object_hook=None, parse_float=None,
168         parse_int=None, parse_constant=None, object_pairs_hook=None):
169     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
170     a JSON document) to a Python object.
171 
172     If the contents of ``fp`` is encoded with an ASCII based encoding other
173     than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
174     be specified. Encodings that are not ASCII based (such as UCS-2) are
175     not allowed, and should be wrapped with
176     ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
177     object and passed to ``loads()``
178 
179     ``object_hook`` is an optional function that will be called with the
180     result of any object literal decode (a ``dict``). The return value of
181     ``object_hook`` will be used instead of the ``dict``. This feature
182     can be used to implement custom decoders (e.g. JSON-RPC class hinting).
183 
184     ``object_pairs_hook`` is an optional function that will be called with the
185     result of any object literal decoded with an ordered list of pairs.  The
186     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
187     This feature can be used to implement custom decoders that rely on the
188     order that the key and value pairs are decoded (for example,
189     collections.OrderedDict will remember the order of insertion). If
190     ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
191 
192     """
193     return loads(fp.read(),
194         encoding=encoding, object_hook=object_hook,
195         parse_float=parse_float, parse_int=parse_int,
196         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook)
197 
198 
199 def loads(s, encoding=None, object_hook=None, parse_float=None,
200         parse_int=None, parse_constant=None, object_pairs_hook=None):
201     """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
202     document) to a Python object.
203 
204     If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
205     other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
206     must be specified. Encodings that are not ASCII based (such as UCS-2)
207     are not allowed and should be decoded to ``unicode`` first.
208 
209     ``object_hook`` is an optional function that will be called with the
210     result of any object literal decode (a ``dict``). The return value of
211     ``object_hook`` will be used instead of the ``dict``. This feature
212     can be used to implement custom decoders (e.g. JSON-RPC class hinting).
213 
214     ``object_pairs_hook`` is an optional function that will be called with the
215     result of any object literal decoded with an ordered list of pairs.  The
216     return value of ``object_pairs_hook`` will be used instead of the ``dict``.
217     This feature can be used to implement custom decoders that rely on the
218     order that the key and value pairs are decoded (for example,
219     collections.OrderedDict will remember the order of insertion). If
220     ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
221 
222     ``parse_float``, if specified, will be called with the string
223     of every JSON float to be decoded. By default this is equivalent to
224     float(num_str). This can be used to use another datatype or parser
225     for JSON floats (e.g. decimal.Decimal).
226 
227     ``parse_int``, if specified, will be called with the string
228     of every JSON int to be decoded. By default this is equivalent to
229     int(num_str). This can be used to use another datatype or parser
230     for JSON integers (e.g. float).
231 
232     ``parse_constant``, if specified, will be called with one of the
233     following strings: -Infinity, Infinity, NaN, null, true, false.
234     This can be used to raise an exception if invalid JSON numbers
235     are encountered.
236 
237     """
238     return json.loads(s,
239         encoding=encoding, cls=LooseJSONDecoder, object_hook=object_hook,
240         parse_float=parse_float, parse_int=parse_int,
241         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook)
242 
243 
244 if __name__ == '__main__':
245     import doctest
246     doctest.testmod()

説明はいらんよね、docstring てあるから。

パッケージ化するか、単一モジュールにするか、設計決断ポイントはいくつかはあったけど、ま、これでひとまず十分でしょ。(**kwの扱いもな。オリジナルの json モジュールの cls 引数を固定化しちゃうんで、**kwが意味なくなっちゃうのよね。)