jsonもどきであればいい (LooseJSONDecoder事始め)を整えましたる。
1 # -*- coding: utf-8 -*-
2 r"""
3 Decoding JSON::
4
5 >>> import loose_json_decoder as loose_json
6 >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
7 >>> loose_json.loads('["foo", {"bar": ["baz", null, 1.0, 2, ], }, ]') == obj
8 True
9 >>> loose_json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
10 True
11 >>> from StringIO import StringIO
12 >>> io = StringIO('["streaming API",]')
13 >>> loose_json.load(io)[0] == 'streaming API'
14 True
15
16 Specializing JSON object decoding::
17
18 >>> import loose_json_decoder as loose_json
19 >>> def as_complex(dct):
20 ... if '__complex__' in dct:
21 ... return complex(dct['real'], dct['imag'])
22 ... return dct
23 ...
24 >>> loose_json.loads('{"__complex__": true, "real": 1, "imag": 2, }',
25 ... object_hook=as_complex)
26 (1+2j)
27 >>> from decimal import Decimal
28 >>> loose_json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
29 True
30 """
31 import json
32 import re
33
34 __all__ = ['LooseJSONDecoder', 'ParseObjectProxy', 'ParseArrayProxy', 'load', 'loads']
35
36
37 # expose original json.decoder module publics
38 from json import scanner
39 from json.decoder import FLAGS
40 from json.decoder import NaN, PosInf, NegInf
41 from json.decoder import linecol, errmsg
42 from json.decoder import STRINGCHUNK
43 from json.decoder import BACKSLASH
44 from json.decoder import DEFAULT_ENCODING
45 from json.decoder import py_scanstring, scanstring
46 from json.decoder import WHITESPACE, WHITESPACE_STR
47 #
48
49
50 _RGX_EXPECT_PROPNAME = re.compile(
51 r'Expecting property name enclosed in double quotes: line ([0-9]+) column ([0-9]+) \(char ([0-9]+)\)')
52
53
54 _RGX_EXPECT_OBJECT = re.compile(
55 r'Expecting object: line ([0-9]+) column ([0-9]+) \(char ([0-9]+)\)')
56
57
58 #
59 # a JsonObject parser which allows extra comma.
60 #
61 class ParseObjectProxy(object):
62 def __init__(self, orig_parser):
63 self.orig_parser = orig_parser
64
65 def __call__(self,
66 s_and_end, encoding, strict, scan_once,
67 object_hook, object_pairs_hook,
68 _w=WHITESPACE.match,
69 _ws=WHITESPACE_STR):
70
71 try:
72 return self.orig_parser(
73 s_and_end, encoding, strict, scan_once,
74 object_hook, object_pairs_hook, _w, _ws)
75 except ValueError as e:
76 m = _RGX_EXPECT_PROPNAME.match(str(e))
77 if not m:
78 raise
79 s, end = s_and_end
80 maybe_brace = int(m.group(3))
81 if s[maybe_brace] == '}':
82 for i in range(maybe_brace - 1, end, -1):
83 if s[i].isspace():
84 continue
85 if s[i] == ',':
86 s = "%s %s" % (s[:i], s[i+1:])
87 return self.orig_parser(
88 (s, end), encoding, strict, scan_once,
89 object_hook, object_pairs_hook, _w, _ws)
90 raise
91 raise
92
93
94 #
95 # a JsonArray parser which allows extra comma.
96 #
97 class ParseArrayProxy(object):
98 def __init__(self, orig_parser):
99 self.orig_parser = orig_parser
100
101 def __call__(self,
102 s_and_end, scan_once,
103 _w=WHITESPACE.match,
104 _ws=WHITESPACE_STR):
105
106 try:
107 return self.orig_parser(s_and_end, scan_once, _w, _ws)
108 except ValueError as e:
109 m = _RGX_EXPECT_OBJECT.match(str(e))
110 if not m:
111 raise
112 s, end = s_and_end
113 maybe_bracket = int(m.group(3))
114 if s[maybe_bracket] == ']':
115 for i in range(maybe_bracket - 1, end, -1):
116 if s[i].isspace():
117 continue
118 if s[i] == ',':
119 s = "%s %s" % (s[:i], s[i+1:])
120 return self.orig_parser(
121 (s, end), scan_once, _w, _ws)
122 raise
123 raise
124
125
126 #
127 # `json-like' decoder
128 #
129 class LooseJSONDecoder(json.JSONDecoder):
130 '''
131 >>> decoder = LooseJSONDecoder()
132 >>> decoder.decode("""
133 ... {"a": [1, 2, 3, ], "b": ["x", "y", "z",],}
134 ... """)
135 {u'a': [1, 2, 3], u'b': [u'x', u'y', u'z']}
136 >>> decoder.decode("""
137 ... {
138 ... "a": [
139 ... 1,
140 ... 2,
141 ... 3,
142 ... ],
143 ... "b": [
144 ... "x",
145 ... "y",
146 ... "z",
147 ... ],
148 ... }
149 ... """)
150 {u'a': [1, 2, 3], u'b': [u'x', u'y', u'z']}
151 '''
152 def __init__(self, encoding=None, object_hook=None, parse_float=None,
153 parse_int=None, parse_constant=None, strict=True,
154 object_pairs_hook=None):
155
156 json.JSONDecoder.__init__(
157 self,
158 encoding, object_hook, parse_float,
159 parse_int, parse_constant, strict,
160 object_pairs_hook)
161
162 self.parse_object = ParseObjectProxy(self.parse_object)
163 self.parse_array = ParseArrayProxy(self.parse_array)
164 self.scan_once = scanner.py_make_scanner(self)
165
166
167 def load(fp, encoding=None, object_hook=None, parse_float=None,
168 parse_int=None, parse_constant=None, object_pairs_hook=None):
169 """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
170 a JSON document) to a Python object.
171
172 If the contents of ``fp`` is encoded with an ASCII based encoding other
173 than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
174 be specified. Encodings that are not ASCII based (such as UCS-2) are
175 not allowed, and should be wrapped with
176 ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
177 object and passed to ``loads()``
178
179 ``object_hook`` is an optional function that will be called with the
180 result of any object literal decode (a ``dict``). The return value of
181 ``object_hook`` will be used instead of the ``dict``. This feature
182 can be used to implement custom decoders (e.g. JSON-RPC class hinting).
183
184 ``object_pairs_hook`` is an optional function that will be called with the
185 result of any object literal decoded with an ordered list of pairs. The
186 return value of ``object_pairs_hook`` will be used instead of the ``dict``.
187 This feature can be used to implement custom decoders that rely on the
188 order that the key and value pairs are decoded (for example,
189 collections.OrderedDict will remember the order of insertion). If
190 ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
191
192 """
193 return loads(fp.read(),
194 encoding=encoding, object_hook=object_hook,
195 parse_float=parse_float, parse_int=parse_int,
196 parse_constant=parse_constant, object_pairs_hook=object_pairs_hook)
197
198
199 def loads(s, encoding=None, object_hook=None, parse_float=None,
200 parse_int=None, parse_constant=None, object_pairs_hook=None):
201 """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
202 document) to a Python object.
203
204 If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
205 other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
206 must be specified. Encodings that are not ASCII based (such as UCS-2)
207 are not allowed and should be decoded to ``unicode`` first.
208
209 ``object_hook`` is an optional function that will be called with the
210 result of any object literal decode (a ``dict``). The return value of
211 ``object_hook`` will be used instead of the ``dict``. This feature
212 can be used to implement custom decoders (e.g. JSON-RPC class hinting).
213
214 ``object_pairs_hook`` is an optional function that will be called with the
215 result of any object literal decoded with an ordered list of pairs. The
216 return value of ``object_pairs_hook`` will be used instead of the ``dict``.
217 This feature can be used to implement custom decoders that rely on the
218 order that the key and value pairs are decoded (for example,
219 collections.OrderedDict will remember the order of insertion). If
220 ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
221
222 ``parse_float``, if specified, will be called with the string
223 of every JSON float to be decoded. By default this is equivalent to
224 float(num_str). This can be used to use another datatype or parser
225 for JSON floats (e.g. decimal.Decimal).
226
227 ``parse_int``, if specified, will be called with the string
228 of every JSON int to be decoded. By default this is equivalent to
229 int(num_str). This can be used to use another datatype or parser
230 for JSON integers (e.g. float).
231
232 ``parse_constant``, if specified, will be called with one of the
233 following strings: -Infinity, Infinity, NaN, null, true, false.
234 This can be used to raise an exception if invalid JSON numbers
235 are encountered.
236
237 """
238 return json.loads(s,
239 encoding=encoding, cls=LooseJSONDecoder, object_hook=object_hook,
240 parse_float=parse_float, parse_int=parse_int,
241 parse_constant=parse_constant, object_pairs_hook=object_pairs_hook)
242
243
244 if __name__ == '__main__':
245 import doctest
246 doctest.testmod()
説明はいらんよね、docstring てあるから。
パッケージ化するか、単一モジュールにするか、設計決断ポイントはいくつかはあったけど、ま、これでひとまず十分でしょ。(**kw
の扱いもな。オリジナルの json モジュールの cls
引数を固定化しちゃうんで、**kw
が意味なくなっちゃうのよね。)