むむむ**2。
Contents
- 1 むむむ…pyparsing
- 2 13:30 追記: いやぁ、いいね、ply
- 3 15:20 追記: いいねいいよ ply
- 4 2017-11-08 9:00 追記: 早い早い
- 5 2017-11-08 11:15 追記: うげぇ ply
- 6 2017-11-09 10:15 追記: うむむ ply
- 7 2017-11-09 16:40 追記: NG と思ったけど違うかも、ply
- 8 2017-11-09 18:00 追記: NG なのは (特に) lex だ、ply
- 9 2017-11-08 18:30 追記: うげぇ ply は確定で、ならば rply?
- 10 2017-11-08 19:20 追記: ひとまず rply のへろわるど
- 11 2017-11-10 01:20 追記: これだ、rply で決まり
- 12 2017-11-10 06:30 追記: rply で lexer のトークン単位にアクションを追加したいとして…
- 13 2017-11-11 11:00 追記: rply で書いた MSBuild の expression evaluator 初版完了
- 14 2017-11-12 19:40 追記: rply 制限に関して言い方間違えた
- 15 2017-11-15 21:00 追記: 救世主、state が使える
むむむ…pyparsing
pyparsing でルールの OR 結合がどうしても書けず。だけじゃなくて、シンプルなヤツだとかなり楽でいいんだがなぁ…、ちょっと複雑になってきたら、とてもじゃないが保守可能なものになりそうな気配がしなかった。やっぱルール記述やトークン記述などは「Python 的に」書けても嬉しくない。
やりかけたヤツ、ただ捨てるのはもったいないけれど自分のレポジトリ的な場所に管理するもんでもないので、「試行錯誤真っ最中、だった失敗作」のまま貼り付けてみる:
1 # -*- coding: utf-8 -*-
2 #
3 from __future__ import absolute_import
4 from __future__ import unicode_literals
5 from __future__ import print_function
6
7 import sys
8 import re
9 from functools import partial
10 import types
11
12 from pyparsing import *
13
14
15 def _ParseResults_asListWithNames(self):
16 names = []
17
18 namedItems = dict(
19 (v[1], k)
20 for (k, vlist) in self._ParseResults__tokdict.items()
21 for v in vlist)
22 for i, res in enumerate(self._ParseResults__toklist):
23 #if isinstance(res, ParseResults):
24 # pass
25 if i in namedItems:
26 names.append(namedItems[i])
27 else:
28 names.append("ITEM")
29
30 return self._ParseResults__toklist, names
31
32
33 def wrap_ParseResults(toks):
34 toks.asListWithName = types.MethodType(_ParseResults_asListWithNames, toks)
35 return toks
36
37
38 class ExpressionEvaluator(object):
39 def __init__(self):
40 self._stack = []
41 self._props = {}
42
43 ref_start = (Literal("$(") | Literal("%(")).setResultsName("REF_START")
44 lparen = Literal("(").setResultsName("LPAR")
45 rparen = Literal(")").setResultsName("RPAR")
46 squote = Literal("'").setResultsName("SQ")
47 comma = Literal(",").suppress()
48 #
49 point = Literal(".")
50 e = CaselessLiteral("E")
51 fnumber = Combine(
52 Word("+-" + nums, nums) + \
53 Optional(point + Optional(Word(nums))) + \
54 Optional(e + Word("+-" + nums, nums))).setResultsName("NUMBER")
55
56 # `xxx`
57 msbuild_string = QuotedString("`", unquoteResults=False)
58
59 #
60 id_component = Word(alphas + "_", alphas + nums + "_").setResultsName("ID_COMPONENT")
61 prop_name = Word(".", alphas + nums + "_").setResultsName("PROP_NAME")
62 #id_component.setParseAction(
63 # partial(self._reduce, context="ROOTKEY"))
64
65 #
66 ref = Forward()
67
68 # (..., )
69 arg = ref | fnumber | msbuild_string
70 args = arg + ZeroOrMore(comma + arg)
71
72 #func_args = Forward()
73 #func_args << lparen + Optional(args) + rparen
74 func_args = lparen + Optional(args) + rparen
75 #func_args.setParseAction(
76 # partial(self._reduce, context="FUNC_ARGS"))
77
78 #
79 #prop = Forward()
80 #prop << prop_name + Optional(func_args)
81 prop = prop_name + Optional(func_args)
82 prop.setResultsName("PROP")
83 prop.setParseAction(
84 partial(self._reduce, context="PROP"))
85 #
86 #ref_content = Forward()
87 ref_content = id_component + ZeroOrMore(prop)
88 #ref1 = (squote + ref_start + ref_content + rparen + squote)
89 #ref2 = (ref_start + ref_content + rparen)
90 ref << (ref_start + ref_content + rparen)
91 #ref |= (squote + ref + squote)
92 #ref << ref1 | ref2
93 #ref = ref | (squote + ref + squote)
94 ref.setParseAction(
95 partial(self._reduce, context="REF"))
96
97 #
98 #grammar = Forward()
99 #grammar << ref
100 grammar = ref
101 #grammar.setParseAction(
102 # partial(self._reduce, context="WHOLE"))
103 self._bnf = grammar
104
105 def evaluate(self, s, props):
106 self._props = props
107 parsed = self._bnf.parseString(s)
108 #print(parsed.asXML())
109 #print("\n".join([str(t) for t in self._stack]))
110
111 def _reduce(self, strg, loc, toks, context=""):
112 wr_toks = wrap_ParseResults(toks)
113 #print(context, dir(toks))
114 #if context == "REF":
115 # if "(" not in toks[1:-1]:
116 # key = "".join(toks[1:-1])
117 # value = self._props.get(key, "")
118 # return value #"".join(toks)
119 #if context in ("WHOLE", ): #"PROP"):
120 if context == "REF":
121 tl = wr_toks.asListWithName()
122 if "LPAR" not in tl[1][1:]:
123 #key = "".join(toks[1:-1])
124 #value = self._props.get(key, "")
125 #return value #"".join(toks)
126 #print("!!!", type(toks))
127 return ParseResults("".join(toks), "REF")
128 #pass
129 #print(context, toks.asList())#.dump()) #dir(toks))
130 #print(context, dir(toks))
131 #print(context, list(toks.values()))
132 #print(context, toks.asDict())
133 #print(context, toks.asXML())
134 #pass
135 #print(context, [(tok, type(tok)) for tok in toks])
136 #print(context, toks._ParseResults__toklist, type(toks))#.dump())
137 #namedItems = dict(
138 # (v[1], k)
139 # for (k, vlist) in toks._ParseResults__tokdict.items()
140 # for v in vlist)
141 #worklist = toks._ParseResults__toklist
142 #for i, res in enumerate(worklist):
143 # #print(isinstance(res, ParseResults))
144 # if i in namedItems:
145 # print((namedItems[i], res))
146 print(tl[0])
147 print(tl[1])
148 print("")
149 #self._stack.append((context, toks))
150 #toks_list = toks.asList()
151 #if context == "REF":
152 # if "(" not in toks_list[1:-1]:
153 # return "".join(toks)
154 #if context == "FUNC_ARGS":
155 # #print("!!!", toks_list[1:-1])
156 # self._stack.append((context, toks_list[1:-1]))
157 # #return []
158 #elif context == "PROP":
159 # args = []
160 # if self._stack:
161 # args = self._stack.pop()[1]
162 # self._stack.append((context, toks_list[0], args))
163 #else:
164 # self._stack.append((context, toks.asList()))
165 #self._stack.append((toks_list, context))
166 #if context == "FUNC_ARGS":
167 # #print("!!!", toks.asList()[1:-1])
168 # self._stack.append(toks)
169 # return []
170 #else:
171 # self._stack.append((context, toks.asList()))
172 #if context == "REF":
173 # if "(" not in toks[1:-1]:
174 # key = "".join(toks[1:-1])
175 # value = self._props[key]
176 # #self._stack.append((context, value))
177 # return [value]
178 # self._stack.append((context, toks))
179 # return []
180 ##if context == "ROOTKEY":
181 ## self._stack.append((context, "".join(toks)))
182 # return []
183 #if context == "WHOLE":
184 # print(" ".join(toks))
185
186 ExpressionEvaluator().evaluate(
187 """%(AAA.YYY.Contains($(CCC.Replace(`a`, `b`).Replace($(X), $(Y)))))""",
188 props={"X": "x", "Y": "y", "AAA": "zzz", "CCC": "aaa"})
書かれてる部分では上手に動いてはいるんだけれど、「$(X)
」部分を「'$(X)'
」も許容しようとして、つまり文法定義的には:
1 ref = ref_start, ref_content, rparen
2 | squote, ref_start, ref_content, rparen, squote
3 ;
としたいわけなのだが、なんだか '|'
の振る舞いが疑わしい。
スクリプトの印象が頗る良くないのは「試行錯誤中のゴミだから」というだけではないと思うのよ。責務の分解がしづらくてな、書きにくいし読みにくい、と。繰り返すけど「非常に単純な用途ならめっちゃ便利」と思う、pyparsing。けど複雑なものには向かないんじゃなかろうか、と思った。
で、仕方ないので「もっとちゃんとしたの」と、ply を試し中。なんつーか「ちゃんとした」どころか、まぁこれは Unix C の「lex/yacc」の完全互換を目指している模様、少しやってみた感触も、lex/yacc を書いてるのと「ほぼ」同じノリで書けそうだ。
というわけで lex/yacc を 1bit も知らない人には説明しずれーんだけれど、知ってる前提で「hello world」的なヤツ:
「文法」もクソもない、「name」という文法規則しかない例だけれど、「どこに何をどのように書いていくのか」を知るには十分なサンプルと思う。
docstring に BNF を書くのな。あと「t_
」はじまりはトークン定義、「p_
」はじまりは解析ルール定義、などいくつかの固定のお約束に従って書く。
本物の BNF を書けるのはアタシにはありがたいな。多分性能的にはこれのせいでいけてない可能性はあるんだとは思うけれども。
こやつで書きたいものが書ききれたらいいなぁ、と、今は祈るのみ。
ちょっとだけ始めてみた、「やりたいこと」:
ルールの記述順が繊細なのを忘れてて一瞬ハマった、けど、あぁ、なんて素直。reduce (還元) を「そのまま」書けるのがやっぱいい。だって yacc ってそういうもんだったでしょ、って思う。
13:30 追記: いやぁ、いいね、ply
いやぁ、いいね、ply。「文法」だけだけど、一気にここまで書けちゃった:
1 # -*- coding: utf-8 -*-
2 #
3 from __future__ import absolute_import
4 from __future__ import unicode_literals
5 from __future__ import print_function
6
7 import ply.lex as lex
8 import ply.yacc as yacc
9
10
11 # Lexing Rules
12 tokens = (
13 "SQUOTE",
14 "NUMBER", "MSBUILDSTR",
15 "REF_START",
16 "RPAREN", "LPAREN", "ARGSEP",
17 "LSQBRACKET", "RSQBRACKET", "DBLCOLON",
18 "NAME", "PROP",
19 )
20
21 t_SQUOTE = r"'"
22 t_NUMBER = r"[+-]?(\d+(\.\d*)?|\.\d+)([eEfF][+-]?\d+)?"
23 t_MSBUILDSTR = r"`[^`]*`"
24
25 t_REF_START = r"[$%]\("
26 t_LPAREN = r'\('
27 t_RPAREN = r'\)'
28 t_LSQBRACKET = r'\['
29 t_RSQBRACKET = r'\]'
30 t_DBLCOLON = r'::'
31 t_ARGSEP = r'\s*,\s*'
32 t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*"
33 t_PROP = r"\.[a-zA-Z_][a-zA-Z0-9_]*"
34
35 # Parsing Rules
36 def p_ref(p):
37 """
38 ref : REF_START refcontent RPAREN
39 | SQUOTE ref SQUOTE
40 """
41 print("ref / p[1:]='{}'".format(p[1:]))
42
43
44 def p_refcontent(p):
45 """
46 refcontent : propcallchain
47 | nsref propcallchain
48 """
49 print("refcontent / p[:]='{}'".format(p[:]))
50 #p[0] = "".join([k for k in p[:] if k])
51
52
53 def p_propcallchain(p):
54 """
55 propcallchain : NAME
56 | NAME propcall
57 | propcallchain propcall
58 """
59 print("propcallchain / p[:]='{}'".format(p[:]))
60 #p[0] = "".join([k for k in p[:] if k])
61
62
63 def p_nsref(p):
64 """
65 nsref : LSQBRACKET NAME RSQBRACKET DBLCOLON
66 | LSQBRACKET NAME prop RSQBRACKET DBLCOLON
67 """
68 print("nsref / p[:]='{}'".format(p[:]))
69
70
71 def p_prop(p):
72 """
73 prop : PROP
74 | prop prop
75 """
76 print("prop / p[:]='{}'".format(p[:]))
77 #p[0] = "".join([k for k in p[:] if k])
78
79
80 def p_propcall(p):
81 """
82 propcall : PROP
83 | PROP call
84 | propcall propcall
85 """
86 print("propcall / p[:]='{}'".format(p[:]))
87 #p[0] = "".join([k for k in p[:] if k])
88
89
90 def p_call(p):
91 """
92 call : LPAREN RPAREN
93 | LPAREN args RPAREN
94 """
95 print("call / p[:]='{}'".format(p[:]))
96 #p[0] = "".join([k for k in p[:] if k])
97
98
99 def p_args(p):
100 """
101 args : arg
102 | arg ARGSEP arg
103 | args ARGSEP arg
104 """
105 print("args / p[:]='{}'".format(p[:]))
106 #p[0] = "".join([k for k in p[:] if k])
107
108
109 def p_arg(p):
110 """
111 arg : ref
112 | NUMBER
113 | MSBUILDSTR
114 """
115 print("args / p[:]='{}'".format(p[:]))
116 #p[0] = "".join([k for k in p[:] if k])
117
118
119 #
120 if __name__ == '__main__':
121 _DEBUG = False
122 lex.lex(debug=_DEBUG) # Build the lexer
123 yacc.yacc(debug=_DEBUG) # Build the parset
124
125 # parse
126 yacc.parse("$(Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
127 print("")
128 yacc.parse("$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
129 print("")
130 yacc.parse("HasTrailingSlash($([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length))")
ほとんど苦労してない。
とりあえずここまででは「(MSBuild の)マクロ参照」だけだけれど、これに「式」(例えば「'$(A)' == 'zzz'
」)を解釈できるようにするのは「文法定義だけなら」多分簡単。まぁアクション部分はてこずるとは思うけどね。単純な電卓のようにはいかん。
15:20 追記: いいねいいよ ply
一つ前の追記から2時間足らずで…
1 # -*- coding: utf-8 -*-
2 #
3 from __future__ import absolute_import
4 from __future__ import unicode_literals
5 from __future__ import print_function
6
7 import ply.lex as lex
8 import ply.yacc as yacc
9
10
11 # Lexing Rules
12 tokens = (
13 "SQUOTE", "DQUOTE",
14 "NUMBER", "MSBUILD_LITERAL",
15 "SQ_LITERAL", "DQ_LITERAL",
16 "REF_START",
17 "RPAREN", "LPAREN", "ARGSEP",
18 "LSQBRACKET", "RSQBRACKET", "DBLCOLON",
19 "NAME", "PROP",
20 "CMP_EQ", "CMP_NE", "CMP_LT",
21 "CMP_LE", "CMP_GE", "CMP_GT",
22 "NEGATE",
23 "LOGICAL_AND", "LOGICAL_OR",
24 )
25
26 t_SQUOTE = r"'"
27 t_DQUOTE = r'"'
28 t_NUMBER = r"[+-]?(\d+(\.\d*)?|\.\d+)([eEfF][+-]?\d+)?"
29 t_MSBUILD_LITERAL = r"`[^`]*`"
30
31 t_REF_START = r"[$%]\("
32 t_LPAREN = r'\('
33 t_RPAREN = r'\)'
34 t_LSQBRACKET = r'\['
35 t_RSQBRACKET = r'\]'
36 t_DBLCOLON = r'::'
37 t_ARGSEP = r'\s*,\s*'
38 t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*"
39 t_PROP = r"\.[a-zA-Z_][a-zA-Z0-9_]*"
40
41 t_SQ_LITERAL = r"'[^']*'"
42 t_DQ_LITERAL = r'"[^"]*"'
43
44 t_CMP_EQ = r"\s*==\s*"
45 t_CMP_NE = r"\s*!=\s*"
46 t_CMP_LT = r"\s*<\s*"
47 t_CMP_LE = r"\s*<=\s*"
48 t_CMP_GE = r"\s*>=\s*"
49 t_CMP_GT = r"\s*>\s*"
50
51 t_NEGATE = r"\s*!\s*"
52
53 t_LOGICAL_AND = r"(?<!\w)\s*[aA][nN][dD]\s*(?!\w)"
54 t_LOGICAL_OR = r"(?<!\w)\s*[oO][rR]\s*(?!\w)"
55
56 # Parsing Rules
57 precedence = (
58 ('left', 'LOGICAL_AND'),
59 ('left', 'LOGICAL_OR'),
60 ('right', 'NEGATE'),
61 )
62
63 def p_cmp_expression(p):
64 """
65 expression : expression CMP_EQ expression
66 | expression CMP_NE expression
67 | expression CMP_LT expression
68 | expression CMP_LE expression
69 | expression CMP_GE expression
70 | expression CMP_GT expression
71 """
72 print("cmp_expression / p[1:]='{}'".format(p[1:]))
73
74
75 def p_landor_expression(p):
76 """
77 expression : expression LOGICAL_AND expression %prec LOGICAL_AND
78 | expression LOGICAL_OR expression %prec LOGICAL_OR
79 """
80 print("cmp_expression / p[1:]='{}'".format(p[1:]))
81
82
83 def p_negate_expression(p):
84 """
85 expression : NEGATE expression %prec NEGATE
86 """
87 print("negate_expression / p[1:]='{}'".format(p[1:]))
88
89
90 def p_expression_group(p):
91 """
92 expression : LPAREN expression RPAREN
93 """
94 print("expression_group / p[1:]='{}'".format(p[1:]))
95 #p[0] = p[2]
96
97
98 def p_value_expression(p):
99 """
100 expression : value
101 """
102 print("value_expression / p[1:]='{}'".format(p[1:]))
103
104
105 def p_ref(p):
106 """
107 ref : REF_START refcontent RPAREN
108 | SQUOTE ref SQUOTE
109 | DQUOTE ref DQUOTE
110 """
111 print("ref / p[1:]='{}'".format(p[1:]))
112
113
114 def p_refcontent(p):
115 """
116 refcontent : propcallchain
117 | nsref propcallchain
118 """
119 print("refcontent / p[:]='{}'".format(p[:]))
120 #p[0] = "".join([k for k in p[:] if k])
121
122
123 def p_propcallchain(p):
124 """
125 propcallchain : NAME
126 | NAME propcall
127 | propcallchain propcall
128 """
129 print("propcallchain / p[:]='{}'".format(p[:]))
130 #p[0] = "".join([k for k in p[:] if k])
131
132
133 def p_nsref(p):
134 """
135 nsref : LSQBRACKET NAME RSQBRACKET DBLCOLON
136 | LSQBRACKET NAME prop RSQBRACKET DBLCOLON
137 """
138 print("nsref / p[:]='{}'".format(p[:]))
139
140
141 def p_prop(p):
142 """
143 prop : PROP
144 | prop prop
145 """
146 print("prop / p[:]='{}'".format(p[:]))
147 #p[0] = "".join([k for k in p[:] if k])
148
149
150 def p_propcall(p):
151 """
152 propcall : PROP
153 | PROP call
154 | propcall propcall
155 """
156 print("propcall / p[:]='{}'".format(p[:]))
157 #p[0] = "".join([k for k in p[:] if k])
158
159
160 def p_funccall(p):
161 """
162 funccall : NAME call
163 """
164 print("funccall / p[:]='{}'".format(p[:]))
165 #p[0] = "".join([k for k in p[:] if k])
166
167
168 def p_call(p):
169 """
170 call : LPAREN RPAREN
171 | LPAREN args RPAREN
172 """
173 print("call / p[:]='{}'".format(p[:]))
174 #p[0] = "".join([k for k in p[:] if k])
175
176
177 def p_args(p):
178 """
179 args : value
180 | value ARGSEP value
181 | args ARGSEP value
182 """
183 print("args / p[:]='{}'".format(p[:]))
184 #p[0] = "".join([k for k in p[:] if k])
185
186
187 def p_value(p):
188 """
189 value : ref
190 | funccall
191 | NUMBER
192 | MSBUILD_LITERAL
193 | SQ_LITERAL
194 | DQ_LITERAL
195 """
196 print("value / p[:]='{}'".format(p[:]))
197 #p[0] = "".join([k for k in p[:] if k])
198
199
200 class ExpressionSyntaxError(Exception):
201 pass
202
203
204 def p_error(p):
205 if p:
206 raise ExpressionSyntaxError(
207 "Syntax error at '{}'".format(p.value))
208 else:
209 raise ExpressionSyntaxError(
210 "Syntax error at EOF")
211
212
213 #
214 if __name__ == '__main__':
215 _DEBUG = False
216 lex.lex(debug=_DEBUG) # Build the lexer
217 yacc.yacc(debug=_DEBUG) # Build the parset
218
219 # parse
220 yacc.parse("$(Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
221 print("")
222 yacc.parse("$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
223 print("")
224 yacc.parse("`xx` == $([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
225 print("")
226 yacc.parse('''`xx` == "$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)"''')
227 print("")
228 yacc.parse('''"xx" != "$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)"''')
229 print("")
230 yacc.parse('''!$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Contains("zzz"))''')
231 print("")
232 yacc.parse('''$(Abcde.Contains("zzz")) or !$(Abcde.Contains("yyy"))''')
233 print("")
234 yacc.parse('''HasTrailingSlash($(Abcde.Contains("zzz"))) or !Exists($(Abcde.Contains("yyy")))''')
pyparsing で悩んだ丸一日を返して欲しいわ。非常に整理しやすいしな。
右結合、左結合の扱い(precedence, %prec
)なんぞもやっておるし、パーサエラー時の振る舞いを追加したりもしてるんで、少しばかりはヒトサマのためになったりならなかったりするような気もしないでもない。ただしまだなんにも「正しい」保障なんか出来ないことにはご注意ね。(だいたいにしてアクションが全部空だもの、あんまし参考にならん気はする。あと最終的には class にする予定だけど、「人のために曝す」なら最初からそうしとけよっ、とも思うだろうしさ。)
文法は実は少々イケてなくて、一重・二重引用符文字列を丸ごとトークンにしてしまう lexer 規則なので、これだと引用符内を再帰的に解析しないとダメ。うまい方法あるかなぁ…? flex みたいに lexer で状態遷移出来れば解決する? しない? どうだろ?
なお、もうわかってる人はわかってると思うけどこれは Visual Studio プロジェクトファイルに記述出来る expression の評価用パーザ。つまりさらにわかってる人は、「まだあれが足りてない」こともわかるのかも。まだ色々あるの。意味は判明してないが、「^$(ProjectPath);@(GenerateSatelliteAssembliesOutputs->FullPath()->Distinct())
」みたいな記述が出来るみたいなのね。ほかにも、「@(Inf->'')
」とか。意味わからんことには対応しようにも出来ないわけだけれど、いづれはやらねばならん。
2017-11-08 9:00 追記: 早い早い
一つ前の追記は正味で半日以内かな、たぶん。
一気に書けば「初版完成」まで行けるんだけれど、本物は長大になってしまって、ひとさまが参考にするのに適さなくなるので、「入り口」としてひとまず(つまりもう TDD のフェーズに入ってる)の版をお見せしておく。アクションの記述がひょっとするとあなたの想像よりずっと簡単に思うんじゃないかな、と思う。BNF ベースのパーサを書くなんて畏れ多い、と思い込んでいる人ほど読んでみて欲しい。「規則を還元する、の繰り返し」が評価なのだ、というコツの部分さえ理解出来てしまえば、きっとあなたでも出来るはず…:
1 # -*- coding: utf-8 -*-
2 #
3 from __future__ import absolute_import
4 from __future__ import unicode_literals
5 from __future__ import print_function
6
7 import os
8 import sys
9
10 import six
11 import ply.lex as lex
12 import ply.yacc as yacc
13
14
15 if sys.version[0] == '2':
16 str = unicode
17
18
19 # ----------------------------------------------------------------
20 #
21 # Non-Python Types Emulation
22 #
23 class String(str):
24 r"""
25 >>> s = String("abc")
26 >>> isinstance(s, (str,))
27 True
28 >>> isinstance(s, six.string_types)
29 True
30 """
31 def __new__(cls, value):
32 obj = str.__new__(cls, value)
33 return obj
34
35 def __getattr__(self, name):
36 # Unfortunatelly, Microsoft always ignores its case...
37 from types import FunctionType
38 for attr in [
39 x for x, y in String.__dict__.items()]:
40
41 if attr.lower() == name.lower():
42 return getattr(self, attr)
43
44 #def Clone(self, *args, **kwargs):
45 # r"""
46 # Object Clone()
47 #
48 # """
49 # raise NotImplementedError()
50
51 def CompareTo(self, value):
52 r"""
53 int CompareTo(Object value)
54 int CompareTo(string strB)
55
56 >>> tab = [
57 ... # (lhs, rhs, expected)
58 ... # same length
59 ... ("A", "A", 0),
60 ... ("A", "a", 1),
61 ... ("a", "A", -1),
62 ... ("AAA", "AAA", 0),
63 ... ("AAA", "aAA", 1),
64 ... ("aAA", "AAA", -1),
65 ... #
66 ... # len(self) > len(rhs)
67 ... ("AAAa", "AAA", 1),
68 ... ("AAAa", "aAA", 1),
69 ... ("aAAa", "AAA", 1),
70 ... ("aAAaa", "AAA", 1),
71 ... #
72 ... # len(self) < len(rhs)
73 ... ("AAA", "AAAA", -1),
74 ... ("AAA", "aAAA", -1),
75 ... ("aAA", "AAAA", -1),
76 ... ("aAA", "AAAAA", -1),
77 ... ]
78 >>> for lhs, rhs, expected in tab:
79 ... result = String(lhs).CompareTo(rhs)
80 ... assert result == expected, str((lhs, rhs, expected, result))
81 ... #from ._powershell import exec_single_command
82 ... #expected2 = exec_single_command('"{}".CompareTo("{}")'.format(lhs, rhs))
83 ... #assert expected == expected2, str((lhs, rhs, expected2, expected))
84 ... #assert result == expected2, str((lhs, rhs, expected2, result))
85 >>>
86 """
87 lendiff = len(self) - len(value)
88 if lendiff == 0:
89 # this logic is the same as python2's cmp
90 return (value > self) - (value < self)
91 return lendiff / abs(lendiff)
92
93 def Contains(self, value):
94 r"""
95 bool Contains(string value)
96
97 >>> String("AAA").Contains("AAA")
98 True
99 >>> String(" AAA ").Contains("AAA")
100 True
101 >>> String("BBB").Contains("AAA")
102 False
103 >>> String("AAA").Contains(" AAA ")
104 False
105 """
106 return value in self
107
108 #def CopyTo(self, *args, **kwargs):
109 # r"""
110 # Void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
111 #
112 # """
113 # raise NotImplementedError()
114
115 def EndsWith(self, *args, **kwargs):
116 r"""
117 bool EndsWith(string value)
118 bool EndsWith(string value, StringComparison comparisonType)
119 bool EndsWith(string value, bool ignoreCase, Globalization.Cul...
120
121 """
122 raise NotImplementedError()
123
124 def Equals(self, *args, **kwargs):
125 r"""
126 bool Equals(Object obj)
127 bool Equals(string value)
128 bool Equals(string value, StringComparison comparisonType)
129
130 """
131 raise NotImplementedError()
132
133 #def GetEnumerator(self, *args, **kwargs):
134 # r"""
135 # CharEnumerator GetEnumerator()
136 #
137 # """
138 # raise NotImplementedError()
139
140 #def GetHashCode(self, *args, **kwargs):
141 # r"""
142 # int GetHashCode()
143 #
144 # """
145 # raise NotImplementedError()
146
147 #def GetType(self, *args, **kwargs):
148 # r"""
149 # type GetType()
150 #
151 # """
152 # raise NotImplementedError()
153
154 #def GetTypeCode(self, *args, **kwargs):
155 # r"""
156 # TypeCode GetTypeCode()
157 #
158 # """
159 # raise NotImplementedError()
160
161 def IndexOf(self, *args, **kwargs):
162 r"""
163 int IndexOf(char value)
164 int IndexOf(char value, int startIndex)
165 int IndexOf(char value, int startIndex, int count)
166 int IndexOf(string value)
167 int IndexOf(string val...
168
169 """
170 raise NotImplementedError()
171
172 def IndexOfAny(self, *args, **kwargs):
173 r"""
174 int IndexOfAny(char[] anyOf)
175 int IndexOfAny(char[] anyOf, int startIndex)
176 int IndexOfAny(char[] anyOf, int startIndex, int count)
177
178 """
179 raise NotImplementedError()
180
181 def Insert(self, *args, **kwargs):
182 r"""
183 string Insert(int startIndex, string value)
184
185 """
186 raise NotImplementedError()
187
188 #def IsNormalized(self, *args, **kwargs):
189 # r"""
190 # bool IsNormalized()
191 # bool IsNormalized(Text.NormalizationForm normalizationForm)
192 #
193 # """
194 # raise NotImplementedError()
195
196 def LastIndexOf(self, *args, **kwargs):
197 r"""
198 int LastIndexOf(char value)
199 int LastIndexOf(char value, int startIndex)
200 int LastIndexOf(char value, int startIndex, int count)
201 int LastIndexOf(string value)
202 int La...
203
204 """
205 raise NotImplementedError()
206
207 def LastIndexOfAny(self, *args, **kwargs):
208 r"""
209 int LastIndexOfAny(char[] anyOf)
210 int LastIndexOfAny(char[] anyOf, int startIndex)
211 int LastIndexOfAny(char[] anyOf, int startIndex, int count)
212
213 """
214 raise NotImplementedError()
215
216 #def Normalize(self, *args, **kwargs):
217 # r"""
218 # string Normalize()
219 # string Normalize(Text.NormalizationForm normalizationForm)
220 #
221 # """
222 # raise NotImplementedError()
223
224 def PadLeft(self, *args, **kwargs):
225 r"""
226 string PadLeft(int totalWidth)
227 string PadLeft(int totalWidth, char paddingChar)
228
229 """
230 raise NotImplementedError()
231
232 def PadRight(self, *args, **kwargs):
233 r"""
234 string PadRight(int totalWidth)
235 string PadRight(int totalWidth, char paddingChar)
236
237 """
238 raise NotImplementedError()
239
240 def Remove(self, *args, **kwargs):
241 r"""
242 string Remove(int startIndex, int count)
243 string Remove(int startIndex)
244
245 """
246 raise NotImplementedError()
247
248 def Replace(self, oldValue, newValue):
249 r"""
250 string Replace(char oldChar, char newChar)
251 string Replace(string oldValue, string newValue)
252
253 >>> print(String("AAA").Replace("AAA", "BBB"))
254 BBB
255 """
256 return String(self.replace(oldValue, newValue))
257
258 def Split(self, *args, **kwargs):
259 r"""
260 string[] Split(Params char[] separator)
261 string[] Split(char[] separator, int count)
262 string[] Split(char[] separator, StringSplitOptions options)
263 string[] Spl...
264
265 """
266 raise NotImplementedError()
267
268 def StartsWith(self, *args, **kwargs):
269 r"""
270 bool StartsWith(string value)
271 bool StartsWith(string value, StringComparison comparisonType)
272 bool StartsWith(string value, bool ignoreCase, Globalizati...
273
274 """
275 raise NotImplementedError()
276
277 def Substring(self, startIndex, length=-1):
278 r"""
279 string Substring(int startIndex)
280 string Substring(int startIndex, int length)
281
282 >>> print(String("abc").Substring(0))
283 abc
284 >>> print(String("abc").Substring(0, 1))
285 a
286 >>> print(String("abc").Substring(1))
287 bc
288 >>> print(String("abc").Substring(1, 1))
289 b
290 >>> print(String("abc").Substring(1, 2))
291 bc
292 >>> print(String("abc").SubString(1, 2))
293 bc
294 """
295 # TODO: "System.String.Substring" raise Exception if
296 # length is larger than actual length.
297 if length >= 0:
298 return String(self[startIndex:][:length])
299 return String(self[startIndex:])
300
301 #def ToCharArray(self, *args, **kwargs):
302 # r"""
303 # char[] ToCharArray()
304 # char[] ToCharArray(int startIndex, int length)
305 #
306 # """
307 # raise NotImplementedError()
308
309 def ToLower(self, *args, **kwargs):
310 r"""
311 string ToLower()
312 string ToLower(Globalization.CultureInfo culture)
313
314 """
315 raise NotImplementedError()
316
317 #def ToLowerInvariant(self, *args, **kwargs):
318 # r"""
319 # string ToLowerInvariant()
320 #
321 # """
322 # raise NotImplementedError()
323
324 #def ToString(self, *args, **kwargs):
325 # r"""
326 # string ToString()
327 # string ToString(IFormatProvider provider)
328 #
329 # """
330 # raise NotImplementedError()
331
332 def ToUpper(self, *args, **kwargs):
333 r"""
334 string ToUpper()
335 string ToUpper(Globalization.CultureInfo culture)
336
337 """
338 raise NotImplementedError()
339
340 #def ToUpperInvariant(self, *args, **kwargs):
341 # r"""
342 # string ToUpperInvariant()
343 #
344 # """
345 # raise NotImplementedError()
346
347 def Trim(self, *args, **kwargs):
348 r"""
349 string Trim(Params char[] trimChars)
350 string Trim()
351
352 """
353 raise NotImplementedError()
354
355 def TrimEnd(self, *args, **kwargs):
356 r"""
357 string TrimEnd(Params char[] trimChars)
358
359 """
360 raise NotImplementedError()
361
362 def TrimStart(self, *args, **kwargs):
363 r"""
364 string TrimStart(Params char[] trimChars)
365
366 """
367 raise NotImplementedError()
368
369 #def Chars(self, *args, **kwargs):
370 # r"""
371 # char Chars(int index) {get;}
372 #
373 # """
374 # raise NotImplementedError()
375
376 @property
377 def Length(self):
378 r"""
379 Int32 Length {get;}
380
381 >>> s = String("abc")
382 >>> s.Length
383 3
384 """
385 return len(self)
386
387
388 # ----------------------------------------------------------------
389 #
390 # Expression Evaluator
391 #
392 class ExpressionSyntaxError(Exception):
393 pass
394
395
396 class _Parser(object):
397 """
398 Base class for a lexer/parser that has the rules defined as methods
399 """
400 tokens = ()
401 precedence = ()
402
403 def __init__(self, **kw):
404 self.debug = kw.get('debug', 0)
405 self.names = {}
406 try:
407 modname = os.path.split(os.path.splitext(__file__)[0])[
408 1] + "_" + self.__class__.__name__
409 except:
410 modname = "parser" + "_" + self.__class__.__name__
411 self.debugfile = modname + ".dbg"
412 self.tabmodule = modname + "_" + "parsetab"
413 # print self.debugfile, self.tabmodule
414
415 # Build the lexer and parser
416 lex.lex(module=self, debug=self.debug)
417 yacc.yacc(module=self,
418 debug=self.debug,
419 debugfile=self.debugfile,
420 tabmodule=self.tabmodule)
421
422 def _parse(self, s):
423 yacc.parse(s)
424
425
426 class MSBuildExpressionEvaluator(_Parser):
427 r"""
428 >>> parser = MSBuildExpressionEvaluator()
429 >>> print(parser.evaluate("$(X)", {"X": "xxx"}))
430 xxx
431 >>> print(parser.evaluate("$(X.Y)", {"X": {"Y": "xxx"}}))
432 xxx
433 >>> parser.evaluate("$(X.Y.Length)", {"X": {"Y": "xxx"}})
434 3
435 >>> print(parser.evaluate("$(X.Y.Substring(1))", {"X": {"Y": "xyz"}}))
436 yz
437 """
438 def __init__(self, **kwargs):
439 _Parser.__init__(self, **kwargs)
440 self._props = {}
441 self._result = None
442
443 # Lexing Rules
444 tokens = (
445 "SQUOTE", "DQUOTE",
446 "NUMBER", "MSBUILD_LITERAL",
447 "SQ_LITERAL", "DQ_LITERAL",
448 "REF_START",
449 "RPAREN", "LPAREN", "ARGSEP",
450 "LSQBRACKET", "RSQBRACKET", "DBLCOLON",
451 "NAME", "PROP",
452 "CMP_EQ", "CMP_NE", "CMP_LT",
453 "CMP_LE", "CMP_GE", "CMP_GT",
454 "NEGATE",
455 "LOGICAL_AND", "LOGICAL_OR",
456 )
457
458 t_SQUOTE = r"'"
459 t_DQUOTE = r'"'
460
461 def t_NUMBER(self, t):
462 r"""[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?"""
463 if "." not in t.value and "e" not in t.value:
464 t.value = int(t.value)
465 else:
466 t.value = float(t.value)
467 return t
468
469 t_MSBUILD_LITERAL = r"`[^`]*`"
470
471 t_REF_START = r"[$%]\("
472 t_LPAREN = r'\('
473 t_RPAREN = r'\)'
474 t_LSQBRACKET = r'\['
475 t_RSQBRACKET = r'\]'
476 t_DBLCOLON = r'::'
477 t_ARGSEP = r'\s*,\s*'
478 t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*"
479 t_PROP = r"\.[a-zA-Z_][a-zA-Z0-9_]*"
480
481 t_SQ_LITERAL = r"'[^']*'"
482 t_DQ_LITERAL = r'"[^"]*"'
483
484 t_CMP_EQ = r"\s*==\s*"
485 t_CMP_NE = r"\s*!=\s*"
486 t_CMP_LT = r"\s*<\s*"
487 t_CMP_LE = r"\s*<=\s*"
488 t_CMP_GE = r"\s*>=\s*"
489 t_CMP_GT = r"\s*>\s*"
490
491 t_NEGATE = r"\s*!\s*"
492
493 t_LOGICAL_AND = r"(?<!\w)\s*[aA][nN][dD]\s*(?!\w)"
494 t_LOGICAL_OR = r"(?<!\w)\s*[oO][rR]\s*(?!\w)"
495
496 # Parsing Rules
497 precedence = (
498 ('left', 'LOGICAL_AND'),
499 ('left', 'LOGICAL_OR'),
500 ('right', 'NEGATE'),
501 )
502
503 def p_cmp_expression(self, p):
504 """
505 expression : expression CMP_EQ expression
506 | expression CMP_NE expression
507 | expression CMP_LT expression
508 | expression CMP_LE expression
509 | expression CMP_GE expression
510 | expression CMP_GT expression
511 """
512 op = p[2].strip()
513 if op == "==":
514 p[0] = (p[1] == p[3])
515 elif op == "!=":
516 p[0] = (p[1] != p[3])
517 elif op == ">=":
518 p[0] = (p[1] >= p[3])
519 elif op == "<=":
520 p[0] = (p[1] <= p[3])
521 elif op == ">":
522 p[0] = (p[1] > p[3])
523 elif op == "<":
524 p[0] = (p[1] < p[3])
525 #print("cmp_expression / p[:]='{}'".format(p[:]), file=sys.stderr)
526
527 def p_landor_expression(self, p):
528 """
529 expression : expression LOGICAL_AND expression %prec LOGICAL_AND
530 | expression LOGICAL_OR expression %prec LOGICAL_OR
531 """
532 op = p[2].strip()
533 if op == "and":
534 p[0] = p[1] and p[3]
535 else:
536 p[0] = p[1] or p[3]
537 #print("cmp_expression / p[:]='{}'".format(p[:]), file=sys.stderr)
538
539 def p_negate_expression(self, p):
540 """
541 expression : NEGATE expression %prec NEGATE
542 """
543 p[0] = not p[1]
544 #print("negate_expression / p[:]='{}'".format(p[:]), file=sys.stderr)
545
546 def p_expression_group(self, p):
547 """
548 expression : LPAREN expression RPAREN
549 """
550 p[0] = p[2]
551 #print("expression_group / p[1:]='{}'".format(p[1:]), file=sys.stderr)
552
553 def p_value_expression(self, p):
554 """
555 expression : value
556 """
557 p[0] = p[1]
558 self._result = p[0]
559 #print("value_expression / p[0]={}".format(p[0]), file=sys.stderr)
560
561 def p_ref(self, p):
562 """
563 ref : REF_START refcontent RPAREN
564 | SQUOTE ref SQUOTE
565 | DQUOTE ref DQUOTE
566 """
567 p[0] = p[2]
568 #print("ref / p[0]='{}'".format(p[0]), file=sys.stderr)
569
570 def p_refcontent(self, p):
571 """
572 refcontent : propcallchain
573 | nsref propcallchain
574 """
575 #print("refcontent / p[:]='{}'".format(p[:]), file=sys.stderr)
576 #p[0] = "".join([k for k in p[:] if k])
577 if len(p) == 2:
578 p[0] = p[1]
579 # TODO: with nsref
580
581 def p_propcallchain(self, p):
582 """
583 propcallchain : NAME
584 | NAME propcall
585 | propcallchain propcall
586 """
587 res = self._props
588 res = res[p[1]] # rootkey
589 if isinstance(res, six.string_types):
590 res = String(res)
591 if len(p) == 3:
592 if not isinstance(p[2][0], (tuple,)):
593 p[2] = (p[2],)
594 for name, args in p[2]:
595 n = name[1:]
596 if n in res:
597 res = res[n]
598 if isinstance(res, six.string_types):
599 res = String(res)
600 elif hasattr(res, n):
601 res = getattr(res, n)
602 if args is not None: # callable
603 res = res(*args)
604 p[0] = res
605 #print("propcallchain / p[:]='{}'".format(p[:]), file=sys.stderr)
606
607 def p_nsref(self, p):
608 """
609 nsref : LSQBRACKET NAME RSQBRACKET DBLCOLON
610 | LSQBRACKET NAME prop RSQBRACKET DBLCOLON
611 """
612 p[0] = "".join([s for s in p[1:] if s])
613 #print("nsref / p[:]='{}'".format(p[:]), file=sys.stderr)
614
615 def p_prop(self, p):
616 """
617 prop : PROP
618 | prop prop
619 """
620 p[0] = "".join([s for s in p[1:] if s])
621 #print("prop / p[:]='{}'".format(p[:]), file=sys.stderr)
622 #p[0] = "".join([k for k in p[:] if k])
623
624 def p_propcall(self, p):
625 """
626 propcall : PROP
627 | PROP call
628 | propcall propcall
629 """
630 if len(p) == 2:
631 p[0] = ((p[1], None))
632 else:
633 p[0] = tuple(p[1:])
634 #print("propcall / p[0]={}".format(p[0]), file=sys.stderr)
635 #p[0] = "".join([k for k in p[:] if k])
636
637 def p_funccall(self, p):
638 """
639 funccall : NAME call
640 """
641 #print("funccall / p[:]='{}'".format(p[:]), file=sys.stderr)
642 #p[0] = "".join([k for k in p[:] if k])
643
644 def p_call(self, p):
645 """
646 call : LPAREN RPAREN
647 | LPAREN args RPAREN
648 """
649 if len(p) == 4:
650 p[0] = p[2]
651 else:
652 p[0] = []
653 #print("call / p[0]={}".format(p[0]), file=sys.stderr)
654
655 def p_args(self, p):
656 """
657 args : value
658 | value ARGSEP value
659 | args ARGSEP value
660 """
661 p[0] = p[1::2]
662 #print("args / p[0]={}".format(p[0]), file=sys.stderr)
663 #p[0] = "".join([k for k in p[:] if k])
664
665 def p_value(self, p):
666 """
667 value : ref
668 | funccall
669 | literal
670 | number
671 """
672 p[0] = p[1]
673 #print("value / p[0]={}".format(p[0]), file=sys.stderr)
674 #p[0] = "".join([k for k in p[:] if k])
675
676 def p_literal(self, p):
677 """
678 literal : MSBUILD_LITERAL
679 | SQ_LITERAL
680 | DQ_LITERAL
681 """
682 if p[1][0] == "`" and p[1][-1] == "`":
683 p[0] = p[1][1:-1]
684 else:
685 p[0] = eval(p[1])
686 #print("literal / p[0]={}".format(p[0]), file=sys.stderr)
687
688 def p_number(self, p):
689 """
690 number : NUMBER
691 """
692 p[0] = p[1]
693 #print("number / p[0]={}".format(p[0]), file=sys.stderr)
694
695 def p_error(self, p):
696 if p:
697 raise ExpressionSyntaxError(
698 "Syntax error at '{}'".format(p.value))
699 else:
700 raise ExpressionSyntaxError(
701 "Syntax error at EOF")
702
703 def evaluate(self, s, props={}):
704 self._props = props
705 self._result = None
706 self._parse(s)
707 return self._result
708
709 #
710 if __name__ == '__main__':
711 import doctest
712 doctest.testmod()
713 #_DEBUG = False
714 #parser = MSBuildExpressionEvaluator(debug=_DEBUG)
715
716 # parse
717 #parser.evaluate("$(X.Y('zzz').Z(`ttt`).Length)")
718 #parser.evaluate("$(Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
719 #print("", file=sys.stderr)
720 #parser.evaluate("$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
721 #print("", file=sys.stderr)
722 #parser.evaluate("`xx` == $([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
723 #print("", file=sys.stderr)
724 #parser.evaluate('''`xx` == "$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)"''')
725 #print("", file=sys.stderr)
726 #parser.evaluate('''"xx" != "$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)"''')
727 #print("", file=sys.stderr)
728 #parser.evaluate('''!$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Contains("zzz"))''')
729 #print("", file=sys.stderr)
730 #parser.evaluate('''$(Abcde.Contains("zzz")) or !$(Abcde.Contains("yyy"))''')
731 #print("", file=sys.stderr)
732 #parser.evaluate('''HasTrailingSlash($(Abcde.Contains("zzz"))) or !Exists($(Abcde.Contains("yyy")))''')
2017-11-08 11:15 追記: うげぇ ply
一つ前の追記で終わらせようかとも思ったんだけれど、ハマりネタを一つ見つけたので一応:
1 # -*- coding: utf-8 -*-
2 #
3 from __future__ import absolute_import
4 from __future__ import unicode_literals
5 from __future__ import print_function
6
7 import os
8 import sys
9 import logging
10
11 import six
12 import ply.lex as lex
13 import ply.yacc as yacc
14
15
16 if sys.version[0] == '2':
17 str = unicode
18
19
20 _logger = logging.getLogger(__name__)
21
22
23 # ----------------------------------------------------------------
24 #
25 # Non-Python Types Emulation
26 #
27 class String(str):
28 r"""
29 >>> s = String("abc")
30 >>> isinstance(s, (str,))
31 True
32 >>> isinstance(s, six.string_types)
33 True
34 """
35 def __new__(cls, value):
36 obj = str.__new__(cls, value)
37 return obj
38
39 def __getattr__(self, name):
40 # Unfortunatelly, Microsoft always ignores its case...
41 from types import FunctionType
42 for attr in [
43 x for x, y in String.__dict__.items()]:
44
45 if attr.lower() == name.lower():
46 return getattr(self, attr)
47
48 #def Clone(self, *args, **kwargs):
49 # r"""
50 # Object Clone()
51 #
52 # """
53 # raise NotImplementedError()
54
55 def CompareTo(self, value):
56 r"""
57 int CompareTo(Object value)
58 int CompareTo(string strB)
59
60 >>> tab = [
61 ... # (lhs, rhs, expected)
62 ... # same length
63 ... ("A", "A", 0),
64 ... ("A", "a", 1),
65 ... ("a", "A", -1),
66 ... ("AAA", "AAA", 0),
67 ... ("AAA", "aAA", 1),
68 ... ("aAA", "AAA", -1),
69 ... #
70 ... # len(self) > len(rhs)
71 ... ("AAAa", "AAA", 1),
72 ... ("AAAa", "aAA", 1),
73 ... ("aAAa", "AAA", 1),
74 ... ("aAAaa", "AAA", 1),
75 ... #
76 ... # len(self) < len(rhs)
77 ... ("AAA", "AAAA", -1),
78 ... ("AAA", "aAAA", -1),
79 ... ("aAA", "AAAA", -1),
80 ... ("aAA", "AAAAA", -1),
81 ... ]
82 >>> for lhs, rhs, expected in tab:
83 ... result = String(lhs).CompareTo(rhs)
84 ... assert result == expected, str((lhs, rhs, expected, result))
85 ... #from ._powershell import exec_single_command
86 ... #expected2 = exec_single_command('"{}".CompareTo("{}")'.format(lhs, rhs))
87 ... #assert expected == expected2, str((lhs, rhs, expected2, expected))
88 ... #assert result == expected2, str((lhs, rhs, expected2, result))
89 >>>
90 """
91 lendiff = len(self) - len(value)
92 if lendiff == 0:
93 # this logic is the same as python2's cmp
94 return (value > self) - (value < self)
95 return lendiff / abs(lendiff)
96
97 def Contains(self, value):
98 r"""
99 bool Contains(string value)
100
101 >>> String("AAA").Contains("AAA")
102 True
103 >>> String(" AAA ").Contains("AAA")
104 True
105 >>> String("BBB").Contains("AAA")
106 False
107 >>> String("AAA").Contains(" AAA ")
108 False
109 """
110 return value in self
111
112 #def CopyTo(self, *args, **kwargs):
113 # r"""
114 # Void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
115 #
116 # """
117 # raise NotImplementedError()
118
119 def EndsWith(self, *args, **kwargs):
120 r"""
121 bool EndsWith(string value)
122 bool EndsWith(string value, StringComparison comparisonType)
123 bool EndsWith(string value, bool ignoreCase, Globalization.Cul...
124
125 """
126 raise NotImplementedError()
127
128 def Equals(self, *args, **kwargs):
129 r"""
130 bool Equals(Object obj)
131 bool Equals(string value)
132 bool Equals(string value, StringComparison comparisonType)
133
134 """
135 raise NotImplementedError()
136
137 #def GetEnumerator(self, *args, **kwargs):
138 # r"""
139 # CharEnumerator GetEnumerator()
140 #
141 # """
142 # raise NotImplementedError()
143
144 #def GetHashCode(self, *args, **kwargs):
145 # r"""
146 # int GetHashCode()
147 #
148 # """
149 # raise NotImplementedError()
150
151 #def GetType(self, *args, **kwargs):
152 # r"""
153 # type GetType()
154 #
155 # """
156 # raise NotImplementedError()
157
158 #def GetTypeCode(self, *args, **kwargs):
159 # r"""
160 # TypeCode GetTypeCode()
161 #
162 # """
163 # raise NotImplementedError()
164
165 def IndexOf(self, *args, **kwargs):
166 r"""
167 int IndexOf(char value)
168 int IndexOf(char value, int startIndex)
169 int IndexOf(char value, int startIndex, int count)
170 int IndexOf(string value)
171 int IndexOf(string val...
172
173 """
174 raise NotImplementedError()
175
176 def IndexOfAny(self, *args, **kwargs):
177 r"""
178 int IndexOfAny(char[] anyOf)
179 int IndexOfAny(char[] anyOf, int startIndex)
180 int IndexOfAny(char[] anyOf, int startIndex, int count)
181
182 """
183 raise NotImplementedError()
184
185 def Insert(self, *args, **kwargs):
186 r"""
187 string Insert(int startIndex, string value)
188
189 """
190 raise NotImplementedError()
191
192 #def IsNormalized(self, *args, **kwargs):
193 # r"""
194 # bool IsNormalized()
195 # bool IsNormalized(Text.NormalizationForm normalizationForm)
196 #
197 # """
198 # raise NotImplementedError()
199
200 def LastIndexOf(self, *args, **kwargs):
201 r"""
202 int LastIndexOf(char value)
203 int LastIndexOf(char value, int startIndex)
204 int LastIndexOf(char value, int startIndex, int count)
205 int LastIndexOf(string value)
206 int La...
207
208 """
209 raise NotImplementedError()
210
211 def LastIndexOfAny(self, *args, **kwargs):
212 r"""
213 int LastIndexOfAny(char[] anyOf)
214 int LastIndexOfAny(char[] anyOf, int startIndex)
215 int LastIndexOfAny(char[] anyOf, int startIndex, int count)
216
217 """
218 raise NotImplementedError()
219
220 #def Normalize(self, *args, **kwargs):
221 # r"""
222 # string Normalize()
223 # string Normalize(Text.NormalizationForm normalizationForm)
224 #
225 # """
226 # raise NotImplementedError()
227
228 def PadLeft(self, *args, **kwargs):
229 r"""
230 string PadLeft(int totalWidth)
231 string PadLeft(int totalWidth, char paddingChar)
232
233 """
234 raise NotImplementedError()
235
236 def PadRight(self, *args, **kwargs):
237 r"""
238 string PadRight(int totalWidth)
239 string PadRight(int totalWidth, char paddingChar)
240
241 """
242 raise NotImplementedError()
243
244 def Remove(self, *args, **kwargs):
245 r"""
246 string Remove(int startIndex, int count)
247 string Remove(int startIndex)
248
249 """
250 raise NotImplementedError()
251
252 def Replace(self, oldValue, newValue):
253 r"""
254 string Replace(char oldChar, char newChar)
255 string Replace(string oldValue, string newValue)
256
257 >>> print(String("AAA").Replace("AAA", "BBB"))
258 BBB
259 """
260 return String(self.replace(oldValue, newValue))
261
262 def Split(self, *args, **kwargs):
263 r"""
264 string[] Split(Params char[] separator)
265 string[] Split(char[] separator, int count)
266 string[] Split(char[] separator, StringSplitOptions options)
267 string[] Spl...
268
269 """
270 raise NotImplementedError()
271
272 def StartsWith(self, *args, **kwargs):
273 r"""
274 bool StartsWith(string value)
275 bool StartsWith(string value, StringComparison comparisonType)
276 bool StartsWith(string value, bool ignoreCase, Globalizati...
277
278 """
279 raise NotImplementedError()
280
281 def Substring(self, startIndex, length=-1):
282 r"""
283 string Substring(int startIndex)
284 string Substring(int startIndex, int length)
285
286 >>> print(String("abc").Substring(0))
287 abc
288 >>> print(String("abc").Substring(0, 1))
289 a
290 >>> print(String("abc").Substring(1))
291 bc
292 >>> print(String("abc").Substring(1, 1))
293 b
294 >>> print(String("abc").Substring(1, 2))
295 bc
296 >>> print(String("abc").SubString(1, 2))
297 bc
298 """
299 # TODO: "System.String.Substring" raise Exception if
300 # length is larger than actual length.
301 if length >= 0:
302 return String(self[startIndex:][:length])
303 return String(self[startIndex:])
304
305 #def ToCharArray(self, *args, **kwargs):
306 # r"""
307 # char[] ToCharArray()
308 # char[] ToCharArray(int startIndex, int length)
309 #
310 # """
311 # raise NotImplementedError()
312
313 def ToLower(self, *args, **kwargs):
314 r"""
315 string ToLower()
316 string ToLower(Globalization.CultureInfo culture)
317
318 """
319 raise NotImplementedError()
320
321 #def ToLowerInvariant(self, *args, **kwargs):
322 # r"""
323 # string ToLowerInvariant()
324 #
325 # """
326 # raise NotImplementedError()
327
328 #def ToString(self, *args, **kwargs):
329 # r"""
330 # string ToString()
331 # string ToString(IFormatProvider provider)
332 #
333 # """
334 # raise NotImplementedError()
335
336 def ToUpper(self, *args, **kwargs):
337 r"""
338 string ToUpper()
339 string ToUpper(Globalization.CultureInfo culture)
340
341 """
342 raise NotImplementedError()
343
344 #def ToUpperInvariant(self, *args, **kwargs):
345 # r"""
346 # string ToUpperInvariant()
347 #
348 # """
349 # raise NotImplementedError()
350
351 def Trim(self, *args, **kwargs):
352 r"""
353 string Trim(Params char[] trimChars)
354 string Trim()
355
356 """
357 raise NotImplementedError()
358
359 def TrimEnd(self, *args, **kwargs):
360 r"""
361 string TrimEnd(Params char[] trimChars)
362
363 """
364 raise NotImplementedError()
365
366 def TrimStart(self, *args, **kwargs):
367 r"""
368 string TrimStart(Params char[] trimChars)
369
370 """
371 raise NotImplementedError()
372
373 #def Chars(self, *args, **kwargs):
374 # r"""
375 # char Chars(int index) {get;}
376 #
377 # """
378 # raise NotImplementedError()
379
380 @property
381 def Length(self):
382 r"""
383 Int32 Length {get;}
384
385 >>> s = String("abc")
386 >>> s.Length
387 3
388 """
389 return len(self)
390
391
392 # ----------------------------------------------------------------
393 #
394 # Expression Evaluator
395 #
396 class ExpressionError(Exception):
397 pass
398
399 class ExpressionInvalidCharacterError(ExpressionError):
400 pass
401
402 class ExpressionSyntaxError(ExpressionError):
403 pass
404
405
406 class _Parser(object):
407 """
408 Base class for a lexer/parser that has the rules defined as methods
409 """
410 tokens = ()
411 precedence = ()
412
413 def __init__(self, **kw):
414 self.debug = kw.get('debug', 0)
415 self.names = {}
416 try:
417 modname = os.path.split(os.path.splitext(__file__)[0])[
418 1] + "_" + self.__class__.__name__
419 except:
420 modname = "parser" + "_" + self.__class__.__name__
421 self.debugfile = modname + ".dbg"
422 self.tabmodule = modname + "_" + "parsetab"
423 # print self.debugfile, self.tabmodule
424
425 # Build the lexer and parser
426 lex.lex(module=self, debug=self.debug)
427 yacc.yacc(module=self,
428 debug=self.debug,
429 debugfile=self.debugfile,
430 tabmodule=self.tabmodule)
431
432 def _parse(self, s):
433 yacc.parse(s)
434
435
436 #
437 # TODO: To improve the performance, we should provide the method
438 # which can reset all parser and lexer's states.
439 #
440
441
442 class MSBuildExpressionEvaluator(_Parser):
443 r"""
444 >>> parser = MSBuildExpressionEvaluator()
445 >>> parser.evaluate("1", {})
446 1
447 >>> parser = MSBuildExpressionEvaluator()
448 >>> print(parser.evaluate("$(X)", {"X": "xxx"}))
449 xxx
450 >>> parser = MSBuildExpressionEvaluator()
451 >>> print(parser.evaluate("$(X.Y)", {"X": {"Y": "xxx"}}))
452 xxx
453 >>> parser = MSBuildExpressionEvaluator()
454 >>> parser.evaluate("$(X.Y.Length)", {"X": {"Y": "xxx"}})
455 3
456 >>> parser = MSBuildExpressionEvaluator()
457 >>> print(parser.evaluate("$(X.Y.Substring(1))", {"X": {"Y": "xyz"}}))
458 yz
459 >>> parser = MSBuildExpressionEvaluator()
460 >>> print(parser.evaluate("'$(X.Y)'", {"X": {"Y": "xyz"}}))
461 xyz
462 >>> parser = MSBuildExpressionEvaluator()
463 >>> parser.evaluate("1", {})
464 1
465 >>> parser = MSBuildExpressionEvaluator()
466 >>> parser.evaluate("'xyz'", {}) == 'xyz'
467 True
468 >>> parser = MSBuildExpressionEvaluator()
469 >>> parser.evaluate("'xyz' != 'zzz'", {})
470 True
471 """
472 def __init__(self, **kwargs):
473 _Parser.__init__(self, **kwargs)
474 self._kwargs = dict(**kwargs)
475 self._props = {}
476 self._result = None
477
478 # Lexing Rules
479 tokens = (
480 "SQUOTE", "DQUOTE",
481 "NUMBER", "MSBUILD_LITERAL",
482 "SQ_LITERAL", "DQ_LITERAL",
483 "REF_START",
484 "RPAREN", "LPAREN", "ARGSEP",
485 "LSQBRACKET", "RSQBRACKET", "DBLCOLON",
486 "NAME", "PROP",
487 "CMP_EQ", "CMP_NE", "CMP_LT",
488 "CMP_LE", "CMP_GE", "CMP_GT",
489 "NEGATE",
490 "LOGICAL_AND", "LOGICAL_OR",
491 )
492
493 t_SQUOTE = r"'"
494 t_DQUOTE = r'"'
495
496 def t_NUMBER(self, t):
497 r"""[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?"""
498 if "." not in t.value and "e" not in t.value:
499 t.value = int(t.value)
500 else:
501 t.value = float(t.value)
502 return t
503
504 t_MSBUILD_LITERAL = r"`[^`]*`"
505
506 t_REF_START = r"[$%]\("
507 t_LPAREN = r'\('
508 t_RPAREN = r'\)'
509 t_LSQBRACKET = r'\['
510 t_RSQBRACKET = r'\]'
511 t_DBLCOLON = r'::'
512 t_ARGSEP = r'\s*,\s*'
513 t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*"
514 t_PROP = r"\.[a-zA-Z_][a-zA-Z0-9_]*"
515
516 t_SQ_LITERAL = r"'[^']*'"
517 t_DQ_LITERAL = r'"[^"]*"'
518
519 t_CMP_EQ = r"\s*==\s*"
520 t_CMP_NE = r"\s*!=\s*"
521 t_CMP_LT = r"\s*<\s*"
522 t_CMP_LE = r"\s*<=\s*"
523 t_CMP_GE = r"\s*>=\s*"
524 t_CMP_GT = r"\s*>\s*"
525
526 t_NEGATE = r"\s*!\s*"
527
528 t_LOGICAL_AND = r"(?<!\w)\s*[aA][nN][dD]\s*(?!\w)"
529 t_LOGICAL_OR = r"(?<!\w)\s*[oO][rR]\s*(?!\w)"
530
531 def t_error(self, t):
532 #t.lexer.skip(1)
533 raise ExpressionInvalidCharacterError(
534 "Illegal character {}".format(repr(t.value[0])))
535
536 # Parsing Rules
537 precedence = (
538 ('left', 'LOGICAL_AND'),
539 ('left', 'LOGICAL_OR'),
540 ('right', 'NEGATE'),
541 )
542
543 def p_cmp_expression(self, p):
544 """
545 expression : expression CMP_EQ expression
546 | expression CMP_NE expression
547 | expression CMP_LT expression
548 | expression CMP_LE expression
549 | expression CMP_GE expression
550 | expression CMP_GT expression
551 """
552 op = p[2].strip()
553 if op == "==":
554 p[0] = (p[1] == p[3])
555 elif op == "!=":
556 p[0] = (p[1] != p[3])
557 elif op == ">=":
558 p[0] = (p[1] >= p[3])
559 elif op == "<=":
560 p[0] = (p[1] <= p[3])
561 elif op == ">":
562 p[0] = (p[1] > p[3])
563 elif op == "<":
564 p[0] = (p[1] < p[3])
565 self._result = p[0]
566 _logger.debug("result=%r", self._result)
567
568 def p_landor_expression(self, p):
569 """
570 expression : expression LOGICAL_AND expression %prec LOGICAL_AND
571 | expression LOGICAL_OR expression %prec LOGICAL_OR
572 """
573 op = p[2].strip()
574 if op == "and":
575 p[0] = p[1] and p[3]
576 else:
577 p[0] = p[1] or p[3]
578 self._result = p[0]
579
580 def p_negate_expression(self, p):
581 """
582 expression : NEGATE expression %prec NEGATE
583 """
584 p[0] = not p[1]
585 self._result = p[0]
586
587 def p_expression_group(self, p):
588 """
589 expression : LPAREN expression RPAREN
590 """
591 p[0] = p[2]
592 self._result = p[0]
593
594 def p_value_expression(self, p):
595 """
596 expression : value
597 """
598 p[0] = p[1]
599 self._result = p[0]
600
601 def p_ref(self, p):
602 """
603 ref : REF_START refcontent RPAREN
604 | SQUOTE ref SQUOTE
605 | DQUOTE ref DQUOTE
606 """
607 p[0] = p[2]
608 self._result = p[0]
609
610 def p_refcontent(self, p):
611 """
612 refcontent : propcallchain
613 | nsref propcallchain
614 """
615 if len(p) == 2:
616 p[0] = p[1]
617 self._result = p[0]
618 # TODO: with nsref
619
620 def p_propcallchain(self, p):
621 """
622 propcallchain : NAME
623 | NAME propcall
624 | propcallchain propcall
625 """
626 res = self._props
627 res = res[p[1]] # rootkey
628 if isinstance(res, six.string_types):
629 res = String(res)
630 if len(p) == 3:
631 if not isinstance(p[2][0], (tuple,)):
632 p[2] = (p[2],)
633 for name, args in p[2]:
634 n = name[1:]
635 if n in res:
636 res = res[n]
637 if isinstance(res, six.string_types):
638 res = String(res)
639 elif hasattr(res, n):
640 res = getattr(res, n)
641 if args is not None: # callable
642 res = res(*args)
643 p[0] = res
644 self._result = p[0]
645
646 def p_nsref(self, p):
647 """
648 nsref : LSQBRACKET NAME RSQBRACKET DBLCOLON
649 | LSQBRACKET NAME prop RSQBRACKET DBLCOLON
650 """
651 p[0] = "".join([s for s in p[1:] if s])
652 self._result = p[0]
653
654 def p_prop(self, p):
655 """
656 prop : PROP
657 | prop prop
658 """
659 p[0] = "".join([s for s in p[1:] if s])
660 self._result = p[0]
661
662 def p_propcall(self, p):
663 """
664 propcall : PROP
665 | PROP call
666 | propcall propcall
667 """
668 if len(p) == 2:
669 p[0] = ((p[1], None))
670 else:
671 p[0] = tuple(p[1:])
672 self._result = p[0]
673
674 def p_funccall(self, p):
675 """
676 funccall : NAME call
677 """
678 self._result = p[0]
679
680 def p_call(self, p):
681 """
682 call : LPAREN RPAREN
683 | LPAREN args RPAREN
684 """
685 if len(p) == 4:
686 p[0] = p[2]
687 else:
688 p[0] = []
689 self._result = p[0]
690
691 def p_args(self, p):
692 """
693 args : value
694 | value ARGSEP value
695 | args ARGSEP value
696 """
697 p[0] = p[1::2]
698 self._result = p[0]
699
700 def p_value(self, p):
701 """
702 value : ref
703 | funccall
704 | literal
705 | number
706 """
707 p[0] = p[1]
708 self._result = p[0]
709
710 def p_literal(self, p):
711 """
712 literal : MSBUILD_LITERAL
713 | SQ_LITERAL
714 | DQ_LITERAL
715 """
716 if p[1][0] == "`" and p[1][-1] == "`":
717 p[0] = p[1][1:-1]
718 else:
719 p[0] = eval(p[1])
720 if "$(" in p[0] or "%(" in p[0]: # contains macro reference
721 innerparser = MSBuildExpressionEvaluator(
722 **self._kwargs)
723 p[0] = innerparser.evaluate(p[0], props=self._props)
724 self._result = p[0]
725
726 def p_number(self, p):
727 """
728 number : NUMBER
729 """
730 p[0] = p[1]
731 self._result = p[0]
732
733 def p_error(self, p):
734 if p:
735 raise ExpressionSyntaxError(
736 "Syntax error at '{}'".format(p.value))
737 else:
738 raise ExpressionSyntaxError(
739 "Syntax error at EOF")
740
741 #
742 def evaluate(self, s, props={}):
743 self._props = props
744 self._result = None
745 self._parse(s)
746 return self._result
747
748 #
749 if __name__ == '__main__':
750 logging.basicConfig(
751 stream=sys.stderr,
752 level=logging.DEBUG,
753 format='%(levelname)s:%(name)s:%(funcName)s:%(message)s')
754
755 import doctest
756 doctest.testmod()
実装具合の進展はあるけど、着目して欲しいのはテストと追加した TODO。今回のケースではネストしたパーサが問題を起こしているのだけれど、より一般論で言えば、「パーサのインスタンスを都度作り直すことなくリセット出来る手段が欲しいが見つかってない」ということ。やっぱ毎度パーサを構築しなおしてるとちょっと動作がもっさりしてる気がすんのよね。
2017-11-09 10:15 追記: うむむ ply
現在絶好調ハマり中。ply、複雑で大きな文法一つを解析するにはいいんだけれど、原則として「グローバル」なパーサをいっときに一つしか持てないために困ったことになっている。
何が起こっているかというと。
前回追記時には気付いてなかったのだけれど、MSBuild の expression って、たとえばこんななのね:
1 <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
2 <ConfigurationType>DynamicLibrary</ConfigurationType>
3 <PlatformToolset>v141</PlatformToolset>
4 </PropertyGroup>
上で書きかけた定義だともう「|」なんて「未知」。つまり「オレ的定義」的には「なんでもありなのかよ」にみえる自由記述。無論こんなことも出来る:
1 <MyName>c:/hoge/$([System.IO.Path]::GetFileName('%(Link.WindowsMetadataFile)')).hoge</MyName>
それと「Condition」内とそうでないもので処理を変える必要がある。「Condition」でないプロパティ内では「==」などの評価をしてはいけない。
ということで何が必要かといえば、
- デリファレンス(マクロ展開)と評価(「==」など)の分離
- そのための、デリファレンス対象部分の切り出し処理
ということになるんだけれど、まず 2. のために別途 lexer を用意せねばならんし、1. で parser を 2つ書くことになる。そしてこういう複数 lexer、parser は ply では非常にやりにくい。少なくとも「同時に複数種類の lexer/parser が動いてはならない」、つまりパーサAがパーサBを呼び出すみたいな構造が NG、グローバルなので。うーん、惜しい…。
これにマルチパーサ関連の issue はかつて上がっていたようなんだけれど、「大規模な API 変更が必要になると思われるが、もはや15年選手だから今から API を変えることは出来ない」みたいな理由で進展しなかったみたい。
ほかのものに乗り換え出来ないかちょっとずつ目配りしつつも、ひとまずは騙しながら進めてみようと思う。
2017-11-09 16:40 追記: NG と思ったけど違うかも、ply
複数 parser/lexer を共存出来ない、と思ったのは ply.yacc のコードで global、モジュール変数を使ってるのを見てしまったからだが、よくよく読んでみると、少なくとも「global」として宣言されてるヤツ、使ってなさそう。昔の名残かしら?
もちっと調べてみる。
2017-11-09 18:00 追記: NG なのは (特に) lex だ、ply
わかった…。複数インスタンスは NG、なのは、特に ply.lex モジュールの lex 関数。こいつが global に依存しまくるので、多重に構築出来ない。
一応「optimize=True」にすると「SyntaxError("Can't build lexer")
」だけは回避出来るものの、そういうことじゃない。間違った振る舞いで動くのは迷惑。
あと yacc の方も、せっかく「module=self
」渡してるのになにやらモジュール内をフラットにスキャンしてるような警告が出る。てことはやっぱり「たぶん yacc のほうも NG に違いない」。
つーわけで…アタシに残された選択肢は2つだけ。一つが「別のものに乗り換える」。もう一つが「一個の定義で頑張れる方法を考える」。(multiprocessing で分離する、はさすがにナシだろう、今の場合。)
2017-11-08 18:30 追記: うげぇ ply は確定で、ならば rply?
ドキュメント
をざっとみるに、やりたいことは出来そうだし、ply にあった多重インスタンス問題は API の見かけ上はなさそうにみえる。
乗り換えよう。多分丸一日程度で乗り換え出来るんじゃないかなという気がする。アクション部分の書き方は若干違うのでそこでは手間とは思うが、BNF 記述部分はまぁ同じ(OR の記述がが少し違うだけ)だし。これでもダメだったらどうしようって心配も少しあるけれど、少なくとも「見かけ上は」よさげだし。
2017-11-08 19:20 追記: ひとまず rply のへろわるど
的な。
1 # -*- coding: utf-8 -*-
2 from rply import LexerGenerator
3 from rply import ParserGenerator
4 from rply.token import BaseBox
5
6 #
7 class Value(BaseBox):
8 def __init__(self, value):
9 self.value = value
10
11 def eval(self):
12 return self.value
13
14 # -------- 1st lexer/parser
15 _lg_deref = LexerGenerator()
16 _lg_deref.add('NUMBER', r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?')
17 _lexer_deref = _lg_deref.build()
18
19 #
20 _pg_deref = ParserGenerator(
21 ['NUMBER', ],
22
23 precedence=[
24 #('left', ['PLUS', 'MINUS']),
25 #('left', ['MUL', 'DIV'])
26 ]
27 )
28
29 @_pg_deref.production('expression : NUMBER')
30 def expression_number(p):
31 return Value(float(p[0].getstr()))
32
33 _parser_deref = _pg_deref.build()
34 print(_parser_deref.parse(_lexer_deref.lex('1.5e-3')).eval())
35
36 # -------- 2nd lexer/parser
37 _lg_evaluator = LexerGenerator()
38 _lg_evaluator.add('NUMBER', r'\d+')
39 _lexer_evaluator = _lg_evaluator.build()
40
41 #
42 _pg_evaluator = ParserGenerator(
43 ['NUMBER', ],
44 )
45
46 @_pg_evaluator.production('expression : NUMBER')
47 def expression_number2(p):
48 return Value(int(p[0].getstr()))
49
50 _parser_evaluator = _pg_evaluator.build()
51 print(_parser_evaluator.parse(_lexer_evaluator.lex('1')).eval())
複数 lexer/parser が問題なく動作する。
のだが…、これ、class をデコレート出来ない、と思う。今 class であることを前提とした処理を書いちゃってるんで、方針変えなければいけない。まぁ今のワタシの処理のインスタンス内メンバーは、別にモジュールグローバルで困らないヤツだからね、いいっちゃぁいい。(何かつぅと、「Exists」やら「HasTrailingSlash」などの外部関数エミュレーション定義を差し替えられるようにしてるの。)
うーん、やはりこれでいってみるか。
2017-11-10 01:20 追記: これだ、rply で決まり
まだ未完成だけれど、「未完成だった ply 版」の完全移植があっという間に出来た。既に長大になっておるが、本物の完成品はこれの3~5倍程度のボリュームになる予定なので、まぁ今時点くらいが「誰かの参考のために」載せる限界くらいだろうね。
1 # -*- coding: utf-8 -*-
2 #
3 from __future__ import absolute_import
4 from __future__ import unicode_literals
5 from __future__ import print_function
6
7 import os
8 import sys
9 import re
10 import logging
11 import warnings
12
13 import six
14 from rply import LexerGenerator
15 from rply import ParserGenerator
16 #from rply.token import BaseBox # it seems basically for RPython
17
18
19 if sys.version[0] == '2':
20 str = unicode
21 else:
22 from functools import reduce
23
24
25 _logger = logging.getLogger(__name__)
26
27
28 # ----------------------------------------------------------------
29 #
30 # Internal Helpers of this module
31 #
32 def _simple_tokenize(s, tokdef):
33 """
34 Very simple lexer that is used for lexical analysis
35 where ply usage is overkill.
36
37 tokdef must be like this:
38 >>> tokdef = {
39 ... "root": [ # state id ("root" is mandatory state.)
40 ... (
41 ... re.compile(r"'"), # regexp
42 ... "SQ", # token id
43 ... "sq" # transition state
44 ... ),
45 ... # ...
46 ... ],
47 ... "sq" : [
48 ... (re.compile(r"."), "", "#pop"),
49 ... ]
50 ... }
51
52 if you were defined "tokdef", now you can use this:
53 >>> for tokid, s in _simple_tokenize("'a", tokdef):
54 ... # do something
55 ... pass
56 """
57 states = ["root"]
58 state = states[0]
59 while s:
60 for rgx, tok, trans in tokdef[state]:
61 if hasattr(rgx, "match"):
62 m = rgx.match(s)
63 else:
64 m = re.match(rgx, s)
65 if m:
66 yield tok, m.group(0)
67 if trans:
68 if trans == "#pop":
69 states.pop(-1)
70 state = states[-1]
71 else:
72 states.append(trans)
73 state = trans
74 s = s[m.span()[1]:]
75 break
76
77
78 # ----------------------------------------------------------------
79 #
80 # Non-Python Types and Functions Emulation
81 #
82 _GLOBAL_FUNCS = {
83 "hastrailingslash": lambda *args: args[0][-1] in ("/", "\\"),
84 "exists": lambda *args: os.path.exists(args[0]),
85
86 # TODO: more?
87 "[system.io.path]::combine": lambda *args: "/".join(args),
88
89 #
90 "[msbuild]::makerelative": lambda *args: os.path.relpath(args[1], args[0]),
91 "[msbuild]::add": lambda *args: sum(args),
92 "[msbuild]::multiply": lambda *args: reduce(lambda x, y: x * y, args),
93 "[msbuild]::bitwiseor": lambda *args: reduce(lambda x, y: x | y, args),
94 "[msbuild]::bitwiseand": lambda *args: reduce(lambda x, y: x & y, args),
95 # TODO: https://msdn.microsoft.com/en-us/library/dd633440.aspx
96 # Subtract
97 # Divide
98 # Modulo
99 # Escape
100 # Unescape
101 # BitwiseXor
102 # BitwiseNot
103 # DoesTaskHostExist # maybe we can't implement
104 # GetDirectoryNameOfFileAbove
105 # GetRegistryValue
106 # GetRegistryValueFromView
107 # ValueOrDefault
108 }
109
110
111 class _DotNetLoader(object):
112 def __init__(self):
113 self._clr = None
114 self._loaded_assemblies = {} # value: success or not
115
116 # value: imported assembly (as python module)
117 self._imported_assemblies = {}
118
119 def _load_assembly(self, asmn):
120 if asmn not in self._loaded_assemblies:
121 try:
122 self._clr.AddReference(asmn)
123 self._loaded_assemblies[asmn] = True
124 except Exception as e:
125 # actually it should be "System.IO.FileNotFoundException",
126 # but we can't assume it when no assemblies are loaded.
127 if "'System.IO.FileNotFoundException'" not in str(type(e)):
128 raise
129 # this nsref is not assembly (maybe module fullname)
130 self._loaded_assemblies[asmn] = False
131 return self._loaded_assemblies[asmn]
132
133 def _import_assembly(self, asmn):
134 if asmn not in self._imported_assemblies:
135 try:
136 exec("import " + asmn)
137 except ImportError:
138 raise # what should we do?
139 self._imported_assemblies[asmn] = eval(asmn)
140 return self._imported_assemblies[asmn]
141
142 def get_function(self, name_with_ns):
143 # name_with_ns: like "[System.IO.Path]::Add"
144 if self._clr is None:
145 import clr # Python for .NET (pythonnet)
146 self._clr = clr
147 ns, name = name_with_ns.split("]::")
148 ns_spl = ns[1:].split(".") # like "System", "IO", "Path"
149 asmn, modn = None, None
150 for i in range(len(ns_spl), 1, -1):
151 asmn = ".".join(ns_spl[:i - 1])
152 modn = ".".join(ns_spl[i - 1:])
153 if self._load_assembly(asmn):
154 break
155 asm = self._import_assembly(asmn)
156 return getattr(getattr(asm, modn), name)
157
158
159 _dotnetloader = _DotNetLoader()
160
161
162 class String(str):
163 r"""
164 >>> s = String("abc")
165 >>> isinstance(s, (str,))
166 True
167 >>> isinstance(s, six.string_types)
168 True
169 """
170 def __new__(cls, value):
171 obj = str.__new__(cls, value)
172 return obj
173
174 def __getattr__(self, name):
175 # Unfortunatelly, Microsoft always ignores its case...
176 from types import FunctionType
177 for attr in [
178 x for x, y in String.__dict__.items()]:
179
180 if attr.lower() == name.lower():
181 return getattr(self, attr)
182
183 #def Clone(self, *args, **kwargs):
184 # r"""
185 # Object Clone()
186 #
187 # """
188 # raise NotImplementedError()
189
190 def CompareTo(self, value):
191 r"""
192 int CompareTo(Object value)
193 int CompareTo(string strB)
194
195 >>> tab = [
196 ... # (lhs, rhs, expected)
197 ... # same length
198 ... ("A", "A", 0),
199 ... ("A", "a", 1),
200 ... ("a", "A", -1),
201 ... ("AAA", "AAA", 0),
202 ... ("AAA", "aAA", 1),
203 ... ("aAA", "AAA", -1),
204 ... #
205 ... # len(self) > len(rhs)
206 ... ("AAAa", "AAA", 1),
207 ... ("AAAa", "aAA", 1),
208 ... ("aAAa", "AAA", 1),
209 ... ("aAAaa", "AAA", 1),
210 ... #
211 ... # len(self) < len(rhs)
212 ... ("AAA", "AAAA", -1),
213 ... ("AAA", "aAAA", -1),
214 ... ("aAA", "AAAA", -1),
215 ... ("aAA", "AAAAA", -1),
216 ... ]
217 >>> for lhs, rhs, expected in tab:
218 ... result = String(lhs).CompareTo(rhs)
219 ... assert result == expected, str((lhs, rhs, expected, result))
220 ... #from ._powershell import exec_single_command
221 ... #expected2 = exec_single_command('"{}".CompareTo("{}")'.format(lhs, rhs))
222 ... #assert expected == expected2, str((lhs, rhs, expected2, expected))
223 ... #assert result == expected2, str((lhs, rhs, expected2, result))
224 >>>
225 """
226 lendiff = len(self) - len(value)
227 if lendiff == 0:
228 # this logic is the same as python2's cmp
229 return (value > self) - (value < self)
230 return lendiff / abs(lendiff)
231
232 def Contains(self, value):
233 r"""
234 bool Contains(string value)
235
236 >>> String("AAA").Contains("AAA")
237 True
238 >>> String(" AAA ").Contains("AAA")
239 True
240 >>> String("BBB").Contains("AAA")
241 False
242 >>> String("AAA").Contains(" AAA ")
243 False
244 """
245 return value in self
246
247 #def CopyTo(self, *args, **kwargs):
248 # r"""
249 # Void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
250 #
251 # """
252 # raise NotImplementedError()
253
254 def EndsWith(self, *args, **kwargs):
255 r"""
256 bool EndsWith(string value)
257 bool EndsWith(string value, StringComparison comparisonType)
258 bool EndsWith(string value, bool ignoreCase, Globalization.Cul...
259
260 """
261 raise NotImplementedError()
262
263 def Equals(self, *args, **kwargs):
264 r"""
265 bool Equals(Object obj)
266 bool Equals(string value)
267 bool Equals(string value, StringComparison comparisonType)
268
269 """
270 raise NotImplementedError()
271
272 #def GetEnumerator(self, *args, **kwargs):
273 # r"""
274 # CharEnumerator GetEnumerator()
275 #
276 # """
277 # raise NotImplementedError()
278
279 #def GetHashCode(self, *args, **kwargs):
280 # r"""
281 # int GetHashCode()
282 #
283 # """
284 # raise NotImplementedError()
285
286 #def GetType(self, *args, **kwargs):
287 # r"""
288 # type GetType()
289 #
290 # """
291 # raise NotImplementedError()
292
293 #def GetTypeCode(self, *args, **kwargs):
294 # r"""
295 # TypeCode GetTypeCode()
296 #
297 # """
298 # raise NotImplementedError()
299
300 def IndexOf(self, *args, **kwargs):
301 r"""
302 int IndexOf(char value)
303 int IndexOf(char value, int startIndex)
304 int IndexOf(char value, int startIndex, int count)
305 int IndexOf(string value)
306 int IndexOf(string val...
307
308 """
309 raise NotImplementedError()
310
311 def IndexOfAny(self, *args, **kwargs):
312 r"""
313 int IndexOfAny(char[] anyOf)
314 int IndexOfAny(char[] anyOf, int startIndex)
315 int IndexOfAny(char[] anyOf, int startIndex, int count)
316
317 """
318 raise NotImplementedError()
319
320 def Insert(self, *args, **kwargs):
321 r"""
322 string Insert(int startIndex, string value)
323
324 """
325 raise NotImplementedError()
326
327 #def IsNormalized(self, *args, **kwargs):
328 # r"""
329 # bool IsNormalized()
330 # bool IsNormalized(Text.NormalizationForm normalizationForm)
331 #
332 # """
333 # raise NotImplementedError()
334
335 def LastIndexOf(self, *args, **kwargs):
336 r"""
337 int LastIndexOf(char value)
338 int LastIndexOf(char value, int startIndex)
339 int LastIndexOf(char value, int startIndex, int count)
340 int LastIndexOf(string value)
341 int La...
342
343 """
344 raise NotImplementedError()
345
346 def LastIndexOfAny(self, *args, **kwargs):
347 r"""
348 int LastIndexOfAny(char[] anyOf)
349 int LastIndexOfAny(char[] anyOf, int startIndex)
350 int LastIndexOfAny(char[] anyOf, int startIndex, int count)
351
352 """
353 raise NotImplementedError()
354
355 #def Normalize(self, *args, **kwargs):
356 # r"""
357 # string Normalize()
358 # string Normalize(Text.NormalizationForm normalizationForm)
359 #
360 # """
361 # raise NotImplementedError()
362
363 def PadLeft(self, *args, **kwargs):
364 r"""
365 string PadLeft(int totalWidth)
366 string PadLeft(int totalWidth, char paddingChar)
367
368 """
369 raise NotImplementedError()
370
371 def PadRight(self, *args, **kwargs):
372 r"""
373 string PadRight(int totalWidth)
374 string PadRight(int totalWidth, char paddingChar)
375
376 """
377 raise NotImplementedError()
378
379 def Remove(self, *args, **kwargs):
380 r"""
381 string Remove(int startIndex, int count)
382 string Remove(int startIndex)
383
384 """
385 raise NotImplementedError()
386
387 def Replace(self, oldValue, newValue):
388 r"""
389 string Replace(char oldChar, char newChar)
390 string Replace(string oldValue, string newValue)
391
392 >>> print(String("AAA").Replace("AAA", "BBB"))
393 BBB
394 """
395 return String(self.replace(oldValue, newValue))
396
397 def Split(self, *args, **kwargs):
398 r"""
399 string[] Split(Params char[] separator)
400 string[] Split(char[] separator, int count)
401 string[] Split(char[] separator, StringSplitOptions options)
402 string[] Spl...
403
404 """
405 raise NotImplementedError()
406
407 def StartsWith(self, *args, **kwargs):
408 r"""
409 bool StartsWith(string value)
410 bool StartsWith(string value, StringComparison comparisonType)
411 bool StartsWith(string value, bool ignoreCase, Globalizati...
412
413 """
414 raise NotImplementedError()
415
416 def Substring(self, startIndex, length=-1):
417 r"""
418 string Substring(int startIndex)
419 string Substring(int startIndex, int length)
420
421 >>> print(String("abc").Substring(0))
422 abc
423 >>> print(String("abc").Substring(0, 1))
424 a
425 >>> print(String("abc").Substring(1))
426 bc
427 >>> print(String("abc").Substring(1, 1))
428 b
429 >>> print(String("abc").Substring(1, 2))
430 bc
431 >>> print(String("abc").SubString(1, 2))
432 bc
433 """
434 # TODO: "System.String.Substring" raise Exception if
435 # length is larger than actual length.
436 if length >= 0:
437 return String(self[startIndex:][:length])
438 return String(self[startIndex:])
439
440 #def ToCharArray(self, *args, **kwargs):
441 # r"""
442 # char[] ToCharArray()
443 # char[] ToCharArray(int startIndex, int length)
444 #
445 # """
446 # raise NotImplementedError()
447
448 def ToLower(self, *args, **kwargs):
449 r"""
450 string ToLower()
451 string ToLower(Globalization.CultureInfo culture)
452
453 """
454 raise NotImplementedError()
455
456 #def ToLowerInvariant(self, *args, **kwargs):
457 # r"""
458 # string ToLowerInvariant()
459 #
460 # """
461 # raise NotImplementedError()
462
463 #def ToString(self, *args, **kwargs):
464 # r"""
465 # string ToString()
466 # string ToString(IFormatProvider provider)
467 #
468 # """
469 # raise NotImplementedError()
470
471 def ToUpper(self, *args, **kwargs):
472 r"""
473 string ToUpper()
474 string ToUpper(Globalization.CultureInfo culture)
475
476 """
477 raise NotImplementedError()
478
479 #def ToUpperInvariant(self, *args, **kwargs):
480 # r"""
481 # string ToUpperInvariant()
482 #
483 # """
484 # raise NotImplementedError()
485
486 def Trim(self, *args, **kwargs):
487 r"""
488 string Trim(Params char[] trimChars)
489 string Trim()
490
491 """
492 raise NotImplementedError()
493
494 def TrimEnd(self, *args, **kwargs):
495 r"""
496 string TrimEnd(Params char[] trimChars)
497
498 """
499 raise NotImplementedError()
500
501 def TrimStart(self, *args, **kwargs):
502 r"""
503 string TrimStart(Params char[] trimChars)
504
505 """
506 raise NotImplementedError()
507
508 #def Chars(self, *args, **kwargs):
509 # r"""
510 # char Chars(int index) {get;}
511 #
512 # """
513 # raise NotImplementedError()
514
515 @property
516 def Length(self):
517 r"""
518 Int32 Length {get;}
519
520 >>> s = String("abc")
521 >>> s.Length
522 3
523 """
524 return len(self)
525
526
527 # ----------------------------------------------------------------
528 #
529 # Public module configurations
530 #
531
532
533 # ----------------------------------------------------------------
534 #
535 # Common definitions, etc. for our two different evaluators.
536 #
537
538 # ------------------
539 # utilities
540 def _tostr(p):
541 if hasattr(p, "getstr"):
542 return p.getstr()
543 return str(p)
544
545
546 # ------------------
547 # wrappers for token
548
549
550
551 # ------------------
552 # common regexp
553 _t_NUMBER = ("NUMBER", r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?')
554 _t_LPAREN = ("LPAREN", r'\s*\(\s*')
555 _t_RPAREN = ("RPAREN", r'\s*\)\s*')
556 _t_NAME = ("NAME", r"[a-zA-Z_][a-zA-Z0-9_]*")
557 _t_REF_START = ("REF_START", r"[$%]\(")
558
559 # ----------------------------------------------------------------
560 #
561 # The evaluator for the purpose of dereferencing the macros.
562 # (Basically, this is not public.)
563 #
564
565 # ------------------
566 #
567 class PropCallChain(object):
568
569 def __init__(self, data, kind):
570 self.data = data
571 self._kind = kind
572
573 def eval(self, props={}, ctx="", functions={}):
574 def _flatten_data(data, flattened):
575 for d in data:
576 if d is not None and isinstance(d[0], (tuple,)):
577 _flatten_data(d, flattened)
578 else:
579 flattened.append(d)
580
581 if self._kind == "p":
582 flattened = []
583 _flatten_data(self.data, flattened)
584 return self._eval_propcallchain(flattened, props, ctx)
585 elif self._kind == "f":
586 return self._eval_funccall(self.data, functions)
587
588 def _eval_propcallchain(self, clist, props, ctx):
589 res = props
590 def _icasesearch(d, sk, nf2blank=True):
591 targk = sk.lower()
592 for k in d.keys():
593 if k.lower() == targk:
594 return d[k]
595 if nf2blank:
596 # MSBuild deals with N/F as blank.
597 return ""
598
599 # resolve root key
600 t = _icasesearch(res, clist[0], nf2blank=False)
601 if t is not None:
602 # if input="$(X.Y)" and props has "X",
603 # now res=props["X"].
604 res = t
605 elif ctx:
606 t = _icasesearch(res, ctx, nf2blank=False)
607 if t is not None:
608 # if input="$(Y)", props has "X",
609 # and ctx is "X",
610 # now res=props["X"].
611 res = t
612 # search "Y" in props["X"]
613 res = _icasesearch(res, clist[0])
614 if t is None:
615 # MSBuild deals with N/F as blank.
616 res = ""
617
618 # nested props
619 if isinstance(res, six.string_types):
620 res = String(res)
621 if len(clist) > 1:
622 for name, args in clist[1:]:
623 n = name[1:]
624 if hasattr(res, n):
625 res = getattr(res, n)
626 if args is not None: # callable
627 res = res(*args)
628 else:
629 res = _icasesearch(res, n)
630 if isinstance(res, six.string_types):
631 res = String(res)
632 return res
633
634 def _eval_funccall(self, clist, functions):
635 if len(clist) == 2:
636 funcname = clist[0]
637 funcargs = clist[1]
638 else:
639 funcname = "".join(clist[:2])
640 funcargs = clist[2]
641 fn = functions.get(funcname.lower())
642 if not fn:
643 fn = _dotnetloader.get_function(funcname)
644 #_logger.debug("%s, %r, %r", funcname, fn, funcargs)
645 if fn:
646 return fn(*funcargs)
647
648 # ------------------
649 # lexer
650 _lg_deref = LexerGenerator()
651 _lg_deref.add(*_t_NUMBER)
652 _lg_deref.add("MSBUILD_LITERAL", r"`[^`]*`")
653 _lg_deref.add(*_t_REF_START)
654 _lg_deref.add(*_t_LPAREN)
655 _lg_deref.add(*_t_RPAREN)
656 _lg_deref.add("LSQBRACKET", r'\[')
657 _lg_deref.add("NS_END", r'\]::')
658 _lg_deref.add("ARGSEP", r'\s*,\s*')
659 _lg_deref.add(*_t_NAME)
660 _lg_deref.add("PROP", r"\.[a-zA-Z_][a-zA-Z0-9_]*")
661 _lexer_deref = _lg_deref.build()
662
663 # ------------------
664 # parser
665 _pg_deref = ParserGenerator(
666 [
667 "NUMBER", "MSBUILD_LITERAL",
668 "REF_START",
669 "RPAREN", "LPAREN", "ARGSEP",
670 "LSQBRACKET", "NS_END",
671 "NAME", "PROP",
672 ],
673 )
674
675
676 @_pg_deref.production("body : ref")
677 @_pg_deref.production("body : value")
678 def _deref_p_body(p):
679 return p[0]
680
681 @_pg_deref.production("ref : REF_START refcontent RPAREN")
682 def _deref_p_ref(p):
683 return p[1]
684
685 @_pg_deref.production("refcontent : propcallchain")
686 @_pg_deref.production("refcontent : funccall")
687 def _deref_p_refcontent(p):
688 return p[0]
689
690 @_pg_deref.production("propcallchain : NAME")
691 def _deref_p_propcallchain_1(p):
692 return PropCallChain((_tostr(p[0]),), "p")
693
694 @_pg_deref.production("propcallchain : NAME propcall")
695 def _deref_p_propcallchain_2(p):
696 return PropCallChain((_tostr(p[0]), p[1]), "p")
697
698 @_pg_deref.production("propcallchain : propcallchain propcall")
699 def _deref_p_propcallchain_3(p):
700 return PropCallChain(p, "p")
701
702 @_pg_deref.production("prop : PROP")
703 @_pg_deref.production("prop : prop prop")
704 def _deref_p_prop(p):
705 return "".join([_tostr(s) for s in p])
706
707 @_pg_deref.production("propcall : PROP")
708 def _deref_p_propcall_1(p):
709 return (_tostr(p[0]), None)
710
711 @_pg_deref.production("propcall : PROP call")
712 def _deref_p_propcall_2(p):
713 return (_tostr(p[0]), p[1])
714
715 @_pg_deref.production("propcall : propcall propcall")
716 def _deref_p_propcall_3(p):
717 return tuple(p)
718
719 @_pg_deref.production("funccall : name call")
720 def _deref_p_funccall(p):
721 return PropCallChain(p, "f")
722
723 @_pg_deref.production("name : NAME")
724 @_pg_deref.production("name : nsref NAME")
725 def _deref_p_name(p):
726 if len(p) == 2:
727 return "".join([p[0], _tostr(p[1])])
728 else:
729 return p[0]
730
731 @_pg_deref.production("nsref : LSQBRACKET NAME NS_END")
732 @_pg_deref.production("nsref : LSQBRACKET NAME prop NS_END")
733 def _deref_p_nsref(p):
734 return "".join([_tostr(s) for s in p if s])
735
736 @_pg_deref.production("call : LPAREN RPAREN")
737 @_pg_deref.production("call : LPAREN args RPAREN")
738 def _deref_p_call(p):
739 if len(p) == 3:
740 return p[1]
741 else:
742 return []
743
744 @_pg_deref.production("args : value")
745 @_pg_deref.production("args : value ARGSEP value")
746 @_pg_deref.production("args : args ARGSEP value")
747 def _deref_p_args(p):
748 return p[0::2] # ('x', ',', 'y', ',', 'z')
749
750 @_pg_deref.production("value : ref")
751 @_pg_deref.production("value : funccall")
752 @_pg_deref.production("value : literal")
753 @_pg_deref.production("value : number")
754 def _deref_p_value(p):
755 return p[0]
756
757 @_pg_deref.production("literal : MSBUILD_LITERAL")
758 def _deref_p_literal(p):
759 return _tostr(p[0])[1:-1]
760
761 @_pg_deref.production("number : NUMBER")
762 def _deref_p_number(p):
763 s = _tostr(p[0])
764 if "." in s or "e" in s:
765 return float(s)
766 else:
767 return int(s)
768
769 with warnings.catch_warnings():
770 # "shift/reduce conflicts" warnings are useful for me
771 # as developper, but this warnings is meaningless for
772 # all users of our library.
773 warnings.simplefilter("ignore")
774
775 _parser_deref = _pg_deref.build()
776
777
778 class _DerefTest(object):
779 r"""
780 # this class is not for use.
781 # just for holding _parser_deref's doctest.
782
783 >>> lexer = _lexer_deref
784 >>> parser = _parser_deref
785 >>> parser.parse(lexer.lex("1.2e-1"))
786 0.12
787 >>> print(parser.parse(lexer.lex("`aaa`")))
788 aaa
789 >>> print(parser.parse(lexer.lex("$(X)")).eval(props={"X": "y"}))
790 y
791 >>> parser.parse(lexer.lex("$(X.Length)")).eval(props={"X": "yz"})
792 2
793 """
794
795
796 # ----------------------------------------------------------------
797 #
798 # The evaluator for the purpose of evaluating the condition.
799 # (Basically, this is not public.)
800 #
801
802 # ------------------
803 # lexer
804 _lg_evaluator = LexerGenerator()
805 _lg_evaluator.add(*_t_NUMBER)
806 _lexer_evaluator = _lg_evaluator.build()
807
808 # ------------------
809 # parser
810 _pg_evaluator = ParserGenerator(
811 [
812 'NUMBER',
813 ],
814 precedence=[
815 #('left', ['AND', 'OR',]),
816 #('right', ['NEGATE',])
817 ]
818 )
819
820 @_pg_evaluator.production("number : NUMBER")
821 def _evaluator_p_number(p):
822 s = _tostr(p[0])
823 if "." in s or "e" in s:
824 return float(s)
825 else:
826 return int(s)
827
828 with warnings.catch_warnings():
829 # "shift/reduce conflicts" warnings are useful for me
830 # as developper, but this warnings is meaningless for
831 # all users of our library.
832 warnings.simplefilter("ignore")
833
834 _parser_evaluator = _pg_evaluator.build()
835
836
837 class _EvaluatorTest(object):
838 r"""
839 # this class is not for use.
840 # just for holding _parser_evaluator's doctest.
841
842 >>> lexer = _lexer_evaluator
843 >>> parser = _parser_evaluator
844 """
845
846
847 # ----------------------------------------------------------------
848 #
849 # Public APIs
850 #
851
852 #
853 # The evaluator for the purpose of dereferencing the macros.
854 #
855 class _MSBuildExpressionEvaluator(object):
856 r"""
857 >>> parser = _MSBuildExpressionEvaluator()
858 >>> print(parser.deref("$(X))", {"X": "xxx"}))
859 xxx)
860 >>> print(parser.deref("'$(X)'", {"X": "xxx"}))
861 'xxx'
862 >>> print(parser.deref("'$(X)|$(Y)'", {"X": "xxx", "Y": "yyy"}))
863 'xxx|yyy'
864 >>> print(parser.deref('"$(X)|$(Y)"', {"X": "xxx", "Y": "yyy"}))
865 "xxx|yyy"
866 >>> print(parser.deref("'$(X)abc | def$(Y)'", {"X": "xxx", "Y": "yyy"}))
867 'xxxabc | defyyy'
868 >>> print(parser.deref("$(X)", {"X": "xxx"}))
869 xxx
870 >>> print(parser.deref("$(X.Y)", {"X": {"Y": "xxx"}}))
871 xxx
872 >>> print(parser.deref("$(X.Y.Length)", {"X": {"Y": "xxx"}}))
873 3
874 >>> print(parser.deref("$(X.Y.Length)", {"x": {"Y": "xxx"}}))
875 3
876 >>> print(parser.deref("$(X.Y.Length)", {"X": {"y": "xxx"}}))
877 3
878 >>> print(parser.deref("$(X.Y.Substring(1))", {"X": {"Y": "xyz"}}))
879 yz
880 >>> print(parser.deref("$(X.Y.substring(1))", {"X": {"Y": "xyz"}}))
881 yz
882 >>> print(parser.deref("$(X.Y.substring ( 1 ) )", {"X": {"Y": "xyz"}}))
883 yz
884 >>> print(parser.deref("$([msbuild]::Add(1, 2))"))
885 3
886 >>> print(parser.deref("$([msbuild]::Add(1, $([msbuild]::Multiply(2, 3))))"))
887 7
888 >>> print(parser.deref("$([msbuild]::BitwiseOr(2, 4))"))
889 6
890 >>> print(parser.deref("$([msbuild]::BitwiseAnd(1, 3))"))
891 1
892 >>> print(parser.deref("$([System.IO.Path]::Combine(`a`, `b`))"))
893 a/b
894 >>> print(parser.deref("$([System.IO.Path]::GetFileName(`aaa/bbb.c`))"))
895 bbb.c
896 >>> r = parser.deref("$([MSBuild]::MakeRelative(`c:/users`, `c:/users/username`))")
897 >>> print(r.replace("\\", "/"))
898 username
899 >>> r = parser.deref("$([MSBuild]::MakeRelative(`c:/users/username`, `c:/users`))")
900 >>> print(r.replace("\\", "/"))
901 ..
902 """
903 def __init__(
904 self,
905 functions={},
906 # TODO: functions_merge: merge_pu, merge_up, dont_p (but notimpl)
907 functions_merge="merge_pu",
908 **kwargs):
909 self._functions = dict(_GLOBAL_FUNCS)
910 self._functions.update(functions)
911
912 def deref(self, s, props={}, ctx=""):
913 # for searching RPAR coressponding to REF_START
914 tokdef = {
915 "root": [
916 (r"`", "STR_S", "str"),
917 (r"\(", "LPAR", ""),
918 (r"\)", "RPAR", ""),
919 (r"[^`()]+", "OTH", ""),
920 ],
921 "str": [
922 (r"`", "STR_E", "#pop"),
923 (r"[^`]+", "STR", ""),
924 ],
925 }
926
927 lexer = _lexer_deref
928 parser = _parser_deref
929
930 result = s
931 rs_rgx = re.compile(_t_REF_START[1])
932 while rs_rgx.search(result):
933 # search last REF_START (i.e. deepest)
934 m = list(reversed(list(rs_rgx.finditer(result))))[0]
935 start, end = m.span()
936
937 # search RPAR coressponding to this REF_START
938 lpars, end = 1, start + 2
939 for tok, s in _simple_tokenize(result[end:], tokdef):
940 end += len(s)
941 if tok == "LPAR":
942 lpars += 1
943 elif tok == "RPAR":
944 lpars -= 1
945 if lpars == 0:
946 break
947
948 # parse using my parser
949 partial_res = parser.parse(lexer.lex(result[start:end])).eval(
950 props=props, ctx=ctx, functions=self._functions)
951 result = "{}{}{}".format(
952 result[:start], partial_res, result[end:])
953 return result
954
955
956
957
958 #
959 # The evaluator for the purpose of evaluating the condition.
960 #
961
962
963 #
964 if __name__ == '__main__':
965 logging.basicConfig(
966 stream=sys.stderr,
967 level=logging.DEBUG,
968 format='%(levelname)s:%(name)s:%(funcName)s:%(lineno)s:%(message)s')
969
970 import doctest
971 doctest.testmod()
参考にしたい人のためのポイントをば。まずは rply に関して:
- アクションの還元は ply のように「
p[0] = ...
」とするのではなく、左辺値を return する。 - 左辺値の返却は公式のサンプルでは約束があるかのようにみえるが、実際は何でもいい。使いやすいものを返せばいい。
- そういうわけで、確かに前記のようにデコレータをクラスに対して適用出来なくても、還元規則が運ぶデータを工夫して最後に評価しやすい形にしておけば、あまりこのことは制限にならない。
rply に無関係のポイント:
$([System.IO.Path]::GetFileName(`aaa/bbb.c`))
のような .NET 関数を評価するために Python for .NET を使っている。使いやすい、つーか「救世主」- 自分で書いてる「String」は Python for .NET に置き換えようか迷ってる。(インストールしてない人でも有る程度動くようにするかどうかの決断の話。)
- dereferencer は基本的に「文法なし自由記述の中に埋め込まれた参照」を引っこ抜いてからでないとすべからくパースエラーとなるために、「抜き出してから評価」という、まぁかったるいことが不可欠、なのよ。
- なんで rply 使ってるのに自前 lexer も使ってるのよ、てのはコメントに書いた通りでもあるんだけれど、そもそも rply 乗り換え前はこれしか手段がなかったのだ。rply の lexer 使っても書けるかもね、と思う。
なお、あげたコードがどの程度「不完全なのか」は正直色々あって、ここには全部は書けない。世界中にある Visual Studio プロジェクトのどの程度カバーできるのか、については、これは 0%。正式なもの相手なら。ただ、システムインストールされている標準プロパティファイルを読まないなら、おそらく(明日以降くらいに deref だけでなく condition の evaluator も書いたとして)8割程度のプロジェクトではこれで足りる。そんな感じ。で、ある程度完成したら、それこそ「公開」したいんだけれど、これはほんといつになるかは全然わからない。来年以降だろうと思う。
2017-11-10 06:30 追記: rply で lexer のトークン単位にアクションを追加したいとして…
実は上にあげたコードはわかっていて「まだ」誤魔化していたところがあって、引用符の扱いね。C/C++ のコメントなんか考えればわかるけれど、引用符内に引用符が現れたりエスケープがあったりするので、こうしたものは決して「一撃必殺正規表現」なんぞでうまくいくことはない。まぁ「8割くらいはオッケー」程度ではあるんだけれど、「2割」はいわゆる「レアケース」じゃなかろ。
つまり「token 単位でアクションを実行する」ということでもあるし、「遷移状態の管理をせねばならぬ」てことね。上のコードで自前 lexer ではやってることを、ply/yply の方でやってなかったのは無論「やり方がわからなかったから」。
Unix C 伝統の GNU flex ではまさに状態遷移をそのまま「定義として」書ける。ちとどんな書き方だったか忘れたが、確か「<<state_a>>
」と書くんじゃなかったかな。こういうのがあったりしないかなぁ、と思ったんだけれど、rply のソースコードをざっとみた感じでは、「ないと思う」。
して、「rply のソースコードを読」んでたら気付いた。「あ、そっか、kexer が外にいるんだから…」。rply ソースコード(lexergenerator.py)にこんな docstring が書かれてる:
1 >>> from rply import LexerGenerator
2 >>> lg = LexerGenerator()
3
4 # You can then build a lexer with which you can lex a string to produce an
5 # iterator yielding tokens:
6
7 >>> lexer = lg.build()
8 >>> iterator = lexer.lex('1 + 1')
9 >>> iterator.next()
10 Token('NUMBER', '1')
11 >>> iterator.next()
12 Token('ADD', '+')
13 >>> iterator.next()
14 Token('NUMBER', '1')
15 >>> iterator.next()
16 Traceback (most recent call last):
17 ...
18 StopIteration
そゆこと。この LexerGenerator で構築した lexer を抱え込んで自前 iterator を返す lex() を書けばいい。ちとすぐには出てこないんでコード貼り付けは控えるけど、難しくないはず。これが書ければ「引用符内に突入」「引用符から出たのよ」を書ける。
2017-11-10 18:00 追記: やってはみないけど少しだけ深掘り
rply/lexer.py
を抜粋するとこんな具合:
1 from rply.errors import LexingError
2 from rply.token import SourcePosition, Token
3
4 class Lexer(object):
5 def __init__(self, rules, ignore_rules):
6 # ... (snip) ...
7
8 def lex(self, s):
9 return LexerStream(self, s)
10
11
12 class LexerStream(object):
13 def __init__(self, lexer, s):
14 self.lexer = lexer
15 # ... (snip) ...
16
17 def __iter__(self):
18 return self
19
20 # ... (snip) ...
21
22 def next(self):
23 # ... (snip) ...
24 for rule in self.lexer.rules:
25 match = rule.matches(self.s, self.idx)
26 if match:
27 # ... (snip) ...
28 return token
29 # ... (snip) ...
30
31 # ... (snip) ...
つまり、おおむね「オレオレ Lexer, LexerStream」の構造はたとえばこんなだろう:
1 from rply.errors import LexingError
2 from rply.token import SourcePosition, Token
3 from rply.lexer import Lexer, LexerStream
4
5 class _OretekiLexer(object):
6 def __init__(self, real_lexer):
7 self._real_lexer = real_lexer
8
9 def lex(self, s):
10 return _OrahonoLexerStream(
11 self._real_lexer,
12 s)
13
14
15 class _OrahonoLexerStream(LexerStream):
16 #def __init__(self, lexer, s):
17 # super(LexerStream, self).__init__(lexer, s)
18
19 def next(self):
20 tok = super(LexerStream, self).next()
21 # tok の種類をみて云々
22 return tok
「tok の種類をみて云々」は、少し手間なのが、「引用符に入ったぜ」状態に遷移してから終わるまで、当たり前だが「トークン文字列を結合して溜め込む必要がある」し、source_pos も正しいものを自力で生み出さなければならない(これは遷移開始のものを使える)。
実際どんなふうに書くのか一度日本語で書いてみたが、わかりやすい表現が見つからなかったのでやめる。コードの可読性は結構落ちる。コードの読み手にとっていやらしいポイントは:
- パーサから見えないトークンが登場し、オレオレ LexerStream 内で勝手に消費される
- 本物の lexer は決して生み出さないトークンが登場する(オレオレ LexerStream による自家発電)
- 2. のトークンがパーサに伝播する
という多段になるってこと。この対応関係が、初見では結構わかりにくいと思うし、保守での修正の際に誤った箇所を触ってしまう危険性も出てくる。まぁこれは正規表現単位にアクションを書けるタイプのものを使っても起こることだけれど、「オレオレ Lexer/LexerStream も書かねばならぬ(しかもその中に詰め込まれる)」ために余計ね。
で、なんで「やってみないけど」なのか。要はこれが必要になるのは、例えば Unix/C 流儀のこんな記述:
1 "I'm a \"PERFECT\" human."
のようにエスケープすることで「二重引用符内に二重引用符を含めることが出来る場合なわけなんだけれど、ちょっと msbuild のそこらのエスケープの仕様がわかってなくてさ。なので当座後回し。昔から Microsoft 仕様の「引用符の扱い」って、いつも主流から外れた独自独自独自なのね。だから慎重になってる。
2017-11-11 11:00 追記: rply で書いた MSBuild の expression evaluator 初版完了
もう「2017-11-10 01:20 追記」のでコードの紹介はやめるつもりだったんだけれど、もう一悶着だけあったんで。これが最後。
1 # -*- coding: utf-8 -*-
2 #
3 from __future__ import absolute_import
4 from __future__ import unicode_literals
5 from __future__ import print_function
6
7 import os
8 import sys
9 import re
10 import logging
11 import warnings
12
13 import six
14 from rply import LexerGenerator
15 from rply import ParserGenerator
16 from rply.token import BaseBox
17
18
19 if sys.version[0] == '2':
20 str = unicode
21 else:
22 from functools import reduce
23
24
25 _logger = logging.getLogger(__name__)
26
27
28 # ----------------------------------------------------------------
29 #
30 # Internal Helpers of this module
31 #
32 def _simple_tokenize(s, tokdef):
33 """
34 Very simple lexer that is used for lexical analysis
35 where ply usage is overkill.
36
37 tokdef must be like this:
38 >>> tokdef = {
39 ... "root": [ # state id ("root" is mandatory state.)
40 ... (
41 ... re.compile(r"'"), # regexp
42 ... "SQ", # token id
43 ... "sq" # transition state
44 ... ),
45 ... # ...
46 ... ],
47 ... "sq" : [
48 ... (re.compile(r"."), "", "#pop"),
49 ... ]
50 ... }
51
52 if you were defined "tokdef", now you can use this:
53 >>> for tokid, s in _simple_tokenize("'a", tokdef):
54 ... # do something
55 ... pass
56 """
57 states = ["root"]
58 state = states[0]
59 while s:
60 for rgx, tok, trans in tokdef[state]:
61 if hasattr(rgx, "match"):
62 m = rgx.match(s)
63 else:
64 m = re.match(rgx, s)
65 if m:
66 yield tok, m.group(0)
67 if trans:
68 if trans == "#pop":
69 states.pop(-1)
70 state = states[-1]
71 else:
72 states.append(trans)
73 state = trans
74 s = s[m.span()[1]:]
75 break
76
77
78 # ----------------------------------------------------------------
79 #
80 # Non-Python Types and Functions Emulation
81 #
82 _GLOBAL_FUNCS = {
83 "hastrailingslash": lambda *args: args[0][-1] in ("/", "\\"),
84 "exists": lambda *args: os.path.exists(args[0]),
85
86 # TODO: more?
87 "[system.io.path]::combine": lambda *args: "/".join(args),
88
89 #
90 "[msbuild]::makerelative": lambda *args: os.path.relpath(args[1], args[0]),
91 "[msbuild]::add": lambda *args: sum(args),
92 "[msbuild]::multiply": lambda *args: reduce(lambda x, y: x * y, args),
93 "[msbuild]::bitwiseor": lambda *args: reduce(lambda x, y: x | y, args),
94 "[msbuild]::bitwiseand": lambda *args: reduce(lambda x, y: x & y, args),
95 # TODO: https://msdn.microsoft.com/en-us/library/dd633440.aspx
96 # Subtract
97 # Divide
98 # Modulo
99 # Escape
100 # Unescape
101 # BitwiseXor
102 # BitwiseNot
103 # DoesTaskHostExist # maybe we can't implement
104 # GetDirectoryNameOfFileAbove
105 # GetRegistryValue
106 # GetRegistryValueFromView
107 # ValueOrDefault
108 }
109
110
111 class _DotNetLoader(object):
112 def __init__(self):
113 self._clr = None
114 self._loaded_assemblies = {} # value: success or not
115
116 # value: imported assembly (as python module)
117 self._imported_assemblies = {}
118
119 def _load_assembly(self, asmn):
120 if asmn not in self._loaded_assemblies:
121 try:
122 self._clr.AddReference(asmn)
123 self._loaded_assemblies[asmn] = True
124 except Exception as e:
125 # actually it should be "System.IO.FileNotFoundException",
126 # but we can't assume it when no assemblies are loaded.
127 if "'System.IO.FileNotFoundException'" not in str(type(e)):
128 raise
129 # this nsref is not assembly (maybe module fullname)
130 self._loaded_assemblies[asmn] = False
131 return self._loaded_assemblies[asmn]
132
133 def _import_assembly(self, asmn):
134 if asmn not in self._imported_assemblies:
135 try:
136 exec("import " + asmn)
137 except ImportError:
138 raise # what should we do?
139 self._imported_assemblies[asmn] = eval(asmn)
140 return self._imported_assemblies[asmn]
141
142 def get_function(self, name_with_ns):
143 # name_with_ns: like "[System.IO.Path]::Add"
144 if self._clr is None:
145 import clr # Python for .NET (pythonnet)
146 self._clr = clr
147 ns, name = name_with_ns.split("]::")
148 ns_spl = ns[1:].split(".") # like "System", "IO", "Path"
149 asmn, modn = None, None
150 for i in range(len(ns_spl), 1, -1):
151 asmn = ".".join(ns_spl[:i - 1])
152 modn = ".".join(ns_spl[i - 1:])
153 if self._load_assembly(asmn):
154 break
155 asm = self._import_assembly(asmn)
156 return getattr(getattr(asm, modn), name)
157
158
159 _dotnetloader = _DotNetLoader()
160
161
162 class String(str):
163 r"""
164 >>> s = String("abc")
165 >>> isinstance(s, (str,))
166 True
167 >>> isinstance(s, six.string_types)
168 True
169 """
170 def __new__(cls, value):
171 obj = str.__new__(cls, value)
172 return obj
173
174 def __getattr__(self, name):
175 # Unfortunatelly, Microsoft always ignores its case...
176 from types import FunctionType
177 for attr in [
178 x for x, y in String.__dict__.items()]:
179
180 if attr.lower() == name.lower():
181 return getattr(self, attr)
182
183 #def Clone(self, *args, **kwargs):
184 # r"""
185 # Object Clone()
186 #
187 # """
188 # raise NotImplementedError()
189
190 def CompareTo(self, value):
191 r"""
192 int CompareTo(Object value)
193 int CompareTo(string strB)
194
195 >>> tab = [
196 ... # (lhs, rhs, expected)
197 ... # same length
198 ... ("A", "A", 0),
199 ... ("A", "a", 1),
200 ... ("a", "A", -1),
201 ... ("AAA", "AAA", 0),
202 ... ("AAA", "aAA", 1),
203 ... ("aAA", "AAA", -1),
204 ... #
205 ... # len(self) > len(rhs)
206 ... ("AAAa", "AAA", 1),
207 ... ("AAAa", "aAA", 1),
208 ... ("aAAa", "AAA", 1),
209 ... ("aAAaa", "AAA", 1),
210 ... #
211 ... # len(self) < len(rhs)
212 ... ("AAA", "AAAA", -1),
213 ... ("AAA", "aAAA", -1),
214 ... ("aAA", "AAAA", -1),
215 ... ("aAA", "AAAAA", -1),
216 ... ]
217 >>> for lhs, rhs, expected in tab:
218 ... result = String(lhs).CompareTo(rhs)
219 ... assert result == expected, str((lhs, rhs, expected, result))
220 ... #from ._powershell import exec_single_command
221 ... #expected2 = exec_single_command('"{}".CompareTo("{}")'.format(lhs, rhs))
222 ... #assert expected == expected2, str((lhs, rhs, expected2, expected))
223 ... #assert result == expected2, str((lhs, rhs, expected2, result))
224 >>>
225 """
226 lendiff = len(self) - len(value)
227 if lendiff == 0:
228 # this logic is the same as python2's cmp
229 return (value > self) - (value < self)
230 return lendiff / abs(lendiff)
231
232 def Contains(self, value):
233 r"""
234 bool Contains(string value)
235
236 >>> String("AAA").Contains("AAA")
237 True
238 >>> String(" AAA ").Contains("AAA")
239 True
240 >>> String("BBB").Contains("AAA")
241 False
242 >>> String("AAA").Contains(" AAA ")
243 False
244 """
245 return value in self
246
247 #def CopyTo(self, *args, **kwargs):
248 # r"""
249 # Void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
250 #
251 # """
252 # raise NotImplementedError()
253
254 def EndsWith(self, *args, **kwargs):
255 r"""
256 bool EndsWith(string value)
257 bool EndsWith(string value, StringComparison comparisonType)
258 bool EndsWith(string value, bool ignoreCase, Globalization.Cul...
259
260 """
261 raise NotImplementedError()
262
263 def Equals(self, *args, **kwargs):
264 r"""
265 bool Equals(Object obj)
266 bool Equals(string value)
267 bool Equals(string value, StringComparison comparisonType)
268
269 """
270 raise NotImplementedError()
271
272 #def GetEnumerator(self, *args, **kwargs):
273 # r"""
274 # CharEnumerator GetEnumerator()
275 #
276 # """
277 # raise NotImplementedError()
278
279 #def GetHashCode(self, *args, **kwargs):
280 # r"""
281 # int GetHashCode()
282 #
283 # """
284 # raise NotImplementedError()
285
286 #def GetType(self, *args, **kwargs):
287 # r"""
288 # type GetType()
289 #
290 # """
291 # raise NotImplementedError()
292
293 #def GetTypeCode(self, *args, **kwargs):
294 # r"""
295 # TypeCode GetTypeCode()
296 #
297 # """
298 # raise NotImplementedError()
299
300 def IndexOf(self, *args, **kwargs):
301 r"""
302 int IndexOf(char value)
303 int IndexOf(char value, int startIndex)
304 int IndexOf(char value, int startIndex, int count)
305 int IndexOf(string value)
306 int IndexOf(string val...
307
308 """
309 raise NotImplementedError()
310
311 def IndexOfAny(self, *args, **kwargs):
312 r"""
313 int IndexOfAny(char[] anyOf)
314 int IndexOfAny(char[] anyOf, int startIndex)
315 int IndexOfAny(char[] anyOf, int startIndex, int count)
316
317 """
318 raise NotImplementedError()
319
320 def Insert(self, *args, **kwargs):
321 r"""
322 string Insert(int startIndex, string value)
323
324 """
325 raise NotImplementedError()
326
327 #def IsNormalized(self, *args, **kwargs):
328 # r"""
329 # bool IsNormalized()
330 # bool IsNormalized(Text.NormalizationForm normalizationForm)
331 #
332 # """
333 # raise NotImplementedError()
334
335 def LastIndexOf(self, *args, **kwargs):
336 r"""
337 int LastIndexOf(char value)
338 int LastIndexOf(char value, int startIndex)
339 int LastIndexOf(char value, int startIndex, int count)
340 int LastIndexOf(string value)
341 int La...
342
343 """
344 raise NotImplementedError()
345
346 def LastIndexOfAny(self, *args, **kwargs):
347 r"""
348 int LastIndexOfAny(char[] anyOf)
349 int LastIndexOfAny(char[] anyOf, int startIndex)
350 int LastIndexOfAny(char[] anyOf, int startIndex, int count)
351
352 """
353 raise NotImplementedError()
354
355 #def Normalize(self, *args, **kwargs):
356 # r"""
357 # string Normalize()
358 # string Normalize(Text.NormalizationForm normalizationForm)
359 #
360 # """
361 # raise NotImplementedError()
362
363 def PadLeft(self, *args, **kwargs):
364 r"""
365 string PadLeft(int totalWidth)
366 string PadLeft(int totalWidth, char paddingChar)
367
368 """
369 raise NotImplementedError()
370
371 def PadRight(self, *args, **kwargs):
372 r"""
373 string PadRight(int totalWidth)
374 string PadRight(int totalWidth, char paddingChar)
375
376 """
377 raise NotImplementedError()
378
379 def Remove(self, *args, **kwargs):
380 r"""
381 string Remove(int startIndex, int count)
382 string Remove(int startIndex)
383
384 """
385 raise NotImplementedError()
386
387 def Replace(self, oldValue, newValue):
388 r"""
389 string Replace(char oldChar, char newChar)
390 string Replace(string oldValue, string newValue)
391
392 >>> print(String("AAA").Replace("AAA", "BBB"))
393 BBB
394 """
395 return String(self.replace(oldValue, newValue))
396
397 def Split(self, *args, **kwargs):
398 r"""
399 string[] Split(Params char[] separator)
400 string[] Split(char[] separator, int count)
401 string[] Split(char[] separator, StringSplitOptions options)
402 string[] Spl...
403
404 """
405 raise NotImplementedError()
406
407 def StartsWith(self, *args, **kwargs):
408 r"""
409 bool StartsWith(string value)
410 bool StartsWith(string value, StringComparison comparisonType)
411 bool StartsWith(string value, bool ignoreCase, Globalizati...
412
413 """
414 raise NotImplementedError()
415
416 def Substring(self, startIndex, length=-1):
417 r"""
418 string Substring(int startIndex)
419 string Substring(int startIndex, int length)
420
421 >>> print(String("abc").Substring(0))
422 abc
423 >>> print(String("abc").Substring(0, 1))
424 a
425 >>> print(String("abc").Substring(1))
426 bc
427 >>> print(String("abc").Substring(1, 1))
428 b
429 >>> print(String("abc").Substring(1, 2))
430 bc
431 >>> print(String("abc").SubString(1, 2))
432 bc
433 """
434 # TODO: "System.String.Substring" raise Exception if
435 # length is larger than actual length.
436 if length >= 0:
437 return String(self[startIndex:][:length])
438 return String(self[startIndex:])
439
440 #def ToCharArray(self, *args, **kwargs):
441 # r"""
442 # char[] ToCharArray()
443 # char[] ToCharArray(int startIndex, int length)
444 #
445 # """
446 # raise NotImplementedError()
447
448 def ToLower(self, *args, **kwargs):
449 r"""
450 string ToLower()
451 string ToLower(Globalization.CultureInfo culture)
452
453 """
454 raise NotImplementedError()
455
456 #def ToLowerInvariant(self, *args, **kwargs):
457 # r"""
458 # string ToLowerInvariant()
459 #
460 # """
461 # raise NotImplementedError()
462
463 #def ToString(self, *args, **kwargs):
464 # r"""
465 # string ToString()
466 # string ToString(IFormatProvider provider)
467 #
468 # """
469 # raise NotImplementedError()
470
471 def ToUpper(self, *args, **kwargs):
472 r"""
473 string ToUpper()
474 string ToUpper(Globalization.CultureInfo culture)
475
476 """
477 raise NotImplementedError()
478
479 #def ToUpperInvariant(self, *args, **kwargs):
480 # r"""
481 # string ToUpperInvariant()
482 #
483 # """
484 # raise NotImplementedError()
485
486 def Trim(self, *args, **kwargs):
487 r"""
488 string Trim(Params char[] trimChars)
489 string Trim()
490
491 """
492 raise NotImplementedError()
493
494 def TrimEnd(self, *args, **kwargs):
495 r"""
496 string TrimEnd(Params char[] trimChars)
497
498 """
499 raise NotImplementedError()
500
501 def TrimStart(self, *args, **kwargs):
502 r"""
503 string TrimStart(Params char[] trimChars)
504
505 """
506 raise NotImplementedError()
507
508 #def Chars(self, *args, **kwargs):
509 # r"""
510 # char Chars(int index) {get;}
511 #
512 # """
513 # raise NotImplementedError()
514
515 @property
516 def Length(self):
517 r"""
518 Int32 Length {get;}
519
520 >>> s = String("abc")
521 >>> s.Length
522 3
523 """
524 return len(self)
525
526
527 # ----------------------------------------------------------------
528 #
529 # Common definitions, etc. for our two different evaluators.
530 #
531
532 # ------------------
533 # utilities
534 def _tostr(p):
535 if hasattr(p, "getstr"):
536 return p.getstr()
537 return str(p)
538
539
540 # ------------------
541 # wrappers for token
542
543
544
545 # ------------------
546 # common regexp
547 _t_NUMBER = ("NUMBER", r'\s*[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?\s*')
548 _t_LPAREN = ("LPAREN", r'\s*\(\s*')
549 _t_RPAREN = ("RPAREN", r'\s*\)\s*')
550 _t_NAME = ("NAME", r"[a-zA-Z_][a-zA-Z0-9_]*")
551 _t_REF_START = ("REF_START", r"[$%]\(")
552 _t_ARGSEP = ("ARGSEP", r'\s*,\s*')
553 # ----------------------------------------------------------------
554 #
555 # The evaluator for the purpose of dereferencing the macros.
556 # (Basically, this is not public.)
557 #
558
559 # ------------------
560 #
561 class PropCallChain(object):
562
563 def __init__(self, data, kind):
564 self.data = data
565 self._kind = kind
566
567 def eval(self, props={}, ctx=""):
568 def _flatten_data(data, flattened):
569 for d in data:
570 if d is not None and isinstance(d[0], (tuple,)):
571 _flatten_data(d, flattened)
572 else:
573 flattened.append(d)
574
575 if self._kind == "p":
576 flattened = []
577 _flatten_data(self.data, flattened)
578 return self._eval_propcallchain(flattened, props, ctx)
579 elif self._kind == "f":
580 return self._eval_funccall(self.data)
581
582 def _eval_propcallchain(self, clist, props, ctx):
583 res = props
584 def _icasesearch(d, sk, nf2blank=True):
585 targk = sk.lower()
586 for k in d.keys():
587 if k.lower() == targk:
588 return d[k]
589 if nf2blank:
590 # MSBuild deals with N/F as blank.
591 return ""
592
593 # resolve root key
594 t = _icasesearch(res, clist[0], nf2blank=False)
595 if t is not None:
596 # if input="$(X.Y)" and props has "X",
597 # now res=props["X"].
598 res = t
599 elif ctx:
600 t = _icasesearch(res, ctx, nf2blank=False)
601 if t is not None:
602 # if input="$(Y)", props has "X",
603 # and ctx is "X",
604 # now res=props["X"].
605 res = t
606 # search "Y" in props["X"]
607 res = _icasesearch(res, clist[0])
608 if t is None:
609 # MSBuild deals with N/F as blank.
610 res = ""
611
612 # nested props
613 if isinstance(res, six.string_types):
614 res = String(res)
615 if len(clist) > 1:
616 for name, args in clist[1:]:
617 n = name[1:]
618 if hasattr(res, n):
619 res = getattr(res, n)
620 if args is not None: # callable
621 res = res(*args)
622 else:
623 res = _icasesearch(res, n)
624 if isinstance(res, six.string_types):
625 res = String(res)
626 return res
627
628 def _eval_funccall(self, clist):
629 if len(clist) == 2:
630 funcname = clist[0]
631 funcargs = clist[1]
632 else:
633 funcname = "".join(clist[:2])
634 funcargs = clist[2]
635 fn = _external_functions_table.get(funcname.lower())
636 if not fn:
637 fn = _dotnetloader.get_function(funcname)
638 #_logger.debug("%s, %r, %r", funcname, fn, funcargs)
639 if fn:
640 return fn(*funcargs)
641
642 # ------------------
643 # lexer
644 _lg_deref = LexerGenerator()
645 _lg_deref.add(*_t_NUMBER)
646 _lg_deref.add("MSBUILD_LITERAL", r"`[^`]*`")
647 _lg_deref.add(*_t_REF_START)
648 _lg_deref.add(*_t_LPAREN)
649 _lg_deref.add(*_t_RPAREN)
650 _lg_deref.add("LSQBRACKET", r'\[')
651 _lg_deref.add("NS_END", r'\]::')
652 _lg_deref.add(*_t_ARGSEP)
653 _lg_deref.add(*_t_NAME)
654 _lg_deref.add("PROP", r"\.[a-zA-Z_][a-zA-Z0-9_]*")
655 _lexer_deref = _lg_deref.build()
656
657 # ------------------
658 # parser
659 _pg_deref = ParserGenerator(
660 [
661 "NUMBER", "MSBUILD_LITERAL",
662 "REF_START",
663 "RPAREN", "LPAREN", "ARGSEP",
664 "LSQBRACKET", "NS_END",
665 "NAME", "PROP",
666 ],
667 )
668
669
670 @_pg_deref.production("body : ref")
671 @_pg_deref.production("body : value")
672 def _deref_p_body(p):
673 return p[0]
674
675 @_pg_deref.production("ref : REF_START refcontent RPAREN")
676 def _deref_p_ref(p):
677 return p[1]
678
679 @_pg_deref.production("refcontent : propcallchain")
680 @_pg_deref.production("refcontent : funccall")
681 def _deref_p_refcontent(p):
682 return p[0]
683
684 @_pg_deref.production("propcallchain : NAME")
685 def _deref_p_propcallchain_1(p):
686 return PropCallChain((_tostr(p[0]),), "p")
687
688 @_pg_deref.production("propcallchain : NAME propcall")
689 def _deref_p_propcallchain_2(p):
690 return PropCallChain((_tostr(p[0]), p[1]), "p")
691
692 @_pg_deref.production("propcallchain : propcallchain propcall")
693 def _deref_p_propcallchain_3(p):
694 return PropCallChain(p, "p")
695
696 @_pg_deref.production("prop : PROP")
697 @_pg_deref.production("prop : prop prop")
698 def _deref_p_prop(p):
699 return "".join([_tostr(s) for s in p])
700
701 @_pg_deref.production("propcall : PROP")
702 def _deref_p_propcall_1(p):
703 return (_tostr(p[0]), None)
704
705 @_pg_deref.production("propcall : PROP call")
706 def _deref_p_propcall_2(p):
707 return (_tostr(p[0]), p[1])
708
709 @_pg_deref.production("propcall : propcall propcall")
710 def _deref_p_propcall_3(p):
711 return tuple(p)
712
713 @_pg_deref.production("funccall : name call")
714 def _deref_p_funccall(p):
715 return PropCallChain(p, "f")
716
717 @_pg_deref.production("name : NAME")
718 @_pg_deref.production("name : nsref NAME")
719 def _deref_p_name(p):
720 if len(p) == 2:
721 return "".join([p[0], _tostr(p[1])])
722 else:
723 return p[0]
724
725 @_pg_deref.production("nsref : LSQBRACKET NAME NS_END")
726 @_pg_deref.production("nsref : LSQBRACKET NAME prop NS_END")
727 def _deref_p_nsref(p):
728 return "".join([_tostr(s) for s in p if s])
729
730 @_pg_deref.production("call : LPAREN RPAREN")
731 @_pg_deref.production("call : LPAREN args RPAREN")
732 def _deref_p_call(p):
733 if len(p) == 3:
734 return p[1]
735 else:
736 return []
737
738 @_pg_deref.production("args : value")
739 @_pg_deref.production("args : value ARGSEP value")
740 @_pg_deref.production("args : args ARGSEP value")
741 def _deref_p_args(p):
742 return p[0::2] # ('x', ',', 'y', ',', 'z')
743
744 @_pg_deref.production("value : ref")
745 @_pg_deref.production("value : funccall")
746 @_pg_deref.production("value : literal")
747 @_pg_deref.production("value : number")
748 def _deref_p_value(p):
749 return p[0]
750
751 @_pg_deref.production("literal : MSBUILD_LITERAL")
752 def _deref_p_literal(p):
753 return _tostr(p[0])[1:-1]
754
755 @_pg_deref.production("number : NUMBER")
756 def _deref_p_number(p):
757 s = _tostr(p[0]).strip()
758 if "." in s or "e" in s:
759 return float(s)
760 else:
761 return int(s)
762
763 with warnings.catch_warnings():
764 # "shift/reduce conflicts" warnings are useful for me
765 # as developper, but this warnings is meaningless for
766 # all users of our library.
767 warnings.simplefilter("ignore")
768
769 _parser_deref = _pg_deref.build()
770
771
772 class _DerefTest(object):
773 r"""
774 # this class is not for use.
775 # just for holding _parser_deref's doctest.
776
777 >>> lexer = _lexer_deref
778 >>> parser = _parser_deref
779 >>> parser.parse(lexer.lex("1.2e-1"))
780 0.12
781 >>> print(parser.parse(lexer.lex("`aaa`")))
782 aaa
783 >>> print(parser.parse(lexer.lex("$(X)")).eval(props={"X": "y"}))
784 y
785 >>> parser.parse(lexer.lex("$(X.Length)")).eval(props={"X": "yz"})
786 2
787 """
788
789
790 # ----------------------------------------------------------------
791 #
792 # The evaluator for the purpose of evaluating the condition.
793 # (Basically, this is not public.)
794 #
795
796 # ------------------
797 # lexer
798 _lg_evaluator = LexerGenerator()
799 _lg_evaluator.add(*_t_NUMBER)
800 _lg_evaluator.add("SQ_LITERAL", r"\s*'[^']*'\s*")
801 _lg_evaluator.add("DQ_LITERAL", r'\s*"[^"]*"\s*')
802 _lg_evaluator.add(*_t_LPAREN)
803 _lg_evaluator.add(*_t_RPAREN)
804 _lg_evaluator.add("CMP_EQ", "\s*==\s*")
805 _lg_evaluator.add("CMP_NE", "\s*!=\s*")
806 _lg_evaluator.add("CMP_LT", "\s*<\s*")
807 _lg_evaluator.add("CMP_LE", "\s*<=\s*")
808 _lg_evaluator.add("CMP_GE", "\s*>=\s*")
809 _lg_evaluator.add("CMP_GT", "\s*>\s*")
810 _lg_evaluator.add("NEGATE", "\s*!\s*")
811 _lg_evaluator.add("LOGICAL_AND", "(?<!\w)\s*and\s*(?!\w)", flags=re.I)
812 _lg_evaluator.add("LOGICAL_OR", "(?<!\w)\s*or\s*(?!\w)", flags=re.I)
813 _lg_evaluator.add(*_t_ARGSEP)
814 _lg_evaluator.add(*_t_NAME)
815
816 _lexer_evaluator = _lg_evaluator.build()
817
818 # ------------------
819 # parser
820 _pg_evaluator = ParserGenerator(
821 [
822 'NUMBER',
823 "SQ_LITERAL", "DQ_LITERAL",
824 "LPAREN", "RPAREN",
825 "ARGSEP",
826 "CMP_EQ", "CMP_NE", "CMP_LT",
827 "CMP_LE", "CMP_GE", "CMP_GT",
828 "NEGATE",
829 "LOGICAL_AND", "LOGICAL_OR",
830 "NAME",
831 ],
832 precedence=[
833 ('left', ['LOGICAL_AND', 'LOGICAL_OR',]),
834 ('right', ['NEGATE',])
835 ]
836 )
837
838 @_pg_evaluator.production("expression : LPAREN expression RPAREN")
839 def _evaluator_p_expression_group(p):
840 return p[1]
841
842 @_pg_evaluator.production("expression : expression CMP_EQ expression")
843 @_pg_evaluator.production("expression : expression CMP_NE expression")
844 @_pg_evaluator.production("expression : expression CMP_LT expression")
845 @_pg_evaluator.production("expression : expression CMP_LE expression")
846 @_pg_evaluator.production("expression : expression CMP_GE expression")
847 @_pg_evaluator.production("expression : expression CMP_GT expression")
848 def _evaluator_p_cmp_expression(p):
849 op = _tostr(p[1]).strip()
850
851 # Microsoft always ignore case...
852 if isinstance(p[0], six.string_types):
853 p[0] = p[0].lower()
854 if isinstance(p[0], six.string_types):
855 p[2] = p[2].lower()
856
857 #
858 if op == "==":
859 return (p[0] == p[2])
860 elif op == "!=":
861 return (p[0] != p[2])
862 elif op == ">=":
863 return (p[0] >= p[2])
864 elif op == "<=":
865 return (p[0] <= p[2])
866 elif op == ">":
867 return (p[0] > p[2])
868 elif op == "<":
869 return (p[0] < p[2])
870
871 @_pg_evaluator.production("expression : expression LOGICAL_AND expression")
872 @_pg_evaluator.production("expression : expression LOGICAL_OR expression")
873 def _evaluator_p_landor_expression(p):
874 op = _tostr(p[1]).strip()
875 if op == "and":
876 return p[0] and p[2]
877 else:
878 return p[0] or p[2]
879
880 @_pg_evaluator.production("expression : NEGATE expression")
881 def _evaluator_p_negate_expression(p):
882 return not p[1]
883
884 @_pg_evaluator.production("expression : value")
885 def _evaluator_p_value_expression(p):
886 return p[0]
887
888 @_pg_evaluator.production("funccall : name call")
889 def _evaluator_p_funccall(p):
890 return PropCallChain(p, "f").eval()
891
892 @_pg_evaluator.production("name : NAME")
893 def _evaluator_p_name(p):
894 return _tostr(p[0])
895
896 @_pg_evaluator.production("call : LPAREN RPAREN")
897 @_pg_evaluator.production("call : LPAREN args RPAREN")
898 def _evaluator_p_call(p):
899 if len(p) == 3:
900 return p[1]
901 else:
902 return []
903
904 @_pg_evaluator.production("args : value")
905 @_pg_evaluator.production("args : value ARGSEP value")
906 @_pg_evaluator.production("args : args ARGSEP value")
907 def _evaluator_p_args(p):
908 return p[0::2] # ('x', ',', 'y', ',', 'z')
909
910 @_pg_evaluator.production("value : funccall")
911 @_pg_evaluator.production("value : literal")
912 @_pg_evaluator.production("value : number")
913 def _evaluator_p_value(p):
914 return p[0]
915
916 @_pg_evaluator.production("literal : SQ_LITERAL")
917 @_pg_evaluator.production("literal : DQ_LITERAL")
918 def _evaluator_p_literal(p):
919 return _tostr(p[0]).strip()[1:-1]
920
921 @_pg_evaluator.production("number : NUMBER")
922 def _evaluator_p_number(p):
923 s = _tostr(p[0]).strip()
924 if "." in s or "e" in s:
925 return float(s)
926 else:
927 return int(s)
928
929 with warnings.catch_warnings():
930 # "shift/reduce conflicts" warnings are useful for me
931 # as developper, but this warnings is meaningless for
932 # all users of our library.
933 warnings.simplefilter("ignore")
934
935 _parser_evaluator = _pg_evaluator.build()
936
937
938 class _EvaluatorTest(object):
939 r"""
940 # this class is not for use.
941 # just for holding _parser_evaluator's doctest.
942
943 >>> lexer = _lexer_evaluator
944 >>> parser = _parser_evaluator
945 >>> parser.parse(lexer.lex("1"))
946 1
947 >>> parser.parse(lexer.lex("1 == 1"))
948 True
949 >>> parser.parse(lexer.lex("1 != 1"))
950 False
951 >>> parser.parse(lexer.lex("'1' == '1'"))
952 True
953 >>> parser.parse(lexer.lex("!('1' == '1')"))
954 False
955 >>> parser.parse(lexer.lex("! !('1' == '1')"))
956 True
957 >>> parser.parse(lexer.lex("(1 != 1) and (2 == 2) or (1 == 1)"))
958 True
959 >>> parser.parse(lexer.lex("((1 != 1) and (2 == 2)) or (1 == 1)"))
960 True
961 >>> parser.parse(lexer.lex("(1 != 1) and ((2 == 2) or (1 == 1))"))
962 False
963 >>> parser.parse(lexer.lex("1 != 1 and ((2 == 2) or (1 == 1))"))
964 False
965 >>> parser.parse(lexer.lex(" 1!= 1 and((2 ==2) or(1 ==1))"))
966 False
967 >>> parser.parse(lexer.lex(" 1!= 1and((2 ==2) or(1 ==1))"))
968 Traceback (most recent call last):
969 ...
970 rply.errors.ParsingError: (None, SourcePosition(idx=6, lineno=1, colno=7))
971 >>> parser.parse(lexer.lex("HasTrailingSlash('abc/')"))
972 True
973 >>> parser.parse(lexer.lex("!HasTrailingSlash('abc/')"))
974 False
975 >>> parser.parse(lexer.lex("!HasTrailingSlash('abc/') or !exists('c:/(^_^)')"))
976 True
977 """
978
979
980 # ----------------------------------------------------------------
981 #
982 # Public APIs
983 #
984
985 # ------------------------------
986 #
987 # Public module configurations
988 #
989
990 # _external_functions_table = _GLOBAL_FUNCS + user_defind
991 _external_functions_table = {}
992 def overide_external_functions_table(**overides):
993 """
994 """
995 global _external_functions_table
996 _external_functions_table.update(dict(**overides))
997
998 def set_default_external_functions_table():
999 """
1000 """
1001 global _external_functions_table
1002 _external_functions_table = dict(_GLOBAL_FUNCS)
1003
1004 set_default_external_functions_table()
1005
1006
1007
1008 #
1009 # The evaluator for the purpose of dereferencing the macros.
1010 #
1011 def dereference(s, props={}, ctx=""):
1012 r"""
1013 >>> print(dereference("$(X))", {"X": "xxx"}))
1014 xxx)
1015 >>> print(dereference("'$(X)'", {"X": "xxx"}))
1016 'xxx'
1017 >>> print(dereference("'$(X)|$(Y)'", {"X": "xxx", "Y": "yyy"}))
1018 'xxx|yyy'
1019 >>> print(dereference('"$(X)|$(Y)"', {"X": "xxx", "Y": "yyy"}))
1020 "xxx|yyy"
1021 >>> print(dereference("'$(X)abc | def$(Y)'", {"X": "xxx", "Y": "yyy"}))
1022 'xxxabc | defyyy'
1023 >>> print(dereference("$(X)", {"X": "xxx"}))
1024 xxx
1025 >>> print(dereference("$(X.Y)", {"X": {"Y": "xxx"}}))
1026 xxx
1027 >>> print(dereference("$(X.Y.Length)", {"X": {"Y": "xxx"}}))
1028 3
1029 >>> print(dereference("$(X.Y.Length)", {"x": {"Y": "xxx"}}))
1030 3
1031 >>> print(dereference("$(X.Y.Length)", {"X": {"y": "xxx"}}))
1032 3
1033 >>> print(dereference("$(X.Y.Substring(1))", {"X": {"Y": "xyz"}}))
1034 yz
1035 >>> print(dereference("$(X.Y.substring(1))", {"X": {"Y": "xyz"}}))
1036 yz
1037 >>> print(dereference("$(X.Y.substring ( 1 ) )", {"X": {"Y": "xyz"}}))
1038 yz
1039 >>> print(dereference("$([msbuild]::Add(1, 2))"))
1040 3
1041 >>> print(dereference("$([msbuild]::Add(1, $([msbuild]::Multiply(2, 3))))"))
1042 7
1043 >>> print(dereference("$([msbuild]::BitwiseOr(2, 4))"))
1044 6
1045 >>> print(dereference("$([msbuild]::BitwiseAnd(1, 3))"))
1046 1
1047 >>> print(dereference("$([System.IO.Path]::Combine(`a`, `b`))"))
1048 a/b
1049 >>> print(dereference("$([System.IO.Path]::GetFileName(`aaa/bbb.c`))"))
1050 bbb.c
1051 >>> r = dereference("$([MSBuild]::MakeRelative(`c:/users`, `c:/users/username`))")
1052 >>> print(r.replace("\\", "/"))
1053 username
1054 >>> r = dereference("$([MSBuild]::MakeRelative(`c:/users/username`, `c:/users`))")
1055 >>> print(r.replace("\\", "/"))
1056 ..
1057 """
1058
1059 # for searching RPAR coressponding to REF_START
1060 tokdef = {
1061 "root": [
1062 (r"`", "STR_S", "str"),
1063 (r"\(", "LPAR", ""),
1064 (r"\)", "RPAR", ""),
1065 (r"[^`()]+", "OTH", ""),
1066 ],
1067 "str": [
1068 (r"`", "STR_E", "#pop"),
1069 (r"[^`]+", "STR", ""),
1070 ],
1071 }
1072
1073 lexer = _lexer_deref
1074 parser = _parser_deref
1075
1076 result = s
1077 rs_rgx = re.compile(_t_REF_START[1])
1078 while rs_rgx.search(result):
1079 # search last REF_START (i.e. deepest)
1080 m = list(reversed(list(rs_rgx.finditer(result))))[0]
1081 start, end = m.span()
1082
1083 # search RPAR coressponding to this REF_START
1084 lpars, end = 1, start + 2
1085 for tok, s in _simple_tokenize(result[end:], tokdef):
1086 end += len(s)
1087 if tok == "LPAR":
1088 lpars += 1
1089 elif tok == "RPAR":
1090 lpars -= 1
1091 if lpars == 0:
1092 break
1093
1094 # parse using my parser
1095 partial_res = parser.parse(lexer.lex(result[start:end])).eval(
1096 props=props, ctx=ctx)
1097 result = "{}{}{}".format(
1098 result[:start], partial_res, result[end:])
1099 return result
1100
1101 #
1102 # The evaluator for the purpose of evaluating the condition.
1103 #
1104 def evaluate(s, props={}, ctx=""):
1105 r"""
1106 >>> print(evaluate("$(X) == 2", {"X": "2"}))
1107 True
1108 >>> evaluate("'xyz' != 'zzz'", {})
1109 True
1110 >>>
1111 >>> # Microsoft always ignore case...
1112 >>> evaluate("!!('aBc' == 'aBc')")
1113 True
1114 >>> evaluate("'%(Aaa.Bbb)' == 'xxx'", {'Aaa': {'Bbb': 'Xxx'}})
1115 True
1116 >>> evaluate("!('$(Y)' == 'xyz')", {"X": {"y": "xyz"}}, ctx="x")
1117 False
1118 >>> evaluate("HasTrailingSlash('abc/')")
1119 True
1120 >>> evaluate("!Exists('$(D)')", {"D": "a a a.txt"})
1121 True
1122 >>> overide_external_functions_table(exists=lambda *args: not not args[0])
1123 >>> evaluate("exists('abc/')")
1124 True
1125 >>> overide_external_functions_table(exists=lambda *args: not args[0])
1126 >>> evaluate("exists('abc/')")
1127 False
1128 >>> set_default_external_functions_table()
1129 """
1130 lexer = _lexer_evaluator
1131 parser = _parser_evaluator
1132 return parser.parse(lexer.lex(dereference(s, props=props, ctx=ctx)))
1133
1134
1135 #
1136 if __name__ == '__main__':
1137 logging.basicConfig(
1138 stream=sys.stderr,
1139 level=logging.DEBUG,
1140 format='%(levelname)s:%(name)s:%(funcName)s:%(lineno)s:%(message)s')
1141
1142 import doctest
1143 doctest.testmod()
上のほうで「これ、class をデコレート出来ない、ので class を活用出来ない」問題、そんなにないかなぁと思ったけど、やっぱ現実には出てきた。
問題になったのは「外部関数のユーザによる差し替え」を提供する _external_functions_table
(「2017-11-10 01:20 追記」時点のではメソッドへの引数 functions だったもの)。
結局「パーサは結果の解析木を作るだけで意味解釈は行わない」(評価を解析終了まで遅延する)という本来のノリに徹するか、解析中の評価を諦めないかの二択ってことなんだけれど、ちょっと前者が気が滅入りそうだったんで、後者を採った、ということなんだけれど、伝わってる?
プログラムでやってる言葉で言えば、「PropCallChain
」でやっているのと同じように、「expression == expression
」などの評価も「PropCallChain
」みたいな遅延評価用構造に載せて、解析終了後に「eval(...)
」する、というのが「前者」。挙げたコード「後者」は「expression == expression
」の評価はその場で行っている。これをすると、PropCallChain型 == 'aaa'
を行わなければならなくなり、なので PropCallChain
型の eval(...)
もその場で行わざるを得なくなる、と。となれば、「class 変数 self._functions」的思想がアウト。
そんなわけで「外部関数のユーザによる差し替え」は class 単位でなくモジュールグローバルとなり、そうなったら書きかけてた _MSBuildExpressionEvaluator
という class そのものが無用の長物となり、結果 dereference
と evaluate
という 2つのモジュールレベル関数が public API となった、てこと。(まぁ _MSBuildExpressionEvaluator
だった頃からゴールのイメージは結局はこれだったわけなので結果オーライてとこもないではないんだけれども。)
2017-11-11 19:45 追記: 一応…「初版完了」、言い方間違えた
「初版完了」と言ったが、これは「TDD 開発サイクルにちゃんと乗っかって、管理可能になった」という程度の意味で言っちゃったが、普通はそうは言わないわな。あげたコードは色々 NG で、色々直したけれど、もうコード貼り付けはキリがないのでやめる。いつかは正式公開すると思うけど。
今回のネタの「lex/yacc ライブラリ」の話に関してはもう何もない。NG だった箇所はもう純然たる「アプリケーション的な間違い」の範疇。ただいくつか「この手のプログラムには一般的なこと」もあったりもしたので、何がダメだったかのポイントだけ列挙しとく:
- MSBuild ではこんな式も許されている:
1 $(A) == ""
evaluate
がdereference
を呼び出すという構造にした関係で、dereference
がこれを1 == ""
と展開してしまい、これは
dereference
が知る文法としてアウト。これを措置するには、evaluate
自身もデリファレンスに関与するか、もしくはdereference
が必要に応じて引用符を付与するかどちらかしかない。ワタシは後者を採用した。(ただし評価式内でない場合は引用符を勝手に追加するのはダメなので、dereference
が実行モードを持つようにした。) -
二項演算子の結合を間違えてた。以下が期待通りになってなかった:
1 '' == '' and '' != 'true' and 2 ('false' == 'false' or 'false' == '') and 3 'DynamicLibrary' == 'DynamicLibrary'"
これは真として評価されなければならない。
precedence
の間違いね。 - プロパティにキーがない場合、MSBuild ではブランクとして扱われる。これの措置の抜けで
"None"
を返すなんてコトをやらかしてた。 dereference
は、プロパティ自身に参照を含んでいる場合に再帰的に展開しなければならないが、まだやってない。
これくらいだったかな。確か。なんにせよ、上で挙げたコードは「まだ一合目」くらいのものなので、使いたいと思わないで。rply
の例として誰かの参考になれば、としてのっけたに過ぎないので。
2017-11-12 19:40 追記: rply 制限に関して言い方間違えた
言い方間違えたつぅか、シンプルに誤解してたと言ったほうがいい。最初の最もシンプルな例を凝視しててようやく悟った。
今からしようとする説明が、rply を気に入るかどうかのキモになると思う。
一言で言えば「パーサ生成器とパーサが別物」(lexerについても同)てだけの話だった。つまり…:
1 from rply import ParserGenerator
2
3 _pg_deref = ParserGenerator( # パーサ生成器
4 ['NUMBER', ],
5
6 precedence=[
7 #('left', ['PLUS', 'MINUS']),
8 #('left', ['MUL', 'DIV'])
9 ]
10 )
11
12 @_pg_deref.production('expression : NUMBER') # これはパーサ生成器が「パーサを生成するのに
13 def expression_number(p): # 使う定義」でしかなく、パーサの定義を
14 return Value(float(p[0].getstr())) # しているわけではない。紛らわしいが通じてる?
15
16 _parser_deref = \
17 _pg_deref.build() # これを好きには出来ない、パーサは rply が決めた出来合いのもの。
ply のように lexer/parser のインスタンス数に制約があることはないけれど、オレオレパーサが自由に自分の状態を管理して好き勝手にやる、てことは絶対に出来ない。上で文句を言っていた「クラスをデコレート出来ない」は実は全然制約ではなくて、仮にデコレート出来たとしてもそれが出来ることに何一つ意味はない、つーこと。
ちなみに、「モジュールのトップレベルだと読みにくいし気分悪い」てだけが理由ならこんなでいい:
1 _pg_deref = ParserGenerator(
2 ['NUMBER', ],
3
4 precedence=[
5 #('left', ['PLUS', 'MINUS']),
6 #('left', ['MUL', 'DIV'])
7 ]
8 )
9
10 class _MyHogeGrammar(object):
11 @classmethod # staticmethod でもどちらでも
12 @_pg_deref.production('expression : NUMBER')
13 def expression_number(p):
14 return Value(float(p[0].getstr()))
15
16 # class であることに意味は C++ の namespace のソレとほぼ同じ。
17 # ただの(文法定義と生成規則を収める)箱。
18
19 _parser_deref = _pg_deref.build()
なんにせよ「パーサは結果の構文木を作れりゃそれでいい」というノリで作ることを受け容れられる限りは、rply には何の問題もない。かえってその方が「文法を同じくしてやることが全く違う」みたいなケースに対しては柔軟かもしらんね。ヘタにパーサ内でなんでもやってしまうと使いまわしにくいものになりがちだしさ。
2017-11-15 21:00 追記: 救世主、state が使える
初見でも目に入ったがすぐにはその価値に気付かなかったある機能、state。
1 # -*- coding: utf-8 -*-
2 from rply import LexerGenerator
3 from rply import ParserGenerator
4
5 # -------- lexer/parser
6 _lg_evaluator = LexerGenerator()
7 _lg_evaluator.add('NUMBER', r'\d+')
8 _lexer_evaluator = _lg_evaluator.build()
9
10 #
11 _pg_evaluator = ParserGenerator(
12 ['NUMBER', ],
13 )
14
15 @_pg_evaluator.production('expression : NUMBER')
16 def expression_number(state, p):
17 # (2) 「ParserGenerator で作った parser の parse に state (任意のオブジェクト) を
18 # 渡」すとここに垂れ流されてくる
19 import sys
20 print(state, file=sys.stderr)
21 return int(p[0].getstr())
22
23 _parser_evaluator = _pg_evaluator.build()
24 # (1) ParserGenerator で作った parser の parse に state (任意のオブジェクト) を渡せる
25 print(
26 _parser_evaluator.parse(
27 _lexer_evaluator.lex('1'),
28 {"key": "value"} # 任意なのだからなんでもいいわけで
29 ))
なんでもいいということはなんでもいいということで、これを使ってパーサにモードを持たせるなり、production で運ぶデータをオブジェクトに処理させるなり、それこそなんでも。
アタシが上でちょっと苦労してる「PropCallChain
」は場所によって eval
に渡す引数を決められなくて弱っていたのだが、この state
オブジェクトに乗っければなんてことはないのであった。