ううむ pyparsing… ので ply

むむむ**2。

むむむ…pyparsing

pyparsing でルールの OR 結合がどうしても書けず。だけじゃなくて、シンプルなヤツだとかなり楽でいいんだがなぁ…、ちょっと複雑になってきたら、とてもじゃないが保守可能なものになりそうな気配がしなかった。やっぱルール記述やトークン記述などは「Python 的に」書けても嬉しくない。

やりかけたヤツ、ただ捨てるのはもったいないけれど自分のレポジトリ的な場所に管理するもんでもないので、「試行錯誤真っ最中、だった失敗作」のまま貼り付けてみる:

  1 # -*- coding: utf-8 -*-
  2 #
  3 from __future__ import absolute_import
  4 from __future__ import unicode_literals
  5 from __future__ import print_function
  6 
  7 import sys
  8 import re
  9 from functools import partial
 10 import types
 11 
 12 from pyparsing import *
 13 
 14 
 15 def _ParseResults_asListWithNames(self):
 16     names = []
 17 
 18     namedItems = dict(
 19         (v[1], k)
 20         for (k, vlist) in self._ParseResults__tokdict.items()
 21         for v in vlist)
 22     for i, res in enumerate(self._ParseResults__toklist):
 23         #if isinstance(res, ParseResults):
 24         #    pass
 25         if i in namedItems:
 26             names.append(namedItems[i])
 27         else:
 28             names.append("ITEM")
 29 
 30     return self._ParseResults__toklist, names
 31 
 32 
 33 def wrap_ParseResults(toks):
 34     toks.asListWithName = types.MethodType(_ParseResults_asListWithNames, toks)
 35     return toks
 36 
 37 
 38 class ExpressionEvaluator(object):
 39     def __init__(self):
 40         self._stack = []
 41         self._props = {}
 42 
 43         ref_start = (Literal("$(") | Literal("%(")).setResultsName("REF_START")
 44         lparen = Literal("(").setResultsName("LPAR")
 45         rparen = Literal(")").setResultsName("RPAR")
 46         squote = Literal("'").setResultsName("SQ")
 47         comma = Literal(",").suppress()
 48         #
 49         point = Literal(".")
 50         e = CaselessLiteral("E")
 51         fnumber = Combine(
 52             Word("+-" + nums, nums) + \
 53                 Optional(point + Optional(Word(nums))) + \
 54                 Optional(e + Word("+-" + nums, nums))).setResultsName("NUMBER")
 55 
 56         # `xxx`
 57         msbuild_string = QuotedString("`", unquoteResults=False)
 58 
 59         #
 60         id_component = Word(alphas + "_", alphas + nums + "_").setResultsName("ID_COMPONENT")
 61         prop_name = Word(".", alphas + nums + "_").setResultsName("PROP_NAME")
 62         #id_component.setParseAction(
 63         #    partial(self._reduce, context="ROOTKEY"))
 64 
 65         #
 66         ref = Forward()
 67 
 68         # (..., )
 69         arg = ref | fnumber | msbuild_string
 70         args = arg + ZeroOrMore(comma + arg)
 71 
 72         #func_args = Forward()
 73         #func_args << lparen + Optional(args) + rparen
 74         func_args = lparen + Optional(args) + rparen
 75         #func_args.setParseAction(
 76         #    partial(self._reduce, context="FUNC_ARGS"))
 77 
 78         #
 79         #prop = Forward()
 80         #prop << prop_name + Optional(func_args)
 81         prop = prop_name + Optional(func_args)
 82         prop.setResultsName("PROP")
 83         prop.setParseAction(
 84             partial(self._reduce, context="PROP"))
 85         #
 86         #ref_content = Forward()
 87         ref_content = id_component + ZeroOrMore(prop)
 88         #ref1 = (squote + ref_start + ref_content + rparen + squote)
 89         #ref2 = (ref_start + ref_content + rparen)
 90         ref << (ref_start + ref_content + rparen)
 91         #ref |= (squote + ref + squote)
 92         #ref << ref1 | ref2
 93         #ref = ref | (squote + ref + squote)
 94         ref.setParseAction(
 95             partial(self._reduce, context="REF"))
 96 
 97         #
 98         #grammar = Forward()
 99         #grammar << ref
100         grammar = ref
101         #grammar.setParseAction(
102         #    partial(self._reduce, context="WHOLE"))
103         self._bnf = grammar
104 
105     def evaluate(self, s, props):
106         self._props = props
107         parsed = self._bnf.parseString(s)
108         #print(parsed.asXML())
109         #print("\n".join([str(t) for t in self._stack]))
110 
111     def _reduce(self, strg, loc, toks, context=""):
112         wr_toks = wrap_ParseResults(toks)
113         #print(context, dir(toks))
114         #if context == "REF":
115         #    if "(" not in toks[1:-1]:
116         #        key = "".join(toks[1:-1])
117         #        value = self._props.get(key, "")
118         #        return value  #"".join(toks)
119         #if context in ("WHOLE", ):  #"PROP"):
120         if context == "REF":
121             tl = wr_toks.asListWithName()
122             if "LPAR" not in tl[1][1:]:
123                 #key = "".join(toks[1:-1])
124                 #value = self._props.get(key, "")
125                 #return value  #"".join(toks)
126                 #print("!!!", type(toks))
127                 return ParseResults("".join(toks), "REF")
128                 #pass
129             #print(context, toks.asList())#.dump())  #dir(toks))
130             #print(context, dir(toks))
131             #print(context, list(toks.values()))
132             #print(context, toks.asDict())
133             #print(context, toks.asXML())
134             #pass
135             #print(context, [(tok, type(tok)) for tok in toks])
136             #print(context, toks._ParseResults__toklist, type(toks))#.dump())
137             #namedItems = dict(
138             #    (v[1], k)
139             #    for (k, vlist) in toks._ParseResults__tokdict.items()
140             #    for v in vlist)
141             #worklist = toks._ParseResults__toklist
142             #for i, res in enumerate(worklist):
143             #    #print(isinstance(res, ParseResults))
144             #    if i in namedItems:
145             #        print((namedItems[i], res))
146             print(tl[0])
147             print(tl[1])
148             print("")
149         #self._stack.append((context, toks))
150         #toks_list = toks.asList()
151         #if context == "REF":
152         #    if "(" not in toks_list[1:-1]:
153         #        return "".join(toks)
154         #if context == "FUNC_ARGS":
155         #    #print("!!!", toks_list[1:-1])
156         #    self._stack.append((context, toks_list[1:-1]))
157         #    #return []
158         #elif context == "PROP":
159         #    args = []
160         #    if self._stack:
161         #        args = self._stack.pop()[1]
162         #    self._stack.append((context, toks_list[0], args))
163         #else:
164         #    self._stack.append((context, toks.asList()))
165         #self._stack.append((toks_list, context))
166         #if context == "FUNC_ARGS":
167         #    #print("!!!", toks.asList()[1:-1])
168         #    self._stack.append(toks)
169         #    return []
170         #else:
171         #    self._stack.append((context, toks.asList()))
172         #if context == "REF":
173         #    if "(" not in toks[1:-1]:
174         #        key = "".join(toks[1:-1])
175         #        value = self._props[key]
176         #        #self._stack.append((context, value))
177         #        return [value]
178         #    self._stack.append((context, toks))
179         #    return []
180         ##if context == "ROOTKEY":
181         ##    self._stack.append((context, "".join(toks)))
182         #    return []
183         #if context == "WHOLE":
184         #    print(" ".join(toks))
185 
186 ExpressionEvaluator().evaluate(
187     """%(AAA.YYY.Contains($(CCC.Replace(`a`, `b`).Replace($(X), $(Y)))))""",
188     props={"X": "x", "Y": "y", "AAA": "zzz", "CCC": "aaa"})

書かれてる部分では上手に動いてはいるんだけれど、$(X)」部分を「'$(X)'」も許容しようとして、つまり文法定義的には:

1 ref = ref_start, ref_content, rparen
2      | squote, ref_start, ref_content, rparen, squote
3      ;

としたいわけなのだが、なんだか '|' の振る舞いが疑わしい。

スクリプトの印象が頗る良くないのは「試行錯誤中のゴミだから」というだけではないと思うのよ。責務の分解がしづらくてな、書きにくいし読みにくい、と。繰り返すけど「非常に単純な用途ならめっちゃ便利」と思う、pyparsing。けど複雑なものには向かないんじゃなかろうか、と思った。

で、仕方ないので「もっとちゃんとしたの」と、ply を試し中。なんつーか「ちゃんとした」どころか、まぁこれは Unix C の「lex/yacc」の完全互換を目指している模様、少しやってみた感触も、lex/yacc を書いてるのと「ほぼ」同じノリで書けそうだ。

というわけで lex/yacc を 1bit も知らない人には説明しずれーんだけれど、知ってる前提で「hello world」的なヤツ:

「文法」もクソもない、「name」という文法規則しかない例だけれど、「どこに何をどのように書いていくのか」を知るには十分なサンプルと思う。

docstring に BNF を書くのな。あと「t_」はじまりはトークン定義、「p_」はじまりは解析ルール定義、などいくつかの固定のお約束に従って書く。

本物の BNF を書けるのはアタシにはありがたいな。多分性能的にはこれのせいでいけてない可能性はあるんだとは思うけれども。

こやつで書きたいものが書ききれたらいいなぁ、と、今は祈るのみ。

ちょっとだけ始めてみた、「やりたいこと」:

ルールの記述順が繊細なのを忘れてて一瞬ハマった、けど、あぁ、なんて素直。reduce (還元) を「そのまま」書けるのがやっぱいい。だって yacc ってそういうもんだったでしょ、って思う。

13:30 追記: いやぁ、いいね、ply

いやぁ、いいね、ply。「文法」だけだけど、一気にここまで書けちゃった:

  1 # -*- coding: utf-8 -*-
  2 #
  3 from __future__ import absolute_import
  4 from __future__ import unicode_literals
  5 from __future__ import print_function
  6 
  7 import ply.lex as lex
  8 import ply.yacc as yacc
  9 
 10 
 11 # Lexing Rules
 12 tokens = (
 13     "SQUOTE",
 14     "NUMBER", "MSBUILDSTR",
 15     "REF_START",
 16     "RPAREN", "LPAREN", "ARGSEP",
 17     "LSQBRACKET", "RSQBRACKET", "DBLCOLON",
 18     "NAME", "PROP",
 19     )
 20 
 21 t_SQUOTE = r"'"
 22 t_NUMBER = r"[+-]?(\d+(\.\d*)?|\.\d+)([eEfF][+-]?\d+)?"
 23 t_MSBUILDSTR = r"`[^`]*`"
 24 
 25 t_REF_START = r"[$%]\("
 26 t_LPAREN = r'\('
 27 t_RPAREN = r'\)'
 28 t_LSQBRACKET = r'\['
 29 t_RSQBRACKET = r'\]'
 30 t_DBLCOLON = r'::'
 31 t_ARGSEP = r'\s*,\s*'
 32 t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*"
 33 t_PROP = r"\.[a-zA-Z_][a-zA-Z0-9_]*"
 34 
 35 # Parsing Rules
 36 def p_ref(p):
 37     """
 38     ref : REF_START refcontent RPAREN
 39         | SQUOTE ref SQUOTE
 40     """
 41     print("ref / p[1:]='{}'".format(p[1:]))
 42 
 43 
 44 def p_refcontent(p):
 45     """
 46     refcontent : propcallchain
 47                | nsref propcallchain
 48     """
 49     print("refcontent / p[:]='{}'".format(p[:]))
 50     #p[0] = "".join([k for k in p[:] if k])
 51 
 52 
 53 def p_propcallchain(p):
 54     """
 55     propcallchain : NAME
 56           | NAME propcall
 57           | propcallchain propcall
 58     """
 59     print("propcallchain / p[:]='{}'".format(p[:]))
 60     #p[0] = "".join([k for k in p[:] if k])
 61 
 62 
 63 def p_nsref(p):
 64     """
 65     nsref : LSQBRACKET NAME RSQBRACKET DBLCOLON
 66           | LSQBRACKET NAME prop RSQBRACKET DBLCOLON
 67     """
 68     print("nsref / p[:]='{}'".format(p[:]))
 69 
 70 
 71 def p_prop(p):
 72     """
 73     prop : PROP
 74          | prop prop
 75     """
 76     print("prop / p[:]='{}'".format(p[:]))
 77     #p[0] = "".join([k for k in p[:] if k])
 78 
 79 
 80 def p_propcall(p):
 81     """
 82     propcall : PROP
 83              | PROP call
 84              | propcall propcall
 85     """
 86     print("propcall / p[:]='{}'".format(p[:]))
 87     #p[0] = "".join([k for k in p[:] if k])
 88 
 89 
 90 def p_call(p):
 91     """
 92     call : LPAREN RPAREN
 93          | LPAREN args RPAREN
 94     """
 95     print("call / p[:]='{}'".format(p[:]))
 96     #p[0] = "".join([k for k in p[:] if k])
 97 
 98 
 99 def p_args(p):
100     """
101     args : arg
102          | arg ARGSEP arg
103          | args ARGSEP arg
104     """
105     print("args / p[:]='{}'".format(p[:]))
106     #p[0] = "".join([k for k in p[:] if k])
107 
108 
109 def p_arg(p):
110     """
111     arg : ref
112         | NUMBER
113         | MSBUILDSTR
114     """
115     print("args / p[:]='{}'".format(p[:]))
116     #p[0] = "".join([k for k in p[:] if k])
117 
118 
119 #
120 if __name__ == '__main__':
121     _DEBUG = False
122     lex.lex(debug=_DEBUG)  # Build the lexer
123     yacc.yacc(debug=_DEBUG)  # Build the parset
124 
125     # parse
126     yacc.parse("$(Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
127     print("")
128     yacc.parse("$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
129     print("")
130     yacc.parse("HasTrailingSlash($([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length))")

ほとんど苦労してない。

とりあえずここまででは「(MSBuild の)マクロ参照」だけだけれど、これに「式」(例えば「'$(A)' == 'zzz'」)を解釈できるようにするのは「文法定義だけなら」多分簡単。まぁアクション部分はてこずるとは思うけどね。単純な電卓のようにはいかん。

15:20 追記: いいねいいよ ply

一つ前の追記から2時間足らずで…

  1 # -*- coding: utf-8 -*-
  2 #
  3 from __future__ import absolute_import
  4 from __future__ import unicode_literals
  5 from __future__ import print_function
  6 
  7 import ply.lex as lex
  8 import ply.yacc as yacc
  9 
 10 
 11 # Lexing Rules
 12 tokens = (
 13     "SQUOTE", "DQUOTE",
 14     "NUMBER", "MSBUILD_LITERAL",
 15     "SQ_LITERAL", "DQ_LITERAL",
 16     "REF_START",
 17     "RPAREN", "LPAREN", "ARGSEP",
 18     "LSQBRACKET", "RSQBRACKET", "DBLCOLON",
 19     "NAME", "PROP",
 20     "CMP_EQ", "CMP_NE", "CMP_LT",
 21     "CMP_LE", "CMP_GE", "CMP_GT",
 22     "NEGATE",
 23     "LOGICAL_AND", "LOGICAL_OR",
 24     )
 25 
 26 t_SQUOTE = r"'"
 27 t_DQUOTE = r'"'
 28 t_NUMBER = r"[+-]?(\d+(\.\d*)?|\.\d+)([eEfF][+-]?\d+)?"
 29 t_MSBUILD_LITERAL = r"`[^`]*`"
 30 
 31 t_REF_START = r"[$%]\("
 32 t_LPAREN = r'\('
 33 t_RPAREN = r'\)'
 34 t_LSQBRACKET = r'\['
 35 t_RSQBRACKET = r'\]'
 36 t_DBLCOLON = r'::'
 37 t_ARGSEP = r'\s*,\s*'
 38 t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*"
 39 t_PROP = r"\.[a-zA-Z_][a-zA-Z0-9_]*"
 40 
 41 t_SQ_LITERAL = r"'[^']*'"
 42 t_DQ_LITERAL = r'"[^"]*"'
 43 
 44 t_CMP_EQ = r"\s*==\s*"
 45 t_CMP_NE = r"\s*!=\s*"
 46 t_CMP_LT = r"\s*<\s*"
 47 t_CMP_LE = r"\s*<=\s*"
 48 t_CMP_GE = r"\s*>=\s*"
 49 t_CMP_GT = r"\s*>\s*"
 50 
 51 t_NEGATE = r"\s*!\s*"
 52 
 53 t_LOGICAL_AND = r"(?<!\w)\s*[aA][nN][dD]\s*(?!\w)"
 54 t_LOGICAL_OR = r"(?<!\w)\s*[oO][rR]\s*(?!\w)"
 55 
 56 # Parsing Rules
 57 precedence = (
 58     ('left', 'LOGICAL_AND'),
 59     ('left', 'LOGICAL_OR'),
 60     ('right', 'NEGATE'),
 61 )
 62 
 63 def p_cmp_expression(p):
 64     """
 65     expression : expression CMP_EQ expression
 66                | expression CMP_NE expression
 67                | expression CMP_LT expression
 68                | expression CMP_LE expression
 69                | expression CMP_GE expression
 70                | expression CMP_GT expression
 71     """
 72     print("cmp_expression / p[1:]='{}'".format(p[1:]))
 73 
 74 
 75 def p_landor_expression(p):
 76     """
 77     expression : expression LOGICAL_AND expression %prec LOGICAL_AND
 78                | expression LOGICAL_OR expression %prec LOGICAL_OR
 79     """
 80     print("cmp_expression / p[1:]='{}'".format(p[1:]))
 81 
 82 
 83 def p_negate_expression(p):
 84     """
 85     expression : NEGATE expression %prec NEGATE
 86     """
 87     print("negate_expression / p[1:]='{}'".format(p[1:]))
 88 
 89 
 90 def p_expression_group(p):
 91     """
 92     expression : LPAREN expression RPAREN
 93     """
 94     print("expression_group / p[1:]='{}'".format(p[1:]))
 95     #p[0] = p[2]
 96 
 97 
 98 def p_value_expression(p):
 99     """
100     expression : value
101     """
102     print("value_expression / p[1:]='{}'".format(p[1:]))
103 
104 
105 def p_ref(p):
106     """
107     ref : REF_START refcontent RPAREN
108         | SQUOTE ref SQUOTE
109         | DQUOTE ref DQUOTE
110     """
111     print("ref / p[1:]='{}'".format(p[1:]))
112 
113 
114 def p_refcontent(p):
115     """
116     refcontent : propcallchain
117                | nsref propcallchain
118     """
119     print("refcontent / p[:]='{}'".format(p[:]))
120     #p[0] = "".join([k for k in p[:] if k])
121 
122 
123 def p_propcallchain(p):
124     """
125     propcallchain : NAME
126           | NAME propcall
127           | propcallchain propcall
128     """
129     print("propcallchain / p[:]='{}'".format(p[:]))
130     #p[0] = "".join([k for k in p[:] if k])
131 
132 
133 def p_nsref(p):
134     """
135     nsref : LSQBRACKET NAME RSQBRACKET DBLCOLON
136           | LSQBRACKET NAME prop RSQBRACKET DBLCOLON
137     """
138     print("nsref / p[:]='{}'".format(p[:]))
139 
140 
141 def p_prop(p):
142     """
143     prop : PROP
144          | prop prop
145     """
146     print("prop / p[:]='{}'".format(p[:]))
147     #p[0] = "".join([k for k in p[:] if k])
148 
149 
150 def p_propcall(p):
151     """
152     propcall : PROP
153              | PROP call
154              | propcall propcall
155     """
156     print("propcall / p[:]='{}'".format(p[:]))
157     #p[0] = "".join([k for k in p[:] if k])
158 
159 
160 def p_funccall(p):
161     """
162     funccall : NAME call
163     """
164     print("funccall / p[:]='{}'".format(p[:]))
165     #p[0] = "".join([k for k in p[:] if k])
166 
167 
168 def p_call(p):
169     """
170     call : LPAREN RPAREN
171          | LPAREN args RPAREN
172     """
173     print("call / p[:]='{}'".format(p[:]))
174     #p[0] = "".join([k for k in p[:] if k])
175 
176 
177 def p_args(p):
178     """
179     args : value
180          | value ARGSEP value
181          | args ARGSEP value
182     """
183     print("args / p[:]='{}'".format(p[:]))
184     #p[0] = "".join([k for k in p[:] if k])
185 
186 
187 def p_value(p):
188     """
189     value : ref
190           | funccall
191           | NUMBER
192           | MSBUILD_LITERAL
193           | SQ_LITERAL
194           | DQ_LITERAL
195     """
196     print("value / p[:]='{}'".format(p[:]))
197     #p[0] = "".join([k for k in p[:] if k])
198 
199 
200 class ExpressionSyntaxError(Exception):
201     pass
202 
203 
204 def p_error(p):
205     if p:
206         raise ExpressionSyntaxError(
207             "Syntax error at '{}'".format(p.value))
208     else:
209         raise ExpressionSyntaxError(
210             "Syntax error at EOF")
211 
212 
213 #
214 if __name__ == '__main__':
215     _DEBUG = False
216     lex.lex(debug=_DEBUG)  # Build the lexer
217     yacc.yacc(debug=_DEBUG)  # Build the parset
218 
219     # parse
220     yacc.parse("$(Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
221     print("")
222     yacc.parse("$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
223     print("")
224     yacc.parse("`xx` == $([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
225     print("")
226     yacc.parse('''`xx` == "$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)"''')
227     print("")
228     yacc.parse('''"xx" != "$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)"''')
229     print("")
230     yacc.parse('''!$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Contains("zzz"))''')
231     print("")
232     yacc.parse('''$(Abcde.Contains("zzz")) or !$(Abcde.Contains("yyy"))''')
233     print("")
234     yacc.parse('''HasTrailingSlash($(Abcde.Contains("zzz"))) or !Exists($(Abcde.Contains("yyy")))''')

pyparsing で悩んだ丸一日を返して欲しいわ。非常に整理しやすいしな。

右結合、左結合の扱い(precedence, %prec)なんぞもやっておるし、パーサエラー時の振る舞いを追加したりもしてるんで、少しばかりはヒトサマのためになったりならなかったりするような気もしないでもない。ただしまだなんにも「正しい」保障なんか出来ないことにはご注意ね。(だいたいにしてアクションが全部空だもの、あんまし参考にならん気はする。あと最終的には class にする予定だけど、「人のために曝す」なら最初からそうしとけよっ、とも思うだろうしさ。)

文法は実は少々イケてなくて、一重・二重引用符文字列を丸ごとトークンにしてしまう lexer 規則なので、これだと引用符内を再帰的に解析しないとダメ。うまい方法あるかなぁ…? flex みたいに lexer で状態遷移出来れば解決する? しない? どうだろ?

なお、もうわかってる人はわかってると思うけどこれは Visual Studio プロジェクトファイルに記述出来る expression の評価用パーザ。つまりさらにわかってる人は、「まだあれが足りてない」こともわかるのかも。まだ色々あるの。意味は判明してないが、「^$(ProjectPath);@(GenerateSatelliteAssembliesOutputs->FullPath()->Distinct())」みたいな記述が出来るみたいなのね。ほかにも、「@(Inf->'')」とか。意味わからんことには対応しようにも出来ないわけだけれど、いづれはやらねばならん。

2017-11-08 9:00 追記: 早い早い

一つ前の追記は正味で半日以内かな、たぶん。

一気に書けば「初版完成」まで行けるんだけれど、本物は長大になってしまって、ひとさまが参考にするのに適さなくなるので、「入り口」としてひとまず(つまりもう TDD のフェーズに入ってる)の版をお見せしておく。アクションの記述がひょっとするとあなたの想像よりずっと簡単に思うんじゃないかな、と思う。BNF ベースのパーサを書くなんて畏れ多い、と思い込んでいる人ほど読んでみて欲しい。「規則を還元する、の繰り返し」が評価なのだ、というコツの部分さえ理解出来てしまえば、きっとあなたでも出来るはず…:

  1 # -*- coding: utf-8 -*-
  2 #
  3 from __future__ import absolute_import
  4 from __future__ import unicode_literals
  5 from __future__ import print_function
  6 
  7 import os
  8 import sys
  9 
 10 import six
 11 import ply.lex as lex
 12 import ply.yacc as yacc
 13 
 14 
 15 if sys.version[0] == '2':
 16     str = unicode
 17 
 18 
 19 # ----------------------------------------------------------------
 20 #
 21 # Non-Python Types Emulation
 22 #
 23 class String(str):
 24     r"""
 25     >>> s = String("abc")
 26     >>> isinstance(s, (str,))
 27     True
 28     >>> isinstance(s, six.string_types)
 29     True
 30     """
 31     def __new__(cls, value):
 32         obj = str.__new__(cls, value)
 33         return obj
 34 
 35     def __getattr__(self, name):
 36         # Unfortunatelly, Microsoft always ignores its case...
 37         from types import FunctionType
 38         for attr in [
 39             x for x, y in String.__dict__.items()]:
 40 
 41             if attr.lower() == name.lower():
 42                 return getattr(self, attr)
 43 
 44     #def Clone(self, *args, **kwargs):
 45     #    r"""
 46     #    Object Clone()
 47     #
 48     #    """
 49     #    raise NotImplementedError()
 50 
 51     def CompareTo(self, value):
 52         r"""
 53         int CompareTo(Object value)
 54         int CompareTo(string strB)
 55 
 56         >>> tab = [
 57         ...     # (lhs, rhs, expected)
 58         ...     #     same length
 59         ...     ("A", "A", 0),
 60         ...     ("A", "a", 1),
 61         ...     ("a", "A", -1),
 62         ...     ("AAA", "AAA", 0),
 63         ...     ("AAA", "aAA", 1),
 64         ...     ("aAA", "AAA", -1),
 65         ...     #
 66         ...     #     len(self) > len(rhs)
 67         ...     ("AAAa", "AAA", 1),
 68         ...     ("AAAa", "aAA", 1),
 69         ...     ("aAAa", "AAA", 1),
 70         ...     ("aAAaa", "AAA", 1),
 71         ...     #
 72         ...     #     len(self) < len(rhs)
 73         ...     ("AAA", "AAAA", -1),
 74         ...     ("AAA", "aAAA", -1),
 75         ...     ("aAA", "AAAA", -1),
 76         ...     ("aAA", "AAAAA", -1),
 77         ... ]
 78         >>> for lhs, rhs, expected in tab:
 79         ...     result = String(lhs).CompareTo(rhs)
 80         ...     assert result == expected, str((lhs, rhs, expected, result))
 81         ...     #from ._powershell import exec_single_command
 82         ...     #expected2 = exec_single_command('"{}".CompareTo("{}")'.format(lhs, rhs))
 83         ...     #assert expected == expected2, str((lhs, rhs, expected2, expected))
 84         ...     #assert result == expected2, str((lhs, rhs, expected2, result))
 85         >>> 
 86         """
 87         lendiff = len(self) - len(value)
 88         if lendiff == 0:
 89             # this logic is the same as python2's cmp
 90             return (value > self) - (value < self)
 91         return lendiff / abs(lendiff)
 92 
 93     def Contains(self, value):
 94         r"""
 95         bool Contains(string value)
 96 
 97         >>> String("AAA").Contains("AAA")
 98         True
 99         >>> String("   AAA   ").Contains("AAA")
100         True
101         >>> String("BBB").Contains("AAA")
102         False
103         >>> String("AAA").Contains("   AAA   ")
104         False
105         """
106         return value in self
107 
108     #def CopyTo(self, *args, **kwargs):
109     #    r"""
110     #    Void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
111     #
112     #    """
113     #    raise NotImplementedError()
114 
115     def EndsWith(self, *args, **kwargs):
116         r"""
117         bool EndsWith(string value)
118         bool EndsWith(string value, StringComparison comparisonType)
119         bool EndsWith(string value, bool ignoreCase, Globalization.Cul...
120 
121         """
122         raise NotImplementedError()
123 
124     def Equals(self, *args, **kwargs):
125         r"""
126         bool Equals(Object obj)
127         bool Equals(string value)
128         bool Equals(string value, StringComparison comparisonType)
129 
130         """
131         raise NotImplementedError()
132 
133     #def GetEnumerator(self, *args, **kwargs):
134     #    r"""
135     #    CharEnumerator GetEnumerator()
136     #
137     #    """
138     #    raise NotImplementedError()
139 
140     #def GetHashCode(self, *args, **kwargs):
141     #    r"""
142     #    int GetHashCode()
143     #
144     #    """
145     #    raise NotImplementedError()
146 
147     #def GetType(self, *args, **kwargs):
148     #    r"""
149     #    type GetType()
150     #
151     #    """
152     #    raise NotImplementedError()
153 
154     #def GetTypeCode(self, *args, **kwargs):
155     #    r"""
156     #    TypeCode GetTypeCode()
157     #
158     #    """
159     #    raise NotImplementedError()
160 
161     def IndexOf(self, *args, **kwargs):
162         r"""
163         int IndexOf(char value)
164         int IndexOf(char value, int startIndex)
165         int IndexOf(char value, int startIndex, int count)
166         int IndexOf(string value)
167         int IndexOf(string val...
168 
169         """
170         raise NotImplementedError()
171 
172     def IndexOfAny(self, *args, **kwargs):
173         r"""
174         int IndexOfAny(char[] anyOf)
175         int IndexOfAny(char[] anyOf, int startIndex)
176         int IndexOfAny(char[] anyOf, int startIndex, int count)
177 
178         """
179         raise NotImplementedError()
180 
181     def Insert(self, *args, **kwargs):
182         r"""
183         string Insert(int startIndex, string value)
184 
185         """
186         raise NotImplementedError()
187 
188     #def IsNormalized(self, *args, **kwargs):
189     #    r"""
190     #    bool IsNormalized()
191     #    bool IsNormalized(Text.NormalizationForm normalizationForm)
192     #
193     #    """
194     #    raise NotImplementedError()
195 
196     def LastIndexOf(self, *args, **kwargs):
197         r"""
198         int LastIndexOf(char value)
199         int LastIndexOf(char value, int startIndex)
200         int LastIndexOf(char value, int startIndex, int count)
201         int LastIndexOf(string value)
202         int La...
203 
204         """
205         raise NotImplementedError()
206 
207     def LastIndexOfAny(self, *args, **kwargs):
208         r"""
209         int LastIndexOfAny(char[] anyOf)
210         int LastIndexOfAny(char[] anyOf, int startIndex)
211         int LastIndexOfAny(char[] anyOf, int startIndex, int count)
212 
213         """
214         raise NotImplementedError()
215 
216     #def Normalize(self, *args, **kwargs):
217     #    r"""
218     #    string Normalize()
219     #    string Normalize(Text.NormalizationForm normalizationForm)
220     #
221     #    """
222     #    raise NotImplementedError()
223 
224     def PadLeft(self, *args, **kwargs):
225         r"""
226         string PadLeft(int totalWidth)
227         string PadLeft(int totalWidth, char paddingChar)
228 
229         """
230         raise NotImplementedError()
231 
232     def PadRight(self, *args, **kwargs):
233         r"""
234         string PadRight(int totalWidth)
235         string PadRight(int totalWidth, char paddingChar)
236 
237         """
238         raise NotImplementedError()
239 
240     def Remove(self, *args, **kwargs):
241         r"""
242         string Remove(int startIndex, int count)
243         string Remove(int startIndex)
244 
245         """
246         raise NotImplementedError()
247 
248     def Replace(self, oldValue, newValue):
249         r"""
250         string Replace(char oldChar, char newChar)
251         string Replace(string oldValue, string newValue)
252 
253         >>> print(String("AAA").Replace("AAA", "BBB"))
254         BBB
255         """
256         return String(self.replace(oldValue, newValue))
257 
258     def Split(self, *args, **kwargs):
259         r"""
260         string[] Split(Params char[] separator)
261         string[] Split(char[] separator, int count)
262         string[] Split(char[] separator, StringSplitOptions options)
263         string[] Spl...
264 
265         """
266         raise NotImplementedError()
267 
268     def StartsWith(self, *args, **kwargs):
269         r"""
270         bool StartsWith(string value)
271         bool StartsWith(string value, StringComparison comparisonType)
272         bool StartsWith(string value, bool ignoreCase, Globalizati...
273 
274         """
275         raise NotImplementedError()
276 
277     def Substring(self, startIndex, length=-1):
278         r"""
279         string Substring(int startIndex)
280         string Substring(int startIndex, int length)
281 
282         >>> print(String("abc").Substring(0))
283         abc
284         >>> print(String("abc").Substring(0, 1))
285         a
286         >>> print(String("abc").Substring(1))
287         bc
288         >>> print(String("abc").Substring(1, 1))
289         b
290         >>> print(String("abc").Substring(1, 2))
291         bc
292         >>> print(String("abc").SubString(1, 2))
293         bc
294         """
295         # TODO: "System.String.Substring" raise Exception if
296         #       length is larger than actual length.
297         if length >= 0:
298             return String(self[startIndex:][:length])
299         return String(self[startIndex:])
300 
301     #def ToCharArray(self, *args, **kwargs):
302     #    r"""
303     #    char[] ToCharArray()
304     #    char[] ToCharArray(int startIndex, int length)
305     #
306     #    """
307     #    raise NotImplementedError()
308 
309     def ToLower(self, *args, **kwargs):
310         r"""
311         string ToLower()
312         string ToLower(Globalization.CultureInfo culture)
313 
314         """
315         raise NotImplementedError()
316 
317     #def ToLowerInvariant(self, *args, **kwargs):
318     #    r"""
319     #    string ToLowerInvariant()
320     #
321     #    """
322     #    raise NotImplementedError()
323 
324     #def ToString(self, *args, **kwargs):
325     #    r"""
326     #    string ToString()
327     #    string ToString(IFormatProvider provider)
328     #
329     #    """
330     #    raise NotImplementedError()
331 
332     def ToUpper(self, *args, **kwargs):
333         r"""
334         string ToUpper()
335         string ToUpper(Globalization.CultureInfo culture)
336 
337         """
338         raise NotImplementedError()
339 
340     #def ToUpperInvariant(self, *args, **kwargs):
341     #    r"""
342     #    string ToUpperInvariant()
343     #
344     #    """
345     #    raise NotImplementedError()
346 
347     def Trim(self, *args, **kwargs):
348         r"""
349         string Trim(Params char[] trimChars)
350         string Trim()
351 
352         """
353         raise NotImplementedError()
354 
355     def TrimEnd(self, *args, **kwargs):
356         r"""
357         string TrimEnd(Params char[] trimChars)
358 
359         """
360         raise NotImplementedError()
361 
362     def TrimStart(self, *args, **kwargs):
363         r"""
364         string TrimStart(Params char[] trimChars)
365 
366         """
367         raise NotImplementedError()
368 
369     #def Chars(self, *args, **kwargs):
370     #    r"""
371     #    char Chars(int index) {get;}
372     #
373     #    """
374     #    raise NotImplementedError()
375 
376     @property
377     def Length(self):
378         r"""
379         Int32 Length {get;}
380 
381         >>> s = String("abc")
382         >>> s.Length
383         3
384         """
385         return len(self)
386 
387 
388 # ----------------------------------------------------------------
389 #
390 # Expression Evaluator
391 #
392 class ExpressionSyntaxError(Exception):
393     pass
394 
395 
396 class _Parser(object):
397     """
398     Base class for a lexer/parser that has the rules defined as methods
399     """
400     tokens = ()
401     precedence = ()
402 
403     def __init__(self, **kw):
404         self.debug = kw.get('debug', 0)
405         self.names = {}
406         try:
407             modname = os.path.split(os.path.splitext(__file__)[0])[
408                 1] + "_" + self.__class__.__name__
409         except:
410             modname = "parser" + "_" + self.__class__.__name__
411         self.debugfile = modname + ".dbg"
412         self.tabmodule = modname + "_" + "parsetab"
413         # print self.debugfile, self.tabmodule
414 
415         # Build the lexer and parser
416         lex.lex(module=self, debug=self.debug)
417         yacc.yacc(module=self,
418                   debug=self.debug,
419                   debugfile=self.debugfile,
420                   tabmodule=self.tabmodule)
421 
422     def _parse(self, s):
423         yacc.parse(s)
424 
425 
426 class MSBuildExpressionEvaluator(_Parser):
427     r"""
428     >>> parser = MSBuildExpressionEvaluator()
429     >>> print(parser.evaluate("$(X)", {"X": "xxx"}))
430     xxx
431     >>> print(parser.evaluate("$(X.Y)", {"X": {"Y": "xxx"}}))
432     xxx
433     >>> parser.evaluate("$(X.Y.Length)", {"X": {"Y": "xxx"}})
434     3
435     >>> print(parser.evaluate("$(X.Y.Substring(1))", {"X": {"Y": "xyz"}}))
436     yz
437     """
438     def __init__(self, **kwargs):
439         _Parser.__init__(self, **kwargs)
440         self._props = {}
441         self._result = None
442 
443     # Lexing Rules
444     tokens = (
445         "SQUOTE", "DQUOTE",
446         "NUMBER", "MSBUILD_LITERAL",
447         "SQ_LITERAL", "DQ_LITERAL",
448         "REF_START",
449         "RPAREN", "LPAREN", "ARGSEP",
450         "LSQBRACKET", "RSQBRACKET", "DBLCOLON",
451         "NAME", "PROP",
452         "CMP_EQ", "CMP_NE", "CMP_LT",
453         "CMP_LE", "CMP_GE", "CMP_GT",
454         "NEGATE",
455         "LOGICAL_AND", "LOGICAL_OR",
456         )
457 
458     t_SQUOTE = r"'"
459     t_DQUOTE = r'"'
460 
461     def t_NUMBER(self, t):
462         r"""[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?"""
463         if "." not in t.value and "e" not in t.value:
464             t.value = int(t.value)
465         else:
466             t.value = float(t.value)
467         return t
468 
469     t_MSBUILD_LITERAL = r"`[^`]*`"
470 
471     t_REF_START = r"[$%]\("
472     t_LPAREN = r'\('
473     t_RPAREN = r'\)'
474     t_LSQBRACKET = r'\['
475     t_RSQBRACKET = r'\]'
476     t_DBLCOLON = r'::'
477     t_ARGSEP = r'\s*,\s*'
478     t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*"
479     t_PROP = r"\.[a-zA-Z_][a-zA-Z0-9_]*"
480 
481     t_SQ_LITERAL = r"'[^']*'"
482     t_DQ_LITERAL = r'"[^"]*"'
483 
484     t_CMP_EQ = r"\s*==\s*"
485     t_CMP_NE = r"\s*!=\s*"
486     t_CMP_LT = r"\s*<\s*"
487     t_CMP_LE = r"\s*<=\s*"
488     t_CMP_GE = r"\s*>=\s*"
489     t_CMP_GT = r"\s*>\s*"
490 
491     t_NEGATE = r"\s*!\s*"
492 
493     t_LOGICAL_AND = r"(?<!\w)\s*[aA][nN][dD]\s*(?!\w)"
494     t_LOGICAL_OR = r"(?<!\w)\s*[oO][rR]\s*(?!\w)"
495 
496     # Parsing Rules
497     precedence = (
498         ('left', 'LOGICAL_AND'),
499         ('left', 'LOGICAL_OR'),
500         ('right', 'NEGATE'),
501     )
502 
503     def p_cmp_expression(self, p):
504         """
505         expression : expression CMP_EQ expression
506                    | expression CMP_NE expression
507                    | expression CMP_LT expression
508                    | expression CMP_LE expression
509                    | expression CMP_GE expression
510                    | expression CMP_GT expression
511         """
512         op = p[2].strip()
513         if op == "==":
514             p[0] = (p[1] == p[3])
515         elif op == "!=":
516             p[0] = (p[1] != p[3])
517         elif op == ">=":
518             p[0] = (p[1] >= p[3])
519         elif op == "<=":
520             p[0] = (p[1] <= p[3])
521         elif op == ">":
522             p[0] = (p[1] > p[3])
523         elif op == "<":
524             p[0] = (p[1] < p[3])
525         #print("cmp_expression / p[:]='{}'".format(p[:]), file=sys.stderr)
526 
527     def p_landor_expression(self, p):
528         """
529         expression : expression LOGICAL_AND expression %prec LOGICAL_AND
530                    | expression LOGICAL_OR expression %prec LOGICAL_OR
531         """
532         op = p[2].strip()
533         if op == "and":
534             p[0] = p[1] and p[3]
535         else:
536             p[0] = p[1] or p[3]
537         #print("cmp_expression / p[:]='{}'".format(p[:]), file=sys.stderr)
538 
539     def p_negate_expression(self, p):
540         """
541         expression : NEGATE expression %prec NEGATE
542         """
543         p[0] = not p[1]
544         #print("negate_expression / p[:]='{}'".format(p[:]), file=sys.stderr)
545 
546     def p_expression_group(self, p):
547         """
548         expression : LPAREN expression RPAREN
549         """
550         p[0] = p[2]
551         #print("expression_group / p[1:]='{}'".format(p[1:]), file=sys.stderr)
552 
553     def p_value_expression(self, p):
554         """
555         expression : value
556         """
557         p[0] = p[1]
558         self._result = p[0]
559         #print("value_expression / p[0]={}".format(p[0]), file=sys.stderr)
560 
561     def p_ref(self, p):
562         """
563         ref : REF_START refcontent RPAREN
564             | SQUOTE ref SQUOTE
565             | DQUOTE ref DQUOTE
566         """
567         p[0] = p[2]
568         #print("ref / p[0]='{}'".format(p[0]), file=sys.stderr)
569 
570     def p_refcontent(self, p):
571         """
572         refcontent : propcallchain
573                    | nsref propcallchain
574         """
575         #print("refcontent / p[:]='{}'".format(p[:]), file=sys.stderr)
576         #p[0] = "".join([k for k in p[:] if k])
577         if len(p) == 2:
578             p[0] = p[1]
579         # TODO: with nsref
580 
581     def p_propcallchain(self, p):
582         """
583         propcallchain : NAME
584               | NAME propcall
585               | propcallchain propcall
586         """
587         res = self._props
588         res = res[p[1]]  # rootkey
589         if isinstance(res, six.string_types):
590             res = String(res)
591         if len(p) == 3:
592             if not isinstance(p[2][0], (tuple,)):
593                 p[2] = (p[2],)
594             for name, args in p[2]:
595                 n = name[1:]
596                 if n in res:
597                     res = res[n]
598                     if isinstance(res, six.string_types):
599                         res = String(res)
600                 elif hasattr(res, n):
601                     res = getattr(res, n)
602                     if args is not None:  # callable
603                         res = res(*args)
604         p[0] = res
605         #print("propcallchain / p[:]='{}'".format(p[:]), file=sys.stderr)
606 
607     def p_nsref(self, p):
608         """
609         nsref : LSQBRACKET NAME RSQBRACKET DBLCOLON
610               | LSQBRACKET NAME prop RSQBRACKET DBLCOLON
611         """
612         p[0] = "".join([s for s in p[1:] if s])
613         #print("nsref / p[:]='{}'".format(p[:]), file=sys.stderr)
614 
615     def p_prop(self, p):
616         """
617         prop : PROP
618              | prop prop
619         """
620         p[0] = "".join([s for s in p[1:] if s])
621         #print("prop / p[:]='{}'".format(p[:]), file=sys.stderr)
622         #p[0] = "".join([k for k in p[:] if k])
623 
624     def p_propcall(self, p):
625         """
626         propcall : PROP
627                  | PROP call
628                  | propcall propcall
629         """
630         if len(p) == 2:
631             p[0] = ((p[1], None))
632         else:
633             p[0] = tuple(p[1:])
634         #print("propcall / p[0]={}".format(p[0]), file=sys.stderr)
635         #p[0] = "".join([k for k in p[:] if k])
636 
637     def p_funccall(self, p):
638         """
639         funccall : NAME call
640         """
641         #print("funccall / p[:]='{}'".format(p[:]), file=sys.stderr)
642         #p[0] = "".join([k for k in p[:] if k])
643 
644     def p_call(self, p):
645         """
646         call : LPAREN RPAREN
647              | LPAREN args RPAREN
648         """
649         if len(p) == 4:
650             p[0] = p[2]
651         else:
652             p[0] = []
653         #print("call / p[0]={}".format(p[0]), file=sys.stderr)
654 
655     def p_args(self, p):
656         """
657         args : value
658              | value ARGSEP value
659              | args ARGSEP value
660         """
661         p[0] = p[1::2]
662         #print("args / p[0]={}".format(p[0]), file=sys.stderr)
663         #p[0] = "".join([k for k in p[:] if k])
664 
665     def p_value(self, p):
666         """
667         value : ref
668               | funccall
669               | literal
670               | number
671         """
672         p[0] = p[1]
673         #print("value / p[0]={}".format(p[0]), file=sys.stderr)
674         #p[0] = "".join([k for k in p[:] if k])
675 
676     def p_literal(self, p):
677         """
678         literal : MSBUILD_LITERAL
679                 | SQ_LITERAL
680                 | DQ_LITERAL
681         """
682         if p[1][0] == "`" and p[1][-1] == "`":
683             p[0] = p[1][1:-1]
684         else:
685             p[0] = eval(p[1])
686         #print("literal / p[0]={}".format(p[0]), file=sys.stderr)
687 
688     def p_number(self, p):
689         """
690         number : NUMBER
691         """
692         p[0] = p[1]
693         #print("number / p[0]={}".format(p[0]), file=sys.stderr)
694 
695     def p_error(self, p):
696         if p:
697             raise ExpressionSyntaxError(
698                 "Syntax error at '{}'".format(p.value))
699         else:
700             raise ExpressionSyntaxError(
701                 "Syntax error at EOF")
702 
703     def evaluate(self, s, props={}):
704         self._props = props
705         self._result = None
706         self._parse(s)
707         return self._result
708 
709 #
710 if __name__ == '__main__':
711     import doctest
712     doctest.testmod()
713     #_DEBUG = False
714     #parser = MSBuildExpressionEvaluator(debug=_DEBUG)
715 
716     # parse
717     #parser.evaluate("$(X.Y('zzz').Z(`ttt`).Length)")
718     #parser.evaluate("$(Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
719     #print("", file=sys.stderr)
720     #parser.evaluate("$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
721     #print("", file=sys.stderr)
722     #parser.evaluate("`xx` == $([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)")
723     #print("", file=sys.stderr)
724     #parser.evaluate('''`xx` == "$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)"''')
725     #print("", file=sys.stderr)
726     #parser.evaluate('''"xx" != "$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Length)"''')
727     #print("", file=sys.stderr)
728     #parser.evaluate('''!$([System.Hoge]::Abcde.Xxx(3.2e-32, `xyz`).Yyy.Zzz($(X), '$(Y)').Contains("zzz"))''')
729     #print("", file=sys.stderr)
730     #parser.evaluate('''$(Abcde.Contains("zzz")) or !$(Abcde.Contains("yyy"))''')
731     #print("", file=sys.stderr)
732     #parser.evaluate('''HasTrailingSlash($(Abcde.Contains("zzz"))) or !Exists($(Abcde.Contains("yyy")))''')

2017-11-08 11:15 追記: うげぇ ply

一つ前の追記で終わらせようかとも思ったんだけれど、ハマりネタを一つ見つけたので一応:

  1 # -*- coding: utf-8 -*-
  2 #
  3 from __future__ import absolute_import
  4 from __future__ import unicode_literals
  5 from __future__ import print_function
  6 
  7 import os
  8 import sys
  9 import logging
 10 
 11 import six
 12 import ply.lex as lex
 13 import ply.yacc as yacc
 14 
 15 
 16 if sys.version[0] == '2':
 17     str = unicode
 18 
 19 
 20 _logger = logging.getLogger(__name__)
 21 
 22 
 23 # ----------------------------------------------------------------
 24 #
 25 # Non-Python Types Emulation
 26 #
 27 class String(str):
 28     r"""
 29     >>> s = String("abc")
 30     >>> isinstance(s, (str,))
 31     True
 32     >>> isinstance(s, six.string_types)
 33     True
 34     """
 35     def __new__(cls, value):
 36         obj = str.__new__(cls, value)
 37         return obj
 38 
 39     def __getattr__(self, name):
 40         # Unfortunatelly, Microsoft always ignores its case...
 41         from types import FunctionType
 42         for attr in [
 43             x for x, y in String.__dict__.items()]:
 44 
 45             if attr.lower() == name.lower():
 46                 return getattr(self, attr)
 47 
 48     #def Clone(self, *args, **kwargs):
 49     #    r"""
 50     #    Object Clone()
 51     #
 52     #    """
 53     #    raise NotImplementedError()
 54 
 55     def CompareTo(self, value):
 56         r"""
 57         int CompareTo(Object value)
 58         int CompareTo(string strB)
 59 
 60         >>> tab = [
 61         ...     # (lhs, rhs, expected)
 62         ...     #     same length
 63         ...     ("A", "A", 0),
 64         ...     ("A", "a", 1),
 65         ...     ("a", "A", -1),
 66         ...     ("AAA", "AAA", 0),
 67         ...     ("AAA", "aAA", 1),
 68         ...     ("aAA", "AAA", -1),
 69         ...     #
 70         ...     #     len(self) > len(rhs)
 71         ...     ("AAAa", "AAA", 1),
 72         ...     ("AAAa", "aAA", 1),
 73         ...     ("aAAa", "AAA", 1),
 74         ...     ("aAAaa", "AAA", 1),
 75         ...     #
 76         ...     #     len(self) < len(rhs)
 77         ...     ("AAA", "AAAA", -1),
 78         ...     ("AAA", "aAAA", -1),
 79         ...     ("aAA", "AAAA", -1),
 80         ...     ("aAA", "AAAAA", -1),
 81         ... ]
 82         >>> for lhs, rhs, expected in tab:
 83         ...     result = String(lhs).CompareTo(rhs)
 84         ...     assert result == expected, str((lhs, rhs, expected, result))
 85         ...     #from ._powershell import exec_single_command
 86         ...     #expected2 = exec_single_command('"{}".CompareTo("{}")'.format(lhs, rhs))
 87         ...     #assert expected == expected2, str((lhs, rhs, expected2, expected))
 88         ...     #assert result == expected2, str((lhs, rhs, expected2, result))
 89         >>> 
 90         """
 91         lendiff = len(self) - len(value)
 92         if lendiff == 0:
 93             # this logic is the same as python2's cmp
 94             return (value > self) - (value < self)
 95         return lendiff / abs(lendiff)
 96 
 97     def Contains(self, value):
 98         r"""
 99         bool Contains(string value)
100 
101         >>> String("AAA").Contains("AAA")
102         True
103         >>> String("   AAA   ").Contains("AAA")
104         True
105         >>> String("BBB").Contains("AAA")
106         False
107         >>> String("AAA").Contains("   AAA   ")
108         False
109         """
110         return value in self
111 
112     #def CopyTo(self, *args, **kwargs):
113     #    r"""
114     #    Void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
115     #
116     #    """
117     #    raise NotImplementedError()
118 
119     def EndsWith(self, *args, **kwargs):
120         r"""
121         bool EndsWith(string value)
122         bool EndsWith(string value, StringComparison comparisonType)
123         bool EndsWith(string value, bool ignoreCase, Globalization.Cul...
124 
125         """
126         raise NotImplementedError()
127 
128     def Equals(self, *args, **kwargs):
129         r"""
130         bool Equals(Object obj)
131         bool Equals(string value)
132         bool Equals(string value, StringComparison comparisonType)
133 
134         """
135         raise NotImplementedError()
136 
137     #def GetEnumerator(self, *args, **kwargs):
138     #    r"""
139     #    CharEnumerator GetEnumerator()
140     #
141     #    """
142     #    raise NotImplementedError()
143 
144     #def GetHashCode(self, *args, **kwargs):
145     #    r"""
146     #    int GetHashCode()
147     #
148     #    """
149     #    raise NotImplementedError()
150 
151     #def GetType(self, *args, **kwargs):
152     #    r"""
153     #    type GetType()
154     #
155     #    """
156     #    raise NotImplementedError()
157 
158     #def GetTypeCode(self, *args, **kwargs):
159     #    r"""
160     #    TypeCode GetTypeCode()
161     #
162     #    """
163     #    raise NotImplementedError()
164 
165     def IndexOf(self, *args, **kwargs):
166         r"""
167         int IndexOf(char value)
168         int IndexOf(char value, int startIndex)
169         int IndexOf(char value, int startIndex, int count)
170         int IndexOf(string value)
171         int IndexOf(string val...
172 
173         """
174         raise NotImplementedError()
175 
176     def IndexOfAny(self, *args, **kwargs):
177         r"""
178         int IndexOfAny(char[] anyOf)
179         int IndexOfAny(char[] anyOf, int startIndex)
180         int IndexOfAny(char[] anyOf, int startIndex, int count)
181 
182         """
183         raise NotImplementedError()
184 
185     def Insert(self, *args, **kwargs):
186         r"""
187         string Insert(int startIndex, string value)
188 
189         """
190         raise NotImplementedError()
191 
192     #def IsNormalized(self, *args, **kwargs):
193     #    r"""
194     #    bool IsNormalized()
195     #    bool IsNormalized(Text.NormalizationForm normalizationForm)
196     #
197     #    """
198     #    raise NotImplementedError()
199 
200     def LastIndexOf(self, *args, **kwargs):
201         r"""
202         int LastIndexOf(char value)
203         int LastIndexOf(char value, int startIndex)
204         int LastIndexOf(char value, int startIndex, int count)
205         int LastIndexOf(string value)
206         int La...
207 
208         """
209         raise NotImplementedError()
210 
211     def LastIndexOfAny(self, *args, **kwargs):
212         r"""
213         int LastIndexOfAny(char[] anyOf)
214         int LastIndexOfAny(char[] anyOf, int startIndex)
215         int LastIndexOfAny(char[] anyOf, int startIndex, int count)
216 
217         """
218         raise NotImplementedError()
219 
220     #def Normalize(self, *args, **kwargs):
221     #    r"""
222     #    string Normalize()
223     #    string Normalize(Text.NormalizationForm normalizationForm)
224     #
225     #    """
226     #    raise NotImplementedError()
227 
228     def PadLeft(self, *args, **kwargs):
229         r"""
230         string PadLeft(int totalWidth)
231         string PadLeft(int totalWidth, char paddingChar)
232 
233         """
234         raise NotImplementedError()
235 
236     def PadRight(self, *args, **kwargs):
237         r"""
238         string PadRight(int totalWidth)
239         string PadRight(int totalWidth, char paddingChar)
240 
241         """
242         raise NotImplementedError()
243 
244     def Remove(self, *args, **kwargs):
245         r"""
246         string Remove(int startIndex, int count)
247         string Remove(int startIndex)
248 
249         """
250         raise NotImplementedError()
251 
252     def Replace(self, oldValue, newValue):
253         r"""
254         string Replace(char oldChar, char newChar)
255         string Replace(string oldValue, string newValue)
256 
257         >>> print(String("AAA").Replace("AAA", "BBB"))
258         BBB
259         """
260         return String(self.replace(oldValue, newValue))
261 
262     def Split(self, *args, **kwargs):
263         r"""
264         string[] Split(Params char[] separator)
265         string[] Split(char[] separator, int count)
266         string[] Split(char[] separator, StringSplitOptions options)
267         string[] Spl...
268 
269         """
270         raise NotImplementedError()
271 
272     def StartsWith(self, *args, **kwargs):
273         r"""
274         bool StartsWith(string value)
275         bool StartsWith(string value, StringComparison comparisonType)
276         bool StartsWith(string value, bool ignoreCase, Globalizati...
277 
278         """
279         raise NotImplementedError()
280 
281     def Substring(self, startIndex, length=-1):
282         r"""
283         string Substring(int startIndex)
284         string Substring(int startIndex, int length)
285 
286         >>> print(String("abc").Substring(0))
287         abc
288         >>> print(String("abc").Substring(0, 1))
289         a
290         >>> print(String("abc").Substring(1))
291         bc
292         >>> print(String("abc").Substring(1, 1))
293         b
294         >>> print(String("abc").Substring(1, 2))
295         bc
296         >>> print(String("abc").SubString(1, 2))
297         bc
298         """
299         # TODO: "System.String.Substring" raise Exception if
300         #       length is larger than actual length.
301         if length >= 0:
302             return String(self[startIndex:][:length])
303         return String(self[startIndex:])
304 
305     #def ToCharArray(self, *args, **kwargs):
306     #    r"""
307     #    char[] ToCharArray()
308     #    char[] ToCharArray(int startIndex, int length)
309     #
310     #    """
311     #    raise NotImplementedError()
312 
313     def ToLower(self, *args, **kwargs):
314         r"""
315         string ToLower()
316         string ToLower(Globalization.CultureInfo culture)
317 
318         """
319         raise NotImplementedError()
320 
321     #def ToLowerInvariant(self, *args, **kwargs):
322     #    r"""
323     #    string ToLowerInvariant()
324     #
325     #    """
326     #    raise NotImplementedError()
327 
328     #def ToString(self, *args, **kwargs):
329     #    r"""
330     #    string ToString()
331     #    string ToString(IFormatProvider provider)
332     #
333     #    """
334     #    raise NotImplementedError()
335 
336     def ToUpper(self, *args, **kwargs):
337         r"""
338         string ToUpper()
339         string ToUpper(Globalization.CultureInfo culture)
340 
341         """
342         raise NotImplementedError()
343 
344     #def ToUpperInvariant(self, *args, **kwargs):
345     #    r"""
346     #    string ToUpperInvariant()
347     #
348     #    """
349     #    raise NotImplementedError()
350 
351     def Trim(self, *args, **kwargs):
352         r"""
353         string Trim(Params char[] trimChars)
354         string Trim()
355 
356         """
357         raise NotImplementedError()
358 
359     def TrimEnd(self, *args, **kwargs):
360         r"""
361         string TrimEnd(Params char[] trimChars)
362 
363         """
364         raise NotImplementedError()
365 
366     def TrimStart(self, *args, **kwargs):
367         r"""
368         string TrimStart(Params char[] trimChars)
369 
370         """
371         raise NotImplementedError()
372 
373     #def Chars(self, *args, **kwargs):
374     #    r"""
375     #    char Chars(int index) {get;}
376     #
377     #    """
378     #    raise NotImplementedError()
379 
380     @property
381     def Length(self):
382         r"""
383         Int32 Length {get;}
384 
385         >>> s = String("abc")
386         >>> s.Length
387         3
388         """
389         return len(self)
390 
391 
392 # ----------------------------------------------------------------
393 #
394 # Expression Evaluator
395 #
396 class ExpressionError(Exception):
397     pass
398 
399 class ExpressionInvalidCharacterError(ExpressionError):
400     pass
401 
402 class ExpressionSyntaxError(ExpressionError):
403     pass
404 
405 
406 class _Parser(object):
407     """
408     Base class for a lexer/parser that has the rules defined as methods
409     """
410     tokens = ()
411     precedence = ()
412 
413     def __init__(self, **kw):
414         self.debug = kw.get('debug', 0)
415         self.names = {}
416         try:
417             modname = os.path.split(os.path.splitext(__file__)[0])[
418                 1] + "_" + self.__class__.__name__
419         except:
420             modname = "parser" + "_" + self.__class__.__name__
421         self.debugfile = modname + ".dbg"
422         self.tabmodule = modname + "_" + "parsetab"
423         # print self.debugfile, self.tabmodule
424 
425         # Build the lexer and parser
426         lex.lex(module=self, debug=self.debug)
427         yacc.yacc(module=self,
428                   debug=self.debug,
429                   debugfile=self.debugfile,
430                   tabmodule=self.tabmodule)
431 
432     def _parse(self, s):
433         yacc.parse(s)
434 
435 
436 #
437 # TODO: To improve the performance, we should provide the method
438 # which can reset all parser and lexer's states.
439 #
440 
441 
442 class MSBuildExpressionEvaluator(_Parser):
443     r"""
444     >>> parser = MSBuildExpressionEvaluator()
445     >>> parser.evaluate("1", {})
446     1
447     >>> parser = MSBuildExpressionEvaluator()
448     >>> print(parser.evaluate("$(X)", {"X": "xxx"}))
449     xxx
450     >>> parser = MSBuildExpressionEvaluator()
451     >>> print(parser.evaluate("$(X.Y)", {"X": {"Y": "xxx"}}))
452     xxx
453     >>> parser = MSBuildExpressionEvaluator()
454     >>> parser.evaluate("$(X.Y.Length)", {"X": {"Y": "xxx"}})
455     3
456     >>> parser = MSBuildExpressionEvaluator()
457     >>> print(parser.evaluate("$(X.Y.Substring(1))", {"X": {"Y": "xyz"}}))
458     yz
459     >>> parser = MSBuildExpressionEvaluator()
460     >>> print(parser.evaluate("'$(X.Y)'", {"X": {"Y": "xyz"}}))
461     xyz
462     >>> parser = MSBuildExpressionEvaluator()
463     >>> parser.evaluate("1", {})
464     1
465     >>> parser = MSBuildExpressionEvaluator()
466     >>> parser.evaluate("'xyz'", {}) == 'xyz'
467     True
468     >>> parser = MSBuildExpressionEvaluator()
469     >>> parser.evaluate("'xyz' != 'zzz'", {})
470     True
471     """
472     def __init__(self, **kwargs):
473         _Parser.__init__(self, **kwargs)
474         self._kwargs = dict(**kwargs)
475         self._props = {}
476         self._result = None
477 
478     # Lexing Rules
479     tokens = (
480         "SQUOTE", "DQUOTE",
481         "NUMBER", "MSBUILD_LITERAL",
482         "SQ_LITERAL", "DQ_LITERAL",
483         "REF_START",
484         "RPAREN", "LPAREN", "ARGSEP",
485         "LSQBRACKET", "RSQBRACKET", "DBLCOLON",
486         "NAME", "PROP",
487         "CMP_EQ", "CMP_NE", "CMP_LT",
488         "CMP_LE", "CMP_GE", "CMP_GT",
489         "NEGATE",
490         "LOGICAL_AND", "LOGICAL_OR",
491         )
492 
493     t_SQUOTE = r"'"
494     t_DQUOTE = r'"'
495 
496     def t_NUMBER(self, t):
497         r"""[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?"""
498         if "." not in t.value and "e" not in t.value:
499             t.value = int(t.value)
500         else:
501             t.value = float(t.value)
502         return t
503 
504     t_MSBUILD_LITERAL = r"`[^`]*`"
505 
506     t_REF_START = r"[$%]\("
507     t_LPAREN = r'\('
508     t_RPAREN = r'\)'
509     t_LSQBRACKET = r'\['
510     t_RSQBRACKET = r'\]'
511     t_DBLCOLON = r'::'
512     t_ARGSEP = r'\s*,\s*'
513     t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*"
514     t_PROP = r"\.[a-zA-Z_][a-zA-Z0-9_]*"
515 
516     t_SQ_LITERAL = r"'[^']*'"
517     t_DQ_LITERAL = r'"[^"]*"'
518 
519     t_CMP_EQ = r"\s*==\s*"
520     t_CMP_NE = r"\s*!=\s*"
521     t_CMP_LT = r"\s*<\s*"
522     t_CMP_LE = r"\s*<=\s*"
523     t_CMP_GE = r"\s*>=\s*"
524     t_CMP_GT = r"\s*>\s*"
525 
526     t_NEGATE = r"\s*!\s*"
527 
528     t_LOGICAL_AND = r"(?<!\w)\s*[aA][nN][dD]\s*(?!\w)"
529     t_LOGICAL_OR = r"(?<!\w)\s*[oO][rR]\s*(?!\w)"
530 
531     def t_error(self, t):
532         #t.lexer.skip(1)
533         raise ExpressionInvalidCharacterError(
534             "Illegal character {}".format(repr(t.value[0])))
535 
536     # Parsing Rules
537     precedence = (
538         ('left', 'LOGICAL_AND'),
539         ('left', 'LOGICAL_OR'),
540         ('right', 'NEGATE'),
541     )
542 
543     def p_cmp_expression(self, p):
544         """
545         expression : expression CMP_EQ expression
546                    | expression CMP_NE expression
547                    | expression CMP_LT expression
548                    | expression CMP_LE expression
549                    | expression CMP_GE expression
550                    | expression CMP_GT expression
551         """
552         op = p[2].strip()
553         if op == "==":
554             p[0] = (p[1] == p[3])
555         elif op == "!=":
556             p[0] = (p[1] != p[3])
557         elif op == ">=":
558             p[0] = (p[1] >= p[3])
559         elif op == "<=":
560             p[0] = (p[1] <= p[3])
561         elif op == ">":
562             p[0] = (p[1] > p[3])
563         elif op == "<":
564             p[0] = (p[1] < p[3])
565         self._result = p[0]
566         _logger.debug("result=%r", self._result)
567 
568     def p_landor_expression(self, p):
569         """
570         expression : expression LOGICAL_AND expression %prec LOGICAL_AND
571                    | expression LOGICAL_OR expression %prec LOGICAL_OR
572         """
573         op = p[2].strip()
574         if op == "and":
575             p[0] = p[1] and p[3]
576         else:
577             p[0] = p[1] or p[3]
578         self._result = p[0]
579 
580     def p_negate_expression(self, p):
581         """
582         expression : NEGATE expression %prec NEGATE
583         """
584         p[0] = not p[1]
585         self._result = p[0]
586 
587     def p_expression_group(self, p):
588         """
589         expression : LPAREN expression RPAREN
590         """
591         p[0] = p[2]
592         self._result = p[0]
593 
594     def p_value_expression(self, p):
595         """
596         expression : value
597         """
598         p[0] = p[1]
599         self._result = p[0]
600 
601     def p_ref(self, p):
602         """
603         ref : REF_START refcontent RPAREN
604             | SQUOTE ref SQUOTE
605             | DQUOTE ref DQUOTE
606         """
607         p[0] = p[2]
608         self._result = p[0]
609 
610     def p_refcontent(self, p):
611         """
612         refcontent : propcallchain
613                    | nsref propcallchain
614         """
615         if len(p) == 2:
616             p[0] = p[1]
617         self._result = p[0]
618         # TODO: with nsref
619 
620     def p_propcallchain(self, p):
621         """
622         propcallchain : NAME
623               | NAME propcall
624               | propcallchain propcall
625         """
626         res = self._props
627         res = res[p[1]]  # rootkey
628         if isinstance(res, six.string_types):
629             res = String(res)
630         if len(p) == 3:
631             if not isinstance(p[2][0], (tuple,)):
632                 p[2] = (p[2],)
633             for name, args in p[2]:
634                 n = name[1:]
635                 if n in res:
636                     res = res[n]
637                     if isinstance(res, six.string_types):
638                         res = String(res)
639                 elif hasattr(res, n):
640                     res = getattr(res, n)
641                     if args is not None:  # callable
642                         res = res(*args)
643         p[0] = res
644         self._result = p[0]
645 
646     def p_nsref(self, p):
647         """
648         nsref : LSQBRACKET NAME RSQBRACKET DBLCOLON
649               | LSQBRACKET NAME prop RSQBRACKET DBLCOLON
650         """
651         p[0] = "".join([s for s in p[1:] if s])
652         self._result = p[0]
653 
654     def p_prop(self, p):
655         """
656         prop : PROP
657              | prop prop
658         """
659         p[0] = "".join([s for s in p[1:] if s])
660         self._result = p[0]
661 
662     def p_propcall(self, p):
663         """
664         propcall : PROP
665                  | PROP call
666                  | propcall propcall
667         """
668         if len(p) == 2:
669             p[0] = ((p[1], None))
670         else:
671             p[0] = tuple(p[1:])
672         self._result = p[0]
673 
674     def p_funccall(self, p):
675         """
676         funccall : NAME call
677         """
678         self._result = p[0]
679 
680     def p_call(self, p):
681         """
682         call : LPAREN RPAREN
683              | LPAREN args RPAREN
684         """
685         if len(p) == 4:
686             p[0] = p[2]
687         else:
688             p[0] = []
689         self._result = p[0]
690 
691     def p_args(self, p):
692         """
693         args : value
694              | value ARGSEP value
695              | args ARGSEP value
696         """
697         p[0] = p[1::2]
698         self._result = p[0]
699 
700     def p_value(self, p):
701         """
702         value : ref
703               | funccall
704               | literal
705               | number
706         """
707         p[0] = p[1]
708         self._result = p[0]
709 
710     def p_literal(self, p):
711         """
712         literal : MSBUILD_LITERAL
713                 | SQ_LITERAL
714                 | DQ_LITERAL
715         """
716         if p[1][0] == "`" and p[1][-1] == "`":
717             p[0] = p[1][1:-1]
718         else:
719             p[0] = eval(p[1])
720         if "$(" in p[0] or "%(" in p[0]:  # contains macro reference
721             innerparser = MSBuildExpressionEvaluator(
722                 **self._kwargs)
723             p[0] = innerparser.evaluate(p[0], props=self._props)
724         self._result = p[0]
725 
726     def p_number(self, p):
727         """
728         number : NUMBER
729         """
730         p[0] = p[1]
731         self._result = p[0]
732 
733     def p_error(self, p):
734         if p:
735             raise ExpressionSyntaxError(
736                 "Syntax error at '{}'".format(p.value))
737         else:
738             raise ExpressionSyntaxError(
739                 "Syntax error at EOF")
740 
741     #
742     def evaluate(self, s, props={}):
743         self._props = props
744         self._result = None
745         self._parse(s)
746         return self._result
747 
748 #
749 if __name__ == '__main__':
750     logging.basicConfig(
751         stream=sys.stderr,
752         level=logging.DEBUG,
753         format='%(levelname)s:%(name)s:%(funcName)s:%(message)s')
754 
755     import doctest
756     doctest.testmod()

実装具合の進展はあるけど、着目して欲しいのはテストと追加した TODO。今回のケースではネストしたパーサが問題を起こしているのだけれど、より一般論で言えば、「パーサのインスタンスを都度作り直すことなくリセット出来る手段が欲しいが見つかってない」ということ。やっぱ毎度パーサを構築しなおしてるとちょっと動作がもっさりしてる気がすんのよね。

2017-11-09 10:15 追記: うむむ ply

現在絶好調ハマり中。ply、複雑で大きな文法一つを解析するにはいいんだけれど、原則として「グローバル」なパーサをいっときに一つしか持てないために困ったことになっている。

何が起こっているかというと。

前回追記時には気付いてなかったのだけれど、MSBuild の expression って、たとえばこんななのね:

1   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
2     <ConfigurationType>DynamicLibrary</ConfigurationType>
3     <PlatformToolset>v141</PlatformToolset>
4   </PropertyGroup>

上で書きかけた定義だともう「|」なんて「未知」。つまり「オレ的定義」的には「なんでもありなのかよ」にみえる自由記述。無論こんなことも出来る:

1 <MyName>c:/hoge/$([System.IO.Path]::GetFileName('%(Link.WindowsMetadataFile)')).hoge</MyName>

それと「Condition」内とそうでないもので処理を変える必要がある。「Condition」でないプロパティ内では「==」などの評価をしてはいけない。

ということで何が必要かといえば、

  1. デリファレンス(マクロ展開)と評価(「==」など)の分離
  2. そのための、デリファレンス対象部分の切り出し処理

ということになるんだけれど、まず 2. のために別途 lexer を用意せねばならんし、1. で parser を 2つ書くことになる。そしてこういう複数 lexer、parser は ply では非常にやりにくい。少なくとも「同時に複数種類の lexer/parser が動いてはならない」、つまりパーサAがパーサBを呼び出すみたいな構造が NG、グローバルなので。うーん、惜しい…。

これにマルチパーサ関連の issue はかつて上がっていたようなんだけれど、「大規模な API 変更が必要になると思われるが、もはや15年選手だから今から API を変えることは出来ない」みたいな理由で進展しなかったみたい。

ほかのものに乗り換え出来ないかちょっとずつ目配りしつつも、ひとまずは騙しながら進めてみようと思う。

2017-11-09 16:40 追記: NG と思ったけど違うかも、ply

複数 parser/lexer を共存出来ない、と思ったのは ply.yacc のコードで global、モジュール変数を使ってるのを見てしまったからだが、よくよく読んでみると、少なくとも「global」として宣言されてるヤツ、使ってなさそう。昔の名残かしら?

もちっと調べてみる。

2017-11-09 18:00 追記: NG なのは (特に) lex だ、ply

わかった…。複数インスタンスは NG、なのは、特に ply.lex モジュールの lex 関数。こいつが global に依存しまくるので、多重に構築出来ない。

一応「optimize=True」にすると「SyntaxError("Can't build lexer")」だけは回避出来るものの、そういうことじゃない。間違った振る舞いで動くのは迷惑。

あと yacc の方も、せっかく「module=self」渡してるのになにやらモジュール内をフラットにスキャンしてるような警告が出る。てことはやっぱり「たぶん yacc のほうも NG に違いない」。

つーわけで…アタシに残された選択肢は2つだけ。一つが「別のものに乗り換える」。もう一つが「一個の定義で頑張れる方法を考える」。(multiprocessing で分離する、はさすがにナシだろう、今の場合。)

2017-11-08 18:30 追記: うげぇ ply は確定で、ならば rply?

ドキュメント

をざっとみるに、やりたいことは出来そうだし、ply にあった多重インスタンス問題は API の見かけ上はなさそうにみえる。

乗り換えよう。多分丸一日程度で乗り換え出来るんじゃないかなという気がする。アクション部分の書き方は若干違うのでそこでは手間とは思うが、BNF 記述部分はまぁ同じ(OR の記述がが少し違うだけ)だし。これでもダメだったらどうしようって心配も少しあるけれど、少なくとも「見かけ上は」よさげだし。

2017-11-08 19:20 追記: ひとまず rply のへろわるど

的な。

 1 # -*- coding: utf-8 -*-
 2 from rply import LexerGenerator
 3 from rply import ParserGenerator
 4 from rply.token import BaseBox
 5 
 6 #
 7 class Value(BaseBox):
 8     def __init__(self, value):
 9         self.value = value
10 
11     def eval(self):
12         return self.value
13 
14 # -------- 1st lexer/parser
15 _lg_deref = LexerGenerator()
16 _lg_deref.add('NUMBER', r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?')
17 _lexer_deref = _lg_deref.build()
18 
19 #
20 _pg_deref = ParserGenerator(
21     ['NUMBER', ],
22 
23     precedence=[
24         #('left', ['PLUS', 'MINUS']),
25         #('left', ['MUL', 'DIV'])
26     ]
27 )
28 
29 @_pg_deref.production('expression : NUMBER')
30 def expression_number(p):
31     return Value(float(p[0].getstr()))
32 
33 _parser_deref = _pg_deref.build()
34 print(_parser_deref.parse(_lexer_deref.lex('1.5e-3')).eval())
35 
36 # -------- 2nd lexer/parser
37 _lg_evaluator = LexerGenerator()
38 _lg_evaluator.add('NUMBER', r'\d+')
39 _lexer_evaluator = _lg_evaluator.build()
40 
41 #
42 _pg_evaluator = ParserGenerator(
43     ['NUMBER', ],
44 )
45 
46 @_pg_evaluator.production('expression : NUMBER')
47 def expression_number2(p):
48     return Value(int(p[0].getstr()))
49 
50 _parser_evaluator = _pg_evaluator.build()
51 print(_parser_evaluator.parse(_lexer_evaluator.lex('1')).eval())

複数 lexer/parser が問題なく動作する。

のだが…、これ、class をデコレート出来ない、と思う。今 class であることを前提とした処理を書いちゃってるんで、方針変えなければいけない。まぁ今のワタシの処理のインスタンス内メンバーは、別にモジュールグローバルで困らないヤツだからね、いいっちゃぁいい。(何かつぅと、「Exists」やら「HasTrailingSlash」などの外部関数エミュレーション定義を差し替えられるようにしてるの。)

うーん、やはりこれでいってみるか。

2017-11-10 01:20 追記: これだ、rply で決まり

まだ未完成だけれど、「未完成だった ply 版」の完全移植があっという間に出来た。既に長大になっておるが、本物の完成品はこれの3~5倍程度のボリュームになる予定なので、まぁ今時点くらいが「誰かの参考のために」載せる限界くらいだろうね。

  1 # -*- coding: utf-8 -*-
  2 #
  3 from __future__ import absolute_import
  4 from __future__ import unicode_literals
  5 from __future__ import print_function
  6 
  7 import os
  8 import sys
  9 import re
 10 import logging
 11 import warnings
 12 
 13 import six
 14 from rply import LexerGenerator
 15 from rply import ParserGenerator
 16 #from rply.token import BaseBox  # it seems basically for RPython
 17 
 18 
 19 if sys.version[0] == '2':
 20     str = unicode
 21 else:
 22     from functools import reduce
 23 
 24 
 25 _logger = logging.getLogger(__name__)
 26 
 27 
 28 # ----------------------------------------------------------------
 29 #
 30 # Internal Helpers of this module
 31 #
 32 def _simple_tokenize(s, tokdef):
 33     """
 34     Very simple lexer that is used for lexical analysis
 35     where ply usage is overkill.
 36 
 37     tokdef must be like this:
 38     >>> tokdef = {
 39     ...     "root": [  # state id ("root" is mandatory state.)
 40     ...         (
 41     ...             re.compile(r"'"),  # regexp
 42     ...             "SQ",  # token id
 43     ...             "sq"  # transition state
 44     ...             ),
 45     ...         # ...
 46     ...         ],
 47     ...     "sq" : [
 48     ...         (re.compile(r"."), "", "#pop"),
 49     ...         ]
 50     ... }
 51 
 52     if you were defined "tokdef", now you can use this:
 53     >>> for tokid, s in _simple_tokenize("'a", tokdef):
 54     ...     # do something
 55     ...     pass
 56     """
 57     states = ["root"]
 58     state = states[0]
 59     while s:
 60         for rgx, tok, trans in tokdef[state]:
 61             if hasattr(rgx, "match"):
 62                 m = rgx.match(s)
 63             else:
 64                 m = re.match(rgx, s)
 65             if m:
 66                 yield tok, m.group(0)
 67                 if trans:
 68                     if trans == "#pop":
 69                         states.pop(-1)
 70                         state = states[-1]
 71                     else:
 72                         states.append(trans)
 73                         state = trans
 74                 s = s[m.span()[1]:]
 75                 break
 76 
 77 
 78 # ----------------------------------------------------------------
 79 #
 80 # Non-Python Types and Functions Emulation
 81 #
 82 _GLOBAL_FUNCS = {
 83     "hastrailingslash": lambda *args: args[0][-1] in ("/", "\\"),
 84     "exists": lambda *args: os.path.exists(args[0]),
 85 
 86     # TODO: more?
 87     "[system.io.path]::combine": lambda *args: "/".join(args),
 88 
 89     #
 90     "[msbuild]::makerelative": lambda *args: os.path.relpath(args[1], args[0]),
 91     "[msbuild]::add": lambda *args: sum(args),
 92     "[msbuild]::multiply": lambda *args: reduce(lambda x, y: x * y, args),
 93     "[msbuild]::bitwiseor": lambda *args: reduce(lambda x, y: x | y, args),
 94     "[msbuild]::bitwiseand": lambda *args: reduce(lambda x, y: x & y, args),
 95     # TODO: https://msdn.microsoft.com/en-us/library/dd633440.aspx
 96     #     Subtract
 97     #     Divide
 98     #     Modulo
 99     #     Escape
100     #     Unescape
101     #     BitwiseXor
102     #     BitwiseNot
103     #     DoesTaskHostExist # maybe we can't implement
104     #     GetDirectoryNameOfFileAbove
105     #     GetRegistryValue
106     #     GetRegistryValueFromView
107     #     ValueOrDefault
108     }
109 
110 
111 class _DotNetLoader(object):
112     def __init__(self):
113         self._clr = None
114         self._loaded_assemblies = {}  # value: success or not
115 
116         # value: imported assembly (as python module)
117         self._imported_assemblies = {}
118 
119     def _load_assembly(self, asmn):
120         if asmn not in self._loaded_assemblies:
121             try:
122                 self._clr.AddReference(asmn)
123                 self._loaded_assemblies[asmn] = True
124             except Exception as e:
125                 # actually it should be "System.IO.FileNotFoundException",
126                 # but we can't assume it when no assemblies are loaded.
127                 if "'System.IO.FileNotFoundException'" not in str(type(e)):
128                     raise
129                 # this nsref is not assembly (maybe module fullname)
130                 self._loaded_assemblies[asmn] = False
131         return self._loaded_assemblies[asmn]
132 
133     def _import_assembly(self, asmn):
134         if asmn not in self._imported_assemblies:
135             try:
136                 exec("import " + asmn)
137             except ImportError:
138                 raise  # what should we do?
139             self._imported_assemblies[asmn] = eval(asmn)
140         return self._imported_assemblies[asmn]
141 
142     def get_function(self, name_with_ns):
143         # name_with_ns: like "[System.IO.Path]::Add"
144         if self._clr is None:
145             import clr  # Python for .NET (pythonnet)
146             self._clr = clr
147         ns, name = name_with_ns.split("]::")
148         ns_spl = ns[1:].split(".")  # like "System", "IO", "Path"
149         asmn, modn = None, None
150         for i in range(len(ns_spl), 1, -1):
151             asmn = ".".join(ns_spl[:i - 1])
152             modn = ".".join(ns_spl[i - 1:])
153             if self._load_assembly(asmn):
154                 break
155         asm = self._import_assembly(asmn)
156         return getattr(getattr(asm, modn), name)
157 
158 
159 _dotnetloader = _DotNetLoader()
160 
161 
162 class String(str):
163     r"""
164     >>> s = String("abc")
165     >>> isinstance(s, (str,))
166     True
167     >>> isinstance(s, six.string_types)
168     True
169     """
170     def __new__(cls, value):
171         obj = str.__new__(cls, value)
172         return obj
173 
174     def __getattr__(self, name):
175         # Unfortunatelly, Microsoft always ignores its case...
176         from types import FunctionType
177         for attr in [
178             x for x, y in String.__dict__.items()]:
179 
180             if attr.lower() == name.lower():
181                 return getattr(self, attr)
182 
183     #def Clone(self, *args, **kwargs):
184     #    r"""
185     #    Object Clone()
186     #
187     #    """
188     #    raise NotImplementedError()
189 
190     def CompareTo(self, value):
191         r"""
192         int CompareTo(Object value)
193         int CompareTo(string strB)
194 
195         >>> tab = [
196         ...     # (lhs, rhs, expected)
197         ...     #     same length
198         ...     ("A", "A", 0),
199         ...     ("A", "a", 1),
200         ...     ("a", "A", -1),
201         ...     ("AAA", "AAA", 0),
202         ...     ("AAA", "aAA", 1),
203         ...     ("aAA", "AAA", -1),
204         ...     #
205         ...     #     len(self) > len(rhs)
206         ...     ("AAAa", "AAA", 1),
207         ...     ("AAAa", "aAA", 1),
208         ...     ("aAAa", "AAA", 1),
209         ...     ("aAAaa", "AAA", 1),
210         ...     #
211         ...     #     len(self) < len(rhs)
212         ...     ("AAA", "AAAA", -1),
213         ...     ("AAA", "aAAA", -1),
214         ...     ("aAA", "AAAA", -1),
215         ...     ("aAA", "AAAAA", -1),
216         ... ]
217         >>> for lhs, rhs, expected in tab:
218         ...     result = String(lhs).CompareTo(rhs)
219         ...     assert result == expected, str((lhs, rhs, expected, result))
220         ...     #from ._powershell import exec_single_command
221         ...     #expected2 = exec_single_command('"{}".CompareTo("{}")'.format(lhs, rhs))
222         ...     #assert expected == expected2, str((lhs, rhs, expected2, expected))
223         ...     #assert result == expected2, str((lhs, rhs, expected2, result))
224         >>> 
225         """
226         lendiff = len(self) - len(value)
227         if lendiff == 0:
228             # this logic is the same as python2's cmp
229             return (value > self) - (value < self)
230         return lendiff / abs(lendiff)
231 
232     def Contains(self, value):
233         r"""
234         bool Contains(string value)
235 
236         >>> String("AAA").Contains("AAA")
237         True
238         >>> String("   AAA   ").Contains("AAA")
239         True
240         >>> String("BBB").Contains("AAA")
241         False
242         >>> String("AAA").Contains("   AAA   ")
243         False
244         """
245         return value in self
246 
247     #def CopyTo(self, *args, **kwargs):
248     #    r"""
249     #    Void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
250     #
251     #    """
252     #    raise NotImplementedError()
253 
254     def EndsWith(self, *args, **kwargs):
255         r"""
256         bool EndsWith(string value)
257         bool EndsWith(string value, StringComparison comparisonType)
258         bool EndsWith(string value, bool ignoreCase, Globalization.Cul...
259 
260         """
261         raise NotImplementedError()
262 
263     def Equals(self, *args, **kwargs):
264         r"""
265         bool Equals(Object obj)
266         bool Equals(string value)
267         bool Equals(string value, StringComparison comparisonType)
268 
269         """
270         raise NotImplementedError()
271 
272     #def GetEnumerator(self, *args, **kwargs):
273     #    r"""
274     #    CharEnumerator GetEnumerator()
275     #
276     #    """
277     #    raise NotImplementedError()
278 
279     #def GetHashCode(self, *args, **kwargs):
280     #    r"""
281     #    int GetHashCode()
282     #
283     #    """
284     #    raise NotImplementedError()
285 
286     #def GetType(self, *args, **kwargs):
287     #    r"""
288     #    type GetType()
289     #
290     #    """
291     #    raise NotImplementedError()
292 
293     #def GetTypeCode(self, *args, **kwargs):
294     #    r"""
295     #    TypeCode GetTypeCode()
296     #
297     #    """
298     #    raise NotImplementedError()
299 
300     def IndexOf(self, *args, **kwargs):
301         r"""
302         int IndexOf(char value)
303         int IndexOf(char value, int startIndex)
304         int IndexOf(char value, int startIndex, int count)
305         int IndexOf(string value)
306         int IndexOf(string val...
307 
308         """
309         raise NotImplementedError()
310 
311     def IndexOfAny(self, *args, **kwargs):
312         r"""
313         int IndexOfAny(char[] anyOf)
314         int IndexOfAny(char[] anyOf, int startIndex)
315         int IndexOfAny(char[] anyOf, int startIndex, int count)
316 
317         """
318         raise NotImplementedError()
319 
320     def Insert(self, *args, **kwargs):
321         r"""
322         string Insert(int startIndex, string value)
323 
324         """
325         raise NotImplementedError()
326 
327     #def IsNormalized(self, *args, **kwargs):
328     #    r"""
329     #    bool IsNormalized()
330     #    bool IsNormalized(Text.NormalizationForm normalizationForm)
331     #
332     #    """
333     #    raise NotImplementedError()
334 
335     def LastIndexOf(self, *args, **kwargs):
336         r"""
337         int LastIndexOf(char value)
338         int LastIndexOf(char value, int startIndex)
339         int LastIndexOf(char value, int startIndex, int count)
340         int LastIndexOf(string value)
341         int La...
342 
343         """
344         raise NotImplementedError()
345 
346     def LastIndexOfAny(self, *args, **kwargs):
347         r"""
348         int LastIndexOfAny(char[] anyOf)
349         int LastIndexOfAny(char[] anyOf, int startIndex)
350         int LastIndexOfAny(char[] anyOf, int startIndex, int count)
351 
352         """
353         raise NotImplementedError()
354 
355     #def Normalize(self, *args, **kwargs):
356     #    r"""
357     #    string Normalize()
358     #    string Normalize(Text.NormalizationForm normalizationForm)
359     #
360     #    """
361     #    raise NotImplementedError()
362 
363     def PadLeft(self, *args, **kwargs):
364         r"""
365         string PadLeft(int totalWidth)
366         string PadLeft(int totalWidth, char paddingChar)
367 
368         """
369         raise NotImplementedError()
370 
371     def PadRight(self, *args, **kwargs):
372         r"""
373         string PadRight(int totalWidth)
374         string PadRight(int totalWidth, char paddingChar)
375 
376         """
377         raise NotImplementedError()
378 
379     def Remove(self, *args, **kwargs):
380         r"""
381         string Remove(int startIndex, int count)
382         string Remove(int startIndex)
383 
384         """
385         raise NotImplementedError()
386 
387     def Replace(self, oldValue, newValue):
388         r"""
389         string Replace(char oldChar, char newChar)
390         string Replace(string oldValue, string newValue)
391 
392         >>> print(String("AAA").Replace("AAA", "BBB"))
393         BBB
394         """
395         return String(self.replace(oldValue, newValue))
396 
397     def Split(self, *args, **kwargs):
398         r"""
399         string[] Split(Params char[] separator)
400         string[] Split(char[] separator, int count)
401         string[] Split(char[] separator, StringSplitOptions options)
402         string[] Spl...
403 
404         """
405         raise NotImplementedError()
406 
407     def StartsWith(self, *args, **kwargs):
408         r"""
409         bool StartsWith(string value)
410         bool StartsWith(string value, StringComparison comparisonType)
411         bool StartsWith(string value, bool ignoreCase, Globalizati...
412 
413         """
414         raise NotImplementedError()
415 
416     def Substring(self, startIndex, length=-1):
417         r"""
418         string Substring(int startIndex)
419         string Substring(int startIndex, int length)
420 
421         >>> print(String("abc").Substring(0))
422         abc
423         >>> print(String("abc").Substring(0, 1))
424         a
425         >>> print(String("abc").Substring(1))
426         bc
427         >>> print(String("abc").Substring(1, 1))
428         b
429         >>> print(String("abc").Substring(1, 2))
430         bc
431         >>> print(String("abc").SubString(1, 2))
432         bc
433         """
434         # TODO: "System.String.Substring" raise Exception if
435         #       length is larger than actual length.
436         if length >= 0:
437             return String(self[startIndex:][:length])
438         return String(self[startIndex:])
439 
440     #def ToCharArray(self, *args, **kwargs):
441     #    r"""
442     #    char[] ToCharArray()
443     #    char[] ToCharArray(int startIndex, int length)
444     #
445     #    """
446     #    raise NotImplementedError()
447 
448     def ToLower(self, *args, **kwargs):
449         r"""
450         string ToLower()
451         string ToLower(Globalization.CultureInfo culture)
452 
453         """
454         raise NotImplementedError()
455 
456     #def ToLowerInvariant(self, *args, **kwargs):
457     #    r"""
458     #    string ToLowerInvariant()
459     #
460     #    """
461     #    raise NotImplementedError()
462 
463     #def ToString(self, *args, **kwargs):
464     #    r"""
465     #    string ToString()
466     #    string ToString(IFormatProvider provider)
467     #
468     #    """
469     #    raise NotImplementedError()
470 
471     def ToUpper(self, *args, **kwargs):
472         r"""
473         string ToUpper()
474         string ToUpper(Globalization.CultureInfo culture)
475 
476         """
477         raise NotImplementedError()
478 
479     #def ToUpperInvariant(self, *args, **kwargs):
480     #    r"""
481     #    string ToUpperInvariant()
482     #
483     #    """
484     #    raise NotImplementedError()
485 
486     def Trim(self, *args, **kwargs):
487         r"""
488         string Trim(Params char[] trimChars)
489         string Trim()
490 
491         """
492         raise NotImplementedError()
493 
494     def TrimEnd(self, *args, **kwargs):
495         r"""
496         string TrimEnd(Params char[] trimChars)
497 
498         """
499         raise NotImplementedError()
500 
501     def TrimStart(self, *args, **kwargs):
502         r"""
503         string TrimStart(Params char[] trimChars)
504 
505         """
506         raise NotImplementedError()
507 
508     #def Chars(self, *args, **kwargs):
509     #    r"""
510     #    char Chars(int index) {get;}
511     #
512     #    """
513     #    raise NotImplementedError()
514 
515     @property
516     def Length(self):
517         r"""
518         Int32 Length {get;}
519 
520         >>> s = String("abc")
521         >>> s.Length
522         3
523         """
524         return len(self)
525 
526 
527 # ----------------------------------------------------------------
528 #
529 # Public module configurations
530 #
531 
532 
533 # ----------------------------------------------------------------
534 #
535 # Common definitions, etc. for our two different evaluators.
536 #
537 
538 #   ------------------
539 #   utilities
540 def _tostr(p):
541     if hasattr(p, "getstr"):
542         return p.getstr()
543     return str(p)
544 
545 
546 #   ------------------
547 #   wrappers for token
548 
549 
550 
551 #   ------------------
552 #   common regexp
553 _t_NUMBER = ("NUMBER", r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?')
554 _t_LPAREN = ("LPAREN", r'\s*\(\s*')
555 _t_RPAREN = ("RPAREN", r'\s*\)\s*')
556 _t_NAME = ("NAME", r"[a-zA-Z_][a-zA-Z0-9_]*")
557 _t_REF_START = ("REF_START", r"[$%]\(")
558 
559 # ----------------------------------------------------------------
560 #
561 # The evaluator for the purpose of dereferencing the macros.
562 # (Basically, this is not public.)
563 #
564 
565 #   ------------------
566 #   
567 class PropCallChain(object):
568 
569     def __init__(self, data, kind):
570         self.data = data
571         self._kind = kind
572 
573     def eval(self, props={}, ctx="", functions={}):
574         def _flatten_data(data, flattened):
575             for d in data:
576                 if d is not None and isinstance(d[0], (tuple,)):
577                     _flatten_data(d, flattened)
578                 else:
579                     flattened.append(d)
580 
581         if self._kind == "p":
582             flattened = []
583             _flatten_data(self.data, flattened)
584             return self._eval_propcallchain(flattened, props, ctx)
585         elif self._kind == "f":
586             return self._eval_funccall(self.data, functions)
587 
588     def _eval_propcallchain(self, clist, props, ctx):
589         res = props
590         def _icasesearch(d, sk, nf2blank=True):
591             targk = sk.lower()
592             for k in d.keys():
593                 if k.lower() == targk:
594                     return d[k]
595             if nf2blank:
596                 # MSBuild deals with N/F as blank.
597                 return ""
598 
599         # resolve root key
600         t = _icasesearch(res, clist[0], nf2blank=False)
601         if t is not None:
602             # if input="$(X.Y)" and props has "X",
603             # now res=props["X"].
604             res = t
605         elif ctx:
606             t = _icasesearch(res, ctx, nf2blank=False)
607             if t is not None:
608                 # if input="$(Y)", props has "X",
609                 # and ctx is "X",
610                 # now res=props["X"].
611                 res = t
612                 # search "Y" in props["X"]
613                 res = _icasesearch(res, clist[0])
614         if t is None:
615             # MSBuild deals with N/F as blank.
616             res = ""
617 
618         # nested props
619         if isinstance(res, six.string_types):
620             res = String(res)
621         if len(clist) > 1:
622             for name, args in clist[1:]:
623                 n = name[1:]
624                 if hasattr(res, n):
625                     res = getattr(res, n)
626                     if args is not None:  # callable
627                         res = res(*args)
628                 else:
629                     res = _icasesearch(res, n)
630                     if isinstance(res, six.string_types):
631                         res = String(res)
632         return res
633 
634     def _eval_funccall(self, clist, functions):
635         if len(clist) == 2:
636             funcname = clist[0]
637             funcargs = clist[1]
638         else:
639             funcname = "".join(clist[:2])
640             funcargs = clist[2]
641         fn = functions.get(funcname.lower())
642         if not fn:
643             fn = _dotnetloader.get_function(funcname)
644         #_logger.debug("%s, %r, %r", funcname, fn, funcargs)
645         if fn:
646             return fn(*funcargs)
647 
648 #   ------------------
649 #   lexer
650 _lg_deref = LexerGenerator()
651 _lg_deref.add(*_t_NUMBER)
652 _lg_deref.add("MSBUILD_LITERAL", r"`[^`]*`")
653 _lg_deref.add(*_t_REF_START)
654 _lg_deref.add(*_t_LPAREN)
655 _lg_deref.add(*_t_RPAREN)
656 _lg_deref.add("LSQBRACKET", r'\[')
657 _lg_deref.add("NS_END", r'\]::')
658 _lg_deref.add("ARGSEP", r'\s*,\s*')
659 _lg_deref.add(*_t_NAME)
660 _lg_deref.add("PROP", r"\.[a-zA-Z_][a-zA-Z0-9_]*")
661 _lexer_deref = _lg_deref.build()
662 
663 #   ------------------
664 #   parser
665 _pg_deref = ParserGenerator(
666     [
667         "NUMBER", "MSBUILD_LITERAL",
668         "REF_START",
669         "RPAREN", "LPAREN", "ARGSEP",
670         "LSQBRACKET", "NS_END",
671         "NAME", "PROP",
672         ],
673 )
674 
675 
676 @_pg_deref.production("body : ref")
677 @_pg_deref.production("body : value")
678 def _deref_p_body(p):
679     return p[0]
680 
681 @_pg_deref.production("ref : REF_START refcontent RPAREN")
682 def _deref_p_ref(p):
683     return p[1]
684 
685 @_pg_deref.production("refcontent : propcallchain")
686 @_pg_deref.production("refcontent : funccall")
687 def _deref_p_refcontent(p):
688     return p[0]
689 
690 @_pg_deref.production("propcallchain : NAME")
691 def _deref_p_propcallchain_1(p):
692     return PropCallChain((_tostr(p[0]),), "p")
693 
694 @_pg_deref.production("propcallchain : NAME propcall")
695 def _deref_p_propcallchain_2(p):
696     return PropCallChain((_tostr(p[0]), p[1]), "p")
697 
698 @_pg_deref.production("propcallchain : propcallchain propcall")
699 def _deref_p_propcallchain_3(p):
700     return PropCallChain(p, "p")
701 
702 @_pg_deref.production("prop : PROP")
703 @_pg_deref.production("prop : prop prop")
704 def _deref_p_prop(p):
705     return "".join([_tostr(s) for s in p])
706 
707 @_pg_deref.production("propcall : PROP")
708 def _deref_p_propcall_1(p):
709     return (_tostr(p[0]), None)
710 
711 @_pg_deref.production("propcall : PROP call")
712 def _deref_p_propcall_2(p):
713     return (_tostr(p[0]), p[1])
714 
715 @_pg_deref.production("propcall : propcall propcall")
716 def _deref_p_propcall_3(p):
717     return tuple(p)
718 
719 @_pg_deref.production("funccall : name call")
720 def _deref_p_funccall(p):
721     return PropCallChain(p, "f")
722 
723 @_pg_deref.production("name : NAME")
724 @_pg_deref.production("name : nsref NAME")
725 def _deref_p_name(p):
726     if len(p) == 2:
727         return "".join([p[0], _tostr(p[1])])
728     else:
729         return p[0]
730 
731 @_pg_deref.production("nsref : LSQBRACKET NAME NS_END")
732 @_pg_deref.production("nsref : LSQBRACKET NAME prop NS_END")
733 def _deref_p_nsref(p):
734     return "".join([_tostr(s) for s in p if s])
735 
736 @_pg_deref.production("call : LPAREN RPAREN")
737 @_pg_deref.production("call : LPAREN args RPAREN")
738 def _deref_p_call(p):
739     if len(p) == 3:
740         return p[1]
741     else:
742         return []
743 
744 @_pg_deref.production("args : value")
745 @_pg_deref.production("args : value ARGSEP value")
746 @_pg_deref.production("args : args ARGSEP value")
747 def _deref_p_args(p):
748     return p[0::2]  # ('x', ',', 'y', ',', 'z')
749 
750 @_pg_deref.production("value : ref")
751 @_pg_deref.production("value : funccall")
752 @_pg_deref.production("value : literal")
753 @_pg_deref.production("value : number")
754 def _deref_p_value(p):
755     return p[0]
756 
757 @_pg_deref.production("literal : MSBUILD_LITERAL")
758 def _deref_p_literal(p):
759     return _tostr(p[0])[1:-1]
760 
761 @_pg_deref.production("number : NUMBER")
762 def _deref_p_number(p):
763     s = _tostr(p[0])
764     if "." in s or "e" in s:
765         return float(s)
766     else:
767         return int(s)
768 
769 with warnings.catch_warnings():
770     # "shift/reduce conflicts" warnings are useful for me
771     # as developper, but this warnings is meaningless for
772     # all users of our library.
773     warnings.simplefilter("ignore")
774 
775     _parser_deref = _pg_deref.build()
776 
777 
778 class _DerefTest(object):
779     r"""
780     # this class is not for use.
781     # just for holding _parser_deref's doctest.
782 
783     >>> lexer = _lexer_deref
784     >>> parser = _parser_deref
785     >>> parser.parse(lexer.lex("1.2e-1"))
786     0.12
787     >>> print(parser.parse(lexer.lex("`aaa`")))
788     aaa
789     >>> print(parser.parse(lexer.lex("$(X)")).eval(props={"X": "y"}))
790     y
791     >>> parser.parse(lexer.lex("$(X.Length)")).eval(props={"X": "yz"})
792     2
793     """
794 
795 
796 # ----------------------------------------------------------------
797 #
798 # The evaluator for the purpose of evaluating the condition.
799 # (Basically, this is not public.)
800 #
801 
802 #   ------------------
803 #   lexer
804 _lg_evaluator = LexerGenerator()
805 _lg_evaluator.add(*_t_NUMBER)
806 _lexer_evaluator = _lg_evaluator.build()
807 
808 #   ------------------
809 #   parser
810 _pg_evaluator = ParserGenerator(
811     [
812         'NUMBER',
813         ],
814     precedence=[
815         #('left', ['AND', 'OR',]),
816         #('right', ['NEGATE',])
817     ]
818 )
819 
820 @_pg_evaluator.production("number : NUMBER")
821 def _evaluator_p_number(p):
822     s = _tostr(p[0])
823     if "." in s or "e" in s:
824         return float(s)
825     else:
826         return int(s)
827 
828 with warnings.catch_warnings():
829     # "shift/reduce conflicts" warnings are useful for me
830     # as developper, but this warnings is meaningless for
831     # all users of our library.
832     warnings.simplefilter("ignore")
833 
834     _parser_evaluator = _pg_evaluator.build()
835 
836 
837 class _EvaluatorTest(object):
838     r"""
839     # this class is not for use.
840     # just for holding _parser_evaluator's doctest.
841 
842     >>> lexer = _lexer_evaluator
843     >>> parser = _parser_evaluator
844     """
845 
846 
847 # ----------------------------------------------------------------
848 #
849 # Public APIs
850 #
851 
852 #
853 # The evaluator for the purpose of dereferencing the macros.
854 #
855 class _MSBuildExpressionEvaluator(object):
856     r"""
857     >>> parser = _MSBuildExpressionEvaluator()
858     >>> print(parser.deref("$(X))", {"X": "xxx"}))
859     xxx)
860     >>> print(parser.deref("'$(X)'", {"X": "xxx"}))
861     'xxx'
862     >>> print(parser.deref("'$(X)|$(Y)'", {"X": "xxx", "Y": "yyy"}))
863     'xxx|yyy'
864     >>> print(parser.deref('"$(X)|$(Y)"', {"X": "xxx", "Y": "yyy"}))
865     "xxx|yyy"
866     >>> print(parser.deref("'$(X)abc | def$(Y)'", {"X": "xxx", "Y": "yyy"}))
867     'xxxabc | defyyy'
868     >>> print(parser.deref("$(X)", {"X": "xxx"}))
869     xxx
870     >>> print(parser.deref("$(X.Y)", {"X": {"Y": "xxx"}}))
871     xxx
872     >>> print(parser.deref("$(X.Y.Length)", {"X": {"Y": "xxx"}}))
873     3
874     >>> print(parser.deref("$(X.Y.Length)", {"x": {"Y": "xxx"}}))
875     3
876     >>> print(parser.deref("$(X.Y.Length)", {"X": {"y": "xxx"}}))
877     3
878     >>> print(parser.deref("$(X.Y.Substring(1))", {"X": {"Y": "xyz"}}))
879     yz
880     >>> print(parser.deref("$(X.Y.substring(1))", {"X": {"Y": "xyz"}}))
881     yz
882     >>> print(parser.deref("$(X.Y.substring ( 1 ) )", {"X": {"Y": "xyz"}}))
883     yz
884     >>> print(parser.deref("$([msbuild]::Add(1, 2))"))
885     3
886     >>> print(parser.deref("$([msbuild]::Add(1, $([msbuild]::Multiply(2, 3))))"))
887     7
888     >>> print(parser.deref("$([msbuild]::BitwiseOr(2, 4))"))
889     6
890     >>> print(parser.deref("$([msbuild]::BitwiseAnd(1, 3))"))
891     1
892     >>> print(parser.deref("$([System.IO.Path]::Combine(`a`, `b`))"))
893     a/b
894     >>> print(parser.deref("$([System.IO.Path]::GetFileName(`aaa/bbb.c`))"))
895     bbb.c
896     >>> r = parser.deref("$([MSBuild]::MakeRelative(`c:/users`, `c:/users/username`))")
897     >>> print(r.replace("\\", "/"))
898     username
899     >>> r = parser.deref("$([MSBuild]::MakeRelative(`c:/users/username`, `c:/users`))")
900     >>> print(r.replace("\\", "/"))
901     ..
902     """
903     def __init__(
904         self,
905         functions={},
906         # TODO: functions_merge: merge_pu, merge_up, dont_p (but notimpl)
907         functions_merge="merge_pu",
908         **kwargs):
909         self._functions = dict(_GLOBAL_FUNCS)
910         self._functions.update(functions)
911 
912     def deref(self, s, props={}, ctx=""):
913         # for searching RPAR coressponding to REF_START
914         tokdef = {
915             "root": [
916                 (r"`", "STR_S", "str"),
917                 (r"\(", "LPAR", ""),
918                 (r"\)", "RPAR", ""),
919                 (r"[^`()]+", "OTH", ""),
920                 ],
921             "str": [
922                 (r"`", "STR_E", "#pop"),
923                 (r"[^`]+", "STR", ""),
924                 ],
925             }
926     
927         lexer = _lexer_deref
928         parser = _parser_deref
929     
930         result = s
931         rs_rgx = re.compile(_t_REF_START[1])
932         while rs_rgx.search(result):
933             # search last REF_START (i.e. deepest)
934             m = list(reversed(list(rs_rgx.finditer(result))))[0]
935             start, end = m.span()
936     
937             # search RPAR coressponding to this REF_START
938             lpars, end = 1, start + 2
939             for tok, s in _simple_tokenize(result[end:], tokdef):
940                 end += len(s)
941                 if tok == "LPAR":
942                     lpars += 1
943                 elif tok == "RPAR":
944                     lpars -= 1
945                     if lpars == 0:
946                         break
947     
948             # parse using my parser
949             partial_res = parser.parse(lexer.lex(result[start:end])).eval(
950                 props=props, ctx=ctx, functions=self._functions)
951             result = "{}{}{}".format(
952                 result[:start], partial_res, result[end:])
953         return result
954 
955 
956 
957 
958 #
959 # The evaluator for the purpose of evaluating the condition.
960 #
961 
962 
963 #
964 if __name__ == '__main__':
965     logging.basicConfig(
966         stream=sys.stderr,
967         level=logging.DEBUG,
968         format='%(levelname)s:%(name)s:%(funcName)s:%(lineno)s:%(message)s')
969 
970     import doctest
971     doctest.testmod()

参考にしたい人のためのポイントをば。まずは rply に関して:

  • アクションの還元は ply のように「p[0] = ...」とするのではなく、左辺値を return する。
  • 左辺値の返却は公式のサンプルでは約束があるかのようにみえるが、実際は何でもいい。使いやすいものを返せばいい。
  • そういうわけで、確かに前記のようにデコレータをクラスに対して適用出来なくても、還元規則が運ぶデータを工夫して最後に評価しやすい形にしておけば、あまりこのことは制限にならない。

rply に無関係のポイント:

  • $([System.IO.Path]::GetFileName(`aaa/bbb.c`)) のような .NET 関数を評価するために Python for .NET を使っている。使いやすい、つーか「救世主」
  • 自分で書いてる「String」は Python for .NET に置き換えようか迷ってる。(インストールしてない人でも有る程度動くようにするかどうかの決断の話。)
  • dereferencer は基本的に「文法なし自由記述の中に埋め込まれた参照」を引っこ抜いてからでないとすべからくパースエラーとなるために、「抜き出してから評価」という、まぁかったるいことが不可欠、なのよ。
  • なんで rply 使ってるのに自前 lexer も使ってるのよ、てのはコメントに書いた通りでもあるんだけれど、そもそも rply 乗り換え前はこれしか手段がなかったのだ。rply の lexer 使っても書けるかもね、と思う。

なお、あげたコードがどの程度「不完全なのか」は正直色々あって、ここには全部は書けない。世界中にある Visual Studio プロジェクトのどの程度カバーできるのか、については、これは 0%。正式なもの相手なら。ただ、システムインストールされている標準プロパティファイルを読まないなら、おそらく(明日以降くらいに deref だけでなく condition の evaluator も書いたとして)8割程度のプロジェクトではこれで足りる。そんな感じ。で、ある程度完成したら、それこそ「公開」したいんだけれど、これはほんといつになるかは全然わからない。来年以降だろうと思う。

2017-11-10 06:30 追記: rply で lexer のトークン単位にアクションを追加したいとして…

実は上にあげたコードはわかっていて「まだ」誤魔化していたところがあって、引用符の扱いね。C/C++ のコメントなんか考えればわかるけれど、引用符内に引用符が現れたりエスケープがあったりするので、こうしたものは決して「一撃必殺正規表現」なんぞでうまくいくことはない。まぁ「8割くらいはオッケー」程度ではあるんだけれど、「2割」はいわゆる「レアケース」じゃなかろ。

つまり「token 単位でアクションを実行する」ということでもあるし、「遷移状態の管理をせねばならぬ」てことね。上のコードで自前 lexer ではやってることを、ply/yply の方でやってなかったのは無論「やり方がわからなかったから」。

Unix C 伝統の GNU flex ではまさに状態遷移をそのまま「定義として」書ける。ちとどんな書き方だったか忘れたが、確か「<<state_a>>」と書くんじゃなかったかな。こういうのがあったりしないかなぁ、と思ったんだけれど、rply のソースコードをざっとみた感じでは、「ないと思う」。

して、「rply のソースコードを読」んでたら気付いた。「あ、そっか、kexer が外にいるんだから…」。rply ソースコード(lexergenerator.py)にこんな docstring が書かれてる:

 1 >>> from rply import LexerGenerator
 2 >>> lg = LexerGenerator()
 3 
 4 # You can then build a lexer with which you can lex a string to produce an
 5 # iterator yielding tokens:
 6 
 7 >>> lexer = lg.build()
 8 >>> iterator = lexer.lex('1 + 1')
 9 >>> iterator.next()
10 Token('NUMBER', '1')
11 >>> iterator.next()
12 Token('ADD', '+')
13 >>> iterator.next()
14 Token('NUMBER', '1')
15 >>> iterator.next()
16 Traceback (most recent call last):
17 ...
18 StopIteration

そゆこと。この LexerGenerator で構築した lexer を抱え込んで自前 iterator を返す lex() を書けばいい。ちとすぐには出てこないんでコード貼り付けは控えるけど、難しくないはず。これが書ければ「引用符内に突入」「引用符から出たのよ」を書ける。

2017-11-10 18:00 追記: やってはみないけど少しだけ深掘り

rply/lexer.py を抜粋するとこんな具合:

 1 from rply.errors import LexingError
 2 from rply.token import SourcePosition, Token
 3 
 4 class Lexer(object):
 5     def __init__(self, rules, ignore_rules):
 6         # ... (snip) ...
 7 
 8     def lex(self, s):
 9         return LexerStream(self, s)
10 
11 
12 class LexerStream(object):
13     def __init__(self, lexer, s):
14         self.lexer = lexer
15         # ... (snip) ...
16 
17     def __iter__(self):
18         return self
19 
20     # ... (snip) ...
21 
22     def next(self):
23         # ... (snip) ...
24         for rule in self.lexer.rules:
25             match = rule.matches(self.s, self.idx)
26             if match:
27                 # ... (snip) ...
28                 return token
29         # ... (snip) ...
30 
31     # ... (snip) ...

つまり、おおむね「オレオレ Lexer, LexerStream」の構造はたとえばこんなだろう:

 1 from rply.errors import LexingError
 2 from rply.token import SourcePosition, Token
 3 from rply.lexer import Lexer, LexerStream
 4 
 5 class _OretekiLexer(object):
 6     def __init__(self, real_lexer):
 7         self._real_lexer = real_lexer
 8 
 9     def lex(self, s):
10         return _OrahonoLexerStream(
11             self._real_lexer,
12             s)
13 
14 
15 class _OrahonoLexerStream(LexerStream):
16     #def __init__(self, lexer, s):
17     #    super(LexerStream, self).__init__(lexer, s)
18 
19     def next(self):
20         tok = super(LexerStream, self).next()
21 	# tok の種類をみて云々
22 	return tok

「tok の種類をみて云々」は、少し手間なのが、「引用符に入ったぜ」状態に遷移してから終わるまで、当たり前だが「トークン文字列を結合して溜め込む必要がある」し、source_pos も正しいものを自力で生み出さなければならない(これは遷移開始のものを使える)。

実際どんなふうに書くのか一度日本語で書いてみたが、わかりやすい表現が見つからなかったのでやめる。コードの可読性は結構落ちる。コードの読み手にとっていやらしいポイントは:

  1. パーサから見えないトークンが登場し、オレオレ LexerStream 内で勝手に消費される
  2. 本物の lexer は決して生み出さないトークンが登場する(オレオレ LexerStream による自家発電)
  3. 2. のトークンがパーサに伝播する

という多段になるってこと。この対応関係が、初見では結構わかりにくいと思うし、保守での修正の際に誤った箇所を触ってしまう危険性も出てくる。まぁこれは正規表現単位にアクションを書けるタイプのものを使っても起こることだけれど、「オレオレ Lexer/LexerStream も書かねばならぬ(しかもその中に詰め込まれる)」ために余計ね。

で、なんで「やってみないけど」なのか。要はこれが必要になるのは、例えば Unix/C 流儀のこんな記述:

1 "I'm a \"PERFECT\" human."

のようにエスケープすることで「二重引用符内に二重引用符を含めることが出来る場合なわけなんだけれど、ちょっと msbuild のそこらのエスケープの仕様がわかってなくてさ。なので当座後回し。昔から Microsoft 仕様の「引用符の扱い」って、いつも主流から外れた独自独自独自なのね。だから慎重になってる。

2017-11-11 11:00 追記: rply で書いた MSBuild の expression evaluator 初版完了

もう「2017-11-10 01:20 追記」のでコードの紹介はやめるつもりだったんだけれど、もう一悶着だけあったんで。これが最後。

   1 # -*- coding: utf-8 -*-
   2 #
   3 from __future__ import absolute_import
   4 from __future__ import unicode_literals
   5 from __future__ import print_function
   6 
   7 import os
   8 import sys
   9 import re
  10 import logging
  11 import warnings
  12 
  13 import six
  14 from rply import LexerGenerator
  15 from rply import ParserGenerator
  16 from rply.token import BaseBox
  17 
  18 
  19 if sys.version[0] == '2':
  20     str = unicode
  21 else:
  22     from functools import reduce
  23 
  24 
  25 _logger = logging.getLogger(__name__)
  26 
  27 
  28 # ----------------------------------------------------------------
  29 #
  30 # Internal Helpers of this module
  31 #
  32 def _simple_tokenize(s, tokdef):
  33     """
  34     Very simple lexer that is used for lexical analysis
  35     where ply usage is overkill.
  36 
  37     tokdef must be like this:
  38     >>> tokdef = {
  39     ...     "root": [  # state id ("root" is mandatory state.)
  40     ...         (
  41     ...             re.compile(r"'"),  # regexp
  42     ...             "SQ",  # token id
  43     ...             "sq"  # transition state
  44     ...             ),
  45     ...         # ...
  46     ...         ],
  47     ...     "sq" : [
  48     ...         (re.compile(r"."), "", "#pop"),
  49     ...         ]
  50     ... }
  51 
  52     if you were defined "tokdef", now you can use this:
  53     >>> for tokid, s in _simple_tokenize("'a", tokdef):
  54     ...     # do something
  55     ...     pass
  56     """
  57     states = ["root"]
  58     state = states[0]
  59     while s:
  60         for rgx, tok, trans in tokdef[state]:
  61             if hasattr(rgx, "match"):
  62                 m = rgx.match(s)
  63             else:
  64                 m = re.match(rgx, s)
  65             if m:
  66                 yield tok, m.group(0)
  67                 if trans:
  68                     if trans == "#pop":
  69                         states.pop(-1)
  70                         state = states[-1]
  71                     else:
  72                         states.append(trans)
  73                         state = trans
  74                 s = s[m.span()[1]:]
  75                 break
  76 
  77 
  78 # ----------------------------------------------------------------
  79 #
  80 # Non-Python Types and Functions Emulation
  81 #
  82 _GLOBAL_FUNCS = {
  83     "hastrailingslash": lambda *args: args[0][-1] in ("/", "\\"),
  84     "exists": lambda *args: os.path.exists(args[0]),
  85 
  86     # TODO: more?
  87     "[system.io.path]::combine": lambda *args: "/".join(args),
  88 
  89     #
  90     "[msbuild]::makerelative": lambda *args: os.path.relpath(args[1], args[0]),
  91     "[msbuild]::add": lambda *args: sum(args),
  92     "[msbuild]::multiply": lambda *args: reduce(lambda x, y: x * y, args),
  93     "[msbuild]::bitwiseor": lambda *args: reduce(lambda x, y: x | y, args),
  94     "[msbuild]::bitwiseand": lambda *args: reduce(lambda x, y: x & y, args),
  95     # TODO: https://msdn.microsoft.com/en-us/library/dd633440.aspx
  96     #     Subtract
  97     #     Divide
  98     #     Modulo
  99     #     Escape
 100     #     Unescape
 101     #     BitwiseXor
 102     #     BitwiseNot
 103     #     DoesTaskHostExist # maybe we can't implement
 104     #     GetDirectoryNameOfFileAbove
 105     #     GetRegistryValue
 106     #     GetRegistryValueFromView
 107     #     ValueOrDefault
 108     }
 109 
 110 
 111 class _DotNetLoader(object):
 112     def __init__(self):
 113         self._clr = None
 114         self._loaded_assemblies = {}  # value: success or not
 115 
 116         # value: imported assembly (as python module)
 117         self._imported_assemblies = {}
 118 
 119     def _load_assembly(self, asmn):
 120         if asmn not in self._loaded_assemblies:
 121             try:
 122                 self._clr.AddReference(asmn)
 123                 self._loaded_assemblies[asmn] = True
 124             except Exception as e:
 125                 # actually it should be "System.IO.FileNotFoundException",
 126                 # but we can't assume it when no assemblies are loaded.
 127                 if "'System.IO.FileNotFoundException'" not in str(type(e)):
 128                     raise
 129                 # this nsref is not assembly (maybe module fullname)
 130                 self._loaded_assemblies[asmn] = False
 131         return self._loaded_assemblies[asmn]
 132 
 133     def _import_assembly(self, asmn):
 134         if asmn not in self._imported_assemblies:
 135             try:
 136                 exec("import " + asmn)
 137             except ImportError:
 138                 raise  # what should we do?
 139             self._imported_assemblies[asmn] = eval(asmn)
 140         return self._imported_assemblies[asmn]
 141 
 142     def get_function(self, name_with_ns):
 143         # name_with_ns: like "[System.IO.Path]::Add"
 144         if self._clr is None:
 145             import clr  # Python for .NET (pythonnet)
 146             self._clr = clr
 147         ns, name = name_with_ns.split("]::")
 148         ns_spl = ns[1:].split(".")  # like "System", "IO", "Path"
 149         asmn, modn = None, None
 150         for i in range(len(ns_spl), 1, -1):
 151             asmn = ".".join(ns_spl[:i - 1])
 152             modn = ".".join(ns_spl[i - 1:])
 153             if self._load_assembly(asmn):
 154                 break
 155         asm = self._import_assembly(asmn)
 156         return getattr(getattr(asm, modn), name)
 157 
 158 
 159 _dotnetloader = _DotNetLoader()
 160 
 161 
 162 class String(str):
 163     r"""
 164     >>> s = String("abc")
 165     >>> isinstance(s, (str,))
 166     True
 167     >>> isinstance(s, six.string_types)
 168     True
 169     """
 170     def __new__(cls, value):
 171         obj = str.__new__(cls, value)
 172         return obj
 173 
 174     def __getattr__(self, name):
 175         # Unfortunatelly, Microsoft always ignores its case...
 176         from types import FunctionType
 177         for attr in [
 178             x for x, y in String.__dict__.items()]:
 179 
 180             if attr.lower() == name.lower():
 181                 return getattr(self, attr)
 182 
 183     #def Clone(self, *args, **kwargs):
 184     #    r"""
 185     #    Object Clone()
 186     #
 187     #    """
 188     #    raise NotImplementedError()
 189 
 190     def CompareTo(self, value):
 191         r"""
 192         int CompareTo(Object value)
 193         int CompareTo(string strB)
 194 
 195         >>> tab = [
 196         ...     # (lhs, rhs, expected)
 197         ...     #     same length
 198         ...     ("A", "A", 0),
 199         ...     ("A", "a", 1),
 200         ...     ("a", "A", -1),
 201         ...     ("AAA", "AAA", 0),
 202         ...     ("AAA", "aAA", 1),
 203         ...     ("aAA", "AAA", -1),
 204         ...     #
 205         ...     #     len(self) > len(rhs)
 206         ...     ("AAAa", "AAA", 1),
 207         ...     ("AAAa", "aAA", 1),
 208         ...     ("aAAa", "AAA", 1),
 209         ...     ("aAAaa", "AAA", 1),
 210         ...     #
 211         ...     #     len(self) < len(rhs)
 212         ...     ("AAA", "AAAA", -1),
 213         ...     ("AAA", "aAAA", -1),
 214         ...     ("aAA", "AAAA", -1),
 215         ...     ("aAA", "AAAAA", -1),
 216         ... ]
 217         >>> for lhs, rhs, expected in tab:
 218         ...     result = String(lhs).CompareTo(rhs)
 219         ...     assert result == expected, str((lhs, rhs, expected, result))
 220         ...     #from ._powershell import exec_single_command
 221         ...     #expected2 = exec_single_command('"{}".CompareTo("{}")'.format(lhs, rhs))
 222         ...     #assert expected == expected2, str((lhs, rhs, expected2, expected))
 223         ...     #assert result == expected2, str((lhs, rhs, expected2, result))
 224         >>> 
 225         """
 226         lendiff = len(self) - len(value)
 227         if lendiff == 0:
 228             # this logic is the same as python2's cmp
 229             return (value > self) - (value < self)
 230         return lendiff / abs(lendiff)
 231 
 232     def Contains(self, value):
 233         r"""
 234         bool Contains(string value)
 235 
 236         >>> String("AAA").Contains("AAA")
 237         True
 238         >>> String("   AAA   ").Contains("AAA")
 239         True
 240         >>> String("BBB").Contains("AAA")
 241         False
 242         >>> String("AAA").Contains("   AAA   ")
 243         False
 244         """
 245         return value in self
 246 
 247     #def CopyTo(self, *args, **kwargs):
 248     #    r"""
 249     #    Void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
 250     #
 251     #    """
 252     #    raise NotImplementedError()
 253 
 254     def EndsWith(self, *args, **kwargs):
 255         r"""
 256         bool EndsWith(string value)
 257         bool EndsWith(string value, StringComparison comparisonType)
 258         bool EndsWith(string value, bool ignoreCase, Globalization.Cul...
 259 
 260         """
 261         raise NotImplementedError()
 262 
 263     def Equals(self, *args, **kwargs):
 264         r"""
 265         bool Equals(Object obj)
 266         bool Equals(string value)
 267         bool Equals(string value, StringComparison comparisonType)
 268 
 269         """
 270         raise NotImplementedError()
 271 
 272     #def GetEnumerator(self, *args, **kwargs):
 273     #    r"""
 274     #    CharEnumerator GetEnumerator()
 275     #
 276     #    """
 277     #    raise NotImplementedError()
 278 
 279     #def GetHashCode(self, *args, **kwargs):
 280     #    r"""
 281     #    int GetHashCode()
 282     #
 283     #    """
 284     #    raise NotImplementedError()
 285 
 286     #def GetType(self, *args, **kwargs):
 287     #    r"""
 288     #    type GetType()
 289     #
 290     #    """
 291     #    raise NotImplementedError()
 292 
 293     #def GetTypeCode(self, *args, **kwargs):
 294     #    r"""
 295     #    TypeCode GetTypeCode()
 296     #
 297     #    """
 298     #    raise NotImplementedError()
 299 
 300     def IndexOf(self, *args, **kwargs):
 301         r"""
 302         int IndexOf(char value)
 303         int IndexOf(char value, int startIndex)
 304         int IndexOf(char value, int startIndex, int count)
 305         int IndexOf(string value)
 306         int IndexOf(string val...
 307 
 308         """
 309         raise NotImplementedError()
 310 
 311     def IndexOfAny(self, *args, **kwargs):
 312         r"""
 313         int IndexOfAny(char[] anyOf)
 314         int IndexOfAny(char[] anyOf, int startIndex)
 315         int IndexOfAny(char[] anyOf, int startIndex, int count)
 316 
 317         """
 318         raise NotImplementedError()
 319 
 320     def Insert(self, *args, **kwargs):
 321         r"""
 322         string Insert(int startIndex, string value)
 323 
 324         """
 325         raise NotImplementedError()
 326 
 327     #def IsNormalized(self, *args, **kwargs):
 328     #    r"""
 329     #    bool IsNormalized()
 330     #    bool IsNormalized(Text.NormalizationForm normalizationForm)
 331     #
 332     #    """
 333     #    raise NotImplementedError()
 334 
 335     def LastIndexOf(self, *args, **kwargs):
 336         r"""
 337         int LastIndexOf(char value)
 338         int LastIndexOf(char value, int startIndex)
 339         int LastIndexOf(char value, int startIndex, int count)
 340         int LastIndexOf(string value)
 341         int La...
 342 
 343         """
 344         raise NotImplementedError()
 345 
 346     def LastIndexOfAny(self, *args, **kwargs):
 347         r"""
 348         int LastIndexOfAny(char[] anyOf)
 349         int LastIndexOfAny(char[] anyOf, int startIndex)
 350         int LastIndexOfAny(char[] anyOf, int startIndex, int count)
 351 
 352         """
 353         raise NotImplementedError()
 354 
 355     #def Normalize(self, *args, **kwargs):
 356     #    r"""
 357     #    string Normalize()
 358     #    string Normalize(Text.NormalizationForm normalizationForm)
 359     #
 360     #    """
 361     #    raise NotImplementedError()
 362 
 363     def PadLeft(self, *args, **kwargs):
 364         r"""
 365         string PadLeft(int totalWidth)
 366         string PadLeft(int totalWidth, char paddingChar)
 367 
 368         """
 369         raise NotImplementedError()
 370 
 371     def PadRight(self, *args, **kwargs):
 372         r"""
 373         string PadRight(int totalWidth)
 374         string PadRight(int totalWidth, char paddingChar)
 375 
 376         """
 377         raise NotImplementedError()
 378 
 379     def Remove(self, *args, **kwargs):
 380         r"""
 381         string Remove(int startIndex, int count)
 382         string Remove(int startIndex)
 383 
 384         """
 385         raise NotImplementedError()
 386 
 387     def Replace(self, oldValue, newValue):
 388         r"""
 389         string Replace(char oldChar, char newChar)
 390         string Replace(string oldValue, string newValue)
 391 
 392         >>> print(String("AAA").Replace("AAA", "BBB"))
 393         BBB
 394         """
 395         return String(self.replace(oldValue, newValue))
 396 
 397     def Split(self, *args, **kwargs):
 398         r"""
 399         string[] Split(Params char[] separator)
 400         string[] Split(char[] separator, int count)
 401         string[] Split(char[] separator, StringSplitOptions options)
 402         string[] Spl...
 403 
 404         """
 405         raise NotImplementedError()
 406 
 407     def StartsWith(self, *args, **kwargs):
 408         r"""
 409         bool StartsWith(string value)
 410         bool StartsWith(string value, StringComparison comparisonType)
 411         bool StartsWith(string value, bool ignoreCase, Globalizati...
 412 
 413         """
 414         raise NotImplementedError()
 415 
 416     def Substring(self, startIndex, length=-1):
 417         r"""
 418         string Substring(int startIndex)
 419         string Substring(int startIndex, int length)
 420 
 421         >>> print(String("abc").Substring(0))
 422         abc
 423         >>> print(String("abc").Substring(0, 1))
 424         a
 425         >>> print(String("abc").Substring(1))
 426         bc
 427         >>> print(String("abc").Substring(1, 1))
 428         b
 429         >>> print(String("abc").Substring(1, 2))
 430         bc
 431         >>> print(String("abc").SubString(1, 2))
 432         bc
 433         """
 434         # TODO: "System.String.Substring" raise Exception if
 435         #       length is larger than actual length.
 436         if length >= 0:
 437             return String(self[startIndex:][:length])
 438         return String(self[startIndex:])
 439 
 440     #def ToCharArray(self, *args, **kwargs):
 441     #    r"""
 442     #    char[] ToCharArray()
 443     #    char[] ToCharArray(int startIndex, int length)
 444     #
 445     #    """
 446     #    raise NotImplementedError()
 447 
 448     def ToLower(self, *args, **kwargs):
 449         r"""
 450         string ToLower()
 451         string ToLower(Globalization.CultureInfo culture)
 452 
 453         """
 454         raise NotImplementedError()
 455 
 456     #def ToLowerInvariant(self, *args, **kwargs):
 457     #    r"""
 458     #    string ToLowerInvariant()
 459     #
 460     #    """
 461     #    raise NotImplementedError()
 462 
 463     #def ToString(self, *args, **kwargs):
 464     #    r"""
 465     #    string ToString()
 466     #    string ToString(IFormatProvider provider)
 467     #
 468     #    """
 469     #    raise NotImplementedError()
 470 
 471     def ToUpper(self, *args, **kwargs):
 472         r"""
 473         string ToUpper()
 474         string ToUpper(Globalization.CultureInfo culture)
 475 
 476         """
 477         raise NotImplementedError()
 478 
 479     #def ToUpperInvariant(self, *args, **kwargs):
 480     #    r"""
 481     #    string ToUpperInvariant()
 482     #
 483     #    """
 484     #    raise NotImplementedError()
 485 
 486     def Trim(self, *args, **kwargs):
 487         r"""
 488         string Trim(Params char[] trimChars)
 489         string Trim()
 490 
 491         """
 492         raise NotImplementedError()
 493 
 494     def TrimEnd(self, *args, **kwargs):
 495         r"""
 496         string TrimEnd(Params char[] trimChars)
 497 
 498         """
 499         raise NotImplementedError()
 500 
 501     def TrimStart(self, *args, **kwargs):
 502         r"""
 503         string TrimStart(Params char[] trimChars)
 504 
 505         """
 506         raise NotImplementedError()
 507 
 508     #def Chars(self, *args, **kwargs):
 509     #    r"""
 510     #    char Chars(int index) {get;}
 511     #
 512     #    """
 513     #    raise NotImplementedError()
 514 
 515     @property
 516     def Length(self):
 517         r"""
 518         Int32 Length {get;}
 519 
 520         >>> s = String("abc")
 521         >>> s.Length
 522         3
 523         """
 524         return len(self)
 525 
 526 
 527 # ----------------------------------------------------------------
 528 #
 529 # Common definitions, etc. for our two different evaluators.
 530 #
 531 
 532 #   ------------------
 533 #   utilities
 534 def _tostr(p):
 535     if hasattr(p, "getstr"):
 536         return p.getstr()
 537     return str(p)
 538 
 539 
 540 #   ------------------
 541 #   wrappers for token
 542 
 543 
 544 
 545 #   ------------------
 546 #   common regexp
 547 _t_NUMBER = ("NUMBER", r'\s*[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?\s*')
 548 _t_LPAREN = ("LPAREN", r'\s*\(\s*')
 549 _t_RPAREN = ("RPAREN", r'\s*\)\s*')
 550 _t_NAME = ("NAME", r"[a-zA-Z_][a-zA-Z0-9_]*")
 551 _t_REF_START = ("REF_START", r"[$%]\(")
 552 _t_ARGSEP = ("ARGSEP", r'\s*,\s*')
 553 # ----------------------------------------------------------------
 554 #
 555 # The evaluator for the purpose of dereferencing the macros.
 556 # (Basically, this is not public.)
 557 #
 558 
 559 #   ------------------
 560 #   
 561 class PropCallChain(object):
 562 
 563     def __init__(self, data, kind):
 564         self.data = data
 565         self._kind = kind
 566 
 567     def eval(self, props={}, ctx=""):
 568         def _flatten_data(data, flattened):
 569             for d in data:
 570                 if d is not None and isinstance(d[0], (tuple,)):
 571                     _flatten_data(d, flattened)
 572                 else:
 573                     flattened.append(d)
 574 
 575         if self._kind == "p":
 576             flattened = []
 577             _flatten_data(self.data, flattened)
 578             return self._eval_propcallchain(flattened, props, ctx)
 579         elif self._kind == "f":
 580             return self._eval_funccall(self.data)
 581 
 582     def _eval_propcallchain(self, clist, props, ctx):
 583         res = props
 584         def _icasesearch(d, sk, nf2blank=True):
 585             targk = sk.lower()
 586             for k in d.keys():
 587                 if k.lower() == targk:
 588                     return d[k]
 589             if nf2blank:
 590                 # MSBuild deals with N/F as blank.
 591                 return ""
 592 
 593         # resolve root key
 594         t = _icasesearch(res, clist[0], nf2blank=False)
 595         if t is not None:
 596             # if input="$(X.Y)" and props has "X",
 597             # now res=props["X"].
 598             res = t
 599         elif ctx:
 600             t = _icasesearch(res, ctx, nf2blank=False)
 601             if t is not None:
 602                 # if input="$(Y)", props has "X",
 603                 # and ctx is "X",
 604                 # now res=props["X"].
 605                 res = t
 606                 # search "Y" in props["X"]
 607                 res = _icasesearch(res, clist[0])
 608         if t is None:
 609             # MSBuild deals with N/F as blank.
 610             res = ""
 611 
 612         # nested props
 613         if isinstance(res, six.string_types):
 614             res = String(res)
 615         if len(clist) > 1:
 616             for name, args in clist[1:]:
 617                 n = name[1:]
 618                 if hasattr(res, n):
 619                     res = getattr(res, n)
 620                     if args is not None:  # callable
 621                         res = res(*args)
 622                 else:
 623                     res = _icasesearch(res, n)
 624                     if isinstance(res, six.string_types):
 625                         res = String(res)
 626         return res
 627 
 628     def _eval_funccall(self, clist):
 629         if len(clist) == 2:
 630             funcname = clist[0]
 631             funcargs = clist[1]
 632         else:
 633             funcname = "".join(clist[:2])
 634             funcargs = clist[2]
 635         fn = _external_functions_table.get(funcname.lower())
 636         if not fn:
 637             fn = _dotnetloader.get_function(funcname)
 638         #_logger.debug("%s, %r, %r", funcname, fn, funcargs)
 639         if fn:
 640             return fn(*funcargs)
 641 
 642 #   ------------------
 643 #   lexer
 644 _lg_deref = LexerGenerator()
 645 _lg_deref.add(*_t_NUMBER)
 646 _lg_deref.add("MSBUILD_LITERAL", r"`[^`]*`")
 647 _lg_deref.add(*_t_REF_START)
 648 _lg_deref.add(*_t_LPAREN)
 649 _lg_deref.add(*_t_RPAREN)
 650 _lg_deref.add("LSQBRACKET", r'\[')
 651 _lg_deref.add("NS_END", r'\]::')
 652 _lg_deref.add(*_t_ARGSEP)
 653 _lg_deref.add(*_t_NAME)
 654 _lg_deref.add("PROP", r"\.[a-zA-Z_][a-zA-Z0-9_]*")
 655 _lexer_deref = _lg_deref.build()
 656 
 657 #   ------------------
 658 #   parser
 659 _pg_deref = ParserGenerator(
 660     [
 661         "NUMBER", "MSBUILD_LITERAL",
 662         "REF_START",
 663         "RPAREN", "LPAREN", "ARGSEP",
 664         "LSQBRACKET", "NS_END",
 665         "NAME", "PROP",
 666         ],
 667 )
 668 
 669 
 670 @_pg_deref.production("body : ref")
 671 @_pg_deref.production("body : value")
 672 def _deref_p_body(p):
 673     return p[0]
 674 
 675 @_pg_deref.production("ref : REF_START refcontent RPAREN")
 676 def _deref_p_ref(p):
 677     return p[1]
 678 
 679 @_pg_deref.production("refcontent : propcallchain")
 680 @_pg_deref.production("refcontent : funccall")
 681 def _deref_p_refcontent(p):
 682     return p[0]
 683 
 684 @_pg_deref.production("propcallchain : NAME")
 685 def _deref_p_propcallchain_1(p):
 686     return PropCallChain((_tostr(p[0]),), "p")
 687 
 688 @_pg_deref.production("propcallchain : NAME propcall")
 689 def _deref_p_propcallchain_2(p):
 690     return PropCallChain((_tostr(p[0]), p[1]), "p")
 691 
 692 @_pg_deref.production("propcallchain : propcallchain propcall")
 693 def _deref_p_propcallchain_3(p):
 694     return PropCallChain(p, "p")
 695 
 696 @_pg_deref.production("prop : PROP")
 697 @_pg_deref.production("prop : prop prop")
 698 def _deref_p_prop(p):
 699     return "".join([_tostr(s) for s in p])
 700 
 701 @_pg_deref.production("propcall : PROP")
 702 def _deref_p_propcall_1(p):
 703     return (_tostr(p[0]), None)
 704 
 705 @_pg_deref.production("propcall : PROP call")
 706 def _deref_p_propcall_2(p):
 707     return (_tostr(p[0]), p[1])
 708 
 709 @_pg_deref.production("propcall : propcall propcall")
 710 def _deref_p_propcall_3(p):
 711     return tuple(p)
 712 
 713 @_pg_deref.production("funccall : name call")
 714 def _deref_p_funccall(p):
 715     return PropCallChain(p, "f")
 716 
 717 @_pg_deref.production("name : NAME")
 718 @_pg_deref.production("name : nsref NAME")
 719 def _deref_p_name(p):
 720     if len(p) == 2:
 721         return "".join([p[0], _tostr(p[1])])
 722     else:
 723         return p[0]
 724 
 725 @_pg_deref.production("nsref : LSQBRACKET NAME NS_END")
 726 @_pg_deref.production("nsref : LSQBRACKET NAME prop NS_END")
 727 def _deref_p_nsref(p):
 728     return "".join([_tostr(s) for s in p if s])
 729 
 730 @_pg_deref.production("call : LPAREN RPAREN")
 731 @_pg_deref.production("call : LPAREN args RPAREN")
 732 def _deref_p_call(p):
 733     if len(p) == 3:
 734         return p[1]
 735     else:
 736         return []
 737 
 738 @_pg_deref.production("args : value")
 739 @_pg_deref.production("args : value ARGSEP value")
 740 @_pg_deref.production("args : args ARGSEP value")
 741 def _deref_p_args(p):
 742     return p[0::2]  # ('x', ',', 'y', ',', 'z')
 743 
 744 @_pg_deref.production("value : ref")
 745 @_pg_deref.production("value : funccall")
 746 @_pg_deref.production("value : literal")
 747 @_pg_deref.production("value : number")
 748 def _deref_p_value(p):
 749     return p[0]
 750 
 751 @_pg_deref.production("literal : MSBUILD_LITERAL")
 752 def _deref_p_literal(p):
 753     return _tostr(p[0])[1:-1]
 754 
 755 @_pg_deref.production("number : NUMBER")
 756 def _deref_p_number(p):
 757     s = _tostr(p[0]).strip()
 758     if "." in s or "e" in s:
 759         return float(s)
 760     else:
 761         return int(s)
 762 
 763 with warnings.catch_warnings():
 764     # "shift/reduce conflicts" warnings are useful for me
 765     # as developper, but this warnings is meaningless for
 766     # all users of our library.
 767     warnings.simplefilter("ignore")
 768 
 769     _parser_deref = _pg_deref.build()
 770 
 771 
 772 class _DerefTest(object):
 773     r"""
 774     # this class is not for use.
 775     # just for holding _parser_deref's doctest.
 776 
 777     >>> lexer = _lexer_deref
 778     >>> parser = _parser_deref
 779     >>> parser.parse(lexer.lex("1.2e-1"))
 780     0.12
 781     >>> print(parser.parse(lexer.lex("`aaa`")))
 782     aaa
 783     >>> print(parser.parse(lexer.lex("$(X)")).eval(props={"X": "y"}))
 784     y
 785     >>> parser.parse(lexer.lex("$(X.Length)")).eval(props={"X": "yz"})
 786     2
 787     """
 788 
 789 
 790 # ----------------------------------------------------------------
 791 #
 792 # The evaluator for the purpose of evaluating the condition.
 793 # (Basically, this is not public.)
 794 #
 795 
 796 #   ------------------
 797 #   lexer
 798 _lg_evaluator = LexerGenerator()
 799 _lg_evaluator.add(*_t_NUMBER)
 800 _lg_evaluator.add("SQ_LITERAL", r"\s*'[^']*'\s*")
 801 _lg_evaluator.add("DQ_LITERAL", r'\s*"[^"]*"\s*')
 802 _lg_evaluator.add(*_t_LPAREN)
 803 _lg_evaluator.add(*_t_RPAREN)
 804 _lg_evaluator.add("CMP_EQ", "\s*==\s*")
 805 _lg_evaluator.add("CMP_NE", "\s*!=\s*")
 806 _lg_evaluator.add("CMP_LT", "\s*<\s*")
 807 _lg_evaluator.add("CMP_LE", "\s*<=\s*")
 808 _lg_evaluator.add("CMP_GE", "\s*>=\s*")
 809 _lg_evaluator.add("CMP_GT", "\s*>\s*")
 810 _lg_evaluator.add("NEGATE", "\s*!\s*")
 811 _lg_evaluator.add("LOGICAL_AND", "(?<!\w)\s*and\s*(?!\w)", flags=re.I)
 812 _lg_evaluator.add("LOGICAL_OR", "(?<!\w)\s*or\s*(?!\w)", flags=re.I)
 813 _lg_evaluator.add(*_t_ARGSEP)
 814 _lg_evaluator.add(*_t_NAME)
 815 
 816 _lexer_evaluator = _lg_evaluator.build()
 817 
 818 #   ------------------
 819 #   parser
 820 _pg_evaluator = ParserGenerator(
 821     [
 822         'NUMBER',
 823         "SQ_LITERAL", "DQ_LITERAL",
 824         "LPAREN", "RPAREN",
 825         "ARGSEP",
 826         "CMP_EQ", "CMP_NE", "CMP_LT",
 827         "CMP_LE", "CMP_GE", "CMP_GT",
 828         "NEGATE",
 829         "LOGICAL_AND", "LOGICAL_OR",
 830         "NAME",
 831         ],
 832     precedence=[
 833         ('left', ['LOGICAL_AND', 'LOGICAL_OR',]),
 834         ('right', ['NEGATE',])
 835     ]
 836 )
 837 
 838 @_pg_evaluator.production("expression : LPAREN expression RPAREN")
 839 def _evaluator_p_expression_group(p):
 840     return p[1]
 841 
 842 @_pg_evaluator.production("expression : expression CMP_EQ expression")
 843 @_pg_evaluator.production("expression : expression CMP_NE expression")
 844 @_pg_evaluator.production("expression : expression CMP_LT expression")
 845 @_pg_evaluator.production("expression : expression CMP_LE expression")
 846 @_pg_evaluator.production("expression : expression CMP_GE expression")
 847 @_pg_evaluator.production("expression : expression CMP_GT expression")
 848 def _evaluator_p_cmp_expression(p):
 849     op = _tostr(p[1]).strip()
 850 
 851     # Microsoft always ignore case...
 852     if isinstance(p[0], six.string_types):
 853         p[0] = p[0].lower()
 854     if isinstance(p[0], six.string_types):
 855         p[2] = p[2].lower()
 856 
 857     #
 858     if op == "==":
 859         return (p[0] == p[2])
 860     elif op == "!=":
 861         return (p[0] != p[2])
 862     elif op == ">=":
 863         return (p[0] >= p[2])
 864     elif op == "<=":
 865         return (p[0] <= p[2])
 866     elif op == ">":
 867         return (p[0] > p[2])
 868     elif op == "<":
 869         return (p[0] < p[2])
 870 
 871 @_pg_evaluator.production("expression : expression LOGICAL_AND expression")
 872 @_pg_evaluator.production("expression : expression LOGICAL_OR expression")
 873 def _evaluator_p_landor_expression(p):
 874     op = _tostr(p[1]).strip()
 875     if op == "and":
 876         return p[0] and p[2]
 877     else:
 878         return p[0] or p[2]
 879 
 880 @_pg_evaluator.production("expression : NEGATE expression")
 881 def _evaluator_p_negate_expression(p):
 882     return not p[1]
 883 
 884 @_pg_evaluator.production("expression : value")
 885 def _evaluator_p_value_expression(p):
 886     return p[0]
 887 
 888 @_pg_evaluator.production("funccall : name call")
 889 def _evaluator_p_funccall(p):
 890     return PropCallChain(p, "f").eval()
 891 
 892 @_pg_evaluator.production("name : NAME")
 893 def _evaluator_p_name(p):
 894     return _tostr(p[0])
 895 
 896 @_pg_evaluator.production("call : LPAREN RPAREN")
 897 @_pg_evaluator.production("call : LPAREN args RPAREN")
 898 def _evaluator_p_call(p):
 899     if len(p) == 3:
 900         return p[1]
 901     else:
 902         return []
 903 
 904 @_pg_evaluator.production("args : value")
 905 @_pg_evaluator.production("args : value ARGSEP value")
 906 @_pg_evaluator.production("args : args ARGSEP value")
 907 def _evaluator_p_args(p):
 908     return p[0::2]  # ('x', ',', 'y', ',', 'z')
 909 
 910 @_pg_evaluator.production("value : funccall")
 911 @_pg_evaluator.production("value : literal")
 912 @_pg_evaluator.production("value : number")
 913 def _evaluator_p_value(p):
 914     return p[0]
 915 
 916 @_pg_evaluator.production("literal : SQ_LITERAL")
 917 @_pg_evaluator.production("literal : DQ_LITERAL")
 918 def _evaluator_p_literal(p):
 919     return _tostr(p[0]).strip()[1:-1]
 920 
 921 @_pg_evaluator.production("number : NUMBER")
 922 def _evaluator_p_number(p):
 923     s = _tostr(p[0]).strip()
 924     if "." in s or "e" in s:
 925         return float(s)
 926     else:
 927         return int(s)
 928 
 929 with warnings.catch_warnings():
 930     # "shift/reduce conflicts" warnings are useful for me
 931     # as developper, but this warnings is meaningless for
 932     # all users of our library.
 933     warnings.simplefilter("ignore")
 934 
 935     _parser_evaluator = _pg_evaluator.build()
 936 
 937 
 938 class _EvaluatorTest(object):
 939     r"""
 940     # this class is not for use.
 941     # just for holding _parser_evaluator's doctest.
 942 
 943     >>> lexer = _lexer_evaluator
 944     >>> parser = _parser_evaluator
 945     >>> parser.parse(lexer.lex("1"))
 946     1
 947     >>> parser.parse(lexer.lex("1 == 1"))
 948     True
 949     >>> parser.parse(lexer.lex("1 != 1"))
 950     False
 951     >>> parser.parse(lexer.lex("'1' == '1'"))
 952     True
 953     >>> parser.parse(lexer.lex("!('1' == '1')"))
 954     False
 955     >>> parser.parse(lexer.lex("! !('1' == '1')"))
 956     True
 957     >>> parser.parse(lexer.lex("(1 != 1) and (2 == 2) or (1 == 1)"))
 958     True
 959     >>> parser.parse(lexer.lex("((1 != 1) and (2 == 2)) or (1 == 1)"))
 960     True
 961     >>> parser.parse(lexer.lex("(1 != 1) and ((2 == 2) or (1 == 1))"))
 962     False
 963     >>> parser.parse(lexer.lex("1 != 1 and ((2 == 2) or (1 == 1))"))
 964     False
 965     >>> parser.parse(lexer.lex(" 1!= 1 and((2 ==2) or(1 ==1))"))
 966     False
 967     >>> parser.parse(lexer.lex(" 1!= 1and((2 ==2) or(1 ==1))"))
 968     Traceback (most recent call last):
 969         ...
 970     rply.errors.ParsingError: (None, SourcePosition(idx=6, lineno=1, colno=7))
 971     >>> parser.parse(lexer.lex("HasTrailingSlash('abc/')"))
 972     True
 973     >>> parser.parse(lexer.lex("!HasTrailingSlash('abc/')"))
 974     False
 975     >>> parser.parse(lexer.lex("!HasTrailingSlash('abc/') or !exists('c:/(^_^)')"))
 976     True
 977     """
 978 
 979 
 980 # ----------------------------------------------------------------
 981 #
 982 # Public APIs
 983 #
 984 
 985 # ------------------------------
 986 #
 987 #   Public module configurations
 988 #
 989 
 990 #    _external_functions_table = _GLOBAL_FUNCS + user_defind
 991 _external_functions_table = {}
 992 def overide_external_functions_table(**overides):
 993     """
 994     """
 995     global _external_functions_table
 996     _external_functions_table.update(dict(**overides))
 997 
 998 def set_default_external_functions_table():
 999     """
1000     """
1001     global _external_functions_table
1002     _external_functions_table = dict(_GLOBAL_FUNCS)
1003 
1004 set_default_external_functions_table()
1005 
1006 
1007 
1008 #
1009 # The evaluator for the purpose of dereferencing the macros.
1010 #
1011 def dereference(s, props={}, ctx=""):
1012     r"""
1013     >>> print(dereference("$(X))", {"X": "xxx"}))
1014     xxx)
1015     >>> print(dereference("'$(X)'", {"X": "xxx"}))
1016     'xxx'
1017     >>> print(dereference("'$(X)|$(Y)'", {"X": "xxx", "Y": "yyy"}))
1018     'xxx|yyy'
1019     >>> print(dereference('"$(X)|$(Y)"', {"X": "xxx", "Y": "yyy"}))
1020     "xxx|yyy"
1021     >>> print(dereference("'$(X)abc | def$(Y)'", {"X": "xxx", "Y": "yyy"}))
1022     'xxxabc | defyyy'
1023     >>> print(dereference("$(X)", {"X": "xxx"}))
1024     xxx
1025     >>> print(dereference("$(X.Y)", {"X": {"Y": "xxx"}}))
1026     xxx
1027     >>> print(dereference("$(X.Y.Length)", {"X": {"Y": "xxx"}}))
1028     3
1029     >>> print(dereference("$(X.Y.Length)", {"x": {"Y": "xxx"}}))
1030     3
1031     >>> print(dereference("$(X.Y.Length)", {"X": {"y": "xxx"}}))
1032     3
1033     >>> print(dereference("$(X.Y.Substring(1))", {"X": {"Y": "xyz"}}))
1034     yz
1035     >>> print(dereference("$(X.Y.substring(1))", {"X": {"Y": "xyz"}}))
1036     yz
1037     >>> print(dereference("$(X.Y.substring ( 1 ) )", {"X": {"Y": "xyz"}}))
1038     yz
1039     >>> print(dereference("$([msbuild]::Add(1, 2))"))
1040     3
1041     >>> print(dereference("$([msbuild]::Add(1, $([msbuild]::Multiply(2, 3))))"))
1042     7
1043     >>> print(dereference("$([msbuild]::BitwiseOr(2, 4))"))
1044     6
1045     >>> print(dereference("$([msbuild]::BitwiseAnd(1, 3))"))
1046     1
1047     >>> print(dereference("$([System.IO.Path]::Combine(`a`, `b`))"))
1048     a/b
1049     >>> print(dereference("$([System.IO.Path]::GetFileName(`aaa/bbb.c`))"))
1050     bbb.c
1051     >>> r = dereference("$([MSBuild]::MakeRelative(`c:/users`, `c:/users/username`))")
1052     >>> print(r.replace("\\", "/"))
1053     username
1054     >>> r = dereference("$([MSBuild]::MakeRelative(`c:/users/username`, `c:/users`))")
1055     >>> print(r.replace("\\", "/"))
1056     ..
1057     """
1058 
1059     # for searching RPAR coressponding to REF_START
1060     tokdef = {
1061         "root": [
1062             (r"`", "STR_S", "str"),
1063             (r"\(", "LPAR", ""),
1064             (r"\)", "RPAR", ""),
1065             (r"[^`()]+", "OTH", ""),
1066             ],
1067         "str": [
1068             (r"`", "STR_E", "#pop"),
1069             (r"[^`]+", "STR", ""),
1070             ],
1071         }
1072 
1073     lexer = _lexer_deref
1074     parser = _parser_deref
1075 
1076     result = s
1077     rs_rgx = re.compile(_t_REF_START[1])
1078     while rs_rgx.search(result):
1079         # search last REF_START (i.e. deepest)
1080         m = list(reversed(list(rs_rgx.finditer(result))))[0]
1081         start, end = m.span()
1082 
1083         # search RPAR coressponding to this REF_START
1084         lpars, end = 1, start + 2
1085         for tok, s in _simple_tokenize(result[end:], tokdef):
1086             end += len(s)
1087             if tok == "LPAR":
1088                 lpars += 1
1089             elif tok == "RPAR":
1090                 lpars -= 1
1091                 if lpars == 0:
1092                     break
1093 
1094         # parse using my parser
1095         partial_res = parser.parse(lexer.lex(result[start:end])).eval(
1096             props=props, ctx=ctx)
1097         result = "{}{}{}".format(
1098             result[:start], partial_res, result[end:])
1099     return result
1100 
1101 #
1102 # The evaluator for the purpose of evaluating the condition.
1103 #
1104 def evaluate(s, props={}, ctx=""):
1105     r"""
1106     >>> print(evaluate("$(X) == 2", {"X": "2"}))
1107     True
1108     >>> evaluate("'xyz' != 'zzz'", {})
1109     True
1110     >>>
1111     >>> # Microsoft always ignore case...
1112     >>> evaluate("!!('aBc' == 'aBc')")
1113     True
1114     >>> evaluate("'%(Aaa.Bbb)' == 'xxx'", {'Aaa': {'Bbb': 'Xxx'}})
1115     True
1116     >>> evaluate("!('$(Y)' == 'xyz')", {"X": {"y": "xyz"}}, ctx="x")
1117     False
1118     >>> evaluate("HasTrailingSlash('abc/')")
1119     True
1120     >>> evaluate("!Exists('$(D)')", {"D": "a a a.txt"})
1121     True
1122     >>> overide_external_functions_table(exists=lambda *args: not not args[0])
1123     >>> evaluate("exists('abc/')")
1124     True
1125     >>> overide_external_functions_table(exists=lambda *args: not args[0])
1126     >>> evaluate("exists('abc/')")
1127     False
1128     >>> set_default_external_functions_table()
1129     """
1130     lexer = _lexer_evaluator
1131     parser = _parser_evaluator
1132     return parser.parse(lexer.lex(dereference(s, props=props, ctx=ctx)))
1133 
1134 
1135 #
1136 if __name__ == '__main__':
1137     logging.basicConfig(
1138         stream=sys.stderr,
1139         level=logging.DEBUG,
1140         format='%(levelname)s:%(name)s:%(funcName)s:%(lineno)s:%(message)s')
1141 
1142     import doctest
1143     doctest.testmod()

上のほうで「これ、class をデコレート出来ない、ので class を活用出来ない」問題、そんなにないかなぁと思ったけど、やっぱ現実には出てきた。

問題になったのは「外部関数のユーザによる差し替え」を提供する _external_functions_table (「2017-11-10 01:20 追記」時点のではメソッドへの引数 functions だったもの)。

結局「パーサは結果の解析木を作るだけで意味解釈は行わない」(評価を解析終了まで遅延する)という本来のノリに徹するか、解析中の評価を諦めないかの二択ってことなんだけれど、ちょっと前者が気が滅入りそうだったんで、後者を採った、ということなんだけれど、伝わってる?

プログラムでやってる言葉で言えば、「PropCallChain」でやっているのと同じように、「expression == expression」などの評価も「PropCallChain」みたいな遅延評価用構造に載せて、解析終了後に「eval(...)」する、というのが「前者」。挙げたコード「後者」は「expression == expression」の評価はその場で行っている。これをすると、PropCallChain型 == 'aaa' を行わなければならなくなり、なので PropCallChain型の eval(...) もその場で行わざるを得なくなる、と。となれば、「class 変数 self._functions」的思想がアウト。

そんなわけで「外部関数のユーザによる差し替え」は class 単位でなくモジュールグローバルとなり、そうなったら書きかけてた _MSBuildExpressionEvaluator という class そのものが無用の長物となり、結果 dereferenceevaluate という 2つのモジュールレベル関数が public API となった、てこと。(まぁ _MSBuildExpressionEvaluator だった頃からゴールのイメージは結局はこれだったわけなので結果オーライてとこもないではないんだけれども。)

2017-11-11 19:45 追記: 一応…「初版完了」、言い方間違えた

「初版完了」と言ったが、これは「TDD 開発サイクルにちゃんと乗っかって、管理可能になった」という程度の意味で言っちゃったが、普通はそうは言わないわな。あげたコードは色々 NG で、色々直したけれど、もうコード貼り付けはキリがないのでやめる。いつかは正式公開すると思うけど。

今回のネタの「lex/yacc ライブラリ」の話に関してはもう何もない。NG だった箇所はもう純然たる「アプリケーション的な間違い」の範疇。ただいくつか「この手のプログラムには一般的なこと」もあったりもしたので、何がダメだったかのポイントだけ列挙しとく:

  1. MSBuild ではこんな式も許されている:
    1 $(A) == ""
    

    evaluatedereference を呼び出すという構造にした関係で、dereference がこれを

    1 == ""
    

    と展開してしまい、これは dereference が知る文法としてアウト。これを措置するには、evaluate 自身もデリファレンスに関与するか、もしくは dereference が必要に応じて引用符を付与するかどちらかしかない。ワタシは後者を採用した。(ただし評価式内でない場合は引用符を勝手に追加するのはダメなので、dereference が実行モードを持つようにした。)

  2. 二項演算子の結合を間違えてた。以下が期待通りになってなかった:

    1 '' == '' and '' != 'true' and 
    2 ('false' == 'false' or 'false' == '') and 
    3 'DynamicLibrary' == 'DynamicLibrary'"
    

    これは真として評価されなければならない。precedence の間違いね。

  3. プロパティにキーがない場合、MSBuild ではブランクとして扱われる。これの措置の抜けで "None" を返すなんてコトをやらかしてた。
  4. dereference は、プロパティ自身に参照を含んでいる場合に再帰的に展開しなければならないが、まだやってない。

これくらいだったかな。確か。なんにせよ、上で挙げたコードは「まだ一合目」くらいのものなので、使いたいと思わないで。rply の例として誰かの参考になれば、としてのっけたに過ぎないので。

2017-11-12 19:40 追記: rply 制限に関して言い方間違えた

言い方間違えたつぅか、シンプルに誤解してたと言ったほうがいい。最初の最もシンプルな例を凝視しててようやく悟った。

今からしようとする説明が、rply を気に入るかどうかのキモになると思う。

一言で言えば「パーサ生成器とパーサが別物」(lexerについても同)てだけの話だった。つまり…:

 1 from rply import ParserGenerator
 2 
 3 _pg_deref = ParserGenerator(  # パーサ生成器
 4     ['NUMBER', ],
 5 
 6     precedence=[
 7         #('left', ['PLUS', 'MINUS']),
 8         #('left', ['MUL', 'DIV'])
 9     ]
10 )
11 
12 @_pg_deref.production('expression : NUMBER')  # これはパーサ生成器が「パーサを生成するのに
13 def expression_number(p):                     # 使う定義」でしかなく、パーサの定義を
14     return Value(float(p[0].getstr()))        # しているわけではない。紛らわしいが通じてる?
15 
16 _parser_deref = \
17     _pg_deref.build()  # これを好きには出来ない、パーサは rply が決めた出来合いのもの。

ply のように lexer/parser のインスタンス数に制約があることはないけれど、オレオレパーサが自由に自分の状態を管理して好き勝手にやる、てことは絶対に出来ない。上で文句を言っていた「クラスをデコレート出来ない」は実は全然制約ではなくて、仮にデコレート出来たとしてもそれが出来ることに何一つ意味はない、つーこと。

ちなみに、「モジュールのトップレベルだと読みにくいし気分悪い」てだけが理由ならこんなでいい:

 1 _pg_deref = ParserGenerator(
 2     ['NUMBER', ],
 3 
 4     precedence=[
 5         #('left', ['PLUS', 'MINUS']),
 6         #('left', ['MUL', 'DIV'])
 7     ]
 8 )
 9 
10 class _MyHogeGrammar(object):
11     @classmethod  # staticmethod でもどちらでも
12     @_pg_deref.production('expression : NUMBER')
13     def expression_number(p):
14         return Value(float(p[0].getstr()))
15 
16     # class であることに意味は C++ の namespace のソレとほぼ同じ。
17     # ただの(文法定義と生成規則を収める)箱。
18 
19 _parser_deref = _pg_deref.build()

なんにせよ「パーサは結果の構文木を作れりゃそれでいい」というノリで作ることを受け容れられる限りは、rply には何の問題もない。かえってその方が「文法を同じくしてやることが全く違う」みたいなケースに対しては柔軟かもしらんね。ヘタにパーサ内でなんでもやってしまうと使いまわしにくいものになりがちだしさ。

2017-11-15 21:00 追記: 救世主、state が使える

初見でも目に入ったがすぐにはその価値に気付かなかったある機能、state。

 1 # -*- coding: utf-8 -*-
 2 from rply import LexerGenerator
 3 from rply import ParserGenerator
 4 
 5 # -------- lexer/parser
 6 _lg_evaluator = LexerGenerator()
 7 _lg_evaluator.add('NUMBER', r'\d+')
 8 _lexer_evaluator = _lg_evaluator.build()
 9 
10 #
11 _pg_evaluator = ParserGenerator(
12     ['NUMBER', ],
13 )
14 
15 @_pg_evaluator.production('expression : NUMBER')
16 def expression_number(state, p):
17     # (2) 「ParserGenerator で作った parser の parse に state (任意のオブジェクト) を
18     #      渡」すとここに垂れ流されてくる
19     import sys
20     print(state, file=sys.stderr)
21     return int(p[0].getstr())
22 
23 _parser_evaluator = _pg_evaluator.build()
24 # (1) ParserGenerator で作った parser の parse に state (任意のオブジェクト) を渡せる
25 print(
26     _parser_evaluator.parse(
27         _lexer_evaluator.lex('1'),
28         {"key": "value"}  # 任意なのだからなんでもいいわけで
29         ))

なんでもいいということはなんでもいいということで、これを使ってパーサにモードを持たせるなり、production で運ぶデータをオブジェクトに処理させるなり、それこそなんでも。

アタシが上でちょっと苦労してる「PropCallChain」は場所によって eval に渡す引数を決められなくて弱っていたのだが、この state オブジェクトに乗っければなんてことはないのであった。