parser.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. """
  2. This parser will parse the given input and build an expression tree. Grammar
  3. file for the supported mathematical expressions.
  4. """
  5. import os.path
  6. PYBISON_BUILD = os.path.realpath('build/external/pybison')
  7. EXTERNAL_MODS = os.path.realpath('external')
  8. import sys
  9. sys.path.insert(0, PYBISON_BUILD)
  10. sys.path.insert(1, EXTERNAL_MODS)
  11. from pybison import BisonParser, BisonSyntaxError
  12. from graph_drawing.graph import generate_graph
  13. from node import ExpressionNode as Node, ExpressionLeaf as Leaf, OP_MAP, \
  14. TOKEN_MAP, TYPE_OPERATOR, OP_COMMA, OP_NEG, OP_MUL, Scope
  15. from rules import RULES
  16. from possibilities import filter_duplicates, pick_suggestion, apply_suggestion
  17. import Queue
  18. # Check for n-ary operator in child nodes
  19. def combine(op, op_type, *nodes):
  20. # At least return the operator.
  21. res = [op]
  22. for n in nodes:
  23. # Merge the children for all nodes which have the same operator.
  24. if n.type == TYPE_OPERATOR and n.op == op_type:
  25. res += n.nodes
  26. else:
  27. res.append(n)
  28. return res
  29. class Parser(BisonParser):
  30. """
  31. Implements the calculator parser. Grammar rules are defined in the method
  32. docstrings. Scanner rules are in the 'lexscript' attribute.
  33. """
  34. # Output directory of generated pybison files, including a trailing slash.
  35. buildDirectory = PYBISON_BUILD + '/'
  36. # ----------------------------------------------------------------
  37. # lexer tokens - these must match those in your lex script (below)
  38. # ----------------------------------------------------------------
  39. # TODO: add a runtime check to verify that this token list match the list
  40. # of tokens of the lex script.
  41. tokens = ['NUMBER', 'IDENTIFIER', 'NEWLINE', 'QUIT', 'RAISE', 'GRAPH', \
  42. 'LPAREN', 'RPAREN'] + TOKEN_MAP.values()
  43. # ------------------------------
  44. # precedences
  45. # ------------------------------
  46. precedences = (
  47. ('left', ('COMMA', )),
  48. ('left', ('MINUS', 'PLUS')),
  49. ('left', ('TIMES', 'DIVIDE')),
  50. ('left', ('EQ', )),
  51. ('left', ('NEG', )),
  52. ('right', ('POW', )),
  53. ('right', ('SIN', 'COS', 'TAN', 'SOLVE', 'INT', 'SQRT')),
  54. )
  55. interactive = 0
  56. def __init__(self, **kwargs):
  57. BisonParser.__init__(self, **kwargs)
  58. self.interactive = kwargs.get('interactive', 0)
  59. self.timeout = kwargs.get('timeout', 0)
  60. self.reset()
  61. def reset(self):
  62. self.read_buffer = ''
  63. self.read_queue = Queue.Queue()
  64. #self.subtree_map = {}
  65. self.root_node = None
  66. self.possibilities = self.last_possibilities = []
  67. def run(self, *args, **kwargs):
  68. self.reset()
  69. return super(Parser, self).run(*args, **kwargs)
  70. # Override default read method with a version that prompts for input.
  71. def read(self, nbytes):
  72. if self.file == sys.stdin and self.file.closed:
  73. return ''
  74. if not self.read_buffer and not self.read_queue.empty():
  75. self.read_buffer = self.read_queue.get_nowait() + '\n'
  76. if self.read_buffer:
  77. read_buffer = self.read_buffer[:nbytes]
  78. self.read_buffer = self.read_buffer[nbytes:]
  79. return read_buffer
  80. try:
  81. read_buffer = raw_input('>>> ' if self.interactive else '') + '\n'
  82. except EOFError:
  83. return ''
  84. self.read_buffer = read_buffer[nbytes:]
  85. return read_buffer[:nbytes]
  86. def hook_read_before(self):
  87. if self.possibilities:
  88. if self.interactive: # pragma: nocover
  89. print 'possibilities:'
  90. items = filter_duplicates(self.possibilities)
  91. self.last_possibilities = self.possibilities
  92. if self.interactive: # pragma: nocover
  93. print ' ' + '\n '.join(map(str, items))
  94. def hook_read_after(self, data):
  95. """
  96. This hook will be called when the read() method returned. The data
  97. argument points to the data read by the read() method. This hook
  98. function should return the data to be used by the parser.
  99. """
  100. if not data.strip():
  101. return data
  102. self.possibilities = []
  103. import re
  104. # TODO: remove this quick preprocessing hack. This hack enables
  105. # concatenated expressions, since the grammar currently does not
  106. # support those. This workaround will replace:
  107. # - ")(" with ")*(".
  108. # - "a(" with "a*(".
  109. # - ")a" with ")*a".
  110. # - "ab" with "a*b".
  111. # - "4a" with "4*a".
  112. # - "a4" with "a^4".
  113. pattern = ('(?:(\))\s*(\()' # match: )( result: ) * (
  114. + '|([a-z0-9])\s*(\()' # match: a( result: a * (
  115. + '|(\))\s*([a-z0-9])' # match: )a result: ) * a
  116. + '|([a-z])\s*([a-z]+)' # match: ab result: a * b
  117. + '|([0-9])\s*([a-z])' # match: 4a result: 4 * a
  118. + '|([a-z])\s*([0-9])' # match: a4 result: a ^ 4
  119. + '|([0-9])\s+([0-9]))') # match: 4 4 result: 4 * 4
  120. def preprocess_data(match):
  121. left, right = filter(None, match.groups())
  122. # Filter words (otherwise they will be preprocessed as well)
  123. if (left + right).upper() in self.tokens:
  124. return left + right
  125. # If all characters on the right are numbers. e.g. "a4", the
  126. # expression implies exponentiation. Make sure ")4" is not
  127. # converted into an exponentiation, because that's multiplication.
  128. if left != ')' and not 48 <= ord(left) < 58 \
  129. and all(map(lambda x: 48 <= ord(x) < 58, list(right))):
  130. return '%s^%s' % (left, right)
  131. # match: ab | abc | abcd (where left = "a")
  132. return '*'.join([left] + list(right))
  133. # Iteratively replace all matches.
  134. while True:
  135. data_after = re.sub(pattern, preprocess_data, data)
  136. if data == data_after:
  137. break
  138. if self.verbose: # pragma: nocover
  139. print 'hook_read_after() modified the input data:'
  140. print 'before:', data.replace('\n', '\\n')
  141. print 'after :', data_after.replace('\n', '\\n')
  142. data = data_after
  143. return data
  144. def hook_handler(self, target, option, names, values, retval):
  145. if target in ['exp', 'line', 'input'] or not retval:
  146. return retval
  147. if not retval.negated and retval.type != TYPE_OPERATOR:
  148. return retval
  149. if retval.type == TYPE_OPERATOR and retval.op in RULES:
  150. handlers = RULES[retval.op]
  151. else:
  152. handlers = []
  153. if retval.negated:
  154. handlers = RULES[OP_NEG]
  155. for handler in handlers:
  156. possibilities = handler(retval)
  157. self.possibilities.extend(possibilities)
  158. return retval
  159. def display_hint(self):
  160. print pick_suggestion(self.last_possibilities)
  161. def display_possibilities(self):
  162. print '\n'.join(map(str, self.last_possibilities))
  163. def rewrite(self):
  164. suggestion = pick_suggestion(self.last_possibilities)
  165. if self.verbose:
  166. print 'applying suggestion:', suggestion
  167. if not suggestion:
  168. return self.root_node
  169. expression = apply_suggestion(self.root_node, suggestion)
  170. if self.verbose:
  171. print 'After application, expression=', expression
  172. self.read_queue.put_nowait(str(expression))
  173. return expression
  174. #def hook_run(self, filename, retval):
  175. # return retval
  176. # ---------------------------------------------------------------
  177. # These methods are the python handlers for the bison targets.
  178. # (which get called by the bison code each time the corresponding
  179. # parse target is unambiguously reached)
  180. #
  181. # WARNING - don't touch the method docstrings unless you know what
  182. # you are doing - they are in bison rule syntax, and are passed
  183. # verbatim to bison to build the parser engine library.
  184. # ---------------------------------------------------------------
  185. # Declare the start target here (by name)
  186. start = 'input'
  187. def on_input(self, target, option, names, values):
  188. """
  189. input :
  190. | input line
  191. """
  192. if option == 1:
  193. # Interactive mode is enabled if the term rewriting system is used
  194. # as a shell. In that case, it is useful that the shell prints the
  195. # output of the evaluation.
  196. if self.interactive and values[1]: # pragma: nocover
  197. print values[1]
  198. return values[1]
  199. def on_line(self, target, option, names, values):
  200. """
  201. line : NEWLINE
  202. | exp NEWLINE
  203. | debug NEWLINE
  204. | HINT NEWLINE
  205. | POSSIBILITIES NEWLINE
  206. | REWRITE NEWLINE
  207. | RAISE NEWLINE
  208. """
  209. if option == 1: # rule: EXP NEWLINE
  210. self.root_node = values[0]
  211. return values[0]
  212. if option == 2: # rule: DEBUG NEWLINE
  213. self.root_node = values[0]
  214. return values[0]
  215. if option == 3: # rule: HINT NEWLINE
  216. self.display_hint()
  217. return
  218. if option == 4: # rule: POSSIBILITIES NEWLINE
  219. self.display_possibilities()
  220. return
  221. if option == 5: # rule: REWRITE NEWLINE
  222. self.root_node = self.rewrite()
  223. return self.root_node
  224. if option == 6:
  225. raise RuntimeError('on_line: exception raised')
  226. def on_debug(self, target, option, names, values):
  227. """
  228. debug : GRAPH exp
  229. """
  230. if option == 0:
  231. print generate_graph(values[1])
  232. return values[1]
  233. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  234. % (option, target)) # pragma: nocover
  235. def on_exp(self, target, option, names, values):
  236. """
  237. exp : NUMBER
  238. | IDENTIFIER
  239. | LPAREN exp RPAREN
  240. | unary
  241. | binary
  242. | nary
  243. """
  244. # | concat
  245. if option == 0: # rule: NUMBER
  246. # TODO: A bit hacky, this achieves long integers and floats.
  247. value = float(values[0]) if '.' in values[0] else int(values[0])
  248. return Leaf(value)
  249. if option == 1: # rule: IDENTIFIER
  250. return Leaf(values[0])
  251. if option == 2: # rule: LPAREN exp RPAREN
  252. return values[1]
  253. if option in [3, 4, 5]: # rule: unary | binary | nary
  254. return values[0]
  255. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  256. % (option, target)) # pragma: nocover
  257. def on_unary(self, target, option, names, values):
  258. """
  259. unary : MINUS exp %prec NEG
  260. | SIN exp
  261. | COS exp
  262. | TAN exp
  263. | INT exp
  264. | SOLVE exp
  265. | SQRT exp
  266. """
  267. if option == 0: # rule: NEG exp
  268. # Add negation to the left-most child
  269. if values[1].is_leaf or values[1].op != OP_MUL:
  270. values[1].negated += 1
  271. else:
  272. child = Scope(values[1])[0]
  273. child.negated += 1
  274. return values[1]
  275. if option < 7: # rule: SIN exp | COS exp | TAN exp | INT exp
  276. if values[1].type == TYPE_OPERATOR and values[1].op == OP_COMMA:
  277. return Node(values[0], *values[1])
  278. return Node(*values)
  279. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  280. % (option, target)) # pragma: nocover
  281. def on_binary(self, target, option, names, values):
  282. """
  283. binary : exp PLUS exp
  284. | exp TIMES exp
  285. | exp DIVIDE exp
  286. | exp POW exp
  287. | exp EQ exp
  288. | exp MINUS exp
  289. """
  290. if 0 <= option < 5: # rule: exp {PLUS,TIMES,DIVIDES,POW,EQ} exp
  291. return Node(values[1], values[0], values[2])
  292. if option == 5: # rule: exp MINUS exp
  293. node = values[2]
  294. # Add negation to the left-most child
  295. if node.is_leaf or node.op != OP_MUL:
  296. node.negated += 1
  297. else:
  298. node = Scope(node)[0]
  299. node.negated += 1
  300. # Explicit call the hook handler on the created unary negation.
  301. node = self.hook_handler('binary', 4, names, values, node)
  302. return Node('+', values[0], values[2])
  303. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  304. % (option, target)) # pragma: nocover
  305. def on_nary(self, target, option, names, values):
  306. """
  307. nary : exp COMMA exp
  308. """
  309. if option == 0: # rule: exp COMMA exp
  310. return Node(*combine(',', OP_COMMA, values[0], values[2]))
  311. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  312. % (option, target)) # pragma: nocover
  313. # -----------------------------------------
  314. # operator tokens
  315. # -----------------------------------------
  316. operators = ''
  317. for op_str, op in OP_MAP.iteritems():
  318. operators += '"%s"%s{ returntoken(%s); }\n' \
  319. % (op_str, ' ' * (8 - len(op_str)), TOKEN_MAP[op])
  320. # -----------------------------------------
  321. # raw lex script, verbatim here
  322. # -----------------------------------------
  323. lexscript = r"""
  324. %top{
  325. #include "Python.h"
  326. }
  327. %{
  328. #define YYSTYPE void *
  329. #include "tokens.h"
  330. extern void *py_parser;
  331. extern void (*py_input)(PyObject *parser, char *buf, int *result,
  332. int max_size);
  333. #define returntoken(tok) \
  334. yylval = PyString_FromString(strdup(yytext)); return (tok);
  335. #define YY_INPUT(buf,result,max_size) { \
  336. (*py_input)(py_parser, buf, &result, max_size); \
  337. }
  338. int yycolumn = 0;
  339. #define YY_USER_ACTION \
  340. yylloc.first_line = yylloc.last_line = yylineno; \
  341. yylloc.first_column = yycolumn; \
  342. yylloc.last_column = yycolumn + yyleng; \
  343. yycolumn += yyleng;
  344. %}
  345. %option yylineno
  346. %%
  347. [0-9]+"."?[0-9]* { returntoken(NUMBER); }
  348. [a-zA-Z] { returntoken(IDENTIFIER); }
  349. "(" { returntoken(LPAREN); }
  350. ")" { returntoken(RPAREN); }
  351. """ + operators + r"""
  352. "raise" { returntoken(RAISE); }
  353. "graph" { returntoken(GRAPH); }
  354. "quit" { yyterminate(); returntoken(QUIT); }
  355. [ \t\v\f] { }
  356. [\n] { yycolumn = 0; returntoken(NEWLINE); }
  357. . { printf("unknown char %c ignored.\n", yytext[0]); }
  358. %%
  359. yywrap() { return(1); }
  360. """