parser.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. """
  2. This parser will parse the given input and build an expression tree. Grammar
  3. file for the supported mathematical expressions.
  4. """
  5. from node import ExpressionNode as Node, ExpressionLeaf as Leaf
  6. import os.path
  7. PYBISON_BUILD = os.path.realpath('build/external/pybison')
  8. EXTERNAL_MODS = os.path.realpath('external')
  9. import sys
  10. sys.path.insert(0, PYBISON_BUILD)
  11. sys.path.insert(1, EXTERNAL_MODS)
  12. from pybison import BisonParser, BisonSyntaxError
  13. from graph_drawing.graph import generate_graph
  14. from node import TYPE_OPERATOR, OP_COMMA, OP_NEG
  15. from rules import RULES
  16. from possibilities import filter_duplicates, pick_suggestion, apply_suggestion
  17. import Queue
  18. # Check for n-ary operator in child nodes
  19. def combine(op, op_type, *nodes):
  20. # At least return the operator.
  21. res = [op]
  22. for n in nodes:
  23. # Merge the children for all nodes which have the same operator.
  24. if n.type == TYPE_OPERATOR and n.op == op_type:
  25. res += n.nodes
  26. else:
  27. res.append(n)
  28. return res
  29. class Parser(BisonParser):
  30. """
  31. Implements the calculator parser. Grammar rules are defined in the method
  32. docstrings. Scanner rules are in the 'lexscript' attribute.
  33. """
  34. # Output directory of generated pybison files, including a trailing slash.
  35. buildDirectory = PYBISON_BUILD + '/'
  36. # ----------------------------------------------------------------
  37. # lexer tokens - these must match those in your lex script (below)
  38. # ----------------------------------------------------------------
  39. # TODO: add a runtime check to verify that this token list match the list
  40. # of tokens of the lex script.
  41. tokens = ['NUMBER', 'IDENTIFIER', 'POSSIBILITIES',
  42. 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'POW',
  43. 'LPAREN', 'RPAREN', 'COMMA', 'HINT', 'REWRITE',
  44. 'NEWLINE', 'QUIT', 'RAISE', 'GRAPH', 'SQRT']
  45. # ------------------------------
  46. # precedences
  47. # ------------------------------
  48. precedences = (
  49. ('left', ('COMMA', )),
  50. ('left', ('MINUS', 'PLUS')),
  51. ('left', ('TIMES', 'DIVIDE')),
  52. ('left', ('NEG', )),
  53. ('right', ('POW', )),
  54. )
  55. interactive = 0
  56. def __init__(self, **kwargs):
  57. BisonParser.__init__(self, **kwargs)
  58. self.interactive = kwargs.get('interactive', 0)
  59. self.timeout = kwargs.get('timeout', 0)
  60. self.reset()
  61. def reset(self):
  62. self.read_buffer = ''
  63. self.read_queue = Queue.Queue()
  64. self.subtree_map = {}
  65. self.root_node = None
  66. self.possibilities = self.last_possibilities = []
  67. def run(self, *args, **kwargs):
  68. self.reset()
  69. return super(Parser, self).run(*args, **kwargs)
  70. # Override default read method with a version that prompts for input.
  71. def read(self, nbytes):
  72. if self.file == sys.stdin and self.file.closed:
  73. return ''
  74. if not self.read_buffer and not self.read_queue.empty():
  75. self.read_buffer = self.read_queue.get_nowait() + '\n'
  76. if self.read_buffer:
  77. read_buffer = self.read_buffer[:nbytes]
  78. self.read_buffer = self.read_buffer[nbytes:]
  79. return read_buffer
  80. try:
  81. read_buffer = raw_input('>>> ' if self.interactive else '') + '\n'
  82. except EOFError:
  83. return ''
  84. self.read_buffer = read_buffer[nbytes:]
  85. return read_buffer[:nbytes]
  86. def hook_read_before(self):
  87. if self.possibilities:
  88. if self.interactive: # pragma: nocover
  89. print 'possibilities:'
  90. items = filter_duplicates(self.possibilities)
  91. self.last_possibilities = self.possibilities
  92. if self.interactive: # pragma: nocover
  93. print ' ' + '\n '.join(map(str, items))
  94. def hook_read_after(self, data):
  95. """
  96. This hook will be called when the read() method returned. The data
  97. argument points to the data read by the read() method. This hook
  98. function should return the data to be used by the parser.
  99. """
  100. if not data.strip():
  101. return data
  102. self.possibilities = []
  103. import re
  104. # TODO: remove this quick preprocessing hack. This hack enables
  105. # concatenated expressions, since the grammar currently does not
  106. # support those. This workaround will replace:
  107. # - ")(" with ")*(".
  108. # - "a(" with "a*(".
  109. # - ")a" with ")*a".
  110. # - "ab" with "a*b".
  111. # - "4a" with "4*a".
  112. # - "a4" with "a^4".
  113. pattern = ('(?:(\))\s*(\()' # match: )( result: ) * (
  114. + '|([a-z0-9])\s*(\()' # match: a( result: a * (
  115. + '|(\))\s*([a-z0-9])' # match: )a result: ) * a
  116. + '|([a-z])\s*([a-z]+)' # match: ab result: a * b
  117. + '|([0-9])\s*([a-z])' # match: 4a result: 4 * a
  118. + '|([a-z])\s*([0-9])' # match: a4 result: a ^ 4
  119. + '|([0-9])\s+([0-9]))') # match: 4 4 result: 4 * 4
  120. def preprocess_data(match):
  121. left, right = filter(None, match.groups())
  122. # Filter words (otherwise they will be preprocessed as well)
  123. if left + right in ['graph', 'raise']:
  124. return left + right
  125. # If all characters on the right are numbers. e.g. "a4", the
  126. # expression implies exponentiation. Make sure ")4" is not
  127. # converted into an exponentiation, because that's multiplication.
  128. if left != ')' and not 48 <= ord(left) < 58 \
  129. and all(map(lambda x: 48 <= ord(x) < 58, list(right))):
  130. return '%s^%s' % (left, right)
  131. # match: ab | abc | abcd (where left = "a")
  132. return '*'.join([left] + list(right))
  133. # Iteratively replace all matches.
  134. while True:
  135. data_after = re.sub(pattern, preprocess_data, data)
  136. if data == data_after:
  137. break
  138. if self.verbose: # pragma: nocover
  139. print 'hook_read_after() modified the input data:'
  140. print 'before:', data.replace('\n', '\\n')
  141. print 'after :', data_after.replace('\n', '\\n')
  142. data = data_after
  143. return data
  144. def hook_handler(self, target, option, names, values, retval):
  145. if target in ['exp', 'line', 'input'] or not retval:
  146. return retval
  147. if not retval.negated and retval.type != TYPE_OPERATOR:
  148. return retval
  149. if self.subtree_map and retval.type == TYPE_OPERATOR:
  150. # Update the subtree map to let the subtree point to its parent
  151. # node.
  152. parent_nodes = self.subtree_map.keys()
  153. for child in retval:
  154. if child in parent_nodes:
  155. self.subtree_map[child] = retval
  156. if retval.type == TYPE_OPERATOR and retval.op in RULES:
  157. handlers = RULES[retval.op]
  158. else:
  159. handlers = []
  160. if retval.negated:
  161. print retval, 'OP_NEG', retval.negated
  162. handlers += RULES[OP_NEG]
  163. for handler in handlers:
  164. possibilities = handler(retval)
  165. # Record the subtree root node in order to avoid tree traversal.
  166. # At this moment, the node is the root node since the expression is
  167. # parser using the left-innermost parsing strategy.
  168. for p in possibilities:
  169. self.subtree_map[p.root] = None
  170. self.possibilities.extend(possibilities)
  171. return retval
  172. def display_hint(self):
  173. print pick_suggestion(self.last_possibilities)
  174. def display_possibilities(self):
  175. print '\n'.join(map(str, self.last_possibilities))
  176. def rewrite(self):
  177. suggestion = pick_suggestion(self.last_possibilities)
  178. if self.verbose:
  179. print 'applying suggestion:', suggestion
  180. if not suggestion:
  181. return self.root_node
  182. expression = apply_suggestion(self.root_node, self.subtree_map,
  183. suggestion)
  184. if self.verbose:
  185. print 'After application, expression=', expression
  186. self.read_queue.put_nowait(str(expression))
  187. return expression
  188. #def hook_run(self, filename, retval):
  189. # return retval
  190. # ---------------------------------------------------------------
  191. # These methods are the python handlers for the bison targets.
  192. # (which get called by the bison code each time the corresponding
  193. # parse target is unambiguously reached)
  194. #
  195. # WARNING - don't touch the method docstrings unless you know what
  196. # you are doing - they are in bison rule syntax, and are passed
  197. # verbatim to bison to build the parser engine library.
  198. # ---------------------------------------------------------------
  199. # Declare the start target here (by name)
  200. start = 'input'
  201. def on_input(self, target, option, names, values):
  202. """
  203. input :
  204. | input line
  205. """
  206. if option == 1:
  207. # Interactive mode is enabled if the term rewriting system is used
  208. # as a shell. In that case, it is useful that the shell prints the
  209. # output of the evaluation.
  210. if self.interactive and values[1]: # pragma: nocover
  211. print values[1]
  212. return values[1]
  213. def on_line(self, target, option, names, values):
  214. """
  215. line : NEWLINE
  216. | exp NEWLINE
  217. | debug NEWLINE
  218. | HINT NEWLINE
  219. | POSSIBILITIES NEWLINE
  220. | REWRITE NEWLINE
  221. | RAISE NEWLINE
  222. """
  223. if option == 1: # rule: EXP NEWLINE
  224. self.root_node = values[0]
  225. return values[0]
  226. if option == 2: # rule: DEBUG NEWLINE
  227. self.root_node = values[0]
  228. return values[0]
  229. if option == 3: # rule: HINT NEWLINE
  230. self.display_hint()
  231. return
  232. if option == 4: # rule: POSSIBILITIES NEWLINE
  233. self.display_possibilities()
  234. return
  235. if option == 5: # rule: REWRITE NEWLINE
  236. self.root_node = self.rewrite()
  237. return self.root_node
  238. if option == 6:
  239. raise RuntimeError('on_line: exception raised')
  240. def on_debug(self, target, option, names, values):
  241. """
  242. debug : GRAPH exp
  243. """
  244. if option == 0:
  245. print generate_graph(values[1])
  246. return values[1]
  247. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  248. % (option, target)) # pragma: nocover
  249. def on_exp(self, target, option, names, values):
  250. """
  251. exp : NUMBER
  252. | IDENTIFIER
  253. | LPAREN exp RPAREN
  254. | unary
  255. | binary
  256. | nary
  257. """
  258. # | concat
  259. if option == 0: # rule: NUMBER
  260. # TODO: A bit hacky, this achieves long integers and floats.
  261. value = float(values[0]) if '.' in values[0] else int(values[0])
  262. return Leaf(value)
  263. if option == 1: # rule: IDENTIFIER
  264. return Leaf(values[0])
  265. if option == 2: # rule: LPAREN exp RPAREN
  266. return values[1]
  267. if option in [3, 4, 5]: # rule: unary | binary | nary
  268. return values[0]
  269. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  270. % (option, target)) # pragma: nocover
  271. def on_unary(self, target, option, names, values):
  272. """
  273. unary : MINUS exp %prec NEG
  274. """
  275. if option == 0: # rule: NEG exp
  276. node = values[1]
  277. node.negated += 1
  278. return node
  279. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  280. % (option, target)) # pragma: nocover
  281. def on_binary(self, target, option, names, values):
  282. """
  283. binary : exp PLUS exp
  284. | exp TIMES exp
  285. | exp DIVIDE exp
  286. | exp POW exp
  287. | exp MINUS exp
  288. """
  289. if 0 <= option < 4: # rule: exp {PLUS,TIMES,DIVIDES,POW} exp
  290. return Node(values[1], values[0], values[2])
  291. if option == 4: # rule: exp MINUS exp
  292. node = values[2]
  293. node.negated += 1
  294. # Explicit call the hook handler on the created unary negation.
  295. node = self.hook_handler('binary', 4, names, values, node)
  296. return Node('+', values[0], node)
  297. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  298. % (option, target)) # pragma: nocover
  299. def on_nary(self, target, option, names, values):
  300. """
  301. nary : exp COMMA exp
  302. """
  303. if option == 0: # rule: exp COMMA exp
  304. return Node(*combine(',', OP_COMMA, values[0], values[2]))
  305. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  306. % (option, target)) # pragma: nocover
  307. # -----------------------------------------
  308. # raw lex script, verbatim here
  309. # -----------------------------------------
  310. lexscript = r"""
  311. %top{
  312. #include "Python.h"
  313. }
  314. %{
  315. #define YYSTYPE void *
  316. #include "tokens.h"
  317. extern void *py_parser;
  318. extern void (*py_input)(PyObject *parser, char *buf, int *result,
  319. int max_size);
  320. #define returntoken(tok) \
  321. yylval = PyString_FromString(strdup(yytext)); return (tok);
  322. #define YY_INPUT(buf,result,max_size) { \
  323. (*py_input)(py_parser, buf, &result, max_size); \
  324. }
  325. int yycolumn = 0;
  326. #define YY_USER_ACTION \
  327. yylloc.first_line = yylloc.last_line = yylineno; \
  328. yylloc.first_column = yycolumn; \
  329. yylloc.last_column = yycolumn + yyleng; \
  330. yycolumn += yyleng;
  331. %}
  332. %option yylineno
  333. %%
  334. [0-9]+"."?[0-9]* { returntoken(NUMBER); }
  335. [a-zA-Z] { returntoken(IDENTIFIER); }
  336. "(" { returntoken(LPAREN); }
  337. ")" { returntoken(RPAREN); }
  338. "+" { returntoken(PLUS); }
  339. "-" { returntoken(MINUS); }
  340. "*" { returntoken(TIMES); }
  341. "^" { returntoken(POW); }
  342. "/" { returntoken(DIVIDE); }
  343. "," { returntoken(COMMA); }
  344. "??" { returntoken(POSSIBILITIES); }
  345. "?" { returntoken(HINT); }
  346. "@" { returntoken(REWRITE); }
  347. "quit" { yyterminate(); returntoken(QUIT); }
  348. "raise" { returntoken(RAISE); }
  349. "graph" { returntoken(GRAPH); }
  350. "sqrt" { returntoken(SQRT); }
  351. [ \t\v\f] { }
  352. [\n] { yycolumn = 0; returntoken(NEWLINE); }
  353. . { printf("unknown char %c ignored.\n", yytext[0]); }
  354. %%
  355. yywrap() { return(1); }
  356. """