parser.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. """
  2. This parser will parse the given input and build an expression tree. Grammar
  3. file for the supported mathematical expressions.
  4. """
  5. import os.path
  6. PYBISON_BUILD = os.path.realpath('build/external/pybison')
  7. EXTERNAL_MODS = os.path.realpath('external')
  8. import sys
  9. sys.path.insert(0, PYBISON_BUILD)
  10. sys.path.insert(1, EXTERNAL_MODS)
  11. from pybison import BisonParser, BisonSyntaxError
  12. from graph_drawing.graph import generate_graph
  13. from node import ExpressionNode as Node, ExpressionLeaf as Leaf, OP_MAP, \
  14. OP_DERIV, TOKEN_MAP, TYPE_OPERATOR, OP_COMMA, OP_NEG, OP_MUL, OP_DIV, \
  15. Scope, PI
  16. from rules import RULES
  17. from strategy import pick_suggestion
  18. from possibilities import filter_duplicates, apply_suggestion
  19. import Queue
  20. # Check for n-ary operator in child nodes
  21. def combine(op, op_type, *nodes):
  22. # At least return the operator.
  23. res = [op]
  24. for n in nodes:
  25. # Merge the children for all nodes which have the same operator.
  26. if n.type == TYPE_OPERATOR and n.op == op_type:
  27. res += n.nodes
  28. else:
  29. res.append(n)
  30. return res
  31. class Parser(BisonParser):
  32. """
  33. Implements the calculator parser. Grammar rules are defined in the method
  34. docstrings. Scanner rules are in the 'lexscript' attribute.
  35. """
  36. # Words to be ignored by preprocessor
  37. words = zip(*filter(lambda (s, op): TOKEN_MAP[op] == 'FUNCTION', \
  38. OP_MAP.iteritems()))[0] + ('raise', 'graph', PI)
  39. # Output directory of generated pybison files, including a trailing slash.
  40. buildDirectory = PYBISON_BUILD + '/'
  41. # ----------------------------------------------------------------
  42. # lexer tokens - these must match those in your lex script (below)
  43. # ----------------------------------------------------------------
  44. # TODO: add a runtime check to verify that this token list match the list
  45. # of tokens of the lex script.
  46. tokens = ['NUMBER', 'IDENTIFIER', 'NEWLINE', 'QUIT', 'RAISE', 'GRAPH',
  47. 'LPAREN', 'RPAREN', 'FUNCTION', 'LBRACKET', 'RBRACKET', \
  48. 'APOSTROPH'] \
  49. + filter(lambda t: t != 'FUNCTION', TOKEN_MAP.values())
  50. # ------------------------------
  51. # precedences
  52. # ------------------------------
  53. precedences = (
  54. ('left', ('COMMA', )),
  55. ('left', ('MINUS', 'PLUS')),
  56. ('left', ('TIMES', 'DIVIDE')),
  57. ('left', ('EQ', )),
  58. ('left', ('NEG', )),
  59. ('right', ('POW', )),
  60. ('right', ('FUNCTION', )),
  61. #('right', ('SIN', 'COS', 'TAN', 'SOLVE', 'INT', 'SQRT')),
  62. )
  63. interactive = 0
  64. def __init__(self, **kwargs):
  65. BisonParser.__init__(self, **kwargs)
  66. self.interactive = kwargs.get('interactive', 0)
  67. self.timeout = kwargs.get('timeout', 0)
  68. self.reset()
  69. def reset(self):
  70. self.read_buffer = ''
  71. self.read_queue = Queue.Queue()
  72. #self.subtree_map = {}
  73. self.root_node = None
  74. self.possibilities = self.last_possibilities = []
  75. def run(self, *args, **kwargs):
  76. self.reset()
  77. return super(Parser, self).run(*args, **kwargs)
  78. # Override default read method with a version that prompts for input.
  79. def read(self, nbytes):
  80. if self.file == sys.stdin and self.file.closed:
  81. return ''
  82. if not self.read_buffer and not self.read_queue.empty():
  83. self.read_buffer = self.read_queue.get_nowait() + '\n'
  84. if self.read_buffer:
  85. read_buffer = self.read_buffer[:nbytes]
  86. self.read_buffer = self.read_buffer[nbytes:]
  87. return read_buffer
  88. try:
  89. read_buffer = raw_input('>>> ' if self.interactive else '') + '\n'
  90. except EOFError:
  91. return ''
  92. self.read_buffer = read_buffer[nbytes:]
  93. return read_buffer[:nbytes]
  94. def hook_read_before(self):
  95. if self.possibilities:
  96. if self.verbose: # pragma: nocover
  97. print 'possibilities:'
  98. items = filter_duplicates(self.possibilities)
  99. self.last_possibilities = self.possibilities
  100. if self.verbose: # pragma: nocover
  101. print ' ' + '\n '.join(map(str, items))
  102. def hook_read_after(self, data):
  103. """
  104. This hook will be called when the read() method returned. The data
  105. argument points to the data read by the read() method. This hook
  106. function should return the data to be used by the parser.
  107. """
  108. if not data.strip():
  109. return data
  110. self.possibilities = []
  111. import re
  112. # TODO: remove this quick preprocessing hack. This hack enables
  113. # concatenated expressions, since the grammar currently does not
  114. # support those. This workaround will replace:
  115. # - ")(" with ")*(".
  116. # - "a(" with "a*(".
  117. # - ")a" with ")*a".
  118. # - "ab" with "a*b".
  119. # - "4a" with "4*a".
  120. # - "a4" with "a^4".
  121. pattern = ('(?:(\))\s*(\()' # match: )( result: ) * (
  122. + '|([a-z0-9])\s*(\()' # match: a( result: a * (
  123. + '|(\))\s*([a-z0-9])' # match: )a result: ) * a
  124. + '|([a-z])\s*([a-z]+)' # match: ab result: a * b
  125. + '|([0-9])\s*([a-z])' # match: 4a result: 4 * a
  126. + '|([a-z])\s*([0-9])' # match: a4 result: a ^ 4
  127. + '|([0-9])\s+([0-9]))') # match: 4 4 result: 4 * 4
  128. def preprocess_data(match):
  129. left, right = filter(None, match.groups())
  130. # Filter words (otherwise they will be preprocessed as well)
  131. if left + right in Parser.words:
  132. return left + right
  133. # If all characters on the right are numbers. e.g. "a4", the
  134. # expression implies exponentiation. Make sure ")4" is not
  135. # converted into an exponentiation, because that's multiplication.
  136. if left != ')' and not 48 <= ord(left) < 58 \
  137. and all(map(lambda x: 48 <= ord(x) < 58, list(right))):
  138. return '%s^%s' % (left, right)
  139. # match: ab | abc | abcd (where left = "a")
  140. return '*'.join([left] + list(right))
  141. if self.verbose: # pragma: nocover
  142. data_before = data
  143. # Iteratively replace all matches.
  144. while True:
  145. data_after = re.sub(pattern, preprocess_data, data)
  146. if data == data_after:
  147. break
  148. data = data_after
  149. if self.verbose and data_before != data_after: # pragma: nocover
  150. print 'hook_read_after() modified the input data:'
  151. print 'before:', repr(data_before)
  152. print 'after :', repr(data_after)
  153. return data
  154. def hook_handler(self, target, option, names, values, retval):
  155. if target in ['exp', 'line', 'input'] or not retval:
  156. return retval
  157. if not retval.negated and retval.type != TYPE_OPERATOR:
  158. return retval
  159. if retval.type == TYPE_OPERATOR and retval.op in RULES:
  160. handlers = RULES[retval.op]
  161. else:
  162. handlers = []
  163. if retval.negated:
  164. handlers = RULES[OP_NEG]
  165. for handler in handlers:
  166. possibilities = handler(retval)
  167. self.possibilities.extend(possibilities)
  168. return retval
  169. def display_hint(self):
  170. print pick_suggestion(self.last_possibilities)
  171. def display_possibilities(self):
  172. if self.last_possibilities:
  173. print '\n'.join(map(str, self.last_possibilities))
  174. def rewrite(self):
  175. suggestion = pick_suggestion(self.last_possibilities)
  176. if self.verbose:
  177. print 'applying suggestion:', suggestion
  178. if not suggestion:
  179. return self.root_node
  180. expression = apply_suggestion(self.root_node, suggestion)
  181. if self.verbose:
  182. print 'After application, expression=', expression
  183. self.read_queue.put_nowait(str(expression))
  184. return expression
  185. #def hook_run(self, filename, retval):
  186. # return retval
  187. # ---------------------------------------------------------------
  188. # These methods are the python handlers for the bison targets.
  189. # (which get called by the bison code each time the corresponding
  190. # parse target is unambiguously reached)
  191. #
  192. # WARNING - don't touch the method docstrings unless you know what
  193. # you are doing - they are in bison rule syntax, and are passed
  194. # verbatim to bison to build the parser engine library.
  195. # ---------------------------------------------------------------
  196. # Declare the start target here (by name)
  197. start = 'input'
  198. def on_input(self, target, option, names, values):
  199. """
  200. input :
  201. | input line
  202. | input REWRITE NEWLINE
  203. """
  204. if option == 1:
  205. # Interactive mode is enabled if the term rewriting system is used
  206. # as a shell. In that case, it is useful that the shell prints the
  207. # output of the evaluation.
  208. if self.interactive and values[1]: # pragma: nocover
  209. print values[1]
  210. return values[1]
  211. if option == 2: # rule: input REWRITE NEWLINE
  212. self.root_node = self.rewrite()
  213. return self.root_node
  214. def on_line(self, target, option, names, values):
  215. """
  216. line : NEWLINE
  217. | exp NEWLINE
  218. | debug NEWLINE
  219. | HINT NEWLINE
  220. | POSSIBILITIES NEWLINE
  221. | RAISE NEWLINE
  222. """
  223. if option == 1: # rule: EXP NEWLINE
  224. self.root_node = values[0]
  225. # Clear list of last possibilities when current expression has no
  226. # possibilities. Otherwise, an invalid expression gets the last
  227. # possibilities of a valid expression.
  228. if not self.possibilities:
  229. self.last_possibilities = []
  230. return values[0]
  231. if option == 2: # rule: DEBUG NEWLINE
  232. self.root_node = values[0]
  233. return values[0]
  234. if option == 3: # rule: HINT NEWLINE
  235. self.display_hint()
  236. return
  237. if option == 4: # rule: POSSIBILITIES NEWLINE
  238. self.display_possibilities()
  239. return
  240. if option == 5:
  241. raise RuntimeError('on_line: exception raised')
  242. def on_debug(self, target, option, names, values):
  243. """
  244. debug : GRAPH exp
  245. """
  246. if option == 0:
  247. print generate_graph(values[1])
  248. return values[1]
  249. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  250. % (option, target)) # pragma: nocover
  251. def on_exp(self, target, option, names, values):
  252. """
  253. exp : NUMBER
  254. | IDENTIFIER
  255. | LPAREN exp RPAREN
  256. | unary
  257. | binary
  258. | nary
  259. | derivative
  260. """
  261. # | concat
  262. if option == 0: # rule: NUMBER
  263. # TODO: A bit hacky, this achieves long integers and floats.
  264. value = float(values[0]) if '.' in values[0] else int(values[0])
  265. return Leaf(value)
  266. if option == 1: # rule: IDENTIFIER
  267. return Leaf(values[0])
  268. if option == 2: # rule: LPAREN exp RPAREN
  269. return values[1]
  270. if option in [3, 4, 5, 6]: # rule: unary | binary | nary | derivative
  271. return values[0]
  272. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  273. % (option, target)) # pragma: nocover
  274. def on_derivative(self, target, option, names, values):
  275. """
  276. derivative : LBRACKET exp RBRACKET APOSTROPH
  277. | derivative APOSTROPH
  278. """
  279. op = [k for k, v in OP_MAP.iteritems() if v == OP_DERIV][0]
  280. if option == 0: # rule: LBRACKET exp RBRACKET APOSTROPH
  281. return Node(op, values[1])
  282. if option == 1: # rule: derivative APOSTROPH
  283. return Node(op, values[0])
  284. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  285. % (option, target)) # pragma: nocover
  286. def on_unary(self, target, option, names, values):
  287. """
  288. unary : MINUS exp %prec NEG
  289. | FUNCTION exp
  290. """
  291. if option == 0: # rule: NEG exp
  292. node = values[1]
  293. # Add negation to the left-most child
  294. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  295. node.negated += 1
  296. else:
  297. child = Scope(node)[0]
  298. child.negated += 1
  299. return node
  300. if option == 1: # rule: FUNCTION exp
  301. if values[1].is_op(OP_COMMA):
  302. return Node(values[0], *values[1])
  303. return Node(*values)
  304. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  305. % (option, target)) # pragma: nocover
  306. def on_binary(self, target, option, names, values):
  307. """
  308. binary : exp PLUS exp
  309. | exp TIMES exp
  310. | exp DIVIDE exp
  311. | exp POW exp
  312. | exp EQ exp
  313. | exp MINUS exp
  314. """
  315. if 0 <= option < 5: # rule: exp {PLUS,TIMES,DIVIDES,POW,EQ} exp
  316. return Node(values[1], values[0], values[2])
  317. if option == 5: # rule: exp MINUS exp
  318. node = values[2]
  319. # Add negation to the left-most child
  320. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  321. node.negated += 1
  322. else:
  323. node = Scope(node)[0]
  324. node.negated += 1
  325. # Explicit call the hook handler on the created unary negation.
  326. node = self.hook_handler('binary', 4, names, values, node)
  327. return Node('+', values[0], values[2])
  328. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  329. % (option, target)) # pragma: nocover
  330. def on_nary(self, target, option, names, values):
  331. """
  332. nary : exp COMMA exp
  333. """
  334. if option == 0: # rule: exp COMMA exp
  335. return Node(*combine(',', OP_COMMA, values[0], values[2]))
  336. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  337. % (option, target)) # pragma: nocover
  338. # -----------------------------------------
  339. # PI and operator tokens
  340. # -----------------------------------------
  341. operators = '"%s"%s{ returntoken(IDENTIFIER); }\n' \
  342. % (PI, ' ' * (8 - len(PI)))
  343. functions = []
  344. for op_str, op in OP_MAP.iteritems():
  345. if TOKEN_MAP[op] == 'FUNCTION':
  346. functions.append(op_str)
  347. else:
  348. operators += '"%s"%s{ returntoken(%s); }\n' \
  349. % (op_str, ' ' * (8 - len(op_str)), TOKEN_MAP[op])
  350. # Put all functions in a single regex
  351. if functions:
  352. operators += '("%s") { returntoken(FUNCTION); }\n' \
  353. % '"|"'.join(functions)
  354. # -----------------------------------------
  355. # raw lex script, verbatim here
  356. # -----------------------------------------
  357. lexscript = r"""
  358. %top{
  359. #include "Python.h"
  360. }
  361. %{
  362. #define YYSTYPE void *
  363. #include "tokens.h"
  364. extern void *py_parser;
  365. extern void (*py_input)(PyObject *parser, char *buf, int *result,
  366. int max_size);
  367. #define returntoken(tok) \
  368. yylval = PyString_FromString(strdup(yytext)); return (tok);
  369. #define YY_INPUT(buf,result,max_size) { \
  370. (*py_input)(py_parser, buf, &result, max_size); \
  371. }
  372. int yycolumn = 0;
  373. #define YY_USER_ACTION \
  374. yylloc.first_line = yylloc.last_line = yylineno; \
  375. yylloc.first_column = yycolumn; \
  376. yylloc.last_column = yycolumn + yyleng; \
  377. yycolumn += yyleng;
  378. %}
  379. %option yylineno
  380. %%
  381. [0-9]+"."?[0-9]* { returntoken(NUMBER); }
  382. [a-zA-Z] { returntoken(IDENTIFIER); }
  383. "(" { returntoken(LPAREN); }
  384. ")" { returntoken(RPAREN); }
  385. """ + operators + r"""
  386. "raise" { returntoken(RAISE); }
  387. "graph" { returntoken(GRAPH); }
  388. "quit" { yyterminate(); returntoken(QUIT); }
  389. [ \t\v\f] { }
  390. [\n] { yycolumn = 0; returntoken(NEWLINE); }
  391. "[" { returntoken(LBRACKET); }
  392. "]" { returntoken(RBRACKET); }
  393. "'" { returntoken(APOSTROPH); }
  394. . { printf("unknown char %c ignored.\n", yytext[0]); }
  395. %%
  396. yywrap() { return(1); }
  397. """