parser.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. """
  2. This parser will parse the given input and build an expression tree. Grammar
  3. file for the supported mathematical expressions.
  4. """
  5. import os.path
  6. PYBISON_BUILD = os.path.realpath('build/external/pybison')
  7. EXTERNAL_MODS = os.path.realpath('external')
  8. import sys
  9. sys.path.insert(0, PYBISON_BUILD)
  10. sys.path.insert(1, EXTERNAL_MODS)
  11. from pybison import BisonParser, BisonSyntaxError
  12. from graph_drawing.graph import generate_graph
  13. from node import ExpressionNode as Node, ExpressionLeaf as Leaf, OP_MAP, \
  14. OP_DERIV, TOKEN_MAP, TYPE_OPERATOR, OP_COMMA, OP_NEG, OP_MUL, OP_DIV, \
  15. Scope, PI
  16. from rules import RULES
  17. from strategy import pick_suggestion
  18. from possibilities import filter_duplicates, apply_suggestion
  19. import Queue
  20. # Check for n-ary operator in child nodes
  21. def combine(op, op_type, *nodes):
  22. # At least return the operator.
  23. res = [op]
  24. for n in nodes:
  25. # Merge the children for all nodes which have the same operator.
  26. if n.type == TYPE_OPERATOR and n.op == op_type:
  27. res += n.nodes
  28. else:
  29. res.append(n)
  30. return res
  31. class Parser(BisonParser):
  32. """
  33. Implements the calculator parser. Grammar rules are defined in the method
  34. docstrings. Scanner rules are in the 'lexscript' attribute.
  35. """
  36. # Words to be ignored by preprocessor
  37. words = zip(*filter(lambda (s, op): TOKEN_MAP[op] == 'FUNCTION', \
  38. OP_MAP.iteritems()))[0] + ('raise', 'graph', PI)
  39. # Output directory of generated pybison files, including a trailing slash.
  40. buildDirectory = PYBISON_BUILD + '/'
  41. # ----------------------------------------------------------------
  42. # lexer tokens - these must match those in your lex script (below)
  43. # ----------------------------------------------------------------
  44. # TODO: add a runtime check to verify that this token list match the list
  45. # of tokens of the lex script.
  46. tokens = ['NUMBER', 'IDENTIFIER', 'NEWLINE', 'QUIT', 'RAISE', 'GRAPH',
  47. 'LPAREN', 'RPAREN', 'FUNCTION', 'FUNCTION_LPAREN', 'LBRACKET',
  48. 'RBRACKET', 'APOSTROPH', 'DERIVATIVE'] \
  49. + filter(lambda t: t != 'FUNCTION', TOKEN_MAP.values())
  50. # ------------------------------
  51. # precedences
  52. # ------------------------------
  53. precedences = (
  54. ('left', ('COMMA', )),
  55. ('left', ('MINUS', 'PLUS')),
  56. ('left', ('TIMES', 'DIVIDE')),
  57. ('right', ('FUNCTION', 'DERIVATIVE')),
  58. ('left', ('EQ', )),
  59. ('left', ('NEG', )),
  60. ('right', ('POW', )),
  61. ('right', ('FUNCTION_LPAREN', )),
  62. )
  63. interactive = 0
  64. def __init__(self, **kwargs):
  65. BisonParser.__init__(self, **kwargs)
  66. self.interactive = kwargs.get('interactive', 0)
  67. self.timeout = kwargs.get('timeout', 0)
  68. self.reset()
  69. def reset(self):
  70. self.read_buffer = ''
  71. self.read_queue = Queue.Queue()
  72. #self.subtree_map = {}
  73. self.root_node = None
  74. self.possibilities = self.last_possibilities = []
  75. def run(self, *args, **kwargs):
  76. self.reset()
  77. return super(Parser, self).run(*args, **kwargs)
  78. # Override default read method with a version that prompts for input.
  79. def read(self, nbytes):
  80. if self.file == sys.stdin and self.file.closed:
  81. return ''
  82. if not self.read_buffer and not self.read_queue.empty():
  83. self.read_buffer = self.read_queue.get_nowait() + '\n'
  84. if self.read_buffer:
  85. read_buffer = self.read_buffer[:nbytes]
  86. self.read_buffer = self.read_buffer[nbytes:]
  87. return read_buffer
  88. try:
  89. read_buffer = raw_input('>>> ' if self.interactive else '') + '\n'
  90. except EOFError:
  91. return ''
  92. self.read_buffer = read_buffer[nbytes:]
  93. return read_buffer[:nbytes]
  94. def hook_read_before(self):
  95. if self.possibilities:
  96. if self.verbose: # pragma: nocover
  97. print 'possibilities:'
  98. items = filter_duplicates(self.possibilities)
  99. self.last_possibilities = self.possibilities
  100. if self.verbose: # pragma: nocover
  101. print ' ' + '\n '.join(map(str, items))
  102. def hook_read_after(self, data):
  103. """
  104. This hook will be called when the read() method returned. The data
  105. argument points to the data read by the read() method. This hook
  106. function should return the data to be used by the parser.
  107. """
  108. if not data.strip():
  109. return data
  110. self.possibilities = []
  111. import re
  112. # Replace known keywords with escape sequences.
  113. words = list(Parser.words)
  114. words.insert(10, '\n')
  115. for i, keyword in enumerate(words):
  116. data = re.sub(keyword, chr(i), data, flags=re.I)
  117. # TODO: remove this quick preprocessing hack. This hack enables
  118. # concatenated expressions, since the grammar currently does not
  119. # support those. This workaround will replace:
  120. # - ")(" with ")*(".
  121. # - "a(" with "a*(".
  122. # - ")a" with ")*a".
  123. # - "ab" with "a*b".
  124. # - "4a" with "4*a".
  125. # - "a4" with "a^4".
  126. pattern = ('(?:(\))\s*(\()' # )( -> ) * (
  127. + '|([\x00-\x09\x0b-\x19a-z0-9])\s*(\()' # a( -> a * (
  128. + '|(\))\s*([\x00-\x09\x0b-\x19a-z0-9])' # )a -> ) * a
  129. + '|([\x00-\x09\x0b-\x19a-z])\s*'
  130. +'([\x00-\x09\x0b-\x19a-z]+)' # ab -> a * b
  131. + '|([0-9])\s*([\x00-\x09\x0b-\x19a-z])' # 4a -> 4 * a
  132. + '|([\x00-\x09\x0b-\x19a-z])\s*([0-9])' # a4 -> a ^ 4
  133. + '|([0-9])\s+([0-9]))' # 4 4 -> 4 * 4
  134. )
  135. def preprocess_data(match):
  136. left, right = filter(None, match.groups())
  137. # Make sure there are no multiplication and exponentiation signs
  138. # inserted between a function and its argument(s): "sin x" should
  139. # not be written as "sin*x", because that is bogus.
  140. if ord(left) <= 0x9 or 0x0b <= ord(left) <= 0x19:
  141. return left + ' ' + right
  142. # If all characters on the right are numbers. e.g. "a4", the
  143. # expression implies exponentiation. Make sure ")4" is not
  144. # converted into an exponentiation, because that's multiplication.
  145. if left != ')' and not 48 <= ord(left) < 58 \
  146. and all(map(lambda x: 48 <= ord(x) < 58, list(right))):
  147. return '%s^%s' % (left, right)
  148. # match: ab | abc | abcd (where left = "a")
  149. return '*'.join([left] + list(right))
  150. if self.verbose: # pragma: nocover
  151. data_before = data
  152. # Iteratively replace all matches.
  153. i = 0
  154. while i < len(data):
  155. data = data[:i] + re.sub(pattern, preprocess_data, data[i:])
  156. i += 1
  157. # Replace escape sequences with original keywords.
  158. for i, keyword in enumerate(words):
  159. data = data.replace(chr(i), keyword)
  160. if self.verbose and data_before != data: # pragma: nocover
  161. print 'hook_read_after() modified the input data:'
  162. print 'before:', repr(data_before)
  163. print 'after :', repr(data)
  164. return data
  165. def hook_handler(self, target, option, names, values, retval):
  166. if target in ['exp', 'line', 'input'] or not retval:
  167. return retval
  168. if not retval.negated and retval.type != TYPE_OPERATOR:
  169. return retval
  170. if retval.type == TYPE_OPERATOR and retval.op in RULES:
  171. handlers = RULES[retval.op]
  172. else:
  173. handlers = []
  174. if retval.negated:
  175. handlers = RULES[OP_NEG]
  176. for handler in handlers:
  177. possibilities = handler(retval)
  178. self.possibilities.extend(possibilities)
  179. return retval
  180. def display_hint(self):
  181. print pick_suggestion(self.last_possibilities)
  182. def display_possibilities(self):
  183. if self.last_possibilities:
  184. print '\n'.join(map(str, self.last_possibilities))
  185. def rewrite(self):
  186. suggestion = pick_suggestion(self.last_possibilities)
  187. if self.verbose:
  188. print 'applying suggestion:', suggestion
  189. if not suggestion:
  190. return self.root_node
  191. expression = apply_suggestion(self.root_node, suggestion)
  192. if self.verbose:
  193. print 'After application, expression=', expression
  194. self.read_queue.put_nowait(str(expression))
  195. return expression
  196. #def hook_run(self, filename, retval):
  197. # return retval
  198. # ---------------------------------------------------------------
  199. # These methods are the python handlers for the bison targets.
  200. # (which get called by the bison code each time the corresponding
  201. # parse target is unambiguously reached)
  202. #
  203. # WARNING - don't touch the method docstrings unless you know what
  204. # you are doing - they are in bison rule syntax, and are passed
  205. # verbatim to bison to build the parser engine library.
  206. # ---------------------------------------------------------------
  207. # Declare the start target here (by name)
  208. start = 'input'
  209. def on_input(self, target, option, names, values):
  210. """
  211. input :
  212. | input line
  213. | input REWRITE NEWLINE
  214. """
  215. if option == 1:
  216. # Interactive mode is enabled if the term rewriting system is used
  217. # as a shell. In that case, it is useful that the shell prints the
  218. # output of the evaluation.
  219. if self.interactive and values[1]: # pragma: nocover
  220. print values[1]
  221. return values[1]
  222. if option == 2: # rule: input REWRITE NEWLINE
  223. self.root_node = self.rewrite()
  224. return self.root_node
  225. def on_line(self, target, option, names, values):
  226. """
  227. line : NEWLINE
  228. | exp NEWLINE
  229. | debug NEWLINE
  230. | HINT NEWLINE
  231. | POSSIBILITIES NEWLINE
  232. | RAISE NEWLINE
  233. """
  234. if option == 1: # rule: EXP NEWLINE
  235. self.root_node = values[0]
  236. # Clear list of last possibilities when current expression has no
  237. # possibilities. Otherwise, an invalid expression gets the last
  238. # possibilities of a valid expression.
  239. if not self.possibilities:
  240. self.last_possibilities = []
  241. return values[0]
  242. if option == 2: # rule: DEBUG NEWLINE
  243. self.root_node = values[0]
  244. return values[0]
  245. if option == 3: # rule: HINT NEWLINE
  246. self.display_hint()
  247. return
  248. if option == 4: # rule: POSSIBILITIES NEWLINE
  249. self.display_possibilities()
  250. return
  251. if option == 5:
  252. raise RuntimeError('on_line: exception raised')
  253. def on_debug(self, target, option, names, values):
  254. """
  255. debug : GRAPH exp
  256. """
  257. if option == 0:
  258. print generate_graph(values[1])
  259. return values[1]
  260. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  261. % (option, target)) # pragma: nocover
  262. def on_exp(self, target, option, names, values):
  263. """
  264. exp : NUMBER
  265. | IDENTIFIER
  266. | LPAREN exp RPAREN
  267. | unary
  268. | binary
  269. | nary
  270. """
  271. # | concat
  272. if option == 0: # rule: NUMBER
  273. # TODO: A bit hacky, this achieves long integers and floats.
  274. value = float(values[0]) if '.' in values[0] else int(values[0])
  275. return Leaf(value)
  276. if option == 1: # rule: IDENTIFIER
  277. return Leaf(values[0])
  278. if option == 2: # rule: LPAREN exp RPAREN
  279. return values[1]
  280. if 3 <= option <= 5: # rule: unary | binary | nary
  281. return values[0]
  282. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  283. % (option, target)) # pragma: nocover
  284. def on_unary(self, target, option, names, values):
  285. """
  286. unary : MINUS exp %prec NEG
  287. | FUNCTION_LPAREN exp RPAREN
  288. | FUNCTION exp
  289. | DERIVATIVE exp
  290. | bracket_derivative
  291. """
  292. if option == 0: # rule: NEG exp
  293. node = values[1]
  294. # Add negation to the left-most child
  295. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  296. node.negated += 1
  297. else:
  298. child = Scope(node)[0]
  299. child.negated += 1
  300. return node
  301. if option in (1, 2): # rule: FUNCTION_LPAREN exp RPAREN | FUNCTION exp
  302. op = values[0].split(' ', 1)[0]
  303. if values[1].is_op(OP_COMMA):
  304. return Node(op, *values[1])
  305. return Node(op, values[1])
  306. if option == 3: # rule: DERIVATIVE exp
  307. op = [k for k, v in OP_MAP.iteritems() if v == OP_DERIV][0]
  308. # DERIVATIVE looks like 'd/d*x*' -> extract the 'x'
  309. return Node(op, values[1], Leaf(values[0][-2]))
  310. if option == 4: # rule: bracket_derivative
  311. return values[0]
  312. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  313. % (option, target)) # pragma: nocover
  314. def on_bracket_derivative(self, target, option, names, values):
  315. """
  316. bracket_derivative : LBRACKET exp RBRACKET APOSTROPH
  317. | bracket_derivative APOSTROPH
  318. """
  319. op = [k for k, v in OP_MAP.iteritems() if v == OP_DERIV][0]
  320. if option == 0: # rule: LBRACKET exp RBRACKET APOSTROPH
  321. return Node(op, values[1])
  322. if option == 1: # rule: bracket_derivative APOSTROPH
  323. return Node(op, values[0])
  324. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  325. % (option, target)) # pragma: nocover
  326. def on_binary(self, target, option, names, values):
  327. """
  328. binary : exp PLUS exp
  329. | exp TIMES exp
  330. | exp DIVIDE exp
  331. | exp POW exp
  332. | exp EQ exp
  333. | exp MINUS exp
  334. """
  335. if 0 <= option < 5: # rule: exp {PLUS,TIMES,DIVIDES,POW,EQ} exp
  336. return Node(values[1], values[0], values[2])
  337. if option == 5: # rule: exp MINUS exp
  338. node = values[2]
  339. # Add negation to the left-most child
  340. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  341. node.negated += 1
  342. else:
  343. node = Scope(node)[0]
  344. node.negated += 1
  345. # Explicit call the hook handler on the created unary negation.
  346. node = self.hook_handler('binary', 4, names, values, node)
  347. return Node('+', values[0], values[2])
  348. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  349. % (option, target)) # pragma: nocover
  350. def on_nary(self, target, option, names, values):
  351. """
  352. nary : exp COMMA exp
  353. """
  354. if option == 0: # rule: exp COMMA exp
  355. return Node(*combine(',', OP_COMMA, values[0], values[2]))
  356. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  357. % (option, target)) # pragma: nocover
  358. # -----------------------------------------
  359. # Special characters and operator tokens
  360. # -----------------------------------------
  361. operators = '"%s"%s{ returntoken(IDENTIFIER); }\n' \
  362. % (PI, ' ' * (8 - len(PI)))
  363. functions = []
  364. for op_str, op in OP_MAP.iteritems():
  365. if TOKEN_MAP[op] == 'FUNCTION':
  366. functions.append(op_str)
  367. else:
  368. operators += '"%s"%s{ returntoken(%s); }\n' \
  369. % (op_str, ' ' * (8 - len(op_str)), TOKEN_MAP[op])
  370. # Put all functions in a single regex
  371. if functions:
  372. operators += '("%s")[ ]*"(" { returntoken(FUNCTION_LPAREN); }\n' \
  373. % '"|"'.join(functions)
  374. operators += '("%s") { returntoken(FUNCTION); }\n' \
  375. % '"|"'.join(functions)
  376. # -----------------------------------------
  377. # raw lex script, verbatim here
  378. # -----------------------------------------
  379. lexscript = r"""
  380. %top{
  381. #include "Python.h"
  382. }
  383. %{
  384. #define YYSTYPE void *
  385. #include "tokens.h"
  386. extern void *py_parser;
  387. extern void (*py_input)(PyObject *parser, char *buf, int *result,
  388. int max_size);
  389. #define returntoken(tok) \
  390. yylval = PyString_FromString(strdup(yytext)); return (tok);
  391. #define YY_INPUT(buf,result,max_size) { \
  392. (*py_input)(py_parser, buf, &result, max_size); \
  393. }
  394. int yycolumn = 0;
  395. #define YY_USER_ACTION \
  396. yylloc.first_line = yylloc.last_line = yylineno; \
  397. yylloc.first_column = yycolumn; \
  398. yylloc.last_column = yycolumn + yyleng; \
  399. yycolumn += yyleng;
  400. %}
  401. %option yylineno
  402. %%
  403. d[ ]*"/"[ ]*"d*"[a-z]"*" { returntoken(DERIVATIVE); }
  404. [0-9]+"."?[0-9]* { returntoken(NUMBER); }
  405. [a-zA-Z] { returntoken(IDENTIFIER); }
  406. "(" { returntoken(LPAREN); }
  407. ")" { returntoken(RPAREN); }
  408. "[" { returntoken(LBRACKET); }
  409. "]" { returntoken(RBRACKET); }
  410. "'" { returntoken(APOSTROPH); }
  411. """ + operators + r"""
  412. "raise" { returntoken(RAISE); }
  413. "graph" { returntoken(GRAPH); }
  414. "quit" { yyterminate(); returntoken(QUIT); }
  415. [ \t\v\f] { }
  416. [\n] { yycolumn = 0; returntoken(NEWLINE); }
  417. . { printf("unknown char %c ignored.\n", yytext[0]); }
  418. %%
  419. yywrap() { return(1); }
  420. """