parser.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. """
  2. This parser will parse the given input and build an expression tree. Grammar
  3. file for the supported mathematical expressions.
  4. """
  5. import os.path
  6. PYBISON_BUILD = os.path.realpath('build/external/pybison')
  7. EXTERNAL_MODS = os.path.realpath('external')
  8. import sys
  9. sys.path.insert(0, PYBISON_BUILD)
  10. sys.path.insert(1, EXTERNAL_MODS)
  11. from pybison import BisonParser, BisonSyntaxError
  12. from graph_drawing.graph import generate_graph
  13. from node import ExpressionBase, ExpressionNode as Node, \
  14. ExpressionLeaf as Leaf, OP_MAP, OP_DER, TOKEN_MAP, TYPE_OPERATOR, \
  15. OP_COMMA, OP_NEG, OP_MUL, OP_DIV, OP_POW, OP_LOG, OP_ADD, Scope, E, \
  16. DEFAULT_LOGARITHM_BASE, OP_VALUE_MAP, SPECIAL_TOKENS, OP_INT, \
  17. OP_INT_INDEF
  18. from rules import RULES
  19. from rules.utils import find_variable
  20. from strategy import pick_suggestion
  21. from possibilities import filter_duplicates, apply_suggestion
  22. import Queue
  23. import re
  24. # Check for n-ary operator in child nodes
  25. def combine(op, op_type, *nodes):
  26. # At least return the operator.
  27. res = [op]
  28. for n in nodes:
  29. # Merge the children for all nodes which have the same operator.
  30. if n.type == TYPE_OPERATOR and n.op == op_type:
  31. res += n.nodes
  32. else:
  33. res.append(n)
  34. return res
  35. def find_integration_variable(exp):
  36. if not exp.is_op(OP_MUL):
  37. return exp, find_variable(exp)
  38. scope = Scope(exp)
  39. if len(scope) > 2 and scope[-2] == 'd' and scope[-1].is_identifier():
  40. x = scope[-1]
  41. scope.nodes = scope[:-2]
  42. return scope.as_nary_node(), x
  43. return exp, find_variable(exp)
  44. class Parser(BisonParser):
  45. """
  46. Implements the calculator parser. Grammar rules are defined in the method
  47. docstrings. Scanner rules are in the 'lexscript' attribute.
  48. """
  49. # Words to be ignored by preprocessor
  50. words = tuple(filter(lambda w: w.isalpha(), OP_MAP.iterkeys())) \
  51. + ('raise', 'graph') + tuple(SPECIAL_TOKENS)
  52. # Output directory of generated pybison files, including a trailing slash.
  53. buildDirectory = PYBISON_BUILD + '/'
  54. # ----------------------------------------------------------------
  55. # lexer tokens - these must match those in your lex script (below)
  56. # ----------------------------------------------------------------
  57. # TODO: add a runtime check to verify that this token list match the list
  58. # of tokens of the lex script.
  59. tokens = ['NUMBER', 'IDENTIFIER', 'NEWLINE', 'QUIT', 'RAISE', 'GRAPH',
  60. 'LPAREN', 'RPAREN', 'FUNCTION', 'FUNCTION_LPAREN', 'LBRACKET',
  61. 'RBRACKET', 'PRIME', 'DERIVATIVE', 'SUB'] \
  62. + filter(lambda t: t != 'FUNCTION', TOKEN_MAP.values())
  63. # ------------------------------
  64. # precedences
  65. # ------------------------------
  66. precedences = (
  67. ('left', ('COMMA', )),
  68. ('right', ('INTEGRAL', 'DERIVATIVE')),
  69. ('left', ('MINUS', 'PLUS')),
  70. ('left', ('TIMES', 'DIVIDE')),
  71. ('right', ('FUNCTION', )),
  72. ('left', ('EQ', )),
  73. ('left', ('NEG', )),
  74. ('right', ('POW', )),
  75. ('right', ('SUB', )),
  76. ('right', ('FUNCTION_LPAREN', )),
  77. )
  78. interactive = 0
  79. def __init__(self, **kwargs):
  80. BisonParser.__init__(self, **kwargs)
  81. self.interactive = kwargs.get('interactive', 0)
  82. self.timeout = kwargs.get('timeout', 0)
  83. self.reset()
  84. def reset(self):
  85. self.read_buffer = ''
  86. self.read_queue = Queue.Queue()
  87. #self.subtree_map = {}
  88. self.root_node = None
  89. self.possibilities = self.last_possibilities = []
  90. def run(self, *args, **kwargs):
  91. self.reset()
  92. return super(Parser, self).run(*args, **kwargs)
  93. # Override default read method with a version that prompts for input.
  94. def read(self, nbytes):
  95. if self.file == sys.stdin and self.file.closed:
  96. return ''
  97. if not self.read_buffer and not self.read_queue.empty():
  98. self.read_buffer = self.read_queue.get_nowait() + '\n'
  99. if self.read_buffer:
  100. read_buffer = self.read_buffer[:nbytes]
  101. self.read_buffer = self.read_buffer[nbytes:]
  102. return read_buffer
  103. try:
  104. read_buffer = raw_input('>>> ' if self.interactive else '') + '\n'
  105. except EOFError:
  106. return ''
  107. self.read_buffer = read_buffer[nbytes:]
  108. return read_buffer[:nbytes]
  109. def hook_read_before(self):
  110. if self.possibilities:
  111. if self.verbose: # pragma: nocover
  112. print 'possibilities:'
  113. items = filter_duplicates(self.possibilities)
  114. self.last_possibilities = self.possibilities
  115. if self.verbose: # pragma: nocover
  116. print ' ' + '\n '.join(map(str, items))
  117. def hook_read_after(self, data):
  118. """
  119. This hook will be called when the read() method returned. The data
  120. argument points to the data read by the read() method. This hook
  121. function should return the data to be used by the parser.
  122. """
  123. if not data.strip():
  124. return data
  125. self.possibilities = []
  126. # Replace known keywords with escape sequences.
  127. words = list(self.__class__.words)
  128. words.insert(10, '\n')
  129. for i, keyword in enumerate(words):
  130. # FIXME: Why case-insensitivity?
  131. data = re.sub(keyword, chr(i), data, flags=re.I)
  132. # TODO: remove this quick preprocessing hack. This hack enables
  133. # concatenated expressions, since the grammar currently does not
  134. # support those. This workaround will replace:
  135. # - ")(" with ")*(".
  136. # - "a(" with "a*(".
  137. # - ")a" with ")*a".
  138. # - "ab" with "a*b".
  139. # - "4a" with "4*a".
  140. # - "a4" with "a^4".
  141. pattern = ('(?:(\))\s*(\(|\[)' # )( -> ) * (
  142. # )[ -> ) * [
  143. + '|([\x00-\x09\x0b-\x19a-z0-9])\s*(\(|\[)' # a( -> a * (
  144. # a[ -> a * [
  145. + '|(\))\s*([\x00-\x09\x0b-\x19a-z0-9])' # )a -> ) * a
  146. + '|([\x00-\x09\x0b-\x19a-z])\s*'
  147. + '([\x00-\x09\x0b-\x19a-z])' # ab -> a * b
  148. + '|([0-9])\s*([\x00-\x09\x0b-\x19a-z])' # 4a -> 4 * a
  149. + '|([\x00-\x09\x0b-\x19a-z])\s*([0-9])' # a4 -> a ^ 4
  150. + '|([0-9])\s+([0-9]))' # 4 4 -> 4 * 4
  151. )
  152. def preprocess_data(match):
  153. left, right = filter(None, match.groups())
  154. # Make sure there are no multiplication and exponentiation signs
  155. # inserted between a function and its argument(s): "sin x" should
  156. # not be written as "sin*x", because that is bogus.
  157. if ord(left) <= 0x9 or 0x0b <= ord(left) <= 0x19:
  158. return left + ' ' + right
  159. # If all characters on the right are numbers. e.g. "a4", the
  160. # expression implies exponentiation. Make sure ")4" is not
  161. # converted into an exponentiation, because that's multiplication.
  162. if left != ')' and not 48 <= ord(left) < 58 \
  163. and all(map(lambda x: 48 <= ord(x) < 58, list(right))):
  164. return '%s^%s' % (left, right)
  165. # match: ab | abc | abcd (where left = "a")
  166. return '*'.join([left] + list(right))
  167. if self.verbose: # pragma: nocover
  168. data_before = data
  169. # Iteratively replace all matches.
  170. i = 0
  171. while i < len(data):
  172. data = data[:i] + re.sub(pattern, preprocess_data, data[i:])
  173. i += 1
  174. # Replace escape sequences with original keywords.
  175. for i, keyword in enumerate(words):
  176. data = data.replace(chr(i), keyword)
  177. if self.verbose and data_before != data: # pragma: nocover
  178. print 'hook_read_after() modified the input data:'
  179. print 'before:', repr(data_before)
  180. print 'after :', repr(data)
  181. return data
  182. def hook_handler(self, target, option, names, values, retval):
  183. if target in ['exp', 'line', 'input'] \
  184. or not isinstance(retval, ExpressionBase):
  185. return retval
  186. if not retval.negated and retval.type != TYPE_OPERATOR:
  187. return retval
  188. if retval.type == TYPE_OPERATOR and retval.op in RULES:
  189. handlers = RULES[retval.op]
  190. else:
  191. handlers = []
  192. if retval.negated:
  193. handlers = RULES[OP_NEG]
  194. for handler in handlers:
  195. possibilities = handler(retval)
  196. self.possibilities.extend(possibilities)
  197. return retval
  198. def display_hint(self):
  199. print pick_suggestion(self.last_possibilities)
  200. def display_possibilities(self):
  201. if self.last_possibilities:
  202. print '\n'.join(map(str, self.last_possibilities))
  203. def rewrite(self):
  204. suggestion = pick_suggestion(self.last_possibilities)
  205. if self.verbose:
  206. print 'applying suggestion:', suggestion
  207. if not suggestion:
  208. return self.root_node
  209. expression = apply_suggestion(self.root_node, suggestion)
  210. if self.verbose:
  211. print 'After application, expression=', expression
  212. self.read_queue.put_nowait(str(expression))
  213. return expression
  214. #def hook_run(self, filename, retval):
  215. # return retval
  216. # ---------------------------------------------------------------
  217. # These methods are the python handlers for the bison targets.
  218. # (which get called by the bison code each time the corresponding
  219. # parse target is unambiguously reached)
  220. #
  221. # WARNING - don't touch the method docstrings unless you know what
  222. # you are doing - they are in bison rule syntax, and are passed
  223. # verbatim to bison to build the parser engine library.
  224. # ---------------------------------------------------------------
  225. # Declare the start target here (by name)
  226. start = 'input'
  227. def on_input(self, target, option, names, values):
  228. """
  229. input :
  230. | input line
  231. | input REWRITE NEWLINE
  232. """
  233. if option == 1:
  234. # Interactive mode is enabled if the term rewriting system is used
  235. # as a shell. In that case, it is useful that the shell prints the
  236. # output of the evaluation.
  237. if self.interactive and values[1]: # pragma: nocover
  238. print values[1]
  239. return values[1]
  240. if option == 2: # rule: input REWRITE NEWLINE
  241. self.root_node = self.rewrite()
  242. return self.root_node
  243. def on_line(self, target, option, names, values):
  244. """
  245. line : NEWLINE
  246. | exp NEWLINE
  247. | debug NEWLINE
  248. | HINT NEWLINE
  249. | POSSIBILITIES NEWLINE
  250. | RAISE NEWLINE
  251. """
  252. if option == 1: # rule: EXP NEWLINE
  253. self.root_node = values[0]
  254. # Clear list of last possibilities when current expression has no
  255. # possibilities. Otherwise, an invalid expression gets the last
  256. # possibilities of a valid expression.
  257. if not self.possibilities:
  258. self.last_possibilities = []
  259. return values[0]
  260. if option == 2: # rule: DEBUG NEWLINE
  261. self.root_node = values[0]
  262. return values[0]
  263. if option == 3: # rule: HINT NEWLINE
  264. self.display_hint()
  265. return
  266. if option == 4: # rule: POSSIBILITIES NEWLINE
  267. self.display_possibilities()
  268. return
  269. if option == 5:
  270. raise RuntimeError('on_line: exception raised')
  271. def on_debug(self, target, option, names, values):
  272. """
  273. debug : GRAPH exp
  274. """
  275. if option == 0:
  276. print generate_graph(values[1])
  277. return values[1]
  278. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  279. % (option, target)) # pragma: nocover
  280. def on_exp(self, target, option, names, values):
  281. """
  282. exp : NUMBER
  283. | IDENTIFIER
  284. | LPAREN exp RPAREN
  285. | unary
  286. | binary
  287. | nary
  288. """
  289. # | concat
  290. if option == 0: # rule: NUMBER
  291. # TODO: A bit hacky, this achieves long integers and floats.
  292. value = float(values[0]) if '.' in values[0] else int(values[0])
  293. return Leaf(value)
  294. if option == 1: # rule: IDENTIFIER
  295. return Leaf(values[0])
  296. if option == 2: # rule: LPAREN exp RPAREN
  297. return values[1]
  298. if 3 <= option <= 5: # rule: unary | binary | nary
  299. return values[0]
  300. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  301. % (option, target)) # pragma: nocover
  302. def on_unary(self, target, option, names, values):
  303. """
  304. unary : MINUS exp %prec NEG
  305. | FUNCTION_LPAREN exp RPAREN
  306. | FUNCTION exp
  307. | DERIVATIVE exp
  308. | bracket_derivative
  309. | INTEGRAL exp
  310. | INTEGRAL bounds exp %prec INTEGRAL
  311. """
  312. if option == 0: # rule: NEG exp
  313. node = values[1]
  314. # Add negation to the left-most child
  315. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  316. node.negated += 1
  317. else:
  318. child = Scope(node)[0]
  319. child.negated += 1
  320. return node
  321. if option in (1, 2): # rule: FUNCTION_LPAREN exp RPAREN | FUNCTION exp
  322. op = values[0].split(' ', 1)[0]
  323. if op == 'int':
  324. fx, x = find_integration_variable(values[1])
  325. return Node(OP_INT, fx, x)
  326. if op == 'ln':
  327. return Node(OP_LOG, values[1], Leaf(E))
  328. if values[1].is_op(OP_COMMA):
  329. return Node(op, *values[1])
  330. if op == OP_VALUE_MAP[OP_LOG]:
  331. return Node(OP_LOG, values[1], Leaf(DEFAULT_LOGARITHM_BASE))
  332. m = re.match(r'^log_([0-9]+|[a-zA-Z])', op)
  333. if m:
  334. value = m.group(1)
  335. if value.isdigit():
  336. value = int(value)
  337. return Node(OP_LOG, values[1], Leaf(value))
  338. return Node(op, values[1])
  339. if option == 3: # rule: DERIVATIVE exp
  340. # DERIVATIVE looks like 'd/d*x*' -> extract the 'x'
  341. return Node(OP_DER, values[1], Leaf(values[0][-2]))
  342. if option == 4: # rule: bracket_derivative
  343. return values[0]
  344. if option == 5: # rule: INTEGRAL exp
  345. fx, x = find_integration_variable(values[1])
  346. return Node(OP_INT, fx, x)
  347. if option == 6: # rule: INTEGRAL bounds exp
  348. lbnd, ubnd = values[1]
  349. fx, x = find_integration_variable(values[2])
  350. return Node(OP_INT, fx, x, lbnd, ubnd)
  351. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  352. % (option, target)) # pragma: nocover
  353. def on_bounds(self, target, option, names, values):
  354. """
  355. bounds : SUB power TIMES
  356. """
  357. if option == 0: # rule: SUB power
  358. return values[1]
  359. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  360. % (option, target)) # pragma: nocover
  361. def on_power(self, target, option, names, values):
  362. """
  363. power : exp POW exp
  364. """
  365. if option == 0: # rule: exp POW exp
  366. return values[0], values[2]
  367. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  368. % (option, target)) # pragma: nocover
  369. def on_bracket_derivative(self, target, option, names, values):
  370. """
  371. bracket_derivative : LBRACKET exp RBRACKET PRIME
  372. | bracket_derivative PRIME
  373. """
  374. if option == 0: # rule: LBRACKET exp RBRACKET PRIME
  375. return Node(OP_DER, values[1])
  376. if option == 1: # rule: bracket_derivative PRIME
  377. return Node(OP_DER, values[0])
  378. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  379. % (option, target)) # pragma: nocover
  380. def on_binary(self, target, option, names, values):
  381. """
  382. binary : exp PLUS exp
  383. | exp TIMES exp
  384. | exp DIVIDE exp
  385. | exp EQ exp
  386. | exp MINUS exp
  387. | power
  388. """
  389. if 0 <= option < 4: # rule: exp {PLUS,TIMES,DIVIDE,EQ} exp
  390. return Node(values[1], values[0], values[2])
  391. if option == 4: # rule: exp MINUS exp
  392. node = values[2]
  393. # Add negation to the left-most child
  394. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  395. node.negated += 1
  396. else:
  397. node = Scope(node)[0]
  398. node.negated += 1
  399. # Explicit call the hook handler on the created unary negation.
  400. self.hook_handler('binary', 3, names, values, node)
  401. return Node(OP_ADD, values[0], values[2])
  402. if option == 5: # rule: power
  403. return Node(OP_POW, *values[0])
  404. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  405. % (option, target)) # pragma: nocover
  406. def on_nary(self, target, option, names, values):
  407. """
  408. nary : exp COMMA exp
  409. """
  410. if option == 0: # rule: exp COMMA exp
  411. return Node(*combine(',', OP_COMMA, values[0], values[2]))
  412. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  413. % (option, target)) # pragma: nocover
  414. # -----------------------------------------
  415. # Special tokens and operator tokens
  416. # -----------------------------------------
  417. operators = ''
  418. functions = []
  419. for token in SPECIAL_TOKENS:
  420. operators += '"%s"%s{ returntoken(IDENTIFIER); }\n' \
  421. % (token, ' ' * (8 - len(token)))
  422. for op_str, op in OP_MAP.iteritems():
  423. if TOKEN_MAP[op] == 'FUNCTION':
  424. functions.append(op_str)
  425. else:
  426. operators += '"%s"%s{ returntoken(%s); }\n' \
  427. % (op_str, ' ' * (8 - len(op_str)), TOKEN_MAP[op])
  428. # Put all functions in a single regex
  429. if functions:
  430. operators += '("%s")[ ]*"(" { returntoken(FUNCTION_LPAREN); }\n' \
  431. % '"|"'.join(functions)
  432. operators += '("%s") { returntoken(FUNCTION); }\n' \
  433. % '"|"'.join(functions)
  434. # -----------------------------------------
  435. # raw lex script, verbatim here
  436. # -----------------------------------------
  437. lexscript = r"""
  438. %top{
  439. #include "Python.h"
  440. }
  441. %{
  442. #define YYSTYPE void *
  443. #include "tokens.h"
  444. extern void *py_parser;
  445. extern void (*py_input)(PyObject *parser, char *buf, int *result,
  446. int max_size);
  447. #define returntoken(tok) \
  448. yylval = PyString_FromString(strdup(yytext)); return (tok);
  449. #define YY_INPUT(buf,result,max_size) { \
  450. (*py_input)(py_parser, buf, &result, max_size); \
  451. }
  452. int yycolumn = 0;
  453. #define YY_USER_ACTION \
  454. yylloc.first_line = yylloc.last_line = yylineno; \
  455. yylloc.first_column = yycolumn; \
  456. yylloc.last_column = yycolumn + yyleng; \
  457. yycolumn += yyleng;
  458. %}
  459. %option yylineno
  460. %%
  461. d[ ]*"/"[ ]*"d*"[a-z]"*" { returntoken(DERIVATIVE); }
  462. [0-9]+"."?[0-9]* { returntoken(NUMBER); }
  463. [a-zA-Z] { returntoken(IDENTIFIER); }
  464. "_" { returntoken(SUB); }
  465. "(" { returntoken(LPAREN); }
  466. ")" { returntoken(RPAREN); }
  467. "[" { returntoken(LBRACKET); }
  468. "]" { returntoken(RBRACKET); }
  469. "'" { returntoken(PRIME); }
  470. log_([0-9]+|[a-zA-Z])"*(" { returntoken(FUNCTION_LPAREN); }
  471. log_([0-9]+|[a-zA-Z])"*" { returntoken(FUNCTION); }
  472. """ + operators + r"""
  473. "raise" { returntoken(RAISE); }
  474. "graph" { returntoken(GRAPH); }
  475. "quit" { yyterminate(); returntoken(QUIT); }
  476. [ \t\v\f] { }
  477. [\n] { yycolumn = 0; returntoken(NEWLINE); }
  478. . { printf("unknown char %c ignored.\n", yytext[0]); }
  479. %%
  480. yywrap() { return(1); }
  481. """
  482. #int[ ]*"(" { returntoken(FUNCTION_LPAREN); }