parser.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. """
  2. This parser will parse the given input and build an expression tree. Grammar
  3. file for the supported mathematical expressions.
  4. """
  5. import os.path
  6. PYBISON_BUILD = os.path.realpath('build/external/pybison')
  7. EXTERNAL_MODS = os.path.realpath('external')
  8. import sys
  9. sys.path.insert(0, PYBISON_BUILD)
  10. sys.path.insert(1, EXTERNAL_MODS)
  11. from pybison import BisonParser, BisonSyntaxError
  12. from graph_drawing.graph import generate_graph
  13. from node import ExpressionNode as Node, \
  14. ExpressionLeaf as Leaf, OP_MAP, OP_DER, TOKEN_MAP, TYPE_OPERATOR, \
  15. OP_COMMA, OP_MUL, OP_POW, OP_LOG, OP_ADD, Scope, E, OP_ABS, \
  16. DEFAULT_LOGARITHM_BASE, OP_VALUE_MAP, SPECIAL_TOKENS, OP_INT, \
  17. OP_INT_INDEF, negation_to_node
  18. from rules.utils import find_variable
  19. from rules.precedences import IMPLICIT_RULES
  20. from strategy import find_possibilities
  21. from possibilities import apply_suggestion
  22. import Queue
  23. import re
  24. # Check for n-ary operator in child nodes
  25. def combine(op, op_type, *nodes):
  26. # At least return the operator.
  27. res = [op]
  28. for n in nodes:
  29. # Merge the children for all nodes which have the same operator.
  30. if n.type == TYPE_OPERATOR and n.op == op_type:
  31. res += n.nodes
  32. else:
  33. res.append(n)
  34. return res
  35. def find_integration_variable(exp):
  36. if not exp.is_op(OP_MUL):
  37. return exp, find_variable(exp)
  38. scope = Scope(exp)
  39. if len(scope) > 2 and scope[-2] == 'd' and scope[-1].is_identifier():
  40. x = scope[-1]
  41. scope.nodes = scope[:-2]
  42. return scope.as_nary_node(), x
  43. return exp, find_variable(exp)
  44. class Parser(BisonParser):
  45. """
  46. Implements the calculator parser. Grammar rules are defined in the method
  47. docstrings. Scanner rules are in the 'lexscript' attribute.
  48. """
  49. # Words to be ignored by preprocessor
  50. words = tuple(filter(lambda w: w.isalpha(), OP_MAP.iterkeys())) \
  51. + ('raise', 'graph') + tuple(SPECIAL_TOKENS)
  52. # Output directory of generated pybison files, including a trailing slash.
  53. buildDirectory = PYBISON_BUILD + '/'
  54. # ----------------------------------------------------------------
  55. # lexer tokens - these must match those in your lex script (below)
  56. # ----------------------------------------------------------------
  57. # TODO: add a runtime check to verify that this token list match the list
  58. # of tokens of the lex script.
  59. tokens = ['NUMBER', 'IDENTIFIER', 'NEWLINE', 'QUIT', 'RAISE', 'GRAPH',
  60. 'LPAREN', 'RPAREN', 'FUNCTION', 'FUNCTION_LPAREN', 'LBRACKET',
  61. 'RBRACKET', 'PIPE', 'PRIME', 'DERIVATIVE'] \
  62. + filter(lambda t: t != 'FUNCTION', TOKEN_MAP.values())
  63. # ------------------------------
  64. # precedences
  65. # ------------------------------
  66. precedences = (
  67. ('left', ('COMMA', )),
  68. ('left', ('INTEGRAL', 'DERIVATIVE')),
  69. ('left', ('OR', )),
  70. ('left', ('AND', )),
  71. ('left', ('EQ', )),
  72. ('left', ('MINUS', 'PLUS', 'NEG')),
  73. ('left', ('TIMES', 'DIVIDE')),
  74. ('right', ('FUNCTION', )),
  75. ('right', ('POW', )),
  76. ('left', ('SUB', )),
  77. ('right', ('FUNCTION_LPAREN', )),
  78. )
  79. interactive = 0
  80. def __init__(self, **kwargs):
  81. BisonParser.__init__(self, **kwargs)
  82. self.interactive = kwargs.get('interactive', 0)
  83. self.timeout = kwargs.get('timeout', 0)
  84. self.root_node = None
  85. self.possibilities = None
  86. self.reset()
  87. def reset(self):
  88. self.read_buffer = ''
  89. self.read_queue = Queue.Queue()
  90. #self.subtree_map = {}
  91. self.set_root_node(None)
  92. self.possibilities = None
  93. def run(self, *args, **kwargs):
  94. self.reset()
  95. return super(Parser, self).run(*args, **kwargs)
  96. # Override default read method with a version that prompts for input.
  97. def read(self, nbytes):
  98. if self.file == sys.stdin and self.file.closed:
  99. return ''
  100. if not self.read_buffer and not self.read_queue.empty():
  101. self.read_buffer = self.read_queue.get_nowait() + '\n'
  102. if self.read_buffer:
  103. read_buffer = self.read_buffer[:nbytes]
  104. self.read_buffer = self.read_buffer[nbytes:]
  105. return read_buffer
  106. try:
  107. read_buffer = raw_input('>>> ' if self.interactive else '') + '\n'
  108. except EOFError:
  109. return ''
  110. self.read_buffer = read_buffer[nbytes:]
  111. return read_buffer[:nbytes]
  112. def hook_read_before(self):
  113. pass
  114. def hook_read_after(self, data):
  115. """
  116. This hook will be called when the read() method returned. The data
  117. argument points to the data read by the read() method. This hook
  118. function should return the data to be used by the parser.
  119. """
  120. if not data.strip():
  121. return data
  122. # Replace known keywords with escape sequences.
  123. words = list(self.__class__.words)
  124. words.insert(10, '\n')
  125. for i, keyword in enumerate(words):
  126. # FIXME: Why case-insensitivity?
  127. data = re.sub(keyword, chr(i), data, flags=re.I)
  128. # TODO: remove this quick preprocessing hack. This hack enables
  129. # concatenated expressions, since the grammar currently does not
  130. # support those. This workaround will replace:
  131. # - ")(" with ")*(".
  132. # - "a(" with "a*(".
  133. # - ")a" with ")*a".
  134. # - "ab" with "a*b".
  135. # - "4a" with "4*a".
  136. # - "a4" with "a^4".
  137. pattern = ('(?:(\))\s*([([])' # )( -> ) * (
  138. # )[ -> ) * [
  139. + '|([\x00-\x09\x0b-\x19a-z0-9])\s*([([])' # a( -> a * (
  140. # a[ -> a * [
  141. + '|(\))\s*([\x00-\x09\x0b-\x19a-z0-9])' # )a -> ) * a
  142. + '|([\x00-\x09\x0b-\x19a-z])\s*'
  143. + '([\x00-\x09\x0b-\x19a-z])' # ab -> a * b
  144. + '|(\|)(\|)' # || -> | * |
  145. + '|([0-9])\s*([\x00-\x09\x0b-\x19a-z])' # 4a -> 4 * a
  146. + '|([\x00-\x09\x0b-\x19a-z])([0-9])' # a4 -> a ^ 4
  147. + '|([\x00-\x09\x0b-\x19a-z0-9])(\s+[0-9]))' # a 4 -> a * 4
  148. # 4 4 -> 4 * 4
  149. )
  150. def preprocess_data(match):
  151. left, right = filter(None, match.groups())
  152. # Make sure there are no multiplication and exponentiation signs
  153. # inserted between a function and its argument(s): "sin x" should
  154. # not be written as "sin*x", because that is bogus.
  155. if ord(left) <= 0x9 or 0x0b <= ord(left) <= 0x19:
  156. return left + ' ' + right
  157. # If all characters on the right are numbers. e.g. "a4", the
  158. # expression implies exponentiation. Make sure ")4" is not
  159. # converted into an exponentiation, because that's multiplication.
  160. if left != ')' and not left.isdigit() and right.isdigit():
  161. return '%s^%s' % (left, right)
  162. # match: ab | abc | abcd (where left = "a")
  163. return '*'.join([left] + list(re.sub(r'^ +', '', right)))
  164. if self.verbose: # pragma: nocover
  165. data_before = data
  166. # Iteratively replace all matches.
  167. i = 0
  168. while i < len(data):
  169. data = data[:i] + re.sub(pattern, preprocess_data, data[i:])
  170. i += 1
  171. # Replace escape sequences with original keywords.
  172. for i, keyword in enumerate(words):
  173. data = data.replace(chr(i), keyword)
  174. # Fix TIMES operator next to OR
  175. data = re.sub(r'\*?vv\*?', 'vv', data)
  176. if self.verbose and data_before != data: # pragma: nocover
  177. print 'hook_read_after() modified the input data:'
  178. print 'before:', repr(data_before)
  179. print 'after :', repr(data)
  180. return data
  181. def hook_handler(self, target, option, names, values, retval):
  182. return retval
  183. def set_root_node(self, node):
  184. self.root_node = node
  185. self.possibilities = None
  186. def find_possibilities(self):
  187. if not self.root_node:
  188. raise RuntimeError('No expression')
  189. if self.possibilities is not None:
  190. if self.verbose:
  191. print 'Expression has not changed, not updating possibilities'
  192. return
  193. self.possibilities = find_possibilities(self.root_node)
  194. def display_hint(self):
  195. self.find_possibilities()
  196. if self.interactive:
  197. if self.possibilities:
  198. print self.possibilities[0]
  199. else:
  200. print 'No further reduction is possible.'
  201. def display_possibilities(self):
  202. self.find_possibilities()
  203. for i, p in enumerate(self.possibilities):
  204. print '%d %s' % (i, p)
  205. def rewrite(self, index=0, verbose=False, check_implicit=True):
  206. self.find_possibilities()
  207. if not self.possibilities:
  208. return
  209. suggestion = self.possibilities[index]
  210. if self.verbose:
  211. print 'EXPLICIT:', suggestion
  212. elif verbose:
  213. print suggestion
  214. self.set_root_node(apply_suggestion(self.root_node, suggestion))
  215. if self.verbose:
  216. print ' ', self.root_node
  217. # Only apply any remaining implicit hints if the suggestion itself is
  218. # not implicit
  219. if check_implicit and suggestion.handler not in IMPLICIT_RULES:
  220. self.find_possibilities()
  221. while self.possibilities:
  222. sugg = self.possibilities[0]
  223. if sugg.handler not in IMPLICIT_RULES:
  224. break
  225. if self.verbose:
  226. print 'IMPLICIT:', sugg
  227. self.set_root_node(apply_suggestion(self.root_node, sugg))
  228. if self.verbose:
  229. print ' ', self.root_node
  230. self.find_possibilities()
  231. if verbose and not self.verbose:
  232. print self.root_node
  233. return self.root_node
  234. def rewrite_all(self, verbose=False):
  235. i = 0
  236. while self.rewrite(verbose=verbose):
  237. i += 1
  238. if i > 100:
  239. lines.append('Too many rewrite steps, aborting...')
  240. break
  241. if not verbose or not i:
  242. return self.root_node
  243. #def hook_run(self, filename, retval):
  244. # return retval
  245. # ---------------------------------------------------------------
  246. # These methods are the python handlers for the bison targets.
  247. # (which get called by the bison code each time the corresponding
  248. # parse target is unambiguously reached)
  249. #
  250. # WARNING - don't touch the method docstrings unless you know what
  251. # you are doing - they are in bison rule syntax, and are passed
  252. # verbatim to bison to build the parser engine library.
  253. # ---------------------------------------------------------------
  254. # Declare the start target here (by name)
  255. start = 'input'
  256. def on_input(self, target, option, names, values):
  257. """
  258. input :
  259. | input line
  260. """
  261. if option == 1:
  262. # Interactive mode is enabled if the term rewriting system is used
  263. # as a shell. In that case, it is useful that the shell prints the
  264. # output of the evaluation.
  265. if self.interactive and values[1]: # pragma: nocover
  266. print values[1]
  267. return values[1]
  268. def on_line(self, target, option, names, values):
  269. """
  270. line : NEWLINE
  271. | exp NEWLINE
  272. | debug NEWLINE
  273. | HINT NEWLINE
  274. | POSSIBILITIES NEWLINE
  275. | REWRITE NEWLINE
  276. | REWRITE NUMBER NEWLINE
  277. | REWRITE_ALL NEWLINE
  278. | REWRITE_ALL_VERBOSE NEWLINE
  279. | RAISE NEWLINE
  280. """
  281. if option in (1, 2): # rule: {exp,debug} NEWLINE
  282. self.set_root_node(values[0])
  283. return values[0]
  284. if option == 3: # rule: HINT NEWLINE
  285. self.display_hint()
  286. return
  287. if option == 4: # rule: POSSIBILITIES NEWLINE
  288. self.display_possibilities()
  289. return
  290. if option == 5: # rule: REWRITE NEWLINE
  291. return self.rewrite()
  292. if option == 6: # rule: REWRITE NUMBER NEWLINE
  293. self.rewrite(int(values[1]))
  294. return self.root_node
  295. if option in (7, 8): # rule: REWRITE_ALL NEWLINE
  296. return self.rewrite_all(verbose=(option == 8))
  297. if option == 9:
  298. raise RuntimeError('on_line: exception raised')
  299. def on_debug(self, target, option, names, values):
  300. """
  301. debug : GRAPH exp
  302. """
  303. if option == 0:
  304. print generate_graph(negation_to_node(values[1]))
  305. return values[1]
  306. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  307. % (option, target)) # pragma: nocover
  308. def on_exp(self, target, option, names, values):
  309. """
  310. exp : NUMBER
  311. | IDENTIFIER
  312. | LPAREN exp RPAREN
  313. | unary
  314. | binary
  315. | nary
  316. """
  317. # | concat
  318. if option == 0: # rule: NUMBER
  319. # TODO: A bit hacky, this achieves long integers and floats.
  320. value = float(values[0]) if '.' in values[0] else int(values[0])
  321. return Leaf(value)
  322. if option == 1: # rule: IDENTIFIER
  323. return Leaf(values[0])
  324. if option == 2: # rule: LPAREN exp RPAREN
  325. return values[1]
  326. if 3 <= option <= 5: # rule: unary | binary | nary
  327. return values[0]
  328. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  329. % (option, target)) # pragma: nocover
  330. def on_unary(self, target, option, names, values):
  331. """
  332. unary : MINUS exp
  333. | FUNCTION_LPAREN exp RPAREN
  334. | FUNCTION exp
  335. | DERIVATIVE exp
  336. | bracket_derivative
  337. | INTEGRAL exp
  338. | integral_bounds TIMES exp %prec INTEGRAL
  339. | LBRACKET exp RBRACKET lbnd ubnd
  340. | PIPE exp PIPE
  341. """
  342. if option == 0: # rule: NEG exp
  343. values[1].negated += 1
  344. return values[1]
  345. if option in (1, 2): # rule: FUNCTION_LPAREN exp RPAREN | FUNCTION exp
  346. op = values[0].split(' ', 1)[0]
  347. if op == 'ln':
  348. return Node(OP_LOG, values[1], Leaf(E))
  349. if values[1].is_op(OP_COMMA):
  350. return Node(op, *values[1])
  351. if op == OP_VALUE_MAP[OP_LOG]:
  352. return Node(OP_LOG, values[1], Leaf(DEFAULT_LOGARITHM_BASE))
  353. m = re.match(r'^log_([0-9]+|[a-zA-Z])', op)
  354. if m:
  355. value = m.group(1)
  356. if value.isdigit():
  357. value = int(value)
  358. return Node(OP_LOG, values[1], Leaf(value))
  359. return Node(op, values[1])
  360. if option == 3: # rule: DERIVATIVE exp
  361. # DERIVATIVE looks like 'd/d*x*' -> extract the 'x'
  362. return Node(OP_DER, values[1], Leaf(values[0][-2]))
  363. if option == 4: # rule: bracket_derivative
  364. return values[0]
  365. if option == 5: # rule: INTEGRAL exp
  366. fx, x = find_integration_variable(values[1])
  367. return Node(OP_INT, fx, x)
  368. if option == 6: # rule: integral_bounds TIMES exp
  369. lbnd, ubnd = values[0]
  370. fx, x = find_integration_variable(values[2])
  371. return Node(OP_INT, fx, x, lbnd, ubnd)
  372. if option == 7: # rule: LBRACKET exp RBRACKET lbnd ubnd
  373. return Node(OP_INT_INDEF, values[1], values[3], values[4])
  374. if option == 8: # rule: PIPE exp PIPE
  375. return Node(OP_ABS, values[1])
  376. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  377. % (option, target)) # pragma: nocover
  378. def on_integral_bounds(self, target, option, names, values):
  379. """
  380. integral_bounds : INTEGRAL lbnd ubnd
  381. """
  382. if option == 0: # rule: INTEGRAL lbnd ubnd
  383. return values[1], values[2]
  384. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  385. % (option, target)) # pragma: nocover
  386. def on_lbnd(self, target, option, names, values):
  387. """
  388. lbnd : SUB exp
  389. """
  390. if option == 0: # rule: SUB exp
  391. return values[1]
  392. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  393. % (option, target)) # pragma: nocover
  394. def on_ubnd(self, target, option, names, values):
  395. """
  396. ubnd : POW exp
  397. """
  398. if option == 0: # rule: POW exp
  399. return values[1]
  400. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  401. % (option, target)) # pragma: nocover
  402. def on_power(self, target, option, names, values):
  403. """
  404. power : exp POW exp
  405. """
  406. if option == 0: # rule: exp POW exp
  407. return values[0], values[2]
  408. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  409. % (option, target)) # pragma: nocover
  410. def on_bracket_derivative(self, target, option, names, values):
  411. """
  412. bracket_derivative : LBRACKET exp RBRACKET PRIME
  413. | bracket_derivative PRIME
  414. """
  415. if option == 0: # rule: LBRACKET exp RBRACKET PRIME
  416. return Node(OP_DER, values[1])
  417. if option == 1: # rule: bracket_derivative PRIME
  418. return Node(OP_DER, values[0])
  419. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  420. % (option, target)) # pragma: nocover
  421. def on_binary(self, target, option, names, values):
  422. """
  423. binary : exp PLUS exp
  424. | exp TIMES exp
  425. | exp DIVIDE exp
  426. | exp EQ exp
  427. | exp AND exp
  428. | exp OR exp
  429. | exp MINUS exp
  430. | power
  431. """
  432. if 0 <= option <= 5: # rule: exp {PLUS,TIMES,DIVIDE,EQ,AND,OR} exp
  433. return Node(values[1], values[0], values[2])
  434. if option == 6: # rule: exp MINUS exp
  435. right = values[2]
  436. right.negated += 1
  437. # Explicit call the hook handler on the created unary negation.
  438. self.hook_handler('unary', 0, names, values, right)
  439. return Node(OP_ADD, values[0], right)
  440. if option == 7: # rule: power
  441. return Node(OP_POW, *values[0])
  442. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  443. % (option, target)) # pragma: nocover
  444. def on_nary(self, target, option, names, values):
  445. """
  446. nary : exp COMMA exp
  447. """
  448. if option == 0: # rule: exp COMMA exp
  449. return Node(*combine(',', OP_COMMA, values[0], values[2]))
  450. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  451. % (option, target)) # pragma: nocover
  452. # -----------------------------------------
  453. # Special tokens and operator tokens
  454. # -----------------------------------------
  455. operators = ''
  456. functions = []
  457. for token in SPECIAL_TOKENS:
  458. if len(token) > 1:
  459. operators += '"%s"%s{ returntoken(IDENTIFIER); }\n' \
  460. % (token, ' ' * (8 - len(token)))
  461. for op_str, op in OP_MAP.iteritems():
  462. if TOKEN_MAP[op] == 'FUNCTION':
  463. functions.append(op_str)
  464. else:
  465. operators += '"%s"%s{ returntoken(%s); }\n' \
  466. % (op_str, ' ' * (8 - len(op_str)), TOKEN_MAP[op])
  467. # Put all functions in a single regex
  468. if functions:
  469. operators += '("%s")[ ]*"(" { returntoken(FUNCTION_LPAREN); }\n' \
  470. % '"|"'.join(functions)
  471. operators += '("%s") { returntoken(FUNCTION); }\n' \
  472. % '"|"'.join(functions)
  473. # -----------------------------------------
  474. # raw lex script, verbatim here
  475. # -----------------------------------------
  476. lexscript = r"""
  477. %top{
  478. #include "Python.h"
  479. }
  480. %{
  481. #define YYSTYPE void *
  482. #include "tokens.h"
  483. extern void *py_parser;
  484. extern void (*py_input)(PyObject *parser, char *buf, int *result,
  485. int max_size);
  486. #define returntoken(tok) \
  487. yylval = PyString_FromString(strdup(yytext)); return (tok);
  488. #define YY_INPUT(buf,result,max_size) { \
  489. (*py_input)(py_parser, buf, &result, max_size); \
  490. }
  491. int yycolumn = 0;
  492. #define YY_USER_ACTION \
  493. yylloc.first_line = yylloc.last_line = yylineno; \
  494. yylloc.first_column = yycolumn; \
  495. yylloc.last_column = yycolumn + yyleng; \
  496. yycolumn += yyleng;
  497. %}
  498. %option yylineno
  499. %%
  500. d[ ]*"/"[ ]*"d*"[a-z]"*" { returntoken(DERIVATIVE); }
  501. [0-9]+"."?[0-9]* { returntoken(NUMBER); }
  502. [a-zA-Z] { returntoken(IDENTIFIER); }
  503. "(" { returntoken(LPAREN); }
  504. ")" { returntoken(RPAREN); }
  505. "[" { returntoken(LBRACKET); }
  506. "]" { returntoken(RBRACKET); }
  507. "'" { returntoken(PRIME); }
  508. "|" { returntoken(PIPE); }
  509. log_([0-9]+|[a-zA-Z])"*(" { returntoken(FUNCTION_LPAREN); }
  510. log_([0-9]+|[a-zA-Z])"*" { returntoken(FUNCTION); }
  511. """ + operators + r"""
  512. "raise" { returntoken(RAISE); }
  513. "graph" { returntoken(GRAPH); }
  514. "quit" { yyterminate(); returntoken(QUIT); }
  515. [ \t\v\f] { }
  516. [\n] { yycolumn = 0; returntoken(NEWLINE); }
  517. . { printf("unknown char %c ignored.\n", yytext[0]); }
  518. %%
  519. yywrap() { return(1); }
  520. """
  521. #int[ ]*"(" { returntoken(FUNCTION_LPAREN); }