parser.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. """
  2. This parser will parse the given input and build an expression tree. Grammar
  3. file for the supported mathematical expressions.
  4. """
  5. import os.path
  6. PYBISON_BUILD = os.path.realpath('build/external/pybison')
  7. EXTERNAL_MODS = os.path.realpath('external')
  8. import sys
  9. sys.path.insert(0, PYBISON_BUILD)
  10. sys.path.insert(1, EXTERNAL_MODS)
  11. from pybison import BisonParser, BisonSyntaxError
  12. from graph_drawing.graph import generate_graph
  13. from node import ExpressionNode as Node, ExpressionLeaf as Leaf, OP_MAP, \
  14. OP_DER, TOKEN_MAP, TYPE_OPERATOR, OP_COMMA, OP_NEG, OP_MUL, OP_DIV, \
  15. OP_LOG, OP_ADD, Scope, PI, E, DEFAULT_LOGARITHM_BASE, OP_VALUE_MAP
  16. from rules import RULES
  17. from strategy import pick_suggestion
  18. from possibilities import filter_duplicates, apply_suggestion
  19. import Queue
  20. import re
  21. # Check for n-ary operator in child nodes
  22. def combine(op, op_type, *nodes):
  23. # At least return the operator.
  24. res = [op]
  25. for n in nodes:
  26. # Merge the children for all nodes which have the same operator.
  27. if n.type == TYPE_OPERATOR and n.op == op_type:
  28. res += n.nodes
  29. else:
  30. res.append(n)
  31. return res
  32. class Parser(BisonParser):
  33. """
  34. Implements the calculator parser. Grammar rules are defined in the method
  35. docstrings. Scanner rules are in the 'lexscript' attribute.
  36. """
  37. # Words to be ignored by preprocessor
  38. words = zip(*filter(lambda (s, op): TOKEN_MAP[op] == 'FUNCTION', \
  39. OP_MAP.iteritems()))[0] + ('raise', 'graph', PI)
  40. # Output directory of generated pybison files, including a trailing slash.
  41. buildDirectory = PYBISON_BUILD + '/'
  42. # ----------------------------------------------------------------
  43. # lexer tokens - these must match those in your lex script (below)
  44. # ----------------------------------------------------------------
  45. # TODO: add a runtime check to verify that this token list match the list
  46. # of tokens of the lex script.
  47. tokens = ['NUMBER', 'IDENTIFIER', 'NEWLINE', 'QUIT', 'RAISE', 'GRAPH',
  48. 'LPAREN', 'RPAREN', 'FUNCTION', 'FUNCTION_LPAREN', 'LBRACKET',
  49. 'RBRACKET', 'APOSTROPH', 'DERIVATIVE'] \
  50. + filter(lambda t: t != 'FUNCTION', TOKEN_MAP.values())
  51. # ------------------------------
  52. # precedences
  53. # ------------------------------
  54. precedences = (
  55. ('left', ('COMMA', )),
  56. ('left', ('MINUS', 'PLUS')),
  57. ('left', ('TIMES', 'DIVIDE')),
  58. ('right', ('FUNCTION', 'DERIVATIVE')),
  59. ('left', ('EQ', )),
  60. ('left', ('NEG', )),
  61. ('right', ('POW', )),
  62. ('right', ('FUNCTION_LPAREN', )),
  63. )
  64. interactive = 0
  65. def __init__(self, **kwargs):
  66. BisonParser.__init__(self, **kwargs)
  67. self.interactive = kwargs.get('interactive', 0)
  68. self.timeout = kwargs.get('timeout', 0)
  69. self.reset()
  70. def reset(self):
  71. self.read_buffer = ''
  72. self.read_queue = Queue.Queue()
  73. #self.subtree_map = {}
  74. self.root_node = None
  75. self.possibilities = self.last_possibilities = []
  76. def run(self, *args, **kwargs):
  77. self.reset()
  78. return super(Parser, self).run(*args, **kwargs)
  79. # Override default read method with a version that prompts for input.
  80. def read(self, nbytes):
  81. if self.file == sys.stdin and self.file.closed:
  82. return ''
  83. if not self.read_buffer and not self.read_queue.empty():
  84. self.read_buffer = self.read_queue.get_nowait() + '\n'
  85. if self.read_buffer:
  86. read_buffer = self.read_buffer[:nbytes]
  87. self.read_buffer = self.read_buffer[nbytes:]
  88. return read_buffer
  89. try:
  90. read_buffer = raw_input('>>> ' if self.interactive else '') + '\n'
  91. except EOFError:
  92. return ''
  93. self.read_buffer = read_buffer[nbytes:]
  94. return read_buffer[:nbytes]
  95. def hook_read_before(self):
  96. if self.possibilities:
  97. if self.verbose: # pragma: nocover
  98. print 'possibilities:'
  99. items = filter_duplicates(self.possibilities)
  100. self.last_possibilities = self.possibilities
  101. if self.verbose: # pragma: nocover
  102. print ' ' + '\n '.join(map(str, items))
  103. def hook_read_after(self, data):
  104. """
  105. This hook will be called when the read() method returned. The data
  106. argument points to the data read by the read() method. This hook
  107. function should return the data to be used by the parser.
  108. """
  109. if not data.strip():
  110. return data
  111. self.possibilities = []
  112. # Replace known keywords with escape sequences.
  113. words = list(Parser.words)
  114. words.insert(10, '\n')
  115. for i, keyword in enumerate(words):
  116. data = re.sub(keyword, chr(i), data, flags=re.I)
  117. # TODO: remove this quick preprocessing hack. This hack enables
  118. # concatenated expressions, since the grammar currently does not
  119. # support those. This workaround will replace:
  120. # - ")(" with ")*(".
  121. # - "a(" with "a*(".
  122. # - ")a" with ")*a".
  123. # - "ab" with "a*b".
  124. # - "4a" with "4*a".
  125. # - "a4" with "a^4".
  126. pattern = ('(?:(\))\s*(\()' # )( -> ) * (
  127. + '|(\))\s*(\[)' # )[ -> ) * [
  128. + '|([\x00-\x09\x0b-\x19a-z0-9])\s*(\()' # a( -> a * (
  129. + '|(\))\s*([\x00-\x09\x0b-\x19a-z0-9])' # )a -> ) * a
  130. + '|([\x00-\x09\x0b-\x19a-z])\s*'
  131. +'([\x00-\x09\x0b-\x19a-z]+)' # ab -> a * b
  132. + '|([0-9])\s*([\x00-\x09\x0b-\x19a-z])' # 4a -> 4 * a
  133. + '|([\x00-\x09\x0b-\x19a-z])\s*([0-9])' # a4 -> a ^ 4
  134. + '|([0-9])\s+([0-9]))' # 4 4 -> 4 * 4
  135. )
  136. def preprocess_data(match):
  137. left, right = filter(None, match.groups())
  138. # Make sure there are no multiplication and exponentiation signs
  139. # inserted between a function and its argument(s): "sin x" should
  140. # not be written as "sin*x", because that is bogus.
  141. if ord(left) <= 0x9 or 0x0b <= ord(left) <= 0x19:
  142. return left + ' ' + right
  143. # If all characters on the right are numbers. e.g. "a4", the
  144. # expression implies exponentiation. Make sure ")4" is not
  145. # converted into an exponentiation, because that's multiplication.
  146. if left != ')' and not 48 <= ord(left) < 58 \
  147. and all(map(lambda x: 48 <= ord(x) < 58, list(right))):
  148. return '%s^%s' % (left, right)
  149. # match: ab | abc | abcd (where left = "a")
  150. return '*'.join([left] + list(right))
  151. if self.verbose: # pragma: nocover
  152. data_before = data
  153. # Iteratively replace all matches.
  154. i = 0
  155. while i < len(data):
  156. data = data[:i] + re.sub(pattern, preprocess_data, data[i:])
  157. i += 1
  158. # Replace escape sequences with original keywords.
  159. for i, keyword in enumerate(words):
  160. data = data.replace(chr(i), keyword)
  161. if self.verbose and data_before != data: # pragma: nocover
  162. print 'hook_read_after() modified the input data:'
  163. print 'before:', repr(data_before)
  164. print 'after :', repr(data)
  165. return data
  166. def hook_handler(self, target, option, names, values, retval):
  167. if target in ['exp', 'line', 'input'] or not retval:
  168. return retval
  169. if not retval.negated and retval.type != TYPE_OPERATOR:
  170. return retval
  171. if retval.type == TYPE_OPERATOR and retval.op in RULES:
  172. handlers = RULES[retval.op]
  173. else:
  174. handlers = []
  175. if retval.negated:
  176. handlers = RULES[OP_NEG]
  177. for handler in handlers:
  178. possibilities = handler(retval)
  179. self.possibilities.extend(possibilities)
  180. return retval
  181. def display_hint(self):
  182. print pick_suggestion(self.last_possibilities)
  183. def display_possibilities(self):
  184. if self.last_possibilities:
  185. print '\n'.join(map(str, self.last_possibilities))
  186. def rewrite(self):
  187. suggestion = pick_suggestion(self.last_possibilities)
  188. if self.verbose:
  189. print 'applying suggestion:', suggestion
  190. if not suggestion:
  191. return self.root_node
  192. expression = apply_suggestion(self.root_node, suggestion)
  193. if self.verbose:
  194. print 'After application, expression=', expression
  195. self.read_queue.put_nowait(str(expression))
  196. return expression
  197. #def hook_run(self, filename, retval):
  198. # return retval
  199. # ---------------------------------------------------------------
  200. # These methods are the python handlers for the bison targets.
  201. # (which get called by the bison code each time the corresponding
  202. # parse target is unambiguously reached)
  203. #
  204. # WARNING - don't touch the method docstrings unless you know what
  205. # you are doing - they are in bison rule syntax, and are passed
  206. # verbatim to bison to build the parser engine library.
  207. # ---------------------------------------------------------------
  208. # Declare the start target here (by name)
  209. start = 'input'
  210. def on_input(self, target, option, names, values):
  211. """
  212. input :
  213. | input line
  214. | input REWRITE NEWLINE
  215. """
  216. if option == 1:
  217. # Interactive mode is enabled if the term rewriting system is used
  218. # as a shell. In that case, it is useful that the shell prints the
  219. # output of the evaluation.
  220. if self.interactive and values[1]: # pragma: nocover
  221. print values[1]
  222. return values[1]
  223. if option == 2: # rule: input REWRITE NEWLINE
  224. self.root_node = self.rewrite()
  225. return self.root_node
  226. def on_line(self, target, option, names, values):
  227. """
  228. line : NEWLINE
  229. | exp NEWLINE
  230. | debug NEWLINE
  231. | HINT NEWLINE
  232. | POSSIBILITIES NEWLINE
  233. | RAISE NEWLINE
  234. """
  235. if option == 1: # rule: EXP NEWLINE
  236. self.root_node = values[0]
  237. # Clear list of last possibilities when current expression has no
  238. # possibilities. Otherwise, an invalid expression gets the last
  239. # possibilities of a valid expression.
  240. if not self.possibilities:
  241. self.last_possibilities = []
  242. return values[0]
  243. if option == 2: # rule: DEBUG NEWLINE
  244. self.root_node = values[0]
  245. return values[0]
  246. if option == 3: # rule: HINT NEWLINE
  247. self.display_hint()
  248. return
  249. if option == 4: # rule: POSSIBILITIES NEWLINE
  250. self.display_possibilities()
  251. return
  252. if option == 5:
  253. raise RuntimeError('on_line: exception raised')
  254. def on_debug(self, target, option, names, values):
  255. """
  256. debug : GRAPH exp
  257. """
  258. if option == 0:
  259. print generate_graph(values[1])
  260. return values[1]
  261. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  262. % (option, target)) # pragma: nocover
  263. def on_exp(self, target, option, names, values):
  264. """
  265. exp : NUMBER
  266. | IDENTIFIER
  267. | LPAREN exp RPAREN
  268. | unary
  269. | binary
  270. | nary
  271. """
  272. # | concat
  273. if option == 0: # rule: NUMBER
  274. # TODO: A bit hacky, this achieves long integers and floats.
  275. value = float(values[0]) if '.' in values[0] else int(values[0])
  276. return Leaf(value)
  277. if option == 1: # rule: IDENTIFIER
  278. return Leaf(values[0])
  279. if option == 2: # rule: LPAREN exp RPAREN
  280. return values[1]
  281. if 3 <= option <= 5: # rule: unary | binary | nary
  282. return values[0]
  283. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  284. % (option, target)) # pragma: nocover
  285. def on_unary(self, target, option, names, values):
  286. """
  287. unary : MINUS exp %prec NEG
  288. | FUNCTION_LPAREN exp RPAREN
  289. | FUNCTION exp
  290. | DERIVATIVE exp
  291. | bracket_derivative
  292. """
  293. if option == 0: # rule: NEG exp
  294. node = values[1]
  295. # Add negation to the left-most child
  296. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  297. node.negated += 1
  298. else:
  299. child = Scope(node)[0]
  300. child.negated += 1
  301. return node
  302. if option in (1, 2): # rule: FUNCTION_LPAREN exp RPAREN | FUNCTION exp
  303. op = values[0].split(' ', 1)[0]
  304. if op == 'ln':
  305. return Node(OP_LOG, values[1], Leaf(E))
  306. if values[1].is_op(OP_COMMA):
  307. return Node(op, *values[1])
  308. if op == OP_VALUE_MAP[OP_LOG]:
  309. return Node(OP_LOG, values[1], Leaf(DEFAULT_LOGARITHM_BASE))
  310. m = re.match(r'^log_([0-9]+|[a-zA-Z])', op)
  311. if m:
  312. value = m.group(1)
  313. if value.isdigit():
  314. value = int(value)
  315. return Node(OP_LOG, values[1], Leaf(value))
  316. return Node(op, values[1])
  317. if option == 3: # rule: DERIVATIVE exp
  318. # DERIVATIVE looks like 'd/d*x*' -> extract the 'x'
  319. return Node(OP_DER, values[1], Leaf(values[0][-2]))
  320. if option == 4: # rule: bracket_derivative
  321. return values[0]
  322. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  323. % (option, target)) # pragma: nocover
  324. def on_bracket_derivative(self, target, option, names, values):
  325. """
  326. bracket_derivative : LBRACKET exp RBRACKET APOSTROPH
  327. | bracket_derivative APOSTROPH
  328. """
  329. if option == 0: # rule: LBRACKET exp RBRACKET APOSTROPH
  330. return Node(OP_DER, values[1])
  331. if option == 1: # rule: bracket_derivative APOSTROPH
  332. return Node(OP_DER, values[0])
  333. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  334. % (option, target)) # pragma: nocover
  335. def on_binary(self, target, option, names, values):
  336. """
  337. binary : exp PLUS exp
  338. | exp TIMES exp
  339. | exp DIVIDE exp
  340. | exp POW exp
  341. | exp EQ exp
  342. | exp MINUS exp
  343. """
  344. if 0 <= option < 5: # rule: exp {PLUS,TIMES,DIVIDES,POW,EQ} exp
  345. return Node(values[1], values[0], values[2])
  346. if option == 5: # rule: exp MINUS exp
  347. node = values[2]
  348. # Add negation to the left-most child
  349. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  350. node.negated += 1
  351. else:
  352. node = Scope(node)[0]
  353. node.negated += 1
  354. # Explicit call the hook handler on the created unary negation.
  355. node = self.hook_handler('binary', 4, names, values, node)
  356. return Node(OP_ADD, values[0], values[2])
  357. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  358. % (option, target)) # pragma: nocover
  359. def on_nary(self, target, option, names, values):
  360. """
  361. nary : exp COMMA exp
  362. """
  363. if option == 0: # rule: exp COMMA exp
  364. return Node(*combine(',', OP_COMMA, values[0], values[2]))
  365. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  366. % (option, target)) # pragma: nocover
  367. # -----------------------------------------
  368. # Special characters and operator tokens
  369. # -----------------------------------------
  370. operators = '"%s"%s{ returntoken(IDENTIFIER); }\n' \
  371. % (PI, ' ' * (8 - len(PI)))
  372. functions = []
  373. for op_str, op in OP_MAP.iteritems():
  374. if TOKEN_MAP[op] == 'FUNCTION':
  375. functions.append(op_str)
  376. else:
  377. operators += '"%s"%s{ returntoken(%s); }\n' \
  378. % (op_str, ' ' * (8 - len(op_str)), TOKEN_MAP[op])
  379. # Put all functions in a single regex
  380. if functions:
  381. operators += '("%s")[ ]*"(" { returntoken(FUNCTION_LPAREN); }\n' \
  382. % '"|"'.join(functions)
  383. operators += '("%s") { returntoken(FUNCTION); }\n' \
  384. % '"|"'.join(functions)
  385. # -----------------------------------------
  386. # raw lex script, verbatim here
  387. # -----------------------------------------
  388. lexscript = r"""
  389. %top{
  390. #include "Python.h"
  391. }
  392. %{
  393. #define YYSTYPE void *
  394. #include "tokens.h"
  395. extern void *py_parser;
  396. extern void (*py_input)(PyObject *parser, char *buf, int *result,
  397. int max_size);
  398. #define returntoken(tok) \
  399. yylval = PyString_FromString(strdup(yytext)); return (tok);
  400. #define YY_INPUT(buf,result,max_size) { \
  401. (*py_input)(py_parser, buf, &result, max_size); \
  402. }
  403. int yycolumn = 0;
  404. #define YY_USER_ACTION \
  405. yylloc.first_line = yylloc.last_line = yylineno; \
  406. yylloc.first_column = yycolumn; \
  407. yylloc.last_column = yycolumn + yyleng; \
  408. yycolumn += yyleng;
  409. %}
  410. %option yylineno
  411. %%
  412. d[ ]*"/"[ ]*"d*"[a-z]"*" { returntoken(DERIVATIVE); }
  413. [0-9]+"."?[0-9]* { returntoken(NUMBER); }
  414. [a-zA-Z] { returntoken(IDENTIFIER); }
  415. "(" { returntoken(LPAREN); }
  416. ")" { returntoken(RPAREN); }
  417. "[" { returntoken(LBRACKET); }
  418. "]" { returntoken(RBRACKET); }
  419. "'" { returntoken(APOSTROPH); }
  420. log_([0-9]+|[a-zA-Z])"*(" { returntoken(FUNCTION_LPAREN); }
  421. log_([0-9]+|[a-zA-Z])"*" { returntoken(FUNCTION); }
  422. """ + operators + r"""
  423. "raise" { returntoken(RAISE); }
  424. "graph" { returntoken(GRAPH); }
  425. "quit" { yyterminate(); returntoken(QUIT); }
  426. [ \t\v\f] { }
  427. [\n] { yycolumn = 0; returntoken(NEWLINE); }
  428. . { printf("unknown char %c ignored.\n", yytext[0]); }
  429. %%
  430. yywrap() { return(1); }
  431. """