parser.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. """
  2. This parser will parse the given input and build an expression tree. Grammar
  3. file for the supported mathematical expressions.
  4. """
  5. import os.path
  6. PYBISON_BUILD = os.path.realpath('build/external/pybison')
  7. EXTERNAL_MODS = os.path.realpath('external')
  8. import sys
  9. sys.path.insert(0, PYBISON_BUILD)
  10. sys.path.insert(1, EXTERNAL_MODS)
  11. from pybison import BisonParser, BisonSyntaxError
  12. from graph_drawing.graph import generate_graph
  13. from node import ExpressionNode as Node, ExpressionLeaf as Leaf, OP_MAP, \
  14. TOKEN_MAP, TYPE_OPERATOR, OP_COMMA, OP_NEG, OP_MUL, OP_DIV, Scope, PI
  15. from rules import RULES
  16. from strategy import pick_suggestion
  17. from possibilities import filter_duplicates, apply_suggestion
  18. import Queue
  19. # Check for n-ary operator in child nodes
  20. def combine(op, op_type, *nodes):
  21. # At least return the operator.
  22. res = [op]
  23. for n in nodes:
  24. # Merge the children for all nodes which have the same operator.
  25. if n.type == TYPE_OPERATOR and n.op == op_type:
  26. res += n.nodes
  27. else:
  28. res.append(n)
  29. return res
  30. class Parser(BisonParser):
  31. """
  32. Implements the calculator parser. Grammar rules are defined in the method
  33. docstrings. Scanner rules are in the 'lexscript' attribute.
  34. """
  35. # Words to be ignored by preprocessor
  36. words = zip(*filter(lambda (s, op): TOKEN_MAP[op] == 'FUNCTION', \
  37. OP_MAP.iteritems()))[0] + ('raise', 'graph', PI)
  38. # Output directory of generated pybison files, including a trailing slash.
  39. buildDirectory = PYBISON_BUILD + '/'
  40. # ----------------------------------------------------------------
  41. # lexer tokens - these must match those in your lex script (below)
  42. # ----------------------------------------------------------------
  43. # TODO: add a runtime check to verify that this token list match the list
  44. # of tokens of the lex script.
  45. tokens = ['NUMBER', 'IDENTIFIER', 'NEWLINE', 'QUIT', 'RAISE', 'GRAPH',
  46. 'LPAREN', 'RPAREN', 'FUNCTION', 'FUNCTION_LPAREN'] \
  47. + filter(lambda t: t != 'FUNCTION', TOKEN_MAP.values())
  48. # ------------------------------
  49. # precedences
  50. # ------------------------------
  51. precedences = (
  52. ('left', ('COMMA', )),
  53. ('left', ('MINUS', 'PLUS')),
  54. ('left', ('TIMES', 'DIVIDE')),
  55. ('right', ('FUNCTION', )),
  56. ('left', ('EQ', )),
  57. ('left', ('NEG', )),
  58. ('right', ('POW', )),
  59. ('right', ('FUNCTION_LPAREN', )),
  60. )
  61. interactive = 0
  62. def __init__(self, **kwargs):
  63. BisonParser.__init__(self, **kwargs)
  64. self.interactive = kwargs.get('interactive', 0)
  65. self.timeout = kwargs.get('timeout', 0)
  66. self.reset()
  67. def reset(self):
  68. self.read_buffer = ''
  69. self.read_queue = Queue.Queue()
  70. #self.subtree_map = {}
  71. self.root_node = None
  72. self.possibilities = self.last_possibilities = []
  73. def run(self, *args, **kwargs):
  74. self.reset()
  75. return super(Parser, self).run(*args, **kwargs)
  76. # Override default read method with a version that prompts for input.
  77. def read(self, nbytes):
  78. if self.file == sys.stdin and self.file.closed:
  79. return ''
  80. if not self.read_buffer and not self.read_queue.empty():
  81. self.read_buffer = self.read_queue.get_nowait() + '\n'
  82. if self.read_buffer:
  83. read_buffer = self.read_buffer[:nbytes]
  84. self.read_buffer = self.read_buffer[nbytes:]
  85. return read_buffer
  86. try:
  87. read_buffer = raw_input('>>> ' if self.interactive else '') + '\n'
  88. except EOFError:
  89. return ''
  90. self.read_buffer = read_buffer[nbytes:]
  91. return read_buffer[:nbytes]
  92. def hook_read_before(self):
  93. if self.possibilities:
  94. if self.verbose: # pragma: nocover
  95. print 'possibilities:'
  96. items = filter_duplicates(self.possibilities)
  97. self.last_possibilities = self.possibilities
  98. if self.verbose: # pragma: nocover
  99. print ' ' + '\n '.join(map(str, items))
  100. def hook_read_after(self, data):
  101. """
  102. This hook will be called when the read() method returned. The data
  103. argument points to the data read by the read() method. This hook
  104. function should return the data to be used by the parser.
  105. """
  106. if not data.strip():
  107. return data
  108. self.possibilities = []
  109. import re
  110. # Replace known keywords with escape sequences.
  111. for i, keyword in enumerate(Parser.words):
  112. data = re.sub(keyword, chr(i), data, flags=re.I)
  113. # TODO: remove this quick preprocessing hack. This hack enables
  114. # concatenated expressions, since the grammar currently does not
  115. # support those. This workaround will replace:
  116. # - ")(" with ")*(".
  117. # - "a(" with "a*(".
  118. # - ")a" with ")*a".
  119. # - "ab" with "a*b".
  120. # - "4a" with "4*a".
  121. # - "a4" with "a^4".
  122. pattern = ('(?:(\))\s*(\()' # )( -> ) * (
  123. + '|([\x00-\x09\x0b-\x19a-z0-9])\s*(\()' # a( -> a * (
  124. + '|(\))\s*([\x00-\x09\x0b-\x19a-z0-9])' # )a -> ) * a
  125. + '|([\x00-\x09\x0b-\x19a-z])\s*'
  126. +'([\x00-\x09\x0b-\x19a-z]+)' # ab -> a * b
  127. + '|([0-9])\s*([\x00-\x09\x0b-\x19a-z])' # 4a -> 4 * a
  128. + '|([\x00-\x09\x0b-\x19a-z])\s*([0-9])' # a4 -> a ^ 4
  129. + '|([0-9])\s+([0-9]))' # 4 4 -> 4 * 4
  130. )
  131. def preprocess_data(match):
  132. left, right = filter(None, match.groups())
  133. # Make sure there are no multiplication and exponentiation signs
  134. # inserted between a function and its argument(s): "sin x" should
  135. # not be written as "sin*x", because that is bogus.
  136. if ord(left) <= 0x9 or 0x0b <= ord(left) <= 0x19:
  137. return left + ' ' + right
  138. # If all characters on the right are numbers. e.g. "a4", the
  139. # expression implies exponentiation. Make sure ")4" is not
  140. # converted into an exponentiation, because that's multiplication.
  141. if left != ')' and not 48 <= ord(left) < 58 \
  142. and all(map(lambda x: 48 <= ord(x) < 58, list(right))):
  143. return '%s^%s' % (left, right)
  144. # match: ab | abc | abcd (where left = "a")
  145. return '*'.join([left] + list(right))
  146. if self.verbose: # pragma: nocover
  147. data_before = data
  148. # Iteratively replace all matches.
  149. i = 0
  150. while i < len(data):
  151. data = data[:i] + re.sub(pattern, preprocess_data, data[i:])
  152. i += 1
  153. # Replace escape sequences with original keywords.
  154. for i, keyword in enumerate(Parser.words):
  155. data = data.replace(chr(i), keyword)
  156. if self.verbose and data_before != data: # pragma: nocover
  157. print 'hook_read_after() modified the input data:'
  158. print 'before:', repr(data_before)
  159. print 'after :', repr(data)
  160. return data
  161. def hook_handler(self, target, option, names, values, retval):
  162. if target in ['exp', 'line', 'input'] or not retval:
  163. return retval
  164. if not retval.negated and retval.type != TYPE_OPERATOR:
  165. return retval
  166. if retval.type == TYPE_OPERATOR and retval.op in RULES:
  167. handlers = RULES[retval.op]
  168. else:
  169. handlers = []
  170. if retval.negated:
  171. handlers = RULES[OP_NEG]
  172. for handler in handlers:
  173. possibilities = handler(retval)
  174. self.possibilities.extend(possibilities)
  175. return retval
  176. def display_hint(self):
  177. print pick_suggestion(self.last_possibilities)
  178. def display_possibilities(self):
  179. if self.last_possibilities:
  180. print '\n'.join(map(str, self.last_possibilities))
  181. def rewrite(self):
  182. suggestion = pick_suggestion(self.last_possibilities)
  183. if self.verbose:
  184. print 'applying suggestion:', suggestion
  185. if not suggestion:
  186. return self.root_node
  187. expression = apply_suggestion(self.root_node, suggestion)
  188. if self.verbose:
  189. print 'After application, expression=', expression
  190. self.read_queue.put_nowait(str(expression))
  191. return expression
  192. #def hook_run(self, filename, retval):
  193. # return retval
  194. # ---------------------------------------------------------------
  195. # These methods are the python handlers for the bison targets.
  196. # (which get called by the bison code each time the corresponding
  197. # parse target is unambiguously reached)
  198. #
  199. # WARNING - don't touch the method docstrings unless you know what
  200. # you are doing - they are in bison rule syntax, and are passed
  201. # verbatim to bison to build the parser engine library.
  202. # ---------------------------------------------------------------
  203. # Declare the start target here (by name)
  204. start = 'input'
  205. def on_input(self, target, option, names, values):
  206. """
  207. input :
  208. | input line
  209. | input REWRITE NEWLINE
  210. """
  211. if option == 1:
  212. # Interactive mode is enabled if the term rewriting system is used
  213. # as a shell. In that case, it is useful that the shell prints the
  214. # output of the evaluation.
  215. if self.interactive and values[1]: # pragma: nocover
  216. print values[1]
  217. return values[1]
  218. if option == 2: # rule: input REWRITE NEWLINE
  219. self.root_node = self.rewrite()
  220. return self.root_node
  221. def on_line(self, target, option, names, values):
  222. """
  223. line : NEWLINE
  224. | exp NEWLINE
  225. | debug NEWLINE
  226. | HINT NEWLINE
  227. | POSSIBILITIES NEWLINE
  228. | RAISE NEWLINE
  229. """
  230. if option == 1: # rule: EXP NEWLINE
  231. self.root_node = values[0]
  232. # Clear list of last possibilities when current expression has no
  233. # possibilities. Otherwise, an invalid expression gets the last
  234. # possibilities of a valid expression.
  235. if not self.possibilities:
  236. self.last_possibilities = []
  237. return values[0]
  238. if option == 2: # rule: DEBUG NEWLINE
  239. self.root_node = values[0]
  240. return values[0]
  241. if option == 3: # rule: HINT NEWLINE
  242. self.display_hint()
  243. return
  244. if option == 4: # rule: POSSIBILITIES NEWLINE
  245. self.display_possibilities()
  246. return
  247. if option == 5:
  248. raise RuntimeError('on_line: exception raised')
  249. def on_debug(self, target, option, names, values):
  250. """
  251. debug : GRAPH exp
  252. """
  253. if option == 0:
  254. print generate_graph(values[1])
  255. return values[1]
  256. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  257. % (option, target)) # pragma: nocover
  258. def on_exp(self, target, option, names, values):
  259. """
  260. exp : NUMBER
  261. | IDENTIFIER
  262. | LPAREN exp RPAREN
  263. | unary
  264. | binary
  265. | nary
  266. """
  267. # | concat
  268. if option == 0: # rule: NUMBER
  269. # TODO: A bit hacky, this achieves long integers and floats.
  270. value = float(values[0]) if '.' in values[0] else int(values[0])
  271. return Leaf(value)
  272. if option == 1: # rule: IDENTIFIER
  273. return Leaf(values[0])
  274. if option == 2: # rule: LPAREN exp RPAREN
  275. return values[1]
  276. if option in [3, 4, 5]: # rule: unary | binary | nary
  277. return values[0]
  278. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  279. % (option, target)) # pragma: nocover
  280. def on_unary(self, target, option, names, values):
  281. """
  282. unary : MINUS exp %prec NEG
  283. | FUNCTION_LPAREN exp RPAREN
  284. | FUNCTION exp
  285. """
  286. if option == 0: # rule: NEG exp
  287. node = values[1]
  288. # Add negation to the left-most child
  289. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  290. node.negated += 1
  291. else:
  292. child = Scope(node)[0]
  293. child.negated += 1
  294. return node
  295. if option in (1, 2): # rule: FUNCTION_LPAREN exp RPAREN | FUNCTION exp
  296. op = values[0].split(' ', 1)[0]
  297. if values[1].is_op(OP_COMMA):
  298. return Node(op, *values[1])
  299. return Node(op, values[1])
  300. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  301. % (option, target)) # pragma: nocover
  302. def on_binary(self, target, option, names, values):
  303. """
  304. binary : exp PLUS exp
  305. | exp TIMES exp
  306. | exp DIVIDE exp
  307. | exp POW exp
  308. | exp EQ exp
  309. | exp MINUS exp
  310. """
  311. if 0 <= option < 5: # rule: exp {PLUS,TIMES,DIVIDES,POW,EQ} exp
  312. return Node(values[1], values[0], values[2])
  313. if option == 5: # rule: exp MINUS exp
  314. node = values[2]
  315. # Add negation to the left-most child
  316. if node.is_leaf or (node.op != OP_MUL and node.op != OP_DIV):
  317. node.negated += 1
  318. else:
  319. node = Scope(node)[0]
  320. node.negated += 1
  321. # Explicit call the hook handler on the created unary negation.
  322. node = self.hook_handler('binary', 4, names, values, node)
  323. return Node('+', values[0], values[2])
  324. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  325. % (option, target)) # pragma: nocover
  326. def on_nary(self, target, option, names, values):
  327. """
  328. nary : exp COMMA exp
  329. """
  330. if option == 0: # rule: exp COMMA exp
  331. return Node(*combine(',', OP_COMMA, values[0], values[2]))
  332. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  333. % (option, target)) # pragma: nocover
  334. # -----------------------------------------
  335. # PI and operator tokens
  336. # -----------------------------------------
  337. operators = '"%s"%s{ returntoken(IDENTIFIER); }\n' \
  338. % (PI, ' ' * (8 - len(PI)))
  339. functions = []
  340. for op_str, op in OP_MAP.iteritems():
  341. if TOKEN_MAP[op] == 'FUNCTION':
  342. functions.append(op_str)
  343. else:
  344. operators += '"%s"%s{ returntoken(%s); }\n' \
  345. % (op_str, ' ' * (8 - len(op_str)), TOKEN_MAP[op])
  346. # Put all functions in a single regex
  347. if functions:
  348. operators += '("%s")[ ]*"(" { returntoken(FUNCTION_LPAREN); }\n' \
  349. % '"|"'.join(functions)
  350. operators += '("%s") { returntoken(FUNCTION); }\n' \
  351. % '"|"'.join(functions)
  352. # -----------------------------------------
  353. # raw lex script, verbatim here
  354. # -----------------------------------------
  355. lexscript = r"""
  356. %top{
  357. #include "Python.h"
  358. }
  359. %{
  360. #define YYSTYPE void *
  361. #include "tokens.h"
  362. extern void *py_parser;
  363. extern void (*py_input)(PyObject *parser, char *buf, int *result,
  364. int max_size);
  365. #define returntoken(tok) \
  366. yylval = PyString_FromString(strdup(yytext)); return (tok);
  367. #define YY_INPUT(buf,result,max_size) { \
  368. (*py_input)(py_parser, buf, &result, max_size); \
  369. }
  370. int yycolumn = 0;
  371. #define YY_USER_ACTION \
  372. yylloc.first_line = yylloc.last_line = yylineno; \
  373. yylloc.first_column = yycolumn; \
  374. yylloc.last_column = yycolumn + yyleng; \
  375. yycolumn += yyleng;
  376. %}
  377. %option yylineno
  378. %%
  379. [0-9]+"."?[0-9]* { returntoken(NUMBER); }
  380. [a-zA-Z] { returntoken(IDENTIFIER); }
  381. "(" { returntoken(LPAREN); }
  382. ")" { returntoken(RPAREN); }
  383. """ + operators + r"""
  384. "raise" { returntoken(RAISE); }
  385. "graph" { returntoken(GRAPH); }
  386. "quit" { yyterminate(); returntoken(QUIT); }
  387. [ \t\v\f] { }
  388. [\n] { yycolumn = 0; returntoken(NEWLINE); }
  389. . { printf("unknown char %c ignored.\n", yytext[0]); }
  390. %%
  391. yywrap() { return(1); }
  392. """