parser.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. #!/usr/bin/env python
  2. """
  3. This parser will parse the given input and build an expression tree. Grammar
  4. file for the supported mathematical expressions.
  5. """
  6. from node import ExpressionNode as Node, ExpressionLeaf as Leaf
  7. import argparse
  8. import os.path
  9. PYBISON_BUILD = os.path.realpath('build/external/pybison')
  10. EXTERNAL_MODS = os.path.realpath('external')
  11. import sys
  12. sys.path.insert(0, PYBISON_BUILD)
  13. sys.path.insert(1, EXTERNAL_MODS)
  14. from pybison import BisonParser, BisonSyntaxError
  15. # Check for n-ary operator in child nodes
  16. def combine(op, n):
  17. return n.nodes if n.title() == op else [n]
  18. class Parser(BisonParser):
  19. """
  20. Implements the calculator parser. Grammar rules are defined in the method
  21. docstrings. Scanner rules are in the 'lexscript' attribute.
  22. """
  23. # Output directory of generated pybison files, including a trailing slash.
  24. buildDirectory = PYBISON_BUILD + '/'
  25. # ----------------------------------------------------------------
  26. # lexer tokens - these must match those in your lex script (below)
  27. # ----------------------------------------------------------------
  28. # TODO: add a runtime check to verify that this token list match the list
  29. # of tokens of the lex script.
  30. tokens = ['NUMBER', 'IDENTIFIER',
  31. 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'POW',
  32. 'LPAREN', 'RPAREN', 'COMMA', 'CONCAT_POW',
  33. 'NEWLINE', 'QUIT', 'RAISE']
  34. # ------------------------------
  35. # precedences
  36. # ------------------------------
  37. precedences = (
  38. ('left', ('MINUS', 'PLUS')),
  39. ('left', ('TIMES', 'DIVIDE')),
  40. ('left', ('NEG', )),
  41. ('right', ('POW', )),
  42. )
  43. interactive = 0
  44. def __init__(self, **kwargs):
  45. BisonParser.__init__(self, **kwargs)
  46. self.interactive = kwargs.get('interactive', 0)
  47. self.timeout = kwargs.get('timeout', 0)
  48. # ------------------------------------------------------------------
  49. # override default read method with a version that prompts for input
  50. # ------------------------------------------------------------------
  51. def read(self, nbytes):
  52. if self.file == sys.stdin and self.file.closed:
  53. return ''
  54. try:
  55. return raw_input('>>> ') + '\n'
  56. except EOFError:
  57. return ''
  58. # ---------------------------------------------------------------
  59. # These methods are the python handlers for the bison targets.
  60. # (which get called by the bison code each time the corresponding
  61. # parse target is unambiguously reached)
  62. #
  63. # WARNING - don't touch the method docstrings unless you know what
  64. # you are doing - they are in bison rule syntax, and are passed
  65. # verbatim to bison to build the parser engine library.
  66. # ---------------------------------------------------------------
  67. # Declare the start target here (by name)
  68. start = 'input'
  69. def on_input(self, target, option, names, values):
  70. """
  71. input :
  72. | input line
  73. """
  74. if option == 1:
  75. # Interactive mode is enabled if the term rewriting system is used
  76. # as a shell. In that case, it is useful that the shell prints the
  77. # output of the evaluation.
  78. if self.interactive and values[1]:
  79. print values[1]
  80. return values[1]
  81. def on_line(self, target, option, names, values):
  82. """
  83. line : NEWLINE
  84. | exp NEWLINE
  85. | RAISE NEWLINE
  86. """
  87. if option == 1:
  88. return values[0]
  89. if option == 2:
  90. raise RuntimeError('on_line: exception raised')
  91. def on_exp(self, target, option, names, values):
  92. """
  93. exp : NUMBER
  94. | IDENTIFIER
  95. | LPAREN exp RPAREN
  96. | unary
  97. | binary
  98. | concat
  99. """
  100. if option == 0: # rule: NUMBER
  101. # TODO: A bit hacky, this achieves long integers and floats.
  102. value = float(values[0]) if '.' in values[0] else int(values[0])
  103. return Leaf(value)
  104. if option == 1: # rule: IDENTIFIER
  105. return Leaf(values[0])
  106. if option == 2: # rule: LPAREN exp RPAREN
  107. return values[1]
  108. if option in [3, 4, 5]: # rule: unary | binary | concat
  109. return values[0]
  110. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  111. % (option, target))
  112. def on_unary(self, target, option, names, values):
  113. """
  114. unary : MINUS exp %prec NEG
  115. """
  116. if option == 0: # rule: NEG exp
  117. return Node('-', values[1])
  118. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  119. % (option, target))
  120. def on_binary(self, target, option, names, values):
  121. """
  122. binary : exp PLUS exp
  123. | exp MINUS exp
  124. | exp TIMES exp
  125. | exp DIVIDE exp
  126. | exp POW exp
  127. """
  128. if option == 0: # rule: exp PLUS exp
  129. return Node('+', *(combine('+', values[0])
  130. + combine('+', values[2])))
  131. if option == 1: # rule: exp MINUS exp
  132. return Node('-', *(combine('-', values[0])
  133. + combine('-', values[2])))
  134. if option == 2: # rule: exp TIMES exp
  135. return Node('*', *(combine('*', values[0])
  136. + combine('*', values[2])))
  137. if option == 3: # rule: exp DIVIDE exp
  138. return Node('/', values[0], values[2])
  139. if option == 4: # rule: exp POW exp
  140. return Node('^', values[0], values[2])
  141. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  142. % (option, target))
  143. def on_concat(self, option, target, names, values):
  144. """
  145. concat : exp IDENTIFIER
  146. | exp NUMBER
  147. | exp LPAREN exp RPAREN
  148. | exp CONCAT_POW
  149. | CONCAT_POW
  150. """
  151. if option in [0, 1]: # rule: exp IDENTIFIER | exp NUMBER
  152. # example: xy -> x*y
  153. # example: (x)4 -> x*4
  154. val = int(values[1]) if option == 1 else values[1]
  155. return Node('*', *(combine('*', values[0]) + [Leaf(val)]))
  156. if option == 2: # rule: exp LPAREN exp RPAREN
  157. # example: x(y) -> x*(y)
  158. return Node('*', *(combine('*', values[0])
  159. + combine('*', values[2])))
  160. if option == 3:
  161. # example: xy4 -> x*y^4
  162. identifier, exponent = list(values[1])
  163. node = Node('^', Leaf(identifier), Leaf(int(exponent)))
  164. return Node('*', values[0], node)
  165. if option == 4:
  166. # example: x4 -> x^4
  167. identifier, exponent = list(values[0])
  168. return Node('^', Leaf(identifier), Leaf(int(exponent)))
  169. raise BisonSyntaxError('Unsupported option %d in target "%s".'
  170. % (option, target))
  171. # -----------------------------------------
  172. # raw lex script, verbatim here
  173. # -----------------------------------------
  174. lexscript = r"""
  175. %{
  176. //int yylineno = 0;
  177. #include "Python.h"
  178. #define YYSTYPE void *
  179. #include "tokens.h"
  180. extern void *py_parser;
  181. extern void (*py_input)(PyObject *parser, char *buf, int *result,
  182. int max_size);
  183. #define returntoken(tok) \
  184. yylval = PyString_FromString(strdup(yytext)); return (tok);
  185. #define YY_INPUT(buf,result,max_size) { \
  186. (*py_input)(py_parser, buf, &result, max_size); \
  187. }
  188. %}
  189. %%
  190. [a-zA-Z][0-9]+ { returntoken(CONCAT_POW); }
  191. [0-9]+ { returntoken(NUMBER); }
  192. [a-zA-Z] { returntoken(IDENTIFIER); }
  193. "(" { returntoken(LPAREN); }
  194. ")" { returntoken(RPAREN); }
  195. "+" { returntoken(PLUS); }
  196. "-" { returntoken(MINUS); }
  197. "*" { returntoken(TIMES); }
  198. "^" { returntoken(POW); }
  199. "/" { returntoken(DIVIDE); }
  200. "," { returntoken(COMMA); }
  201. "quit" { printf("lex: got QUIT\n"); yyterminate(); returntoken(QUIT); }
  202. "raise" { returntoken(RAISE); }
  203. [ \t\v\f] {}
  204. [\n] {yylineno++; returntoken(NEWLINE); }
  205. . { printf("unknown char %c ignored, yytext=%p\n",
  206. yytext[0], yytext); /* ignore bad chars */}
  207. %%
  208. yywrap() { return(1); }
  209. """
  210. def get_args():
  211. parser = argparse.ArgumentParser(prog='parser', description=__doc__)
  212. parser.add_argument('--debug', '-d', action='store_true', default=False,
  213. help='Enable debug mode in bison and flex.')
  214. parser.add_argument('--verbose', '-v', action='store_true', default=False,
  215. help='Enable verbose output messages (printed to stdout).')
  216. parser.add_argument('--keepfiles', '-k', action='store_true', default=False,
  217. help='Keep temporary generated bison and lex files.')
  218. parser.add_argument('--batch', '-b', action='store_true', default=False,
  219. help='Disable interactive mode and execute expressions in batch mode.')
  220. return parser.parse_args()
  221. def main():
  222. args = get_args()
  223. p = Parser(verbose=args.verbose,
  224. keepfiles=args.keepfiles,
  225. interactive=not args.batch)
  226. node = p.run(debug=args.debug)
  227. # Clear the line, when the shell exits.
  228. if not args.batch:
  229. print
  230. return node
  231. if __name__ == '__main__':
  232. main()