| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679 |
- """
- This parser will parse the given input and build an expression tree. Grammar
- file for the supported mathematical expressions.
- """
- import os.path
- PYBISON_BUILD = os.path.realpath('build/external/pybison')
- EXTERNAL_MODS = os.path.realpath('external')
- import sys
- sys.path.insert(0, PYBISON_BUILD)
- sys.path.insert(1, EXTERNAL_MODS)
- from pybison import BisonParser, BisonSyntaxError
- from graph_drawing.graph import generate_graph
- from node import ExpressionNode as Node, \
- ExpressionLeaf as Leaf, OP_MAP, OP_DER, TOKEN_MAP, TYPE_OPERATOR, \
- OP_COMMA, OP_MUL, OP_POW, OP_LOG, OP_ADD, Scope, E, OP_ABS, \
- DEFAULT_LOGARITHM_BASE, OP_VALUE_MAP, SPECIAL_TOKENS, OP_INT, \
- OP_INT_INDEF, negation_to_node
- from rules.utils import find_variable
- from rules.precedences import IMPLICIT_RULES
- from strategy import find_possibilities
- from possibilities import apply_suggestion
- import Queue
- import re
- # Check for n-ary operator in child nodes
- def combine(op, op_type, *nodes):
- # At least return the operator.
- res = [op]
- for n in nodes:
- # Merge the children for all nodes which have the same operator.
- if n.type == TYPE_OPERATOR and n.op == op_type:
- res += n.nodes
- else:
- res.append(n)
- return res
- def find_integration_variable(exp):
- if not exp.is_op(OP_MUL):
- return exp, find_variable(exp)
- scope = Scope(exp)
- if len(scope) > 2 and scope[-2] == 'd' and scope[-1].is_identifier():
- x = scope[-1]
- scope.nodes = scope[:-2]
- return scope.as_nary_node(), x
- return exp, find_variable(exp)
- class Parser(BisonParser):
- """
- Implements the calculator parser. Grammar rules are defined in the method
- docstrings. Scanner rules are in the 'lexscript' attribute.
- """
- # Words to be ignored by preprocessor
- words = tuple(filter(lambda w: w.isalpha(), OP_MAP.iterkeys())) \
- + ('raise', 'graph') + tuple(SPECIAL_TOKENS)
- # Output directory of generated pybison files, including a trailing slash.
- buildDirectory = PYBISON_BUILD + '/'
- # ----------------------------------------------------------------
- # lexer tokens - these must match those in your lex script (below)
- # ----------------------------------------------------------------
- # TODO: add a runtime check to verify that this token list match the list
- # of tokens of the lex script.
- tokens = ['NUMBER', 'IDENTIFIER', 'NEWLINE', 'QUIT', 'RAISE', 'GRAPH',
- 'LPAREN', 'RPAREN', 'FUNCTION', 'FUNCTION_LPAREN', 'LBRACKET',
- 'RBRACKET', 'PIPE', 'PRIME', 'DERIVATIVE'] \
- + filter(lambda t: t != 'FUNCTION', TOKEN_MAP.values())
- # ------------------------------
- # precedences
- # ------------------------------
- precedences = (
- ('left', ('COMMA', )),
- ('left', ('INTEGRAL', 'DERIVATIVE')),
- ('left', ('OR', )),
- ('left', ('AND', )),
- ('left', ('EQ', )),
- ('left', ('MINUS', 'PLUS', 'NEG')),
- ('left', ('TIMES', 'DIVIDE')),
- ('right', ('FUNCTION', )),
- ('right', ('POW', )),
- ('left', ('SUB', )),
- ('right', ('FUNCTION_LPAREN', )),
- )
- interactive = 0
- def __init__(self, **kwargs):
- BisonParser.__init__(self, **kwargs)
- self.interactive = kwargs.get('interactive', 0)
- self.timeout = kwargs.get('timeout', 0)
- self.root_node = None
- self.possibilities = None
- self.reset()
- def reset(self):
- self.read_buffer = ''
- self.read_queue = Queue.Queue()
- #self.subtree_map = {}
- self.set_root_node(None)
- self.possibilities = None
- def run(self, *args, **kwargs):
- self.reset()
- return super(Parser, self).run(*args, **kwargs)
- # Override default read method with a version that prompts for input.
- def read(self, nbytes):
- if self.file == sys.stdin and self.file.closed:
- return ''
- if not self.read_buffer and not self.read_queue.empty():
- self.read_buffer = self.read_queue.get_nowait() + '\n'
- if self.read_buffer:
- read_buffer = self.read_buffer[:nbytes]
- self.read_buffer = self.read_buffer[nbytes:]
- return read_buffer
- try:
- read_buffer = raw_input('>>> ' if self.interactive else '') + '\n'
- except EOFError:
- return ''
- self.read_buffer = read_buffer[nbytes:]
- return read_buffer[:nbytes]
- def hook_read_before(self):
- pass
- def hook_read_after(self, data):
- """
- This hook will be called when the read() method returned. The data
- argument points to the data read by the read() method. This hook
- function should return the data to be used by the parser.
- """
- if not data.strip():
- return data
- # Replace known keywords with escape sequences.
- words = list(self.__class__.words)
- words.insert(10, '\n')
- for i, keyword in enumerate(words):
- # FIXME: Why case-insensitivity?
- data = re.sub(keyword, chr(i), data, flags=re.I)
- # TODO: remove this quick preprocessing hack. This hack enables
- # concatenated expressions, since the grammar currently does not
- # support those. This workaround will replace:
- # - ")(" with ")*(".
- # - "a(" with "a*(".
- # - ")a" with ")*a".
- # - "ab" with "a*b".
- # - "4a" with "4*a".
- # - "a4" with "a^4".
- pattern = ('(?:(\))\s*([([])' # )( -> ) * (
- # )[ -> ) * [
- + '|([\x00-\x09\x0b-\x19a-z0-9])\s*([([])' # a( -> a * (
- # a[ -> a * [
- + '|(\))\s*([\x00-\x09\x0b-\x19a-z0-9])' # )a -> ) * a
- + '|([\x00-\x09\x0b-\x19a-z])\s*'
- + '([\x00-\x09\x0b-\x19a-z])' # ab -> a * b
- + '|(\|)(\|)' # || -> | * |
- + '|([0-9])\s*([\x00-\x09\x0b-\x19a-z])' # 4a -> 4 * a
- + '|([\x00-\x09\x0b-\x19a-z])([0-9])' # a4 -> a ^ 4
- + '|([\x00-\x09\x0b-\x19a-z0-9])(\s+[0-9]))' # a 4 -> a * 4
- # 4 4 -> 4 * 4
- )
- def preprocess_data(match):
- left, right = filter(None, match.groups())
- # Make sure there are no multiplication and exponentiation signs
- # inserted between a function and its argument(s): "sin x" should
- # not be written as "sin*x", because that is bogus.
- if ord(left) <= 0x9 or 0x0b <= ord(left) <= 0x19:
- return left + ' ' + right
- # If all characters on the right are numbers. e.g. "a4", the
- # expression implies exponentiation. Make sure ")4" is not
- # converted into an exponentiation, because that's multiplication.
- if left != ')' and not left.isdigit() and right.isdigit():
- return '%s^%s' % (left, right)
- # match: ab | abc | abcd (where left = "a")
- return '*'.join([left] + list(re.sub(r'^ +', '', right)))
- if self.verbose: # pragma: nocover
- data_before = data
- # Iteratively replace all matches.
- i = 0
- while i < len(data):
- data = data[:i] + re.sub(pattern, preprocess_data, data[i:])
- i += 1
- # Replace escape sequences with original keywords.
- for i, keyword in enumerate(words):
- data = data.replace(chr(i), keyword)
- # Fix TIMES operator next to OR
- data = re.sub(r'\*?vv\*?', 'vv', data)
- if self.verbose and data_before != data: # pragma: nocover
- print 'hook_read_after() modified the input data:'
- print 'before:', repr(data_before)
- print 'after :', repr(data)
- return data
- def hook_handler(self, target, option, names, values, retval):
- return retval
- def set_root_node(self, node):
- self.root_node = node
- self.possibilities = None
- def find_possibilities(self):
- if not self.root_node:
- raise RuntimeError('No expression')
- if self.possibilities is not None:
- if self.verbose:
- print 'Expression has not changed, not updating possibilities'
- return
- self.possibilities = find_possibilities(self.root_node)
- def display_hint(self):
- self.find_possibilities()
- if self.interactive:
- if self.possibilities:
- print self.possibilities[0]
- else:
- print 'No further reduction is possible.'
- def display_possibilities(self):
- self.find_possibilities()
- for i, p in enumerate(self.possibilities):
- print '%d %s' % (i, p)
- def rewrite(self, index=0, verbose=False, check_implicit=True):
- self.find_possibilities()
- if not self.possibilities:
- return
- suggestion = self.possibilities[index]
- if self.verbose:
- print 'EXPLICIT:', suggestion
- elif verbose:
- print suggestion
- self.set_root_node(apply_suggestion(self.root_node, suggestion))
- if self.verbose:
- print ' ', self.root_node
- # Only apply any remaining implicit hints if the suggestion itself is
- # not implicit
- if check_implicit and suggestion.handler not in IMPLICIT_RULES:
- self.find_possibilities()
- while self.possibilities:
- sugg = self.possibilities[0]
- if sugg.handler not in IMPLICIT_RULES:
- break
- if self.verbose:
- print 'IMPLICIT:', sugg
- self.set_root_node(apply_suggestion(self.root_node, sugg))
- if self.verbose:
- print ' ', self.root_node
- self.find_possibilities()
- if verbose and not self.verbose:
- print self.root_node
- return self.root_node
- def rewrite_all(self, verbose=False):
- i = 0
- while self.rewrite(verbose=verbose):
- i += 1
- if i > 100:
- lines.append('Too many rewrite steps, aborting...')
- break
- if not verbose or not i:
- return self.root_node
- #def hook_run(self, filename, retval):
- # return retval
- # ---------------------------------------------------------------
- # These methods are the python handlers for the bison targets.
- # (which get called by the bison code each time the corresponding
- # parse target is unambiguously reached)
- #
- # WARNING - don't touch the method docstrings unless you know what
- # you are doing - they are in bison rule syntax, and are passed
- # verbatim to bison to build the parser engine library.
- # ---------------------------------------------------------------
- # Declare the start target here (by name)
- start = 'input'
- def on_input(self, target, option, names, values):
- """
- input :
- | input line
- """
- if option == 1:
- # Interactive mode is enabled if the term rewriting system is used
- # as a shell. In that case, it is useful that the shell prints the
- # output of the evaluation.
- if self.interactive and values[1]: # pragma: nocover
- print values[1]
- return values[1]
- def on_line(self, target, option, names, values):
- """
- line : NEWLINE
- | exp NEWLINE
- | debug NEWLINE
- | HINT NEWLINE
- | POSSIBILITIES NEWLINE
- | REWRITE NEWLINE
- | REWRITE NUMBER NEWLINE
- | REWRITE_ALL NEWLINE
- | REWRITE_ALL_VERBOSE NEWLINE
- | RAISE NEWLINE
- """
- if option in (1, 2): # rule: {exp,debug} NEWLINE
- self.set_root_node(values[0])
- return values[0]
- if option == 3: # rule: HINT NEWLINE
- self.display_hint()
- return
- if option == 4: # rule: POSSIBILITIES NEWLINE
- self.display_possibilities()
- return
- if option == 5: # rule: REWRITE NEWLINE
- return self.rewrite()
- if option == 6: # rule: REWRITE NUMBER NEWLINE
- self.rewrite(int(values[1]))
- return self.root_node
- if option in (7, 8): # rule: REWRITE_ALL NEWLINE
- return self.rewrite_all(verbose=(option == 8))
- if option == 9:
- raise RuntimeError('on_line: exception raised')
- def on_debug(self, target, option, names, values):
- """
- debug : GRAPH exp
- """
- if option == 0:
- print generate_graph(negation_to_node(values[1]))
- return values[1]
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- def on_exp(self, target, option, names, values):
- """
- exp : NUMBER
- | IDENTIFIER
- | LPAREN exp RPAREN
- | unary
- | binary
- | nary
- """
- # | concat
- if option == 0: # rule: NUMBER
- # TODO: A bit hacky, this achieves long integers and floats.
- value = float(values[0]) if '.' in values[0] else int(values[0])
- return Leaf(value)
- if option == 1: # rule: IDENTIFIER
- return Leaf(values[0])
- if option == 2: # rule: LPAREN exp RPAREN
- return values[1]
- if 3 <= option <= 5: # rule: unary | binary | nary
- return values[0]
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- def on_unary(self, target, option, names, values):
- """
- unary : MINUS exp
- | FUNCTION_LPAREN exp RPAREN
- | FUNCTION exp
- | DERIVATIVE exp
- | bracket_derivative
- | INTEGRAL exp
- | integral_bounds TIMES exp %prec INTEGRAL
- | LBRACKET exp RBRACKET lbnd ubnd
- | PIPE exp PIPE
- """
- if option == 0: # rule: NEG exp
- values[1].negated += 1
- return values[1]
- if option in (1, 2): # rule: FUNCTION_LPAREN exp RPAREN | FUNCTION exp
- op = values[0].split(' ', 1)[0]
- if op == 'ln':
- return Node(OP_LOG, values[1], Leaf(E))
- if values[1].is_op(OP_COMMA):
- return Node(op, *values[1])
- if op == OP_VALUE_MAP[OP_LOG]:
- return Node(OP_LOG, values[1], Leaf(DEFAULT_LOGARITHM_BASE))
- m = re.match(r'^log_([0-9]+|[a-zA-Z])', op)
- if m:
- value = m.group(1)
- if value.isdigit():
- value = int(value)
- return Node(OP_LOG, values[1], Leaf(value))
- return Node(op, values[1])
- if option == 3: # rule: DERIVATIVE exp
- # DERIVATIVE looks like 'd/d*x*' -> extract the 'x'
- return Node(OP_DER, values[1], Leaf(values[0][-2]))
- if option == 4: # rule: bracket_derivative
- return values[0]
- if option == 5: # rule: INTEGRAL exp
- fx, x = find_integration_variable(values[1])
- return Node(OP_INT, fx, x)
- if option == 6: # rule: integral_bounds TIMES exp
- lbnd, ubnd = values[0]
- fx, x = find_integration_variable(values[2])
- return Node(OP_INT, fx, x, lbnd, ubnd)
- if option == 7: # rule: LBRACKET exp RBRACKET lbnd ubnd
- return Node(OP_INT_INDEF, values[1], values[3], values[4])
- if option == 8: # rule: PIPE exp PIPE
- return Node(OP_ABS, values[1])
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- def on_integral_bounds(self, target, option, names, values):
- """
- integral_bounds : INTEGRAL lbnd ubnd
- """
- if option == 0: # rule: INTEGRAL lbnd ubnd
- return values[1], values[2]
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- def on_lbnd(self, target, option, names, values):
- """
- lbnd : SUB exp
- """
- if option == 0: # rule: SUB exp
- return values[1]
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- def on_ubnd(self, target, option, names, values):
- """
- ubnd : POW exp
- """
- if option == 0: # rule: POW exp
- return values[1]
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- def on_power(self, target, option, names, values):
- """
- power : exp POW exp
- """
- if option == 0: # rule: exp POW exp
- return values[0], values[2]
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- def on_bracket_derivative(self, target, option, names, values):
- """
- bracket_derivative : LBRACKET exp RBRACKET PRIME
- | bracket_derivative PRIME
- """
- if option == 0: # rule: LBRACKET exp RBRACKET PRIME
- return Node(OP_DER, values[1])
- if option == 1: # rule: bracket_derivative PRIME
- return Node(OP_DER, values[0])
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- def on_binary(self, target, option, names, values):
- """
- binary : exp PLUS exp
- | exp TIMES exp
- | exp DIVIDE exp
- | exp EQ exp
- | exp AND exp
- | exp OR exp
- | exp MINUS exp
- | power
- """
- if 0 <= option <= 5: # rule: exp {PLUS,TIMES,DIVIDE,EQ,AND,OR} exp
- return Node(values[1], values[0], values[2])
- if option == 6: # rule: exp MINUS exp
- right = values[2]
- right.negated += 1
- # Explicit call the hook handler on the created unary negation.
- self.hook_handler('unary', 0, names, values, right)
- return Node(OP_ADD, values[0], right)
- if option == 7: # rule: power
- return Node(OP_POW, *values[0])
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- def on_nary(self, target, option, names, values):
- """
- nary : exp COMMA exp
- """
- if option == 0: # rule: exp COMMA exp
- return Node(*combine(',', OP_COMMA, values[0], values[2]))
- raise BisonSyntaxError('Unsupported option %d in target "%s".'
- % (option, target)) # pragma: nocover
- # -----------------------------------------
- # Special tokens and operator tokens
- # -----------------------------------------
- operators = ''
- functions = []
- for token in SPECIAL_TOKENS:
- if len(token) > 1:
- operators += '"%s"%s{ returntoken(IDENTIFIER); }\n' \
- % (token, ' ' * (8 - len(token)))
- for op_str, op in OP_MAP.iteritems():
- if TOKEN_MAP[op] == 'FUNCTION':
- functions.append(op_str)
- else:
- operators += '"%s"%s{ returntoken(%s); }\n' \
- % (op_str, ' ' * (8 - len(op_str)), TOKEN_MAP[op])
- # Put all functions in a single regex
- if functions:
- operators += '("%s")[ ]*"(" { returntoken(FUNCTION_LPAREN); }\n' \
- % '"|"'.join(functions)
- operators += '("%s") { returntoken(FUNCTION); }\n' \
- % '"|"'.join(functions)
- # -----------------------------------------
- # raw lex script, verbatim here
- # -----------------------------------------
- lexscript = r"""
- %top{
- #include "Python.h"
- }
- %{
- #define YYSTYPE void *
- #include "tokens.h"
- extern void *py_parser;
- extern void (*py_input)(PyObject *parser, char *buf, int *result,
- int max_size);
- #define returntoken(tok) \
- yylval = PyString_FromString(strdup(yytext)); return (tok);
- #define YY_INPUT(buf,result,max_size) { \
- (*py_input)(py_parser, buf, &result, max_size); \
- }
- int yycolumn = 0;
- #define YY_USER_ACTION \
- yylloc.first_line = yylloc.last_line = yylineno; \
- yylloc.first_column = yycolumn; \
- yylloc.last_column = yycolumn + yyleng; \
- yycolumn += yyleng;
- %}
- %option yylineno
- %%
- d[ ]*"/"[ ]*"d*"[a-z]"*" { returntoken(DERIVATIVE); }
- [0-9]+"."?[0-9]* { returntoken(NUMBER); }
- [a-zA-Z] { returntoken(IDENTIFIER); }
- "(" { returntoken(LPAREN); }
- ")" { returntoken(RPAREN); }
- "[" { returntoken(LBRACKET); }
- "]" { returntoken(RBRACKET); }
- "'" { returntoken(PRIME); }
- "|" { returntoken(PIPE); }
- log_([0-9]+|[a-zA-Z])"*(" { returntoken(FUNCTION_LPAREN); }
- log_([0-9]+|[a-zA-Z])"*" { returntoken(FUNCTION); }
- """ + operators + r"""
- "raise" { returntoken(RAISE); }
- "graph" { returntoken(GRAPH); }
- "quit" { yyterminate(); returntoken(QUIT); }
- [ \t\v\f] { }
- [\n] { yycolumn = 0; returntoken(NEWLINE); }
- . { printf("unknown char %c ignored.\n", yytext[0]); }
- %%
- yywrap() { return(1); }
- """
- #int[ ]*"(" { returntoken(FUNCTION_LPAREN); }
|