Forráskód Böngészése

Started with constructing an expression tree in the parser.

Sander Mathijs van Veen 14 éve
szülő
commit
bc910e29ef

+ 0 - 0
external/__init__.py


+ 1 - 1
external/graph_drawing

@@ -1 +1 @@
-Subproject commit 1d05417ac83c39ec3a75486ee842e8195565d75a
+Subproject commit 45bee0e4fe53619176d85fc0bc5ae3b40d99716d

+ 1 - 1
external/pybison

@@ -1 +1 @@
-Subproject commit 3d541647e811564761853d7b7694fa6236ef132f
+Subproject commit 0bdb26228364e4f7dda5e3537785727c02040b37

+ 9 - 1
external/rules.mk

@@ -16,6 +16,14 @@ $(b)pybison/bisondynlib-linux.o $(b)pybison/bison_.o: | $(b)pybison
 $(b)pybison/bisondynlib-linux.o: $(d)pybison/src/c/bisondynlib-linux.c
 $(b)pybison/bison_.o: $(b)pybison/bison_.c
 
+PYREX := 1
+
+ifdef PYREX
+py2c := pyrexc
+else
+py2c := cython -Wextra -Werror --fast-fail --line-directives
+endif
+
 $(b)pybison/%.c: $(d)pybison/src/pyrex/%.pyx
-	pyrexc -o $@ $<
+	$(py2c) -o $@ $<
 

+ 1 - 2
src/calc.py

@@ -5,7 +5,6 @@ A simple pybison parser program implementing a calculator
 
 from __future__ import division
 from sympy import Symbol
-from logger import filter_non_ascii
 
 import os.path
 PYBISON_BUILD = os.path.realpath('build/external/pybison')
@@ -15,7 +14,7 @@ import sys
 sys.path.insert(0, PYBISON_BUILD)
 sys.path.insert(1, PYBISON_PYREX)
 
-from bison import BisonParser, BisonNode
+from bison import BisonParser
 
 class Parser(BisonParser):
     """

+ 18 - 0
src/expression.py

@@ -0,0 +1,18 @@
+#class Expression(object):
+#    """Class used to hold a mathematical expression."""
+#
+#    magic_operator_map = {
+#            int.__add__: '%s + %s',
+#            int.__sub__: '%s - %s',
+#            int.__mul__: '%s * %s',
+#            int.__div__: '%s / %s',
+#            int.__neg__: '-%s',
+#            int.__pow__: '%s**%s',
+#            }
+#
+#    def __init__(self, operator, *args):
+#        super(Expression, self).__init__()
+#        self.operator, self.args = args[0], args[1:]
+#
+#    def __str__(self):
+#        return self.magic_operator_map[self.operator] % self.args

+ 59 - 83
src/node.py

@@ -4,124 +4,100 @@ import sys
 sys.path.insert(0, os.path.realpath('external'))
 
 from graph_drawing.graph import generate_graph
+from graph_drawing.line import generate_line
+from graph_drawing.node import Node, Leaf
 
 
-class ExpressionNode(object):
-    def __init__(self, operator, *args):
-        super(ExpressionNode, self).__init__()
-        self.operator, self.args = operator, list(args)
+#NODE_TYPE = 0
+#NODE_
 
-        for a in self.args:
-            a.parent = self
+class ExpressionNode(Node):
+    def __init__(self, *args, **kwargs):
+        super(ExpressionNode, self).__init__(*args, **kwargs)
+        #self.type = NODE_TYPE
 
-    def title(self):
-        return self.operator
+    def __str__(self):
+        return generate_line(self)
 
     def replace(self, node):
-        pos = self.parent.args.index(self)
-        self.parent.args[pos] = node
+        pos = self.parent.nodes.index(self)
+        self.parent.nodes[pos] = node
         node.parent = self.parent
         self.parent = None
 
-    def __iter__(self):
-        return iter(self.args)
-
-    def __len__(self):
-        return len(self.args)
-
-    def __getitem__(self, n):
-        return self.args[n]
-
-    def __setitem__(self, n, arg):
-        self.args[n] = arg
-
-    def __str__(self):
-        return generate_graph(self, ExpressionNode)
-
-class ExpressionLeaf(object):
-    def __init__(self, value):
-        super(ExpressionLeaf, self).__init__()
-        self.value = value
+    def graph(self):
+        return generate_graph(self)
 
+class ExpressionLeaf(Leaf):
     def replace(self, node):
         if not hasattr(self, 'parent'):
             return
 
-        pos = self.parent.args.index(self)
-        self.parent.args[pos] = node
+        pos = self.parent.nodes.index(self)
+        self.parent.nodes[pos] = node
         node.parent = self.parent
         self.parent = None
 
-    def title(self):
-        return str(self.value)
-
-    def __add__(self, b):
-        return self.value + b.value
-
-    def __repr__(self):
-        return repr(self.value)
-
-    def __str__(self):
-        return str(self.value)
 
-l0 = ExpressionLeaf(3)
-l1 = ExpressionLeaf(4)
-l2 = ExpressionLeaf(5)
-l3 = ExpressionLeaf(7)
+if __name__ == '__main__':
+    l0 = ExpressionLeaf(3)
+    l1 = ExpressionLeaf(4)
+    l2 = ExpressionLeaf(5)
+    l3 = ExpressionLeaf(7)
 
-n0 = ExpressionNode('+', l0, l1)
-n1 = ExpressionNode('+', l2, l3)
-n2 = ExpressionNode('*', n0, n1)
+    n0 = ExpressionNode('+', l0, l1)
+    n1 = ExpressionNode('+', l2, l3)
+    n2 = ExpressionNode('*', n0, n1)
 
-print n2
+    print n2
 
-N = ExpressionNode
+    N = ExpressionNode
 
-def rewrite_multiply(node):
-    a, b = node[0]
-    c, d = node[1]
+    def rewrite_multiply(node):
+        a, b = node[0]
+        c, d = node[1]
 
-    ac = N('*', a, c)
-    ad = N('*', a, d)
-    bc = N('*', b, c)
-    bd = N('*', b, d)
+        ac = N('*', a, c)
+        ad = N('*', a, d)
+        bc = N('*', b, c)
+        bd = N('*', b, d)
 
-    res = N('+', N('+', N('+', ac, ad), bc), bd)
+        res = N('+', N('+', N('+', ac, ad), bc), bd)
 
-    return res
+        return res
 
-possibilities = [
-        (n0, lambda (x,y): ExpressionLeaf(x + y)),
-        (n1, lambda (x,y): ExpressionLeaf(x + y)),
-        (n2, rewrite_multiply),
-        ]
+    possibilities = [
+            (n0, lambda (x,y): ExpressionLeaf(x.value + y.value)),
+            (n1, lambda (x,y): ExpressionLeaf(x.value + y.value)),
+            (n2, rewrite_multiply),
+            ]
 
-print '\n--- after rule 2 ---\n'
+    print '\n--- after rule 2 ---\n'
 
-n_, method = possibilities[2]
-new = method(n_)
+    n_, method = possibilities[2]
+    new = method(n_)
 
-print new
+    print new
 
-print '\n--- original graph ---\n'
+    print '\n--- original graph ---\n'
 
-print n2
+    print n2
 
-print '\n--- apply rule 0 ---\n'
+    print '\n--- apply rule 0 ---\n'
 
-n_, method = possibilities[0]
-new = method(n_)
-n_.replace(new)
+    n_, method = possibilities[0]
+    new = method(n_)
+    n_.replace(new)
 
-print n2
+    print n2
 
-# Revert rule 0
-new.replace(n_)
+    # Revert rule 0
+    new.replace(n_)
 
-print '\n--- apply rule 1 ---\n'
+    print '\n--- apply rule 1 ---\n'
 
-n_, method = possibilities[1]
-new = method(n_)
-n_.replace(new)
+    n_, method = possibilities[1]
+    new = method(n_)
+    n_.replace(new)
 
-print n2
+    print n2

+ 265 - 0
src/parser.py

@@ -0,0 +1,265 @@
+#!/usr/bin/env python
+"""
+This parser will parse the given input and build an expression tree. Grammar
+file for the supported mathematical expressions.
+"""
+
+from node import ExpressionNode as Node, ExpressionLeaf as Leaf
+
+import argparse
+
+import os.path
+PYBISON_BUILD = os.path.realpath('build/external/pybison')
+PYBISON_PYREX = os.path.realpath('external/pybison/src/pyrex')
+
+import sys
+sys.path.insert(0, PYBISON_BUILD)
+sys.path.insert(1, PYBISON_PYREX)
+
+from bison import BisonParser, ParserSyntaxError
+
+class Parser(BisonParser):
+    """
+    Implements the calculator parser. Grammar rules are defined in the method
+    docstrings. Scanner rules are in the 'lexscript' attribute.
+    """
+
+    # Output directory of generated pybison files, including a trailing slash.
+    buildDirectory = PYBISON_BUILD + '/'
+
+    # ----------------------------------------------------------------
+    # lexer tokens - these must match those in your lex script (below)
+    # ----------------------------------------------------------------
+    # TODO: add a runtime check to verify that this token list match the list
+    # of tokens of the lex script.
+    tokens = ['NUMBER', 'IDENTIFIER',
+              'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'POW',
+              'LPAREN', 'RPAREN', 'COMMA',
+              'NEWLINE', 'QUIT', 'RAISE']
+
+    # ------------------------------
+    # precedences
+    # ------------------------------
+    precedences = (
+        ('left', ('MINUS', 'PLUS')),
+        ('left', ('TIMES', 'DIVIDE')),
+        ('left', ('NEG', )),
+        ('right', ('POW', )),
+        )
+
+    interactive = 0
+
+    def __init__(self, **kwargs):
+        BisonParser.__init__(self, **kwargs)
+        self.interactive = kwargs.get('interactive', 0)
+        self.timeout = kwargs.get('timeout', 0)
+
+    # ------------------------------------------------------------------
+    # override default read method with a version that prompts for input
+    # ------------------------------------------------------------------
+    def read(self, nbytes):
+        try:
+            return raw_input('>>> ') + '\n'
+        except EOFError:
+            return ''
+
+    # ---------------------------------------------------------------
+    # These methods are the python handlers for the bison targets.
+    # (which get called by the bison code each time the corresponding
+    # parse target is unambiguously reached)
+    #
+    # WARNING - don't touch the method docstrings unless you know what
+    # you are doing - they are in bison rule syntax, and are passed
+    # verbatim to bison to build the parser engine library.
+    # ---------------------------------------------------------------
+
+    # Declare the start target here (by name)
+    start = 'input'
+
+    def on_input(self, target, option, names, values):
+        """
+        input :
+              | input line
+        """
+
+        if option == 1:
+            # Interactive mode is enabled if the term rewriting system is used
+            # as a shell. In that case, it is useful that the shell prints the
+            # output of the evaluation.
+            if self.interactive and values[1]:
+                print 'result:', values[1]
+
+            return values[1]
+
+    def on_line(self, target, option, names, values):
+        """
+        line : NEWLINE
+             | exp NEWLINE
+             | RAISE NEWLINE
+        """
+        if option == 1:
+            return values[0]
+
+        if option == 2:
+            raise RuntimeError('on_line: exception raised')
+
+    def on_exp(self, target, option, names, values):
+        """
+        exp : NUMBER
+            | IDENTIFIER
+            | exp PLUS exp
+            | exp MINUS exp
+            | exp TIMES exp
+            | exp DIVIDE exp
+            | MINUS exp %prec NEG
+            | exp POW exp
+            | LPAREN exp RPAREN
+            | symbolic
+        """
+
+        # rule: NUMBER
+        if option == 0:
+            # TODO: A bit hacky, this achieves long integers and floats.
+            value = float(values[0]) if '.' in values[0] else int(values[0])
+            return Leaf(value)
+
+        # rule: IDENTIFIER
+        if option == 1:
+            return Leaf(values[0])
+
+        # rule: LPAREN exp RPAREN
+        if option == 8:
+            return values[1]
+
+        # rule: symbolic
+        if option == 9:
+            return values[0]
+
+        # Check for n-ary operator in child nodes
+        combine = lambda op, n: n.nodes if n.title() == op else [n]
+
+        # rule: exp PLUS exp
+        if option == 2:
+            return Node('+', *(combine('+', values[0]) + combine('+', values[2])))
+
+        # rule: exp MINUS expo
+        if option == 3:
+            return Node('-', *(combine('-', values[0]) + combine('-', values[2])))
+
+        # rule: exp TIMES expo
+        if option == 4:
+            return Node('*', *(combine('*', values[0]) + combine('*', values[2])))
+
+        # rule: exp DIVIDE expo
+        if option == 5:
+            return Node('/', values[0], values[2])
+
+        # rule: NEG expo
+        if option == 6:
+            return Node('-', values[1])
+
+        # rule: exp POW expo
+        if option == 7:
+            return Node('^', values[0], values[2])
+
+        raise ParserSyntaxError('Unsupported option %d in target "%s".'
+                                % (option, target))
+
+    def on_symbolic(self, target, option, names, values):
+        """
+        symbolic : NUMBER IDENTIFIER
+                 | IDENTIFIER IDENTIFIER
+                 | symbolic IDENTIFIER
+                 | IDENTIFIER NUMBER
+        """
+        # rule: NUMBER IDENTIFIER
+        # rule: IDENTIFIER IDENTIFIER
+        # rule: symbolic IDENTIFIER
+        if option in [0, 1, 2]:
+            # 4x -> 4*x
+            # a b -> a * b
+            # a b c -> (a * b) * c
+            node = Node('*', Leaf(values[0]), Leaf(values[1]))
+            return node
+
+        # rule: IDENTIFIER NUMBER
+        if option == 3:
+            # x4 -> x^4
+            return Node('^', Leaf(values[0]), Leaf(values[1]))
+
+        raise ParserSyntaxError('Unsupported option %d in target "%s".'
+                                % (option, target))
+
+    # -----------------------------------------
+    # raw lex script, verbatim here
+    # -----------------------------------------
+    lexscript = r"""
+    %{
+    //int yylineno = 0;
+    #include <stdio.h>
+    #include <string.h>
+    #include "Python.h"
+    #define YYSTYPE void *
+    #include "tokens.h"
+    extern void *py_parser;
+    extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size);
+    #define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok);
+    #define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); }
+    %}
+
+    %%
+
+    [0-9]+    { returntoken(NUMBER); }
+    [a-zA-Z]  { returntoken(IDENTIFIER); }
+    "("       { returntoken(LPAREN); }
+    ")"       { returntoken(RPAREN); }
+    "+"       { returntoken(PLUS); }
+    "-"       { returntoken(MINUS); }
+    "*"       { returntoken(TIMES); }
+    "^"       { returntoken(POW); }
+    "/"       { returntoken(DIVIDE); }
+    ","       { returntoken(COMMA); }
+    "quit"    { printf("lex: got QUIT\n"); yyterminate(); returntoken(QUIT); }
+    "raise"   { returntoken(RAISE); }
+
+    [ \t\v\f] {}
+    [\n]      {yylineno++; returntoken(NEWLINE); }
+    .         { printf("unknown char %c ignored, yytext=0x%lx\n", yytext[0], yytext); /* ignore bad chars */}
+
+    %%
+
+    yywrap() { return(1); }
+    """
+
+
+def get_args():
+    parser = argparse.ArgumentParser(prog='parser', description=__doc__)
+    parser.add_argument('--debug', '-d', action='store_true', default=False,
+            help='Enable debug mode in bison and flex.')
+    parser.add_argument('--verbose', '-v', action='store_true', default=False,
+            help='Enable verbose output messages (printed to stdout).')
+    parser.add_argument('--keepfiles', '-k', action='store_true', default=False,
+            help='Keep temporary generated bison and lex files.')
+    parser.add_argument('--batch', '-b', action='store_true', default=False,
+            help='Disable interactive mode and execute expressions in batch mode.')
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+
+    p = Parser(verbose=args.verbose,
+               keepfiles=args.keepfiles,
+               interactive=not args.batch)
+
+    node = p.run(debug=args.debug)
+
+    # Clear the line, when the shell exits.
+    if not args.batch:
+        print
+
+    return node
+
+
+if __name__ == '__main__':
+    main()

+ 55 - 0
src/rules.py

@@ -0,0 +1,55 @@
+from node import ExpressionLeaf as Leaf
+
+def get_factor_constants(operand):
+    op = operand.title()
+    res = []
+
+    if operand.type == OP_MUL:
+        if operand[0].type == LEAF_NUM:
+            fn()
+
+        if operand[1].type == LEAF_NUM:
+            res += operand[1]
+
+    return res
+
+def combine_plus_factors(node):
+    p = []
+
+    # Check if any numeric factors can be combined
+    def apply_numeric_factors(node, leaves):
+        return Leaf(reduce(lambda a, b: a.value + b.value, leaves))
+
+    num_nodes = []
+
+    for n in node:
+        # NUM + NUM -> NUM
+        if n.type == VAL_NUM:
+            num_nodes.append(n)
+
+    if len(num_nodes) > 1:
+        p.append((node, apply_plus_factors, num_nodes))
+
+    # Check if any variable multiplcations/divisions can be combined
+    def apply_identifiers(node, operands):
+        apply_constant(lambda x: )
+        return Leaf(leaves[0].value + leaves[1].value)
+
+    id_nodes = []
+
+    for n in node:
+        # NUM *  + NUM -> NUM
+        if n.type == OP_MUL:
+            consts = get_factor_constants(n)
+
+            if len(consts) > 1:
+                id_nodes += 
+
+    if len(num_nodes) > 1:
+        p.append((node, apply_plus_factors, num_nodes))
+
+    return p
+
+rules = {
+        '+': [combine_plus_factors],
+        }

+ 39 - 0
src/suggestions.py

@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+from parser import main
+from node import ExpressionLeaf as Leaf
+from rules import rules
+
+# (node, funcptr, (args...))
+
+def get_node_possibilities(node):
+    """
+    Get all possible rewrite steps for this node.
+    """
+    op = node.title()
+    possibilities = []
+
+    for key, fn in rules.iteritems():
+        if op == key:
+            possibilities += fn(node)
+
+    return possibilities
+
+def get_possibilities(node):
+    """
+    Get all possible rewrite steps for this node and its children.
+    """
+    possibilities = get_node_possibilities(node)
+
+    if not isinstance(node, Leaf):
+        possibilities += [get_possibilities(n) for n in node]
+
+    return possibilities
+
+if __name__ == '__main__':
+    node = main()
+    print 'node:', node
+
+    p = get_possibilities(node)
+    print ' p: -------------'
+    print '\n'.join(p),
+    print '----------------'

+ 27 - 24
tests/parser.py

@@ -1,41 +1,44 @@
 import sys
 
-from src.calc import Parser
 
+class ParserWrapper(object):
 
-class TestParser(Parser):
-
-    def __init__(self, **kwargs):
-        Parser.__init__(self, **kwargs)
-
+    def __init__(self, base_class, **kwargs):
         self.input_buffer = []
         self.input_position = 0
 
-    def run(self, input_buffer, *args, **kwargs):
-        map(self.append, input_buffer)
-        return Parser.run(self, *args, **kwargs)
+        self.verbose = kwargs.get('verbose', False)
 
-    def append(self, input):
-        self.input_buffer.append(input + '\n')
+        # Overwrite parser read() method
+        def read(nbytes):
+            buf = ''
 
-    def read(self, nbytes):
-        buffer = ''
+            try:
+                buf = self.input_buffer[self.input_position]
 
-        try:
-            buffer = self.input_buffer[self.input_position]
+                if self.verbose:
+                    print 'read:', buf
+            except IndexError:
+                return ''
 
-            if self.verbose:
-                print 'read:', buffer
-        except IndexError:
-            return ''
+            self.input_position += 1
 
-        self.input_position += 1
+            return buf
 
-        return buffer
+        self.parser = base_class(**kwargs)
+        self.parser.read = read
+
+
+    def run(self, input_buffer, *args, **kwargs):
+        map(self.append, input_buffer)
+        return self.parser.run(*args, **kwargs)
+
+    def append(self, input):
+        self.input_buffer.append(input + '\n')
 
 
-def run_expressions(expressions, keepfiles=1, fail=True, silent=False,
-        verbose=0):
+def run_expressions(base_class, expressions, keepfiles=1, fail=True,
+        silent=False, verbose=0):
     """
     Run a list of mathematical expression through the term rewriting system and
     check if the output matches the expected output. The list of EXPRESSIONS
@@ -55,7 +58,7 @@ def run_expressions(expressions, keepfiles=1, fail=True, silent=False,
     higher value will print more types of debug messages.
     """
 
-    parser = TestParser(keepfiles=keepfiles, verbose=verbose)
+    parser = ParserWrapper(base_class, keepfiles=keepfiles, verbose=verbose)
 
     for exp, out in expressions:
         res = None

+ 5 - 4
tests/test_calc.py

@@ -1,6 +1,7 @@
 import unittest
 
-from tests.parser import TestParser, run_expressions
+from src.calc import Parser
+from tests.parser import ParserWrapper, run_expressions
 
 
 class TestCalc(unittest.TestCase):
@@ -12,7 +13,7 @@ class TestCalc(unittest.TestCase):
         pass
 
     def test_constructor(self):
-        assert TestParser(keepfiles=1).run(['1+4']) == 5.0
+        assert ParserWrapper(Parser, keepfiles=1).run(['1+4']) == 5.0
 
     def test_basic_on_exp(self):
         expressions = [('4', 4.0),
@@ -23,7 +24,7 @@ class TestCalc(unittest.TestCase):
                        ('3^4', 81.0),
                        ('(4)', 4.0)]
 
-        run_expressions(expressions)
+        run_expressions(Parser, expressions)
 
     def test_infinity(self):
         expressions = [('2^3000', 2**3000),
@@ -31,4 +32,4 @@ class TestCalc(unittest.TestCase):
         #               ('2^99999999999', None),
         #               ('2^-99999999999', 0.0)]
 
-        run_expressions(expressions)
+        run_expressions(Parser, expressions)

+ 36 - 0
tests/test_parser.py

@@ -0,0 +1,36 @@
+# vim: set fileencoding=utf-8 :
+import unittest
+
+from external.graph_drawing.graph import generate_graph
+from external.graph_drawing.line import generate_line
+
+from src.parser import Parser
+from src.node import ExpressionNode as Node, ExpressionLeaf as Leaf
+from tests.parser import ParserWrapper, run_expressions
+
+
+def graph(*exp, **kwargs):
+    return generate_graph(ParserWrapper(Parser, **kwargs).run(exp))
+
+
+def line(*exp, **kwargs):
+    return generate_line(ParserWrapper(Parser, **kwargs).run(exp))
+
+
+class TestParser(unittest.TestCase):
+    def test_constructor(self):
+        node = Node('+', Leaf(1), Leaf(4))
+        self.assertEqual(ParserWrapper(Parser).run(['1 + 4']), node)
+
+    def test_identifiers(self):
+        run_expressions(Parser, [('a', Leaf('a'))])
+
+    def test_graph(self):
+        assert graph('4a') == ("""
+         *
+        ╭┴╮
+        4 a
+        """).replace('\n        ', '\n')[1:-1]
+
+    def test_line(self):
+        self.assertEqual(line('4a'), '4 * a')

+ 5 - 4
tests/test_variables.py

@@ -1,6 +1,7 @@
 import unittest
 
-from tests.parser import TestParser, run_expressions
+from src.calc import Parser
+from tests.parser import run_expressions
 from sympy import Symbol, symbols
 
 
@@ -14,17 +15,17 @@ class TestVariables(unittest.TestCase):
 
     def test_addition(self):
         expressions = [('5 + 5', 5 + 5)]
-        run_expressions(expressions)
+        run_expressions(Parser, expressions)
 
     def test_addition_of_one_term(self):
         a = Symbol('a')
         expressions = [('a + 5', 5 + a)]
-        run_expressions(expressions)
+        run_expressions(Parser, expressions)
 
     def test_addition_of_two_terms(self):
         a, b = symbols('a,b')
         expressions = [('4*a + 5*b', 4*a + 5*b)]
-        run_expressions(expressions)
+        run_expressions(Parser, expressions)
 
     #def test_short_addition_of_two_terms(self):
     #    a, b = symbols('a,b')