Started with constructing an expression tree in the parser.

parent 7a5f11a6
graph_drawing @ 45bee0e4
Subproject commit 1d05417ac83c39ec3a75486ee842e8195565d75a
Subproject commit 45bee0e4fe53619176d85fc0bc5ae3b40d99716d
pybison @ 0bdb2622
Subproject commit 3d541647e811564761853d7b7694fa6236ef132f
Subproject commit 0bdb26228364e4f7dda5e3537785727c02040b37
......@@ -16,6 +16,14 @@ $(b)pybison/bisondynlib-linux.o $(b)pybison/bison_.o: | $(b)pybison
$(b)pybison/bisondynlib-linux.o: $(d)pybison/src/c/bisondynlib-linux.c
$(b)pybison/bison_.o: $(b)pybison/bison_.c
PYREX := 1
ifdef PYREX
py2c := pyrexc
else
py2c := cython -Wextra -Werror --fast-fail --line-directives
endif
$(b)pybison/%.c: $(d)pybison/src/pyrex/%.pyx
pyrexc -o $@ $<
$(py2c) -o $@ $<
......@@ -5,7 +5,6 @@ A simple pybison parser program implementing a calculator
from __future__ import division
from sympy import Symbol
from logger import filter_non_ascii
import os.path
PYBISON_BUILD = os.path.realpath('build/external/pybison')
......@@ -15,7 +14,7 @@ import sys
sys.path.insert(0, PYBISON_BUILD)
sys.path.insert(1, PYBISON_PYREX)
from bison import BisonParser, BisonNode
from bison import BisonParser
class Parser(BisonParser):
"""
......
#class Expression(object):
# """Class used to hold a mathematical expression."""
#
# magic_operator_map = {
# int.__add__: '%s + %s',
# int.__sub__: '%s - %s',
# int.__mul__: '%s * %s',
# int.__div__: '%s / %s',
# int.__neg__: '-%s',
# int.__pow__: '%s**%s',
# }
#
# def __init__(self, operator, *args):
# super(Expression, self).__init__()
# self.operator, self.args = args[0], args[1:]
#
# def __str__(self):
# return self.magic_operator_map[self.operator] % self.args
......@@ -4,80 +4,56 @@ import sys
sys.path.insert(0, os.path.realpath('external'))
from graph_drawing.graph import generate_graph
from graph_drawing.line import generate_line
from graph_drawing.node import Node, Leaf
class ExpressionNode(object):
def __init__(self, operator, *args):
super(ExpressionNode, self).__init__()
self.operator, self.args = operator, list(args)
#NODE_TYPE = 0
#NODE_
for a in self.args:
a.parent = self
class ExpressionNode(Node):
def __init__(self, *args, **kwargs):
super(ExpressionNode, self).__init__(*args, **kwargs)
#self.type = NODE_TYPE
def title(self):
return self.operator
def __str__(self):
return generate_line(self)
def replace(self, node):
pos = self.parent.args.index(self)
self.parent.args[pos] = node
pos = self.parent.nodes.index(self)
self.parent.nodes[pos] = node
node.parent = self.parent
self.parent = None
def __iter__(self):
return iter(self.args)
def __len__(self):
return len(self.args)
def __getitem__(self, n):
return self.args[n]
def __setitem__(self, n, arg):
self.args[n] = arg
def __str__(self):
return generate_graph(self, ExpressionNode)
class ExpressionLeaf(object):
def __init__(self, value):
super(ExpressionLeaf, self).__init__()
self.value = value
def graph(self):
return generate_graph(self)
class ExpressionLeaf(Leaf):
def replace(self, node):
if not hasattr(self, 'parent'):
return
pos = self.parent.args.index(self)
self.parent.args[pos] = node
pos = self.parent.nodes.index(self)
self.parent.nodes[pos] = node
node.parent = self.parent
self.parent = None
def title(self):
return str(self.value)
def __add__(self, b):
return self.value + b.value
def __repr__(self):
return repr(self.value)
def __str__(self):
return str(self.value)
l0 = ExpressionLeaf(3)
l1 = ExpressionLeaf(4)
l2 = ExpressionLeaf(5)
l3 = ExpressionLeaf(7)
if __name__ == '__main__':
l0 = ExpressionLeaf(3)
l1 = ExpressionLeaf(4)
l2 = ExpressionLeaf(5)
l3 = ExpressionLeaf(7)
n0 = ExpressionNode('+', l0, l1)
n1 = ExpressionNode('+', l2, l3)
n2 = ExpressionNode('*', n0, n1)
n0 = ExpressionNode('+', l0, l1)
n1 = ExpressionNode('+', l2, l3)
n2 = ExpressionNode('*', n0, n1)
print n2
print n2
N = ExpressionNode
N = ExpressionNode
def rewrite_multiply(node):
def rewrite_multiply(node):
a, b = node[0]
c, d = node[1]
......@@ -90,38 +66,38 @@ def rewrite_multiply(node):
return res
possibilities = [
(n0, lambda (x,y): ExpressionLeaf(x + y)),
(n1, lambda (x,y): ExpressionLeaf(x + y)),
possibilities = [
(n0, lambda (x,y): ExpressionLeaf(x.value + y.value)),
(n1, lambda (x,y): ExpressionLeaf(x.value + y.value)),
(n2, rewrite_multiply),
]
print '\n--- after rule 2 ---\n'
print '\n--- after rule 2 ---\n'
n_, method = possibilities[2]
new = method(n_)
n_, method = possibilities[2]
new = method(n_)
print new
print new
print '\n--- original graph ---\n'
print '\n--- original graph ---\n'
print n2
print n2
print '\n--- apply rule 0 ---\n'
print '\n--- apply rule 0 ---\n'
n_, method = possibilities[0]
new = method(n_)
n_.replace(new)
n_, method = possibilities[0]
new = method(n_)
n_.replace(new)
print n2
print n2
# Revert rule 0
new.replace(n_)
# Revert rule 0
new.replace(n_)
print '\n--- apply rule 1 ---\n'
print '\n--- apply rule 1 ---\n'
n_, method = possibilities[1]
new = method(n_)
n_.replace(new)
n_, method = possibilities[1]
new = method(n_)
n_.replace(new)
print n2
print n2
#!/usr/bin/env python
"""
This parser will parse the given input and build an expression tree. Grammar
file for the supported mathematical expressions.
"""
from node import ExpressionNode as Node, ExpressionLeaf as Leaf
import argparse
import os.path
PYBISON_BUILD = os.path.realpath('build/external/pybison')
PYBISON_PYREX = os.path.realpath('external/pybison/src/pyrex')
import sys
sys.path.insert(0, PYBISON_BUILD)
sys.path.insert(1, PYBISON_PYREX)
from bison import BisonParser, ParserSyntaxError
class Parser(BisonParser):
"""
Implements the calculator parser. Grammar rules are defined in the method
docstrings. Scanner rules are in the 'lexscript' attribute.
"""
# Output directory of generated pybison files, including a trailing slash.
buildDirectory = PYBISON_BUILD + '/'
# ----------------------------------------------------------------
# lexer tokens - these must match those in your lex script (below)
# ----------------------------------------------------------------
# TODO: add a runtime check to verify that this token list match the list
# of tokens of the lex script.
tokens = ['NUMBER', 'IDENTIFIER',
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'POW',
'LPAREN', 'RPAREN', 'COMMA',
'NEWLINE', 'QUIT', 'RAISE']
# ------------------------------
# precedences
# ------------------------------
precedences = (
('left', ('MINUS', 'PLUS')),
('left', ('TIMES', 'DIVIDE')),
('left', ('NEG', )),
('right', ('POW', )),
)
interactive = 0
def __init__(self, **kwargs):
BisonParser.__init__(self, **kwargs)
self.interactive = kwargs.get('interactive', 0)
self.timeout = kwargs.get('timeout', 0)
# ------------------------------------------------------------------
# override default read method with a version that prompts for input
# ------------------------------------------------------------------
def read(self, nbytes):
try:
return raw_input('>>> ') + '\n'
except EOFError:
return ''
# ---------------------------------------------------------------
# These methods are the python handlers for the bison targets.
# (which get called by the bison code each time the corresponding
# parse target is unambiguously reached)
#
# WARNING - don't touch the method docstrings unless you know what
# you are doing - they are in bison rule syntax, and are passed
# verbatim to bison to build the parser engine library.
# ---------------------------------------------------------------
# Declare the start target here (by name)
start = 'input'
def on_input(self, target, option, names, values):
"""
input :
| input line
"""
if option == 1:
# Interactive mode is enabled if the term rewriting system is used
# as a shell. In that case, it is useful that the shell prints the
# output of the evaluation.
if self.interactive and values[1]:
print 'result:', values[1]
return values[1]
def on_line(self, target, option, names, values):
"""
line : NEWLINE
| exp NEWLINE
| RAISE NEWLINE
"""
if option == 1:
return values[0]
if option == 2:
raise RuntimeError('on_line: exception raised')
def on_exp(self, target, option, names, values):
"""
exp : NUMBER
| IDENTIFIER
| exp PLUS exp
| exp MINUS exp
| exp TIMES exp
| exp DIVIDE exp
| MINUS exp %prec NEG
| exp POW exp
| LPAREN exp RPAREN
| symbolic
"""
# rule: NUMBER
if option == 0:
# TODO: A bit hacky, this achieves long integers and floats.
value = float(values[0]) if '.' in values[0] else int(values[0])
return Leaf(value)
# rule: IDENTIFIER
if option == 1:
return Leaf(values[0])
# rule: LPAREN exp RPAREN
if option == 8:
return values[1]
# rule: symbolic
if option == 9:
return values[0]
# Check for n-ary operator in child nodes
combine = lambda op, n: n.nodes if n.title() == op else [n]
# rule: exp PLUS exp
if option == 2:
return Node('+', *(combine('+', values[0]) + combine('+', values[2])))
# rule: exp MINUS expo
if option == 3:
return Node('-', *(combine('-', values[0]) + combine('-', values[2])))
# rule: exp TIMES expo
if option == 4:
return Node('*', *(combine('*', values[0]) + combine('*', values[2])))
# rule: exp DIVIDE expo
if option == 5:
return Node('/', values[0], values[2])
# rule: NEG expo
if option == 6:
return Node('-', values[1])
# rule: exp POW expo
if option == 7:
return Node('^', values[0], values[2])
raise ParserSyntaxError('Unsupported option %d in target "%s".'
% (option, target))
def on_symbolic(self, target, option, names, values):
"""
symbolic : NUMBER IDENTIFIER
| IDENTIFIER IDENTIFIER
| symbolic IDENTIFIER
| IDENTIFIER NUMBER
"""
# rule: NUMBER IDENTIFIER
# rule: IDENTIFIER IDENTIFIER
# rule: symbolic IDENTIFIER
if option in [0, 1, 2]:
# 4x -> 4*x
# a b -> a * b
# a b c -> (a * b) * c
node = Node('*', Leaf(values[0]), Leaf(values[1]))
return node
# rule: IDENTIFIER NUMBER
if option == 3:
# x4 -> x^4
return Node('^', Leaf(values[0]), Leaf(values[1]))
raise ParserSyntaxError('Unsupported option %d in target "%s".'
% (option, target))
# -----------------------------------------
# raw lex script, verbatim here
# -----------------------------------------
lexscript = r"""
%{
//int yylineno = 0;
#include <stdio.h>
#include <string.h>
#include "Python.h"
#define YYSTYPE void *
#include "tokens.h"
extern void *py_parser;
extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size);
#define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok);
#define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); }
%}
%%
[0-9]+ { returntoken(NUMBER); }
[a-zA-Z] { returntoken(IDENTIFIER); }
"(" { returntoken(LPAREN); }
")" { returntoken(RPAREN); }
"+" { returntoken(PLUS); }
"-" { returntoken(MINUS); }
"*" { returntoken(TIMES); }
"^" { returntoken(POW); }
"/" { returntoken(DIVIDE); }
"," { returntoken(COMMA); }
"quit" { printf("lex: got QUIT\n"); yyterminate(); returntoken(QUIT); }
"raise" { returntoken(RAISE); }
[ \t\v\f] {}
[\n] {yylineno++; returntoken(NEWLINE); }
. { printf("unknown char %c ignored, yytext=0x%lx\n", yytext[0], yytext); /* ignore bad chars */}
%%
yywrap() { return(1); }
"""
def get_args():
parser = argparse.ArgumentParser(prog='parser', description=__doc__)
parser.add_argument('--debug', '-d', action='store_true', default=False,
help='Enable debug mode in bison and flex.')
parser.add_argument('--verbose', '-v', action='store_true', default=False,
help='Enable verbose output messages (printed to stdout).')
parser.add_argument('--keepfiles', '-k', action='store_true', default=False,
help='Keep temporary generated bison and lex files.')
parser.add_argument('--batch', '-b', action='store_true', default=False,
help='Disable interactive mode and execute expressions in batch mode.')
return parser.parse_args()
def main():
args = get_args()
p = Parser(verbose=args.verbose,
keepfiles=args.keepfiles,
interactive=not args.batch)
node = p.run(debug=args.debug)
# Clear the line, when the shell exits.
if not args.batch:
print
return node
if __name__ == '__main__':
main()
from node import ExpressionLeaf as Leaf
def get_factor_constants(operand):
op = operand.title()
res = []
if operand.type == OP_MUL:
if operand[0].type == LEAF_NUM:
fn()
if operand[1].type == LEAF_NUM:
res += operand[1]
return res
def combine_plus_factors(node):
p = []
# Check if any numeric factors can be combined
def apply_numeric_factors(node, leaves):
return Leaf(reduce(lambda a, b: a.value + b.value, leaves))
num_nodes = []
for n in node:
# NUM + NUM -> NUM
if n.type == VAL_NUM:
num_nodes.append(n)
if len(num_nodes) > 1:
p.append((node, apply_plus_factors, num_nodes))
# Check if any variable multiplcations/divisions can be combined
def apply_identifiers(node, operands):
apply_constant(lambda x: )
return Leaf(leaves[0].value + leaves[1].value)
id_nodes = []
for n in node:
# NUM * + NUM -> NUM
if n.type == OP_MUL:
consts = get_factor_constants(n)
if len(consts) > 1:
id_nodes +=
if len(num_nodes) > 1:
p.append((node, apply_plus_factors, num_nodes))
return p
rules = {
'+': [combine_plus_factors],
}
#!/usr/bin/env python
from parser import main
from node import ExpressionLeaf as Leaf
from rules import rules
# (node, funcptr, (args...))
def get_node_possibilities(node):
"""
Get all possible rewrite steps for this node.
"""
op = node.title()
possibilities = []
for key, fn in rules.iteritems():
if op == key:
possibilities += fn(node)
return possibilities
def get_possibilities(node):
"""
Get all possible rewrite steps for this node and its children.
"""
possibilities = get_node_possibilities(node)
if not isinstance(node, Leaf):
possibilities += [get_possibilities(n) for n in node]
return possibilities
if __name__ == '__main__':
node = main()
print 'node:', node
p = get_possibilities(node)
print ' p: -------------'
print '\n'.join(p),
print '----------------'
import sys
from src.calc import Parser
class ParserWrapper(object):
class TestParser(Parser):
def __init__(self, **kwargs):
Parser.__init__(self, **kwargs)
def __init__(self, base_class, **kwargs):
self.input_buffer = []
self.input_position = 0
def run(self, input_buffer, *args, **kwargs):
map(self.append, input_buffer)
return Parser.run(self, *args, **kwargs)
def append(self, input):
self.input_buffer.append(input + '\n')
self.verbose = kwargs.get('verbose', False)
def read(self, nbytes):
buffer = ''
# Overwrite parser read() method
def read(nbytes):
buf = ''
try:
buffer = self.input_buffer[self.input_position]
buf = self.input_buffer[self.input_position]
if self.verbose:
print 'read:', buffer
print 'read:', buf
except IndexError:
return ''
self.input_position += 1
return buffer
return buf
self.parser = base_class(**kwargs)
self.parser.read = read
def run(self, input_buffer, *args, **kwargs):
map(self.append, input_buffer)
return self.parser.run(*args, **kwargs)
def append(self, input):
self.input_buffer.append(input + '\n')
def run_expressions(expressions, keepfiles=1, fail=True, silent=False,
verbose=0):
def run_expressions(base_class, expressions, keepfiles=1, fail=True,
silent=False, verbose=0):
"""
Run a list of mathematical expression through the term rewriting system and
check if the output matches the expected output. The list of EXPRESSIONS
......@@ -55,7 +58,7 @@ def run_expressions(expressions, keepfiles=1, fail=True, silent=False,
higher value will print more types of debug messages.
"""
parser = TestParser(keepfiles=keepfiles, verbose=verbose)
parser = ParserWrapper(base_class, keepfiles=keepfiles, verbose=verbose)
for exp, out in expressions:
res = None
......
import unittest
from tests.parser import TestParser, run_expressions
from src.calc import Parser
from tests.parser import ParserWrapper, run_expressions
class TestCalc(unittest.TestCase):
......@@ -12,7 +13,7 @@ class TestCalc(unittest.TestCase):
pass
def test_constructor(self):
assert TestParser(keepfiles=1).run(['1+4']) == 5.0
assert ParserWrapper(Parser, keepfiles=1).run(['1+4']) == 5.0
def test_basic_on_exp(self):
expressions = [('4', 4.0),
......@@ -23,7 +24,7 @@ class TestCalc(unittest.TestCase):
('3^4', 81.0),
('(4)', 4.0)]
run_expressions(expressions)
run_expressions(Parser, expressions)
def test_infinity(self):
expressions = [('2^3000', 2**3000),
......@@ -31,4 +32,4 @@ class TestCalc(unittest.TestCase):
# ('2^99999999999', None),
# ('2^-99999999999', 0.0)]
run_expressions(expressions)
run_expressions(Parser, expressions)
# vim: set fileencoding=utf-8 :
import unittest
from external.graph_drawing.graph import generate_graph
from external.graph_drawing.line import generate_line
from src.parser import Parser
from src.node import ExpressionNode as Node, ExpressionLeaf as Leaf
from tests.parser import ParserWrapper, run_expressions
def graph(*exp, **kwargs):
return generate_graph(ParserWrapper(Parser, **kwargs).run(exp))
def line(*exp, **kwargs):
return generate_line(ParserWrapper(Parser, **kwargs).run(exp))
class TestParser(unittest.TestCase):
def test_constructor(self):
node = Node('+', Leaf(1), Leaf(4))
self.assertEqual(ParserWrapper(Parser).run(['1 + 4']), node)
def test_identifiers(self):
run_expressions(Parser, [('a', Leaf('a'))])
def test_graph(self):
assert graph('4a') == ("""
*
╭┴╮
4 a
""").replace('\n ', '\n')[1:-1]
def test_line(self):
self.assertEqual(line('4a'), '4 * a')
import unittest
from tests.parser import TestParser, run_expressions
from src.calc import Parser
from tests.parser import run_expressions
from sympy import Symbol, symbols
......@@ -14,17 +15,17 @@ class TestVariables(unittest.TestCase):
def test_addition(self):
expressions = [('5 + 5', 5 + 5)]
run_expressions(expressions)
run_expressions(Parser, expressions)
def test_addition_of_one_term(self):
a = Symbol('a')
expressions = [('a + 5', 5 + a)]
run_expressions(expressions)
run_expressions(Parser, expressions)
def test_addition_of_two_terms(self):
a, b = symbols('a,b')
expressions = [('4*a + 5*b', 4*a + 5*b)]
run_expressions(expressions)
run_expressions(Parser, expressions)
#def test_short_addition_of_two_terms(self):
# a, b = symbols('a,b')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment