Просмотр исходного кода

Merge branch 'master' of github.com:taddeus/peephole

Conflicts:
	report/report.tex
Richard Torenvliet 14 лет назад
Родитель
Сommit
42da38d6b9

+ 7 - 6
Makefile

@@ -1,22 +1,23 @@
 BUILD=build/
 BUILD=build/
-CLEAN=src/*.pyc src/optimize/*.pyc
+CLEAN=*.pyc src/*.pyc src/optimize/*.pyc parser.out parsetab.py
 
 
 # Fix pdflatex search path
 # Fix pdflatex search path
-TGT_DIR :=
-TGT_DOC :=
+TGT_DIR := report
 
 
 # Default target is 'all'. The 'build' target is defined here so that all
 # Default target is 'all'. The 'build' target is defined here so that all
 # sub rules.mk can add prerequisites to the 'build' target.
 # sub rules.mk can add prerequisites to the 'build' target.
 all:
 all:
 build:
 build:
 
 
-d := tests/
+d := report/
 include base.mk
 include base.mk
 include $(d)/rules.mk
 include $(d)/rules.mk
 
 
-.PHONY: doc
+d := tests/
+include base.mk
+include $(d)/rules.mk
 
 
-all: doc build
+all: report
 
 
 clean:
 clean:
 	rm -rf $(CLEAN)
 	rm -rf $(CLEAN)

+ 13 - 9
main.py

@@ -1,22 +1,26 @@
 #!/usr/bin/python
 #!/usr/bin/python
 from src.parser import parse_file
 from src.parser import parse_file
 from src.optimize import optimize
 from src.optimize import optimize
-from src.writer import write_statements
+
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     from sys import argv, exit
     from sys import argv, exit
 
 
     if len(argv) < 2:
     if len(argv) < 2:
-        print 'Usage: python %s FILE' % argv[0]
+        print 'Usage: python %s SOURCE_FILE [ OUT_FILE [ SOURCE_OUT_FILE ] ]' \
+                % argv[0]
         exit(1)
         exit(1)
 
 
-    # Parse File
-    original = parse_file(argv[1])
-    optimized = optimize(original, verbose=1)
+    # Parse file
+    program = parse_file(argv[1])
+    program.debug = True
+
+    if len(argv) > 3:
+        # Save input assembly in new file for easy comparison
+        program.save(argv[3])
+
+    optimize(program, verbose=1)
 
 
     if len(argv) > 2:
     if len(argv) > 2:
         # Save output assembly
         # Save output assembly
-        out = write_statements(optimized)
-        f = open(argv[2], 'w+')
-        f.write(out)
-        f.close()
+        program.save(argv[2])

+ 1 - 1
report/Makefile

@@ -1,6 +1,6 @@
 RM=rm -rf
 RM=rm -rf
 
 
-all: report.pdf
+report: report.pdf
 
 
 %.pdf: %.tex
 %.pdf: %.tex
 	pdflatex $^
 	pdflatex $^

+ 20 - 14
report/report.tex

@@ -1,22 +1,17 @@
 \documentclass[10pt,a4paper]{article}
 \documentclass[10pt,a4paper]{article}
 \usepackage[latin1]{inputenc}
 \usepackage[latin1]{inputenc}
-\usepackage{amsmath}
-\usepackage{amsfonts}
-\usepackage{amssymb}
-\usepackage{booktabs}
-\usepackage{graphicx}
-\usepackage{listings}
-\usepackage{subfigure}
-\usepackage{float}
-\usepackage{hyperref}
+\usepackage{amsmath,amsfonts,amssymb,booktabs,graphicx,listings,subfigure}
+\usepackage{float,hyperref}
 
 
 \title{Peephole Optimizer}
 \title{Peephole Optimizer}
 \author{Jayke Meijer (6049885), Richard Torenvliet (6138861), Tadde\"us Kroes
 \author{Jayke Meijer (6049885), Richard Torenvliet (6138861), Tadde\"us Kroes
     (6054129)}
     (6054129)}
 
 
 \begin{document}
 \begin{document}
+
 \maketitle
 \maketitle
 \tableofcontents
 \tableofcontents
+
 \pagebreak
 \pagebreak
 
 
 \section{Introduction}
 \section{Introduction}
@@ -81,7 +76,7 @@ These are optimizations that simply look for a certain statement or pattern of
 statements, and optimize these. For example,
 statements, and optimize these. For example,
 \begin{verbatim}
 \begin{verbatim}
 mov $regA,$regB
 mov $regA,$regB
-instr $regA, $regA,... 
+instr $regA, $regA,...
 \end{verbatim}
 \end{verbatim}
 can be optimized into
 can be optimized into
 \begin{verbatim}
 \begin{verbatim}
@@ -115,7 +110,7 @@ implementation is a slightly less fancy, but easier to implement.
 We search from the end of the block up for instructions that are eligible for
 We search from the end of the block up for instructions that are eligible for
 CSE. If we find one, we check further up in the code for the same instruction,
 CSE. If we find one, we check further up in the code for the same instruction,
 and add that to a temporary storage list. This is done until the beginning of
 and add that to a temporary storage list. This is done until the beginning of
-the block or until one of the arguments of this expression is assigned. The temporty storage is 
+the block or until one of the arguments of this expression is assigned.
 
 
 We now add the instruction above the first use, and write the result in a new
 We now add the instruction above the first use, and write the result in a new
 variable. Then all occurrences of this expression can be replaced by a move of
 variable. Then all occurrences of this expression can be replaced by a move of
@@ -126,10 +121,15 @@ in general not very large and the execution time of the optimizer is not a
 primary concern, this is not a big problem.
 primary concern, this is not a big problem.
 
 
 \subsubsection*{Fold constants}
 \subsubsection*{Fold constants}
-Constant folding is an optimization where the outcome of arithmetics are calculated at compile time. If a value x is assigned to a certain value, let's say 10, than all next occurences of \texttt{x} are replaced by 10 until a redefinition of x. Arithmetics in Assembly are always preformed between two constants, if this is not the case the calculation is not possible. See the example for a more clear explanation of constant folding(will come). In other words until the current definition of \texttt{x} becomes dead. Therefore reaching definitions analysis is needed.
+Constant folding is an optimization where the outcome of arithmetics are
+calculated at compile time. If a value x is assigned to a certain value, lets
+say 10, than all next occurences of \texttt{x} are replaced by 10 until a
+redefinition of x. Arithmetics in Assembly are always performed between two
+variables or a variable and a constant. If this is not the case the calculation
+is not possible. See the example for a more clear explanation of constant
+folding(will come). In other words until the current definition of \texttt{x}
+becomes dead. Therefore reaching definitions analysis is needed.
 
 
-
-git 
 \subsubsection*{Copy propagation}
 \subsubsection*{Copy propagation}
 
 
 Copy propagation `unpacks' a move instruction, by replacing its destination
 Copy propagation `unpacks' a move instruction, by replacing its destination
@@ -169,6 +169,11 @@ removed by the dead code elimination.
 \subsubsection*{Algebraic transformations}
 \subsubsection*{Algebraic transformations}
 
 
 
 
+<<<<<<< HEAD
+=======
+There are a number of such cases, all of which are once again stated in
+appendix \ref{opt}.
+>>>>>>> fd46a3140a9b8bd0e5e542e57b57bfe87ff7556d
 
 
 \section{Implementation}
 \section{Implementation}
 
 
@@ -316,4 +321,5 @@ mult $regA, $regB, 0    ->  li   $regA, 0
 
 
 mult $regA, $regB, 2    ->  sll  $regA, $regB, 1
 mult $regA, $regB, 2    ->  sll  $regA, $regB, 1
 \end{verbatim}
 \end{verbatim}
+
 \end{document}
 \end{document}

+ 9 - 0
report/rules.mk

@@ -0,0 +1,9 @@
+CLEAN := $(CLEAN) report/*.pdf report/*.aux report/*.log \
+	report/*.out report/*.toc report/*.snm report/*.nav
+
+report: report/report.pdf
+
+report/%.pdf: report/%.tex
+	cd report; \
+	pdflatex report.tex; \
+	pdflatex report.tex

+ 50 - 0
src/dag.py

@@ -0,0 +1,50 @@
+class Dag:
+    def __init__(self, block):
+        """Create the Directed Acyclic Graph of all binary operations in a
+        basic block."""
+        self.nodes = []
+
+        for s in block:
+            if s.is_command('move') or s.is_monop():
+                rd, rs = s
+                y = self.find_reg_node(rs)
+                self.find_op_node(s.name, rd, y)
+            elif s.is_binop():
+                rd, rs, rt = s
+                y = self.find_reg_node(rs)
+                z = self.find_reg_node(rt)
+                self.find_op_node(s.name, rd, y, z)
+
+    def find_reg_node(self, reg):
+        for n in self.nodes:
+            if reg in n.reg:
+                return n
+
+        node = DagLeaf(reg)
+        self.nodes.append(node)
+
+        return node
+
+    def find_op_node(self, op, rd, *args):
+        for n in self.nodes:
+            if not isinstance(n, DagLeaf) and n.op == op and n.nodes == args:
+                n.labels.append(rd)
+
+                return n
+
+        node = DagNode(op, rd, *args)
+        self.nodes.append(node)
+
+        return node
+
+
+class DagNode:
+    def __init__(self, op, label, *args):
+        self.op = op
+        self.labels = [label]
+        self.nodes = args
+
+
+class DagLeaf:
+    def __init__(self, reg):
+        self.reg = reg

+ 0 - 189
src/dataflow.py

@@ -9,10 +9,6 @@ class BasicBlock(Block):
 
 
         self.dominates = []
         self.dominates = []
         self.dominated_by = []
         self.dominated_by = []
-        self.in_set = set([])
-        self.out_set = set([])
-        self.gen_set = set([])
-        self.kill_set = set([])
 
 
     def add_edge_to(self, block):
     def add_edge_to(self, block):
         if block not in self.edges_to:
         if block not in self.edges_to:
@@ -24,90 +20,6 @@ class BasicBlock(Block):
             self.dominates.append(block)
             self.dominates.append(block)
             block.dominated_by.append(self)
             block.dominated_by.append(self)
 
 
-    def create_gen_kill(self, defs):
-        used = set()
-        self_defs = {}
-
-        # Get the last of each definition series and put in in the `def' set
-        self.gen_set = set()
-
-        for s in reversed(self):
-            for reg in s.get_def():
-                if reg not in self_defs:
-                    self_defs[reg] = s.sid
-                    self.gen_set.add(s.sid)
-
-        # Generate kill set
-        self.kill_set = set()
-
-        for reg, statement_ids in defs.iteritems():
-            if reg in self_defs:
-                self.kill_set |= statement_ids - set([self_defs[reg]])
-
-
-def get_defs(blocks):
-    # Collect definitions of all registers
-    defs = {}
-
-    for b in blocks:
-        for s in b:
-            for reg in s.get_def():
-                if reg not in defs:
-                    defs[reg] = set([s.sid])
-                else:
-                    defs[reg].add(s.sid)
-
-    return defs
-
-
-def reaching_definitions(blocks):
-    """Generate the `in' and `out' sets of the given blocks using the iterative
-    algorithm from the lecture slides."""
-    # Generate flow graph
-    generate_flow_graph(blocks)
-
-    # Create gen/kill sets
-    defs = get_defs(blocks)
-    print 'defs:', defs
-
-    for b in blocks:
-        b.create_gen_kill(defs)
-        b.out_set = b.gen_set
-
-    change = True
-
-    while change:
-        change = False
-
-        for b in blocks:
-            print 'block:', b
-            b.in_set = set()
-
-            for pred in b.edges_from:
-                print 'pred:      ', pred
-                b.in_set |= pred.out_set
-
-            print 'b.in_set:  ', b.in_set
-            print 'b.out_set: ', b.out_set
-            new_out = b.gen_set | (b.in_set - b.kill_set)
-            print 'new_out:   ', new_out
-
-            if new_out != b.out_set:
-                print 'changed'
-                b.out_set = new_out
-                change = True
-
-
-def pred(n, known=[]):
-    """Recursively find all predecessors of a node."""
-    direct = filter(lambda b: b not in known, n.edges_from)
-    p = copy(direct)
-
-    for ancestor in direct:
-        p += pred(ancestor, direct)
-
-    return p
-
 
 
 def find_leaders(statements):
 def find_leaders(statements):
     """Determine the leaders, which are:
     """Determine the leaders, which are:
@@ -169,104 +81,3 @@ def generate_flow_graph(blocks):
                 b.add_edge_to(blocks[i + 1])
                 b.add_edge_to(blocks[i + 1])
         elif i < len(blocks) - 1:
         elif i < len(blocks) - 1:
             b.add_edge_to(blocks[i + 1])
             b.add_edge_to(blocks[i + 1])
-
-
-#def generate_dominator_tree(nodes):
-#    """Add dominator administration to the given flow graph nodes."""
-#    # Dominator of the start node is the start itself
-#    nodes[0].dom = set([nodes[0]])
-#
-#    # For all other nodes, set all nodes as the dominators
-#    for n in nodes[1:]:
-#        n.dom = set(copy(nodes))
-#
-#    def pred(n, known=[]):
-#        """Recursively find all predecessors of a node."""
-#        direct = filter(lambda x: x not in known, n.edges_from)
-#        p = copy(direct)
-#
-#        for ancestor in direct:
-#            p += pred(ancestor, direct)
-#
-#        return p
-#
-#    # Iteratively eliminate nodes that are not dominators
-#    changed = True
-#
-#    while changed:
-#        changed = False
-#
-#        for n in nodes[1:]:
-#            old_dom = n.dom
-#            intersection = lambda p1, p2: p1.dom & p2.dom
-#            n.dom = set([n]) | reduce(intersection, pred(n), set([]))
-#
-#            if n.dom != old_dom:
-#                changed = True
-#
-#    def idom(d, n):
-#        """Check if d immediately dominates n."""
-#        for b in n.dom:
-#            if b != d and b != n and b in n.dom:
-#                return False
-#
-#        return True
-#
-#    # Build tree using immediate dominators
-#    for n in nodes:
-#        for d in n.dom:
-#            if idom(d, n):
-#                d.set_dominates(n)
-#                break
-
-
-class Dag:
-    def __init__(self, block):
-        """Create the Directed Acyclic Graph of all binary operations in a
-        basic block."""
-        self.nodes = []
-
-        for s in block:
-            if s.is_command('move') or s.is_monop():
-                rd, rs = s
-                y = self.find_reg_node(rs)
-                self.find_op_node(s.name, rd, y)
-            elif s.is_binop():
-                rd, rs, rt = s
-                y = self.find_reg_node(rs)
-                z = self.find_reg_node(rt)
-                self.find_op_node(s.name, rd, y, z)
-
-    def find_reg_node(self, reg):
-        for n in self.nodes:
-            if reg in n.reg:
-                return n
-
-        node = DagLeaf(reg)
-        self.nodes.append(node)
-
-        return node
-
-    def find_op_node(self, op, rd, *args):
-        for n in self.nodes:
-            if not isinstance(n, DagLeaf) and n.op == op and n.nodes == args:
-                n.labels.append(rd)
-
-                return n
-
-        node = DagNode(op, rd, *args)
-        self.nodes.append(node)
-
-        return node
-
-
-class DagNode:
-    def __init__(self, op, label, *args):
-        self.op = op
-        self.labels = [label]
-        self.nodes = args
-
-
-class DagLeaf:
-    def __init__(self, reg):
-        self.reg = reg

+ 50 - 0
src/dominator.py

@@ -0,0 +1,50 @@
+from copy import copy
+
+
+def generate_dominator_tree(nodes):
+    """Add dominator administration to the given flow graph nodes."""
+    # Dominator of the start node is the start itself
+    nodes[0].dom = set([nodes[0]])
+
+    # For all other nodes, set all nodes as the dominators
+    for n in nodes[1:]:
+        n.dom = set(copy(nodes))
+
+    def pred(n, known=[]):
+        """Recursively find all predecessors of a node."""
+        direct = filter(lambda x: x not in known, n.edges_from)
+        p = copy(direct)
+
+        for ancestor in direct:
+            p += pred(ancestor, direct)
+
+        return p
+
+    # Iteratively eliminate nodes that are not dominators
+    changed = True
+
+    while changed:
+        changed = False
+
+        for n in nodes[1:]:
+            old_dom = n.dom
+            intersection = lambda p1, p2: p1.dom & p2.dom
+            n.dom = set([n]) | reduce(intersection, pred(n), set([]))
+
+            if n.dom != old_dom:
+                changed = True
+
+    def idom(d, n):
+        """Check if d immediately dominates n."""
+        for b in n.dom:
+            if b != d and b != n and b in n.dom:
+                return False
+
+        return True
+
+    # Build tree using immediate dominators
+    for n in nodes:
+        for d in n.dom:
+            if idom(d, n):
+                d.set_dominates(n)
+                break

+ 71 - 0
src/liveness.py

@@ -0,0 +1,71 @@
+from copy import copy
+
+
+def create_use_def(block):
+    used = set()
+    defined = set()
+
+    # Get the last of each definition series and put in in the `def' set
+    block.use_set = set()
+    block.def_set = set()
+
+    for s in block:
+        # use[B] is the set of variables whose values may be used in B prior to
+        # any definition of the variable
+        for reg in s.get_use():
+            used.add(reg)
+
+            if reg not in defined:
+                block.use_set.add(reg)
+
+        # def[B] is the set of variables assigned values in B prior to any use
+        # of that variable in B
+        for reg in s.get_def():
+            defined.add(reg)
+
+            if reg not in used:
+                block.def_set.add(reg)
+
+
+def succ(block, known=[]):
+    """Recursively find all successors of a node."""
+    direct = filter(lambda b: b != block and b not in known, block.edges_to)
+    s = copy(direct)
+
+    for successor in direct:
+        s += succ(successor, known + direct)
+        return s
+
+    return s
+
+
+def create_in_out(blocks):
+    for b in blocks:
+        create_use_def(b)
+
+        b.live_in = b.use_set
+        b.live_out = set()
+
+    change = True
+
+    while change:
+        change = False
+
+        for b in blocks:
+            # in[B] = use[B] | (out[B] - def[B])
+            new_in = b.use_set | (b.live_out - b.def_set)
+
+            # out[B] = union of in[S] for S in succ(B)
+            new_out = set()
+
+            for s in succ(b):
+                new_out |= s.live_in
+
+            # Check if either `in' or `out' changed
+            if new_in != b.live_in:
+                b.live_in = new_in
+                change = True
+
+            if new_out != b.live_out:
+                b.live_out = new_out
+                change = True

+ 20 - 63
src/optimize/__init__.py

@@ -1,76 +1,33 @@
-from src.dataflow import find_basic_blocks, reaching_definitions
-
-from redundancies import remove_redundant_jumps, move_1, move_2, move_3, \
-        move_4, load, shift, add
+from src.dataflow import find_basic_blocks, generate_flow_graph
+from redundancies import remove_redundancies
 from advanced import eliminate_common_subexpressions, fold_constants, \
 from advanced import eliminate_common_subexpressions, fold_constants, \
-        copy_propagation, algebraic_transformations, eliminate_dead_code
-
-
-def remove_redundancies(block):
-    """Execute all functions that remove redundant statements."""
-    callbacks = [move_1, move_2, move_3, move_4, load, shift, add]
-    old_len = -1
-    changed = False
-
-    while old_len != len(block):
-        old_len = len(block)
-
-        while not block.end():
-            s = block.read()
-
-            for callback in callbacks:
-                if callback(s, block):
-                    changed = True
-                    break
-
-    return changed
+        copy_propagation, eliminate_dead_code
+import src.liveness as liveness
+import src.reaching_definitions as reaching_definitions
 
 
-
-def optimize_block(block):
-    """Optimize a basic block."""
-    while remove_redundancies(block) \
-            | eliminate_common_subexpressions(block) \
-            | fold_constants(block) \
-            | copy_propagation(block) \
-            | eliminate_dead_code(block):
-            #| algebraic_transformations(block) \
-        pass
-
-
-def optimize(statements, verbose=0):
+def optimize(program, verbose=0):
     """Optimization wrapper function, calls global and basic-block level
     """Optimization wrapper function, calls global and basic-block level
     optimization functions."""
     optimization functions."""
-    # Optimize on a global level
-    o = len(statements)
-    remove_redundant_jumps(statements)
-    g = len(statements)
+    # Remember original number of statements
+    o = program.count_instructions()
 
 
-    # Divide into basic blocks
-    blocks = find_basic_blocks(statements)
+    # Optimize on a global level
+    program.optimize_global()
+    g = program.count_instructions()
 
 
-    # Find reaching definitions
-    reaching_definitions(blocks)
+    # Perform dataflow analysis
+    program.perform_dataflow_analysis()
 
 
     # Optimize basic blocks
     # Optimize basic blocks
-    map(optimize_block, blocks)
+    program.optimize_blocks()
 
 
     # Concatenate optimized blocks to obtain
     # Concatenate optimized blocks to obtain
-    block_statements = map(lambda b: b.statements, blocks)
-    opt_blocks = reduce(lambda a, b: a + b, block_statements)
-    b = len(opt_blocks)
-
-    # - Common subexpression elimination
-    # - Constant folding
-    # - Copy propagation
-    # - Dead-code elimination
-    # - Temporary variable renaming
-    # - Interchange of independent statements
+    b = program.count_instructions()
 
 
+    # Print results
     if verbose:
     if verbose:
-        print 'Original statements:             %d' % o
-        print 'After global optimization:       %d' % g
-        print 'After basic blocks optimization: %d' % b
-        print 'Optimization:                    %d (%d%%)' \
+        print 'Original statements:            %d' % o
+        print 'After global optimization:      %d (%d removed)' % (g, o - g)
+        print 'After basic block optimization: %d (%d removed)' % (b, g - b)
+        print 'Statements removed:             %d (%d%%)' \
                 % (o - b, int((o - b) / float(b) * 100))
                 % (o - b, int((o - b) / float(b) * 100))
-
-    return opt_blocks

+ 68 - 20
src/optimize/advanced.py

@@ -18,7 +18,7 @@ def reg_can_be_used_in(reg, block, start, end):
         elif s.defines(reg):
         elif s.defines(reg):
             return True
             return True
 
 
-    return reg not in block.out_set
+    return reg not in block.live_out
 
 
 
 
 def find_free_reg(block, start, end):
 def find_free_reg(block, start, end):
@@ -50,6 +50,8 @@ def eliminate_common_subexpressions(block):
     """
     """
     changed = False
     changed = False
 
 
+    block.reset()
+
     while not block.end():
     while not block.end():
         s = block.read()
         s = block.read()
 
 
@@ -77,15 +79,20 @@ def eliminate_common_subexpressions(block):
                 new_reg = find_free_reg(block, occurrences[0], occurrences[-1])
                 new_reg = find_free_reg(block, occurrences[0], occurrences[-1])
 
 
                 # Replace all occurrences with a move statement
                 # Replace all occurrences with a move statement
+                message = 'Common subexpression reference: %s %s' \
+                        % (s.name, ', '.join(map(str, [new_reg] + s[1:])))
+
                 for occurrence in occurrences:
                 for occurrence in occurrences:
                     rd = block[occurrence][0]
                     rd = block[occurrence][0]
                     block.replace(1, [S('command', 'move', rd, new_reg)], \
                     block.replace(1, [S('command', 'move', rd, new_reg)], \
-                            start=occurrence)
+                            start=occurrence, message=message)
 
 
                 # Insert the calculation before the original with the new
                 # Insert the calculation before the original with the new
                 # destination address
                 # destination address
+                message = 'Common subexpression: %s %s' \
+                        % (s.name, ', '.join(map(str, s)))
                 block.insert(S('command', s.name, *([new_reg] + args)), \
                 block.insert(S('command', s.name, *([new_reg] + args)), \
-                             index=occurrences[0])
+                             index=occurrences[0], message=message)
 
 
                 changed = True
                 changed = True
 
 
@@ -123,8 +130,11 @@ def fold_constants(block):
     # Current known values in register
     # Current known values in register
     register = {}
     register = {}
 
 
+    block.reset()
+
     while not block.end():
     while not block.end():
         s = block.read()
         s = block.read()
+        known = []
 
 
         if not s.is_command():
         if not s.is_command():
             continue
             continue
@@ -132,27 +142,34 @@ def fold_constants(block):
         if s.name == 'li':
         if s.name == 'li':
             # Save value in register
             # Save value in register
             register[s[0]] = int(s[1], 16)
             register[s[0]] = int(s[1], 16)
+            known.append((s[0], register[s[0]]))
         elif s.name == 'move' and s[0] in register:
         elif s.name == 'move' and s[0] in register:
             reg_to, reg_from = s
             reg_to, reg_from = s
 
 
             if reg_from in register:
             if reg_from in register:
                 # Other value is also known, copy its value
                 # Other value is also known, copy its value
                 register[reg_to] = register[reg_from]
                 register[reg_to] = register[reg_from]
+                known.append((reg_to, register[reg_to]))
             else:
             else:
                 # Other value is unknown, delete the value
                 # Other value is unknown, delete the value
                 del register[reg_to]
                 del register[reg_to]
+                known.append((reg_to, 'unknown'))
         elif s.name == 'sw' and s[0] in register:
         elif s.name == 'sw' and s[0] in register:
             # Constant variable definition, e.g. 'int a = 1;'
             # Constant variable definition, e.g. 'int a = 1;'
             constants[s[1]] = register[s[0]]
             constants[s[1]] = register[s[0]]
+            known.append((s[1], register[s[0]]))
         elif s.name == 'lw' and s[1] in constants:
         elif s.name == 'lw' and s[1] in constants:
             # Usage of variable with constant value
             # Usage of variable with constant value
             register[s[0]] = constants[s[1]]
             register[s[0]] = constants[s[1]]
-        elif s.name == 'mflo':
+            known.append((s[0], register[s[0]]))
+        elif s.name == 'mflo' and '$lo' in register:
             # Move of `Lo' register to another register
             # Move of `Lo' register to another register
             register[s[0]] = register['$lo']
             register[s[0]] = register['$lo']
-        elif s.name == 'mfhi':
+            known.append((s[0], register[s[0]]))
+        elif s.name == 'mfhi' and '$hi' in register:
             # Move of `Hi' register to another register
             # Move of `Hi' register to another register
             register[s[0]] = register['$hi']
             register[s[0]] = register['$hi']
+            known.append((s[0], register[s[0]]))
         elif s.name in ['mult', 'div'] \
         elif s.name in ['mult', 'div'] \
                 and s[0]in register and s[1] in register:
                 and s[0]in register and s[1] in register:
             # Multiplication/division with constants
             # Multiplication/division with constants
@@ -163,29 +180,37 @@ def fold_constants(block):
                 if not a or not b:
                 if not a or not b:
                     # Multiplication by 0
                     # Multiplication by 0
                     hi = lo = to_hex(0)
                     hi = lo = to_hex(0)
+                    message = 'Multiplication by 0: %d * 0' % (b if a else a)
                 elif a == 1:
                 elif a == 1:
                     # Multiplication by 1
                     # Multiplication by 1
                     hi = to_hex(0)
                     hi = to_hex(0)
                     lo = to_hex(b)
                     lo = to_hex(b)
+                    message = 'Multiplication by 1: %d * 1' % b
                 elif b == 1:
                 elif b == 1:
                     # Multiplication by 1
                     # Multiplication by 1
                     hi = to_hex(0)
                     hi = to_hex(0)
                     lo = to_hex(a)
                     lo = to_hex(a)
+                    message = 'Multiplication by 1: %d * 1' % a
                 else:
                 else:
                     # Calculate result and fill Hi/Lo registers
                     # Calculate result and fill Hi/Lo registers
-                    binary = bin(a * b)[2:]
+                    result = a * b
+                    binary = bin(result)[2:]
                     binary = '0' * (64 - len(binary)) + binary
                     binary = '0' * (64 - len(binary)) + binary
                     hi = int(binary[:32], base=2)
                     hi = int(binary[:32], base=2)
                     lo = int(binary[32:], base=2)
                     lo = int(binary[32:], base=2)
+                    message = 'Constant multiplication: %d * %d = %d' \
+                              % (a, b, result)
 
 
                 # Replace the multiplication with two immidiate loads to the
                 # Replace the multiplication with two immidiate loads to the
                 # Hi/Lo registers
                 # Hi/Lo registers
                 block.replace(1, [S('command', 'li', '$hi', hi),
                 block.replace(1, [S('command', 'li', '$hi', hi),
-                                S('command', 'li', '$lo', li)])
+                                  S('command', 'li', '$lo', li)],
+                              message=message)
             elif s.name == 'div':
             elif s.name == 'div':
                 lo, hi = divmod(rs, rt)
                 lo, hi = divmod(rs, rt)
 
 
             register['$lo'], register['$hi'] = lo, hi
             register['$lo'], register['$hi'] = lo, hi
+            known += [('$lo', lo), ('$hi', hi)]
             changed = True
             changed = True
         elif s.name in ['addu', 'subu']:
         elif s.name in ['addu', 'subu']:
             # Addition/subtraction with constants
             # Addition/subtraction with constants
@@ -193,21 +218,28 @@ def fold_constants(block):
             rs_known = rs in register
             rs_known = rs in register
             rt_known = rt in register
             rt_known = rt in register
 
 
-            if rs_known and rt_known:
+            if (rs_known or isinstance(rs, int)) and \
+                    (rt_known or isinstance(rt, int)):
                 # a = 5         ->  b = 15
                 # a = 5         ->  b = 15
                 # c = 10
                 # c = 10
                 # b = a + c
                 # b = a + c
-                rs_val = register[rs]
-                rt_val = register[rt]
+                rs_val = register[rs] if rs_known else rs
+                rt_val = register[rt] if rt_known else rt
 
 
                 if s.name == 'addu':
                 if s.name == 'addu':
-                    result = to_hex(rs_val + rt_val)
+                    result = rs_val + rt_val
+                    message = 'Constant addition: %d + %d = %d' \
+                            % (rs_val, rt_val, result)
 
 
                 if s.name == 'subu':
                 if s.name == 'subu':
-                    result = to_hex(rs_val - rt_val)
+                    result = rs_val - rt_val
+                    message = 'Constant subtraction: %d - %d = %d' \
+                            % (rs_val, rt_val, result)
 
 
-                block.replace(1, [S('command', 'li', rd, result)])
+                block.replace(1, [S('command', 'li', rd, to_hex(result))],
+                              message=message)
                 register[rd] = result
                 register[rd] = result
+                known.append((rd, result))
                 changed = True
                 changed = True
                 continue
                 continue
 
 
@@ -224,13 +256,20 @@ def fold_constants(block):
                 changed = True
                 changed = True
 
 
             if s[2] == 0:
             if s[2] == 0:
-                # Addition/subtraction with 0
-                block.replace(1, [S('command', 'move', rd, s[1])])
+                # Addition/subtraction by 0
+                message = '%s by 0: %s * 1' % ('Addition' if s.name == 'addu' \
+                                               else 'Substraction', s[1])
+                block.replace(1, [S('command', 'move', rd, s[1])], \
+                              message=message)
         else:
         else:
             for reg in s.get_def():
             for reg in s.get_def():
                 if reg in register:
                 if reg in register:
                     # Known register is overwritten, remove its value
                     # Known register is overwritten, remove its value
                     del register[reg]
                     del register[reg]
+                    known.append((reg, 'unknown'))
+
+        if block.debug and len(known):
+            s.set_inline_comment(','.join([' %s = %s' % k for k in known]))
 
 
     return changed
     return changed
 
 
@@ -252,6 +291,8 @@ def copy_propagation(block):
     moves_to = []
     moves_to = []
     changed = False
     changed = False
 
 
+    block.reset()
+
     while not block.end():
     while not block.end():
         s = block.read()
         s = block.read()
 
 
@@ -271,7 +312,7 @@ def copy_propagation(block):
             # the list.
             # the list.
             i = 0
             i = 0
 
 
-            while i  < len(moves_to):
+            while i < len(moves_to):
                 if moves_to[i] == s[0] or moves_to[i] == s[1]:
                 if moves_to[i] == s[0] or moves_to[i] == s[1]:
                     del moves_to[i]
                     del moves_to[i]
                     del moves_from[i]
                     del moves_from[i]
@@ -305,6 +346,8 @@ def algebraic_transformations(block):
     """
     """
     changed = False
     changed = False
 
 
+    block.reset()
+
     while not block.end():
     while not block.end():
         s = block.read()
         s = block.read()
 
 
@@ -354,14 +397,19 @@ def eliminate_dead_code(block):
         for reg in s.get_def():
         for reg in s.get_def():
             if reg in unused:
             if reg in unused:
                 # Statement is redefined later, so this statement is useless
                 # Statement is redefined later, so this statement is useless
-                s.remove = True
-                #print 'reg %s is in %s, remove:' % (reg, unused), \
-                #        block.pointer - 1, s
+                if block.debug:
+                    s.stype = 'comment'
+                    s.options['block'] = False
+                    s.name = ' Dead code: %s %s' \
+                            % (s.name, ', '.join(map(str, s)))
+                else:
+                    s.remove = True
             else:
             else:
                 unused.add(reg)
                 unused.add(reg)
 
 
         unused -= set(s.get_use())
         unused -= set(s.get_use())
 
 
-    block.apply_filter(lambda s: not hasattr(s, 'remove'))
+    if not block.debug:
+        block.apply_filter(lambda s: not hasattr(s, 'remove'))
 
 
     return changed
     return changed

+ 24 - 0
src/optimize/redundancies.py

@@ -1,6 +1,28 @@
 import re
 import re
 
 
 
 
+def remove_redundancies(block):
+    """Execute all functions that remove redundant statements."""
+    callbacks = [move_1, move_2, move_3, move_4, load, shift, add]
+    old_len = -1
+    changed = False
+
+    while old_len != len(block):
+        old_len = len(block)
+
+        block.reset()
+
+        while not block.end():
+            s = block.read()
+
+            for callback in callbacks:
+                if callback(s, block):
+                    changed = True
+                    break
+
+    return changed
+
+
 def move_1(mov, statements):
 def move_1(mov, statements):
     """
     """
     mov $regA, $regA          ->  --- remove it
     mov $regA, $regA          ->  --- remove it
@@ -124,3 +146,5 @@ def remove_redundant_jumps(statements):
                         s.name = 'bne' if s.is_command('beq') else 'beq'
                         s.name = 'bne' if s.is_command('beq') else 'beq'
                         s[2] = j[0]
                         s[2] = j[0]
                         statements.replace(3, [s, label])
                         statements.replace(3, [s, label])
+
+    statements.reset()

+ 7 - 4
src/parser.py

@@ -1,7 +1,8 @@
 import ply.lex as lex
 import ply.lex as lex
 import ply.yacc as yacc
 import ply.yacc as yacc
 
 
-from statement import Statement as S, Block
+from statement import Statement as S
+from program import Program
 
 
 
 
 # Global statements administration
 # Global statements administration
@@ -46,6 +47,7 @@ def t_offset_address(t):
 def t_int(t):
 def t_int(t):
     r'-?[0-9]+'
     r'-?[0-9]+'
     t.type = 'WORD'
     t.type = 'WORD'
+    t.value = int(t.value)
     return t
     return t
 
 
 def t_WORD(t):
 def t_WORD(t):
@@ -79,11 +81,12 @@ def p_line_instruction(p):
 
 
 def p_line_comment(p):
 def p_line_comment(p):
     'line : COMMENT NEWLINE'
     'line : COMMENT NEWLINE'
-    statements.append(S('comment', p[1], inline=False))
+    statements.append(S('comment', p[1]))
 
 
 def p_line_inline_comment(p):
 def p_line_inline_comment(p):
     'line : instruction COMMENT NEWLINE'
     'line : instruction COMMENT NEWLINE'
-    statements.append(S('comment', p[2], inline=True))
+    # Add the inline comment to the last parsed statement
+    statements[-1].options['comment'] = p[2]
 
 
 def p_instruction_command(p):
 def p_instruction_command(p):
     'instruction : command'
     'instruction : command'
@@ -125,4 +128,4 @@ def parse_file(filename):
     except IOError:
     except IOError:
         raise Exception('File "%s" could not be opened' % filename)
         raise Exception('File "%s" could not be opened' % filename)
 
 
-    return Block(statements)
+    return Program(statements)

+ 95 - 0
src/program.py

@@ -0,0 +1,95 @@
+from statement import Block
+from dataflow import find_basic_blocks, generate_flow_graph
+from optimize.redundancies import remove_redundant_jumps, remove_redundancies
+from optimize.advanced import eliminate_common_subexpressions, \
+        fold_constants, copy_propagation, eliminate_dead_code
+from writer import write_statements
+import liveness
+import reaching_definitions
+
+
+class Program(Block):
+    def __len__(self):
+        """Get the number of statements in the program."""
+        return len(self.statements) if hasattr(self, 'statements') \
+               else reduce(lambda a, b: len(a) + len(b), self.blocks, 0)
+
+    def get_statements(self):
+        """Concatenate the statements of all blocks and return the resulting
+        list."""
+        if hasattr(self, 'statements'):
+            return self.statements
+        else:
+            return reduce(lambda a, b: a + b,
+                          [b.statements for b in self.blocks])
+
+    def count_instructions(self):
+        """Count the number of statements that are commands or labels."""
+        return len(filter(lambda s: s.is_command() or s.is_label(),
+                          self.get_statements()))
+
+    def optimize_global(self):
+        """Optimize on a global level."""
+        remove_redundant_jumps(self)
+
+    def optimize_blocks(self):
+        """Optimize on block level. Keep executing all optimizations until no
+        more changes occur."""
+        self.program_iterations = self.block_iterations = 0
+        program_changed = True
+
+        while program_changed:
+            self.program_iterations += 1
+            program_changed = False
+
+            for block in self.blocks:
+                self.block_iterations += 1
+                block_changed = True
+
+                while block_changed:
+                    block_changed = False
+
+                    if remove_redundancies(block):
+                        block_changed = True
+
+                    if eliminate_common_subexpressions(block):
+                        block_changed = True
+
+                    if fold_constants(block):
+                        block_changed = True
+
+                    if copy_propagation(block):
+                        block_changed = True
+
+                    if eliminate_dead_code(block):
+                        block_changed = True
+
+                    if block_changed:
+                        program_changed = True
+
+    def find_basic_blocks(self):
+        """Divide the statement list into basic blocks."""
+        self.blocks = find_basic_blocks(self.statements)
+
+        for b in self.blocks:
+            b.debug = self.debug
+
+        # Remove the old statement list, since it will probably change
+        del self.statements
+
+    def perform_dataflow_analysis(self):
+        """Perform dataflow analysis:
+           - Divide the statement list into basic blocks
+           - Generate flow graph
+           - Create liveness sets: def, use, in, out
+           - Create reaching definitions sets: gen, kill, in, out"""
+        self.find_basic_blocks()
+        generate_flow_graph(self.blocks)
+        liveness.create_in_out(self.blocks)
+        reaching_definitions.create_in_out(self.blocks)
+
+    def save(self, filename):
+        """Save the program in the specified file."""
+        f = open(filename, 'w+')
+        f.write(write_statements(self.get_statements()))
+        f.close()

+ 64 - 0
src/reaching_definitions.py

@@ -0,0 +1,64 @@
+from dataflow import BasicBlock as B
+
+
+def get_defs(blocks):
+    """Collect definitions of all registers."""
+    defs = {}
+
+    for b in blocks:
+        for s in b:
+            for reg in s.get_def():
+                if reg not in defs:
+                    defs[reg] = set([s.sid])
+                else:
+                    defs[reg].add(s.sid)
+
+    return defs
+
+
+def create_gen_kill(block, global_defs):
+    block_defs = {}
+
+    # Get the last of each definition series and put in in the `def' set
+    block.gen_set = set()
+
+    for s in reversed(block):
+        for reg in s.get_def():
+            if reg not in block_defs:
+                block_defs[reg] = s.sid
+                block.gen_set.add(s.sid)
+
+    # Generate kill set
+    block.kill_set = set()
+
+    for reg, statement_ids in global_defs.iteritems():
+        if reg in block_defs:
+            block.kill_set |= statement_ids - set([block_defs[reg]])
+
+
+def create_in_out(blocks):
+    """Generate the `in' and `out' sets of the given blocks using the iterative
+    algorithm from the lecture slides."""
+    # Create gen/kill sets
+    defs = get_defs(blocks)
+
+    for b in blocks:
+        create_gen_kill(b, defs)
+        b.reach_out = b.gen_set
+
+    change = True
+
+    while change:
+        change = False
+
+        for b in blocks:
+            b.reach_in = set()
+
+            for pred in b.edges_from:
+                b.reach_in |= pred.reach_out
+
+            new_out = b.gen_set | (b.reach_in - b.kill_set)
+
+            if new_out != b.reach_out:
+                b.reach_out = new_out
+                change = True

+ 63 - 24
src/statement.py

@@ -38,12 +38,15 @@ class Statement:
     def __repr__(self):  # pragma: nocover
     def __repr__(self):  # pragma: nocover
         return str(self)
         return str(self)
 
 
+    def set_inline_comment(self, comment):
+        self.options['comment'] = comment
+
+    def has_inline_comment(self):
+        return 'comment' in self.options and len(self.options['comment'])
+
     def is_comment(self):
     def is_comment(self):
         return self.stype == 'comment'
         return self.stype == 'comment'
 
 
-    def is_inline_comment(self):
-        return self.is_comment() and self.options['inline']
-
     def is_directive(self):
     def is_directive(self):
         return self.stype == 'directive'
         return self.stype == 'directive'
 
 
@@ -57,13 +60,13 @@ class Statement:
     def is_jump(self):
     def is_jump(self):
         """Check if the statement is a jump."""
         """Check if the statement is a jump."""
         return self.is_command() \
         return self.is_command() \
-               and re.match('^j|jal|beq|bne|blez|bgtz|bltz|bgez|bct|bcf$', \
+               and re.match('^j|jal|beq|bne|blez|bgtz|bltz|bgez|bc1t|bc1f$', \
                             self.name)
                             self.name)
 
 
     def is_branch(self):
     def is_branch(self):
         """Check if the statement is a branch."""
         """Check if the statement is a branch."""
         return self.is_command() \
         return self.is_command() \
-               and re.match('^beq|bne|blez|bgtz|bltz|bgez|bct|bcf$', \
+               and re.match('^beq|bne|blez|bgtz|bltz|bgez|bct|bcf|bc1f|bc1t$',\
                             self.name)
                             self.name)
 
 
     def is_branch_zero(self):
     def is_branch_zero(self):
@@ -82,8 +85,8 @@ class Statement:
 
 
     def is_store(self):
     def is_store(self):
         """Check if the statement is a store instruction."""
         """Check if the statement is a store instruction."""
-        return self.is_command() and self.name in ['sw', 's.d', 'dsw', 's.s', \
-                                                   's.b']
+        return self.is_command() and self.name in ['sw', 'sb', 's.d', 'dsw', \
+                                                   's.s', 's.b']
 
 
     def is_arith(self):
     def is_arith(self):
         """Check if the statement is an arithmetic operation."""
         """Check if the statement is an arithmetic operation."""
@@ -107,6 +110,7 @@ class Statement:
         return self.is_command() \
         return self.is_command() \
                and re.match('^l(w|a|b|bu|\.d|\.s)|dlw$', \
                and re.match('^l(w|a|b|bu|\.d|\.s)|dlw$', \
                             self.name)
                             self.name)
+
     def is_logical(self):
     def is_logical(self):
         """Check if the statement is a logical operator."""
         """Check if the statement is a logical operator."""
         return self.is_command() and re.match('^(xor|or|and)i?$', self.name)
         return self.is_command() and re.match('^(xor|or|and)i?$', self.name)
@@ -150,51 +154,60 @@ class Statement:
 
 
     def get_def(self):
     def get_def(self):
         """Get the variable that this statement defines, if any."""
         """Get the variable that this statement defines, if any."""
-        instr = ['move', 'addu', 'subu', 'li', 'mtc1', 'dmfc1', 'mov.d']
+        instr = ['move', 'addu', 'subu', 'li', 'dmfc1', 'mov.d']
 
 
+        if self.is_command('mtc1'):
+            return [self[1]]
         if self.is_load_non_immediate() or self.is_arith() \
         if self.is_load_non_immediate() or self.is_arith() \
                 or self.is_logical() or self.is_double_arithmetic() \
                 or self.is_logical() or self.is_double_arithmetic() \
                 or self.is_move_from_spec() or self.is_double_unary() \
                 or self.is_move_from_spec() or self.is_double_unary() \
                 or self.is_set_if_less() or self.is_convert() \
                 or self.is_set_if_less() or self.is_convert() \
                 or self.is_truncate() or self.is_load() \
                 or self.is_truncate() or self.is_load() \
                 or self.is_command(*instr):
                 or self.is_command(*instr):
-            return [self[0]]
+            return self[:1]
 
 
         return []
         return []
 
 
     def get_use(self):
     def get_use(self):
         """Get the variables that this statement uses, if any."""
         """Get the variables that this statement uses, if any."""
-        instr = ['addu', 'subu', 'mult', 'div', 'move', 'mtc1', 'mov.d', \
+        instr = ['addu', 'subu', 'mult', 'div', 'move', 'mov.d', \
             'dmfc1']
             'dmfc1']
         use = []
         use = []
 
 
         # Case arg0
         # Case arg0
-        if self.is_branch() or self.is_store() or self.is_compare()\
-                or self.is_command(*['mult', 'div', 'dsz']):
+        if (self.is_branch() \
+                and not self.is_command(*['bc1f', 'bc1t', 'bct', 'bcf'])) \
+                or self.is_store() or self.is_compare() \
+                or self.is_command(*['mult', 'div', 'dsz', 'mtc1']):
             if self.name == 'dsz':
             if self.name == 'dsz':
-                m = re.match('^\d+\(([^)]+)\)$', self[0])
-                use.append(m)
+                m = re.match('^[^(]+\(([^)]+)\)$', self[0])
+
+                if m:
+                    use.append(m.group(1))
             else:
             else:
                 use.append(self[0])
                 use.append(self[0])
         # Case arg1 direct adressing
         # Case arg1 direct adressing
-        if (self.is_branch() and not self.is_branch_zero()) or self.is_shift()\
+        if (self.is_branch() and not self.is_branch_zero() \
+                and not self.is_command(*['bc1f', 'bc1t', 'bct', 'bcf'])) \
+                or self.is_shift() \
                 or self.is_double_arithmetic() or self.is_double_unary() \
                 or self.is_double_arithmetic() or self.is_double_unary() \
                 or self.is_logical() or self.is_convert() \
                 or self.is_logical() or self.is_convert() \
                 or self.is_truncate() or self.is_set_if_less() \
                 or self.is_truncate() or self.is_set_if_less() \
-                or self.is_command(*instr):
+                or self.is_compare() or self.is_command(*instr):
             use.append(self[1])
             use.append(self[1])
         # Case arg1 relative adressing
         # Case arg1 relative adressing
         if self.is_load_non_immediate() or self.is_store():
         if self.is_load_non_immediate() or self.is_store():
-            m = re.match('^\d+\(([^)]+)\)$', self[1])
+            m = re.match('^[^(]+\(([^)]+)\)$', self[1])
+
             if m:
             if m:
-                use.append(m)
+                use.append(m.group(1))
             else:
             else:
                 use.append(self[1])
                 use.append(self[1])
         # Case arg2
         # Case arg2
         if self.is_double_arithmetic() or self.is_set_if_less() \
         if self.is_double_arithmetic() or self.is_set_if_less() \
-                or self.is_logical() \
+                or self.is_logical() or self.is_truncate() \
                 or self.is_command(*['addu', 'subu']):
                 or self.is_command(*['addu', 'subu']):
-            if not isinstance(self[2] , int):
+            if not isinstance(self[2], int):
                     use.append(self[2])
                     use.append(self[2])
 
 
         return use
         return use
@@ -211,7 +224,7 @@ class Statement:
 class Block:
 class Block:
     bid = 1
     bid = 1
 
 
-    def __init__(self, statements=[]):
+    def __init__(self, statements=[], debug=False):
         self.statements = statements
         self.statements = statements
         self.pointer = 0
         self.pointer = 0
 
 
@@ -219,6 +232,8 @@ class Block:
         self.bid = Block.bid
         self.bid = Block.bid
         Block.bid += 1
         Block.bid += 1
 
 
+        self.debug = debug
+
     def __str__(self):
     def __str__(self):
         return '<Block bid=%d statements=%d>' % (self.bid, len(self))
         return '<Block bid=%d statements=%d>' % (self.bid, len(self))
 
 
@@ -244,7 +259,7 @@ class Block:
 
 
     def end(self):
     def end(self):
         """Check if the pointer is at the end of the statement list."""
         """Check if the pointer is at the end of the statement list."""
-        return self.pointer == len(self)
+        return self.pointer >= len(self)
 
 
     def peek(self, count=1):
     def peek(self, count=1):
         """Read the statements until an offset from the current pointer
         """Read the statements until an offset from the current pointer
@@ -255,7 +270,7 @@ class Block:
         return self.statements[self.pointer] if count == 1 \
         return self.statements[self.pointer] if count == 1 \
                else self.statements[self.pointer:self.pointer + count]
                else self.statements[self.pointer:self.pointer + count]
 
 
-    def replace(self, count, replacement, start=None):
+    def replace(self, count, replacement, start=None, message=''):
         """Replace the given range start-(start + count) with the given
         """Replace the given range start-(start + count) with the given
         statement list, and move the pointer to the first statement after the
         statement list, and move the pointer to the first statement after the
         replacement."""
         replacement."""
@@ -265,15 +280,35 @@ class Block:
         if start == None:
         if start == None:
             start = self.pointer - 1
             start = self.pointer - 1
 
 
+        # Add a message in inline comments
+        if self.debug:
+            if len(message):
+                message = ' ' + message
+
+                if len(replacement):
+                    replacement[0].set_inline_comment(message)
+
+                    for s in replacement[1:]:
+                        s.set_inline_comment('|')
+                else:
+                    replacement = [Statement('comment', message)]
+            elif not len(replacement):
+                # Statement is removed, comment it
+                replacement = [Statement('comment', str(b)) \
+                               for b in self.statements[start:start + count]]
+
         before = self.statements[:start]
         before = self.statements[:start]
         after = self.statements[start + count:]
         after = self.statements[start + count:]
         self.statements = before + replacement + after
         self.statements = before + replacement + after
         self.pointer = start + len(replacement)
         self.pointer = start + len(replacement)
 
 
-    def insert(self, statement, index=None):
+    def insert(self, statement, index=None, message=''):
         if index == None:
         if index == None:
             index = self.pointer
             index = self.pointer
 
 
+        if self.debug and len(message):
+            statement.set_inline_comment(' ' + message)
+
         self.statements.insert(index, statement)
         self.statements.insert(index, statement)
 
 
     def apply_filter(self, callback):
     def apply_filter(self, callback):
@@ -284,4 +319,8 @@ class Block:
     def reverse_statements(self):
     def reverse_statements(self):
         """Reverse the statement list and reset the pointer."""
         """Reverse the statement list and reset the pointer."""
         self.statements = self.statements[::-1]
         self.statements = self.statements[::-1]
+        self.reset()
+
+    def reset(self):
+        """Reset the internal pointer."""
         self.pointer = 0
         self.pointer = 0

+ 7 - 0
src/todo.txt

@@ -1 +1,8 @@
 * Apart python script maken die de generator genereert dmv config file
 * Apart python script maken die de generator genereert dmv config file
+* 'verbose' argument in main.py
+
+* Gaat fout:
+
+li  $5,0x00008000  ->  l.d  $f0,32768($4)
+addu    $4,$4,$5
+l.d $f0,0($4)

+ 43 - 17
src/writer.py

@@ -1,50 +1,76 @@
 from math import ceil
 from math import ceil
 
 
 
 
+TABSIZE = 4                 # Size in spaces of a single tab
+INLINE_COMMENT_LEVEL = 6    # Number of tabs to inline commment level
+COMMAND_SIZE = 8            # Default length of a command name, used for
+                            # indenting
+ADD_COMMENT_BLOCKS = True   # Wether to add newlines before and after
+                            # non-inline comment
+ADD_ARGUMENT_SPACE = False  # Wether to add a space between command arguments
+                            # and the previous comma
+
+
 def write_statements(statements):
 def write_statements(statements):
     """Write a list of statements to valid assembly code."""
     """Write a list of statements to valid assembly code."""
     out = ''
     out = ''
     indent_level = 0
     indent_level = 0
-    prevline = ''
+    prev_comment = False
 
 
     for i, s in enumerate(statements):
     for i, s in enumerate(statements):
-        newline = '\n' if i else ''
+        current_comment = False
 
 
         if s.is_label():
         if s.is_label():
             line = s.name + ':'
             line = s.name + ':'
             indent_level = 1
             indent_level = 1
         elif s.is_comment():
         elif s.is_comment():
-            line = '#' + s.name
-
-            if s.is_inline_comment():
-                l = len(prevline.expandtabs(4))
-                tabs = int(ceil((24 - l) / 4.)) + 1
-                newline = '\t' * tabs
-            else:
-                line = '\t' * indent_level + line
+            line = '\t' * indent_level + '#' + s.name
+            current_comment = s.options.get('block', True)
         elif s.is_directive():
         elif s.is_directive():
             line = '\t' + s.name
             line = '\t' + s.name
         elif s.is_command():
         elif s.is_command():
             line = '\t' + s.name
             line = '\t' + s.name
 
 
+            # If there are arguments, add tabs until the 8 character limit has
+            # been reached. If the command name is 8 or more characers long,
+            # add a single space
             if len(s):
             if len(s):
-                if len(s.name) < 8:
-                    line += '\t'
+                l = len(s.name)
+
+                if l < COMMAND_SIZE:
+                    line += '\t' * int(ceil((COMMAND_SIZE - l)
+                                       / float(TABSIZE)))
                 else:
                 else:
                     line += ' '
                     line += ' '
 
 
-                line += ','.join(s.args)
+                delim = ', ' if ADD_ARGUMENT_SPACE else ','
+                line += delim.join(map(str, s))
         else:
         else:
             raise Exception('Unsupported statement type "%s"' % s.stype)
             raise Exception('Unsupported statement type "%s"' % s.stype)
 
 
-        out += newline + line
-        prevline = line
+        # Add the inline comment, if there is any
+        if s.has_inline_comment():
+            start = INLINE_COMMENT_LEVEL * TABSIZE
+            diff = start - len(line.expandtabs(TABSIZE))
+
+            # The comment must not be directly adjacent to the command itself
+            tabs = int(ceil(diff / float(TABSIZE))) + 1 if diff > 0 else  1
 
 
-    # Add newline at end of file
-    out += '\n'
+            line += '\t' * tabs + '#' + s.options['comment']
+
+        # Add newline at end of command
+        line += '\n'
+
+        if ADD_COMMENT_BLOCKS:
+            if prev_comment ^ current_comment:
+                out += '\n'
+
+        out += line
+        prev_comment = current_comment
 
 
     return out
     return out
 
 
+
 def write_to_file(filename, statements):
 def write_to_file(filename, statements):
     """Convert a list of statements to valid assembly code and write it to a
     """Convert a list of statements to valid assembly code and write it to a
     file."""
     file."""

+ 2 - 0
test.py

@@ -1,3 +1,5 @@
+#!/usr/bin/python
 from testrunner import main
 from testrunner import main
 import sys
 import sys
+
 main(sys.argv[1:])
 main(sys.argv[1:])

+ 1 - 0
tests/rules.mk

@@ -1,6 +1,7 @@
 TESTS=$(wildcard tests/test_*.py)
 TESTS=$(wildcard tests/test_*.py)
 COVERAGE_OUTPUT_DIR := coverage
 COVERAGE_OUTPUT_DIR := coverage
 OMIT := /usr/share/pyshared/*,test*,*__init__.py
 OMIT := /usr/share/pyshared/*,test*,*__init__.py
+CLEAN := $(CLEAN) tests/*.pyc
 
 
 ifeq ($(findstring python-coverage,$(wildcard /usr/bin/*)), python-coverage)
 ifeq ($(findstring python-coverage,$(wildcard /usr/bin/*)), python-coverage)
 COVERAGE=/usr/bin/python-coverage
 COVERAGE=/usr/bin/python-coverage

+ 65 - 0
tests/test_dag.py

@@ -0,0 +1,65 @@
+import unittest
+
+from src.statement import Statement as S
+from src.dataflow import BasicBlock as B
+from src.dag import Dag, DagNode, DagLeaf
+
+
+class TestDag(unittest.TestCase):
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_dag_unary(self):
+        dag = Dag(B([S('command', 'neg.d', '$rd', '$rs')]))
+        expect = Dag([])
+        expect.nodes = [DagLeaf('$rs'), DagNode('neg.d', '$rd', \
+                        DagLeaf('$rs'))]
+
+        self.assertEqualDag(dag, expect)
+
+    def test_dag_binary(self):
+        dag = Dag(B([S('command', 'addu', '$rd', '$r1', '$r2')]))
+        expect = Dag([])
+        expect.nodes = [DagLeaf('$r1'),
+                        DagLeaf('$r2'),
+                        DagNode('addu', '$rd', DagLeaf('$r1'), DagLeaf('$r2'))]
+
+        self.assertEqualDag(dag, expect)
+
+#    def test_dag_combinednode(self):
+#        dag = Dag(B([S('command', 'mult', '$rd1', '$r1', '$r2'),
+#                     S('command', 'mult', '$rd2', '$r1', '$r2')]))
+#        expect = Dag([])
+#        multnode = DagNode('mult',
+#                           DagLeaf('$r1'),
+#                           DagLeaf('$r2'))
+#        multnode.labels = ['$rd1', '$rd2']
+#        expect.nodes = [DagLeaf('$r1'),
+#                        DagLeaf('$r2'),
+#                        multnode]
+#
+#        self.assertEqualDag(dag, expect)
+
+    def assertEqualDag(self, dag1, dag2):
+        self.assertEqual(len(dag1.nodes), len(dag2.nodes))
+
+        for node1, node2 in zip(dag1.nodes, dag2.nodes):
+            self.assertEqualNodes(node1, node2)
+
+    def assertEqualNodes(self, node1, node2):
+        if isinstance(node1, DagLeaf):
+            self.assertIsInstance(node2, DagLeaf)
+            self.assertEqual(node1.reg, node2.reg)
+        elif isinstance(node2, DagLeaf):
+            raise AssertionError
+        else:
+            self.assertEqual(node1.op, node2.op)
+            self.assertEqual(node1.labels, node2.labels)
+            self.assertEqual(len(node1.nodes), len(node2.nodes))
+
+            for child1, child2 in zip(node1.nodes, node2.nodes):
+                self.assertEqualNodes(child1, child2)

+ 1 - 137
tests/test_dataflow.py

@@ -2,8 +2,7 @@ import unittest
 
 
 from src.statement import Statement as S
 from src.statement import Statement as S
 from src.dataflow import BasicBlock as B, find_leaders, find_basic_blocks, \
 from src.dataflow import BasicBlock as B, find_leaders, find_basic_blocks, \
-        generate_flow_graph, Dag, DagNode, DagLeaf, get_defs, \
-        reaching_definitions
+        generate_flow_graph
 
 
 
 
 class TestDataflow(unittest.TestCase):
 class TestDataflow(unittest.TestCase):
@@ -25,90 +24,6 @@ class TestDataflow(unittest.TestCase):
                 [B(s[:2]).statements, B(s[2:4]).statements, \
                 [B(s[:2]).statements, B(s[2:4]).statements, \
                  B(s[4:]).statements])
                  B(s[4:]).statements])
 
 
-    def test_get_defs(self):
-        s1 = S('command', 'add', '$3', '$1', '$2')
-        s2 = S('command', 'move', '$1', '$3')
-        s3 = S('command', 'move', '$3', '$2')
-        s4 = S('command', 'li', '$4', '0x00000001')
-        block = B([s1, s2, s3, s4])
-        self.assertEqual(get_defs([block]), {
-            '$3': set([s1.sid, s3.sid]),
-            '$1': set([s2.sid]),
-            '$4': set([s4.sid])
-        })
-
-    def test_create_gen_kill_simple(self):
-        s1 = S('command', 'addu', '$3', '$1', '$2')
-        s2 = S('command', 'addu', '$1', '$3', 10)
-        s3 = S('command', 'subu', '$3', '$1', 5)
-        s4 = S('command', 'li', '$4', '0x00000001')
-        block = B([s1, s2, s3, s4])
-        block.create_gen_kill(get_defs([block]))
-        self.assertEqual(block.gen_set, set([s2.sid, s3.sid, s4.sid]))
-        self.assertEqual(block.kill_set, set([s1.sid]))
-
-
-    def test_create_gen_kill_between_blocks(self):
-        s11 = S('command', 'li', 'a', 3)
-        s12 = S('command', 'li', 'b', 5)
-        s13 = S('command', 'li', 'd', 4)
-        s14 = S('command', 'li', 'x', 100)
-        s15 = S('command', 'blt', 'a', 'b', 'L1')
-        b1 = B([s11, s12, s13, s14, s15])
-
-        s21 = S('command', 'addu', 'c', 'a', 'b')
-        s22 = S('command', 'li', 'd', 2)
-        b2 = B([s21, s22])
-
-        s31 = S('label', 'L1')
-        s32 = S('command', 'li', 'c', 4)
-        s33 = S('command', 'mult', 'b', 'd')
-        s34 = S('command', 'mflo', 'temp')
-        s35 = S('command', 'addu', 'return', 'temp', 'c')
-        b3 = B([s31, s32, s33, s34, s35])
-
-        defs = get_defs([b1, b2, b3])
-        b1.create_gen_kill(defs)
-        b2.create_gen_kill(defs)
-        b3.create_gen_kill(defs)
-
-        self.assertEqual(b1.gen_set, set([s11.sid, s12.sid, s13.sid, s14.sid]))
-        self.assertEqual(b1.kill_set, set([s22.sid]))
-
-        self.assertEqual(b2.gen_set, set([s21.sid, s22.sid]))
-        self.assertEqual(b2.kill_set, set([s13.sid, s32.sid]))
-
-        self.assertEqual(b3.gen_set, set([s32.sid, s34.sid, s35.sid]))
-        self.assertEqual(b3.kill_set, set([s21.sid]))
-
-
-    def test_reaching_definitions(self):
-        s11 = S('command', 'li', 'a', 3)
-        s12 = S('command', 'li', 'b', 5)
-        s13 = S('command', 'li', 'd', 4)
-        s14 = S('command', 'li', 'x', 100)
-        s15 = S('command', 'blt', 'a', 'b', 'L1')
-        b1 = B([s11, s12, s13, s14, s15])
-
-        s21 = S('command', 'addu', 'c', 'a', 'b')
-        s22 = S('command', 'li', 'd', 2)
-        b2 = B([s21, s22])
-
-        s31 = S('label', 'L1')
-        s32 = S('command', 'li', 'c', 4)
-        s33 = S('command', 'mult', 'b', 'd')
-        s34 = S('command', 'mflo', 'temp')
-        s35 = S('command', 'addu', 'return', 'temp', 'c')
-        b3 = B([s31, s32, s33, s34, s35])
-
-        reaching_definitions([b1, b2, b3])
-        self.assertEqual(b1.in_set, set())
-        self.assertEqual(b1.out_set, set([s11.sid, s12.sid, s13.sid]))
-        self.assertEqual(b2.in_set, set([s11.sid, s12.sid]))
-        self.assertEqual(b2.out_set, set([s12.sid, s22.sid]))
-        self.assertEqual(b3.in_set, set([s12.sid, s22.sid]))
-        self.assertEqual(b3.out_set, set())
-
     def test_generate_flow_graph_simple(self):
     def test_generate_flow_graph_simple(self):
         b1 = B([S('command', 'foo'), S('command', 'j', 'b2')])
         b1 = B([S('command', 'foo'), S('command', 'j', 'b2')])
         b2 = B([S('label', 'b2'), S('command', 'bar')])
         b2 = B([S('label', 'b2'), S('command', 'bar')])
@@ -129,54 +44,3 @@ class TestDataflow(unittest.TestCase):
         self.assertEqual(b2.edges_to, [b3])
         self.assertEqual(b2.edges_to, [b3])
         self.assertIn(b1, b3.edges_from)
         self.assertIn(b1, b3.edges_from)
         self.assertIn(b2, b3.edges_from)
         self.assertIn(b2, b3.edges_from)
-
-    def test_dag_unary(self):
-        dag = Dag(B([S('command', 'neg.d', '$rd', '$rs')]))
-        expect = Dag([])
-        expect.nodes = [DagLeaf('$rs'), DagNode('neg.d', '$rd', \
-                        DagLeaf('$rs'))]
-
-        self.assertEqualDag(dag, expect)
-
-    def test_dag_binary(self):
-        dag = Dag(B([S('command', 'addu', '$rd', '$r1', '$r2')]))
-        expect = Dag([])
-        expect.nodes = [DagLeaf('$r1'),
-                        DagLeaf('$r2'),
-                        DagNode('addu', '$rd', DagLeaf('$r1'), DagLeaf('$r2'))]
-
-        self.assertEqualDag(dag, expect)
-
-#    def test_dag_combinednode(self):
-#        dag = Dag(B([S('command', 'mult', '$rd1', '$r1', '$r2'),
-#                     S('command', 'mult', '$rd2', '$r1', '$r2')]))
-#        expect = Dag([])
-#        multnode = DagNode('mult',
-#                           DagLeaf('$r1'),
-#                           DagLeaf('$r2'))
-#        multnode.labels = ['$rd1', '$rd2']
-#        expect.nodes = [DagLeaf('$r1'),
-#                        DagLeaf('$r2'),
-#                        multnode]
-#
-#        self.assertEqualDag(dag, expect)
-
-    def assertEqualDag(self, dag1, dag2):
-        self.assertEqual(len(dag1.nodes), len(dag2.nodes))
-
-        for node1, node2 in zip(dag1.nodes, dag2.nodes):
-            self.assertEqualNodes(node1, node2)
-
-    def assertEqualNodes(self, node1, node2):
-        if isinstance(node1, DagLeaf):
-            self.assertIsInstance(node2, DagLeaf)
-            self.assertEqual(node1.reg, node2.reg)
-        elif isinstance(node2, DagLeaf):
-            raise AssertionError
-        else:
-            self.assertEqual(node1.op, node2.op)
-            self.assertEqual(node1.labels, node2.labels)
-            self.assertEqual(len(node1.nodes), len(node2.nodes))
-
-            for child1, child2 in zip(node1.nodes, node2.nodes):
-                self.assertEqualNodes(child1, child2)

+ 65 - 0
tests/test_liveness.py

@@ -0,0 +1,65 @@
+import unittest
+
+from src.statement import Statement as S
+from src.dataflow import BasicBlock as B, find_basic_blocks, \
+        generate_flow_graph
+from src.liveness import create_use_def, create_in_out
+
+
+class TestLiveness(unittest.TestCase):
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_create_gen_kill(self):
+        s1 = S('command', 'addu', '$3', '$1', '$2')
+        s2 = S('command', 'addu', '$1', '$3', 10)
+        s3 = S('command', 'subu', '$3', '$1', 5)
+        s4 = S('command', 'li', '$4', '0x00000001')
+        block = B([s1, s2, s3, s4])
+
+        create_use_def(block)
+
+        self.assertEqual(block.use_set, set(['$1', '$2']))
+        self.assertEqual(block.def_set, set(['$3', '$4']))
+
+    def test_create_in_out(self):
+        s11 = S('command', 'li', 'a', 3)
+        s12 = S('command', 'li', 'b', 5)
+        s13 = S('command', 'li', 'd', 4)
+        s14 = S('command', 'li', 'x', 100)
+        s15 = S('command', 'beq', 'a', 'b', 'L1')
+
+        s21 = S('command', 'addu', 'c', 'a', 'b')
+        s22 = S('command', 'li', 'd', 2)
+
+        s31 = S('label', 'L1')
+        s32 = S('command', 'li', 'c', 4)
+        s33 = S('command', 'mult', 'b', 'd')
+        s34 = S('command', 'mflo', 'temp')
+        s35 = S('command', 'addu', 'return', 'temp', 'c')
+
+        b1, b2, b3 = find_basic_blocks([s11, s12, s13, s14, s15, s21, s22, \
+                                        s31, s32, s33, s34, s35])
+
+        generate_flow_graph([b1, b2, b3])
+        create_in_out([b1, b2, b3])
+
+        self.assertEqual(b1.use_set, set())
+        self.assertEqual(b1.def_set, set(['a', 'b', 'd', 'x']))
+
+        self.assertEqual(b2.use_set, set(['a', 'b']))
+        self.assertEqual(b2.def_set, set(['c', 'd']))
+
+        self.assertEqual(b3.use_set, set(['b', 'd']))
+        self.assertEqual(b3.def_set, set(['c', 'temp', 'return']))
+
+        self.assertEqual(b1.live_in, set())
+        self.assertEqual(b1.live_out, set(['a', 'b', 'd']))
+        self.assertEqual(b2.live_in, set(['a', 'b']))
+        self.assertEqual(b2.live_out, set(['b', 'd']))
+        self.assertEqual(b3.live_in, set(['b', 'd']))
+        self.assertEqual(b3.live_out, set())

+ 12 - 1
tests/test_optimize.py

@@ -1,10 +1,21 @@
 import unittest
 import unittest
 
 
 from src.optimize.redundancies import remove_redundant_jumps
 from src.optimize.redundancies import remove_redundant_jumps
-from src.optimize import optimize_block
+from src.program import Program
 from src.statement import Statement as S, Block as B
 from src.statement import Statement as S, Block as B
 
 
 
 
+def optimize_block(block):
+    """Optimize a basic block using a Program object."""
+    program = Program([])
+
+    program.blocks = [block]
+    del program.statements
+    program.optimize_blocks()
+
+    return program.blocks
+
+
 class TestOptimize(unittest.TestCase):
 class TestOptimize(unittest.TestCase):
 
 
     def setUp(self):
     def setUp(self):

+ 6 - 2
tests/test_optimize_advanced.py

@@ -3,7 +3,9 @@ from copy import copy
 
 
 from src.optimize.advanced import eliminate_common_subexpressions, \
 from src.optimize.advanced import eliminate_common_subexpressions, \
         fold_constants, copy_propagation, algebraic_transformations
         fold_constants, copy_propagation, algebraic_transformations
-from src.statement import Statement as S, Block as B
+from src.statement import Statement as S
+from src.dataflow import BasicBlock as B, generate_flow_graph
+import src.liveness as liveness
 
 
 
 
 class TestOptimizeAdvanced(unittest.TestCase):
 class TestOptimizeAdvanced(unittest.TestCase):
@@ -22,6 +24,7 @@ class TestOptimizeAdvanced(unittest.TestCase):
         e = [S('command', 'addu', '$8', '$regA', '$regB'), \
         e = [S('command', 'addu', '$8', '$regA', '$regB'), \
              S('command', 'move', '$regC', '$8'), \
              S('command', 'move', '$regC', '$8'), \
              S('command', 'move', '$regD', '$8')]
              S('command', 'move', '$regD', '$8')]
+        liveness.create_in_out([b])
         eliminate_common_subexpressions(b)
         eliminate_common_subexpressions(b)
         self.assertEqual(b.statements, e)
         self.assertEqual(b.statements, e)
 
 
@@ -30,6 +33,7 @@ class TestOptimizeAdvanced(unittest.TestCase):
                S('command', 'li', '$regA', '0x00000001'),
                S('command', 'li', '$regA', '0x00000001'),
                S('command', 'addu', '$regD', '$regA', '$regB')])
                S('command', 'addu', '$regD', '$regA', '$regB')])
         e = copy(b.statements)
         e = copy(b.statements)
+        liveness.create_in_out([b])
         eliminate_common_subexpressions(b)
         eliminate_common_subexpressions(b)
         self.assertEqual(b.statements, e)
         self.assertEqual(b.statements, e)
 
 
@@ -49,7 +53,7 @@ class TestOptimizeAdvanced(unittest.TestCase):
                    self.foo,
                    self.foo,
                    S('command', 'addu', '$3', '$2', '$4'),
                    S('command', 'addu', '$3', '$2', '$4'),
                    self.bar])
                    self.bar])
-                   
+
     def test_copy_propagation_other_arg(self):
     def test_copy_propagation_other_arg(self):
         block = B([self.foo,
         block = B([self.foo,
                    S('command', 'move', '$1', '$2'),
                    S('command', 'move', '$1', '$2'),

+ 83 - 0
tests/test_reaching_definitions.py

@@ -0,0 +1,83 @@
+import unittest
+
+from src.statement import Statement as S
+from src.dataflow import BasicBlock as B, find_basic_blocks, \
+        generate_flow_graph
+from src.reaching_definitions import get_defs, create_gen_kill, create_in_out
+
+
+class TestReachingDefinitions(unittest.TestCase):
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_get_defs(self):
+        s1 = S('command', 'add', '$3', '$1', '$2')
+        s2 = S('command', 'move', '$1', '$3')
+        s3 = S('command', 'move', '$3', '$2')
+        s4 = S('command', 'li', '$4', '0x00000001')
+        block = B([s1, s2, s3, s4])
+
+        self.assertEqual(get_defs([block]), {
+            '$3': set([s1.sid, s3.sid]),
+            '$1': set([s2.sid]),
+            '$4': set([s4.sid])
+        })
+
+    def test_create_gen_kill(self):
+        s1 = S('command', 'addu', '$3', '$1', '$2')
+        s2 = S('command', 'addu', '$1', '$3', 10)
+        s3 = S('command', 'subu', '$3', '$1', 5)
+        s4 = S('command', 'li', '$4', '0x00000001')
+        block = B([s1, s2, s3, s4])
+
+        create_gen_kill(block, get_defs([block]))
+
+        self.assertEqual(block.gen_set, set([s2.sid, s3.sid, s4.sid]))
+        self.assertEqual(block.kill_set, set([s1.sid]))
+
+    def test_create_in_out(self):
+        s11 = S('command', 'li', 'a', 3)
+        s12 = S('command', 'li', 'b', 5)
+        s13 = S('command', 'li', 'd', 4)
+        s14 = S('command', 'li', 'x', 100)
+        s15 = S('command', 'beq', 'a', 'b', 'L1')
+
+        s21 = S('command', 'addu', 'c', 'a', 'b')
+        s22 = S('command', 'li', 'd', 2)
+
+        s31 = S('label', 'L1')
+        s32 = S('command', 'li', 'c', 4)
+        s33 = S('command', 'mult', 'b', 'd')
+        s34 = S('command', 'mflo', 'temp')
+        s35 = S('command', 'addu', 'return', 'temp', 'c')
+
+        b1, b2, b3 = find_basic_blocks([s11, s12, s13, s14, s15, s21, s22, \
+                                        s31, s32, s33, s34, s35])
+
+        generate_flow_graph([b1, b2, b3])
+        create_in_out([b1, b2, b3])
+
+        self.assertEqual(b1.gen_set, set([s11.sid, s12.sid, s13.sid,
+                                            s14.sid]))
+        self.assertEqual(b1.kill_set, set([s22.sid]))
+        self.assertEqual(b2.gen_set, set([s21.sid, s22.sid]))
+        self.assertEqual(b2.kill_set, set([s13.sid, s32.sid]))
+        self.assertEqual(b3.gen_set, set([s32.sid, s34.sid, s35.sid]))
+        self.assertEqual(b3.kill_set, set([s21.sid]))
+
+        self.assertEqual(b1.reach_in, set())
+        self.assertEqual(b1.reach_out, set([s11.sid, s12.sid, s13.sid,
+                                            s14.sid]))
+        self.assertEqual(b2.reach_in, set([s11.sid, s12.sid, s13.sid,
+                                            s14.sid]))
+        self.assertEqual(b2.reach_out, set([s21.sid, s22.sid, s11.sid, \
+                                            s12.sid, s14.sid]))
+        self.assertEqual(b3.reach_in, set([s21.sid, s22.sid, s11.sid, \
+                                            s12.sid, s13.sid, s14.sid]))
+        self.assertEqual(b3.reach_out, set([s32.sid, s34.sid, s35.sid, \
+                                            s22.sid, s11.sid, s12.sid, \
+                                            s13.sid, s14.sid]))

+ 77 - 20
tests/test_statement.py

@@ -36,9 +36,10 @@ class TestStatement(unittest.TestCase):
         self.assertFalse(S('comment', 'foo', inline=False).is_label())
         self.assertFalse(S('comment', 'foo', inline=False).is_label())
         self.assertFalse(S('directive', 'foo').is_command())
         self.assertFalse(S('directive', 'foo').is_command())
 
 
-    def test_is_inline_comment(self):
-        self.assertTrue(S('comment', 'foo', inline=True).is_inline_comment())
-        self.assertFalse(S('comment', 'foo', inline=False).is_inline_comment())
+    def test_has_inline_comment(self):
+        self.assertTrue(S('comment', 'foo', comment='a').has_inline_comment())
+        self.assertFalse(S('comment', 'foo', comment='').has_inline_comment())
+        self.assertFalse(S('comment', 'foo').has_inline_comment())
 
 
     def test_jump_target(self):
     def test_jump_target(self):
         self.assertEqual(S('command', 'j', 'foo').jump_target(), 'foo')
         self.assertEqual(S('command', 'j', 'foo').jump_target(), 'foo')
@@ -93,20 +94,76 @@ class TestStatement(unittest.TestCase):
         self.assertTrue(S('command', 'addu', '$1', '$2', '$3').is_arith())
         self.assertTrue(S('command', 'addu', '$1', '$2', '$3').is_arith())
         self.assertFalse(S('command', 'foo').is_arith())
         self.assertFalse(S('command', 'foo').is_arith())
         self.assertFalse(S('label', 'addu').is_arith())
         self.assertFalse(S('label', 'addu').is_arith())
-        
-    def test_get_def(self):
-        self.assertEqual(S('command', 'move', '$1', '$2').get_def(), ['$1'])
-        self.assertEqual(S('command', 'subu', '$1', '$2').get_def(), ['$1'])
-        self.assertEqual(S('command', 'addu','$1','$2','$3').get_def(), ['$1'])
-        self.assertEqual(S('command', 'sll','$1','$2','$3').get_def(), ['$1'])
-        self.assertEqual(S('command', 'srl','$1','$2','$3').get_def(), ['$1'])
-        self.assertEqual(S('command', 'la', '$1','16($fp)').get_def(), ['$1'])
-        self.assertEqual(S('command', 'li', '$1','16($fp)').get_def(), ['$1'])
-        self.assertEqual(S('command','add.d', '$1','$2','$3').get_def(),['$1'])
-        self.assertEqual(S('command','neg.d', '$1','$2').get_def(),['$1'])
-        self.assertEqual(S('command','sub.d','$1','$2', '$3').get_def(),['$1'])
-        self.assertEqual(S('command','slt', '$1','$2').get_def(),['$1'])
-        self.assertEqual(S('command','xori', '$1','$2', '0x0000').get_def(), \
-                                                                     ['$1']) 
-        self.assertEqual(S('command','mov.d', '$1','$2').get_def(), ['$1'])
-        self.assertEqual(S('command','dmfc1', '$1','$f0').get_def(), ['$1'])
+
+    def test_get_def_true(self):
+        a = ['a']
+
+        self.assertEqual(S('command', 'move', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'subu', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'addu', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'sll', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'srl', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'la', 'a', '16($fp)').get_def(), a)
+        self.assertEqual(S('command', 'li', 'a', '16($fp)').get_def(), a)
+        self.assertEqual(S('command', 'lw', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'l.d', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'add.d', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'neg.d', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'sub.d', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'slt', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'xori', 'a', 'b', '0x0000').get_def(), a)
+        self.assertEqual(S('command', 'mov.d', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'dmfc1', 'a', '$f0').get_def(), a)
+        self.assertEqual(S('command', 'mtc1', 'b', 'a').get_def(), a)
+        self.assertEqual(S('command', 'trunc.w.d', 'a', 'b', 'c').get_def(), a)
+
+    def test_get_def_false(self):
+        self.assertEqual(S('command', 'bne', 'a', 'b', 'L1').get_def(), [])
+        self.assertEqual(S('command', 'beq', 'a', 'b', 'L1').get_def(), [])
+
+    def test_get_use_true(self):
+        arg1 = ['$1']
+        arg2 = ['$1', '$2']
+
+        self.assertEqual(S('command', 'addu', '$3', '$1', '$2').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'subu', '$3', '$1', '$2').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'mult', '$1', '$2').get_use(), arg2)
+        self.assertEqual(S('command', 'div', '$1', '$2').get_use(), arg2)
+        self.assertEqual(S('command', 'move', '$2', '$1').get_use(), arg1)
+        self.assertEqual(S('command', 'beq', '$1', '$2', '$L1').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'bne', '$1', '$2', '$L1').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'sll', '$2', '$1', 2).get_use(), arg1)
+        self.assertEqual(S('command', 'lb', '$2', '10($1)').get_use(), arg1)
+        self.assertEqual(S('command', 'lw', '$2', '10($1)').get_use(), arg1)
+        self.assertEqual(S('command', 'la', '$2', '10($1)').get_use(), arg1)
+        self.assertEqual(S('command', 'lb', '$2', 'n.7').get_use(), ['n.7'])
+        self.assertEqual(S('command', 'lbu', '$2', '10($1)').get_use(), arg1)
+        self.assertEqual(S('command', 'l.d', '$2', '10($1)').get_use(), arg1)
+        self.assertEqual(S('command', 's.d', '$1', '10($2)').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 's.s', '$1', '10($2)').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'sb', '$1', '10($2)').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'mtc1', '$1', '$2').get_use(), arg1)
+        self.assertEqual(S('command', 'add.d', '$3', '$1', '$2').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'sub.d', '$3', '$1', '$2').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'div.d', '$3', '$1', '$2').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'mul.d', '$3', '$1', '$2').get_use(), \
+                arg2)
+        self.assertEqual(S('command', 'neg.d', '$2', '$1').get_use(), arg1)
+        self.assertEqual(S('command', 'abs.d', '$2', '$1').get_use(), arg1)
+        self.assertEqual(S('command', 'dsz', '10($1)', '$2').get_use(), arg1)
+        self.assertEqual(S('command', 'dsw', '$1', '10($2)').get_use(), arg2)
+        self.assertEqual(S('command', 'c.lt.d', '$1', '$2').get_use(), arg2)
+        self.assertEqual(S('command', 'bgez', '$1', '$2').get_use(), arg1)
+        self.assertEqual(S('command', 'bltz', '$1', '$2').get_use(), arg1)
+        self.assertEqual(S('command', 'trunc.w.d', '$3', '$1', '$2').get_use(),
+                         arg2)

+ 73 - 0
tests/test_writer.py

@@ -0,0 +1,73 @@
+import unittest
+
+from src.writer import write_statements
+from src.statement import Statement as S, Block as B
+
+
+class TestWriter(unittest.TestCase):
+
+    def setUp(self):
+        self.foo = S('command', 'move', '$regA', '$regB')
+        self.bar = S('command', 'addu', '$regC', '$regA', '$regB')
+
+    def tearDown(self):
+        del self.foo
+        del self.bar
+        
+    def test_writer_one(self):
+        output = write_statements([self.foo])
+        expect = "\tmove\t$regA,$regB\n"
+        self.assertEqual(output, expect)
+        
+    def test_writer_longname(self):
+        command = S('command', 'movemovemove', '$regA', '$regB')
+        output = write_statements([command])
+        expect = "\tmovemovemove $regA,$regB\n"
+        self.assertEqual(output, expect)
+        
+    def test_writer_several(self):
+        output = write_statements([self.foo, self.bar, self.foo])
+        expect = "\tmove\t$regA,$regB\n" \
+                 + "\taddu\t$regC,$regA,$regB\n" \
+                 + "\tmove\t$regA,$regB\n"
+        self.assertEqual(output, expect)
+        
+    def test_writer_with_label(self):
+        label = S('label', '$L1')
+        output = write_statements([self.foo, label, self.bar])
+        expect = "\tmove\t$regA,$regB\n" \
+                 + "$L1:\n" \
+                 + "\taddu\t$regC,$regA,$regB\n"
+        self.assertEqual(output, expect)
+        
+    def test_writer_with_comment(self):
+        comment = S('comment', 'tralala')
+        output = write_statements([self.foo, comment, self.bar])
+        expect = "\tmove\t$regA,$regB\n" \
+                 + "\n#tralala\n\n" \
+                 + "\taddu\t$regC,$regA,$regB\n"
+        self.assertEqual(output, expect)
+        
+    def test_writer_with_comment_non_tabbed(self):
+        directive = S('comment', 'tralala')
+        output = write_statements([directive, self.foo, self.bar])
+        expect = "\n#tralala\n\n" \
+                 + "\tmove\t$regA,$regB\n" \
+                 + "\taddu\t$regC,$regA,$regB\n"
+        self.assertEqual(output, expect)
+        
+    def test_writer_with_inlinecomment(self):
+        self.foo.options['comment'] = 'tralala'
+        output = write_statements([self.foo, self.bar])
+        expect = "\tmove\t$regA,$regB" \
+                 + "\t\t#tralala\n" \
+                 + "\taddu\t$regC,$regA,$regB\n"
+        self.assertEqual(output, expect)
+        
+    def test_writer_with_directive(self):
+        directive = S('directive', '.tralala trololo')
+        output = write_statements([self.foo, directive, self.bar])
+        expect = "\tmove\t$regA,$regB\n" \
+                 + "\t.tralala trololo\n" \
+                 + "\taddu\t$regC,$regA,$regB\n"
+        self.assertEqual(output, expect)