Merge branch 'master' of github.com:taddeus/peephole

405dc7a5 · Richard Torenvliet · 41528750 · 58c6acb7 · 41528750 · 41528750
Commit 405dc7a5 authored Dec 29, 2011 by Richard Torenvliet
10 changed files
--- a/benchmarks/optimized/clinpack.s
+++ b/benchmarks/optimized/clinpack.s
--- a/benchmarks/optimized/slalom.s
+++ b/benchmarks/optimized/slalom.s
--- a/report/report.tex
+++ b/report/report.tex
@@ -120,13 +120,27 @@ We now add the instruction above the first use, and write the result in a new
 variable. Then all occurrences of this expression can be replaced by a move of
 from new variable into the original destination variable of the instruction.

-This is a less efficient method then the dag, but because the basic blocks are
+This is a less efficient method then the DAG, but because the basic blocks are
 in general not very large and the execution time of the optimizer is not a
 primary concern, this is not a big problem.

-\subsubsection*{Fold constants}
+\subsubsection*{Constant folding}

+Another optimization is to do constant folding. Constant folding is replacing
+a expensive step like addition with a more simple step like loading a constant.
+Of course, this is not always possible. It is possible in cases where you apply
+an operation on two constants, or a constant and a variable of which you know
+for sure that it always has a certain value at that point. For example:
+\begin{verbatim}
+li   $regA, 1               li $regA, 1
+addu $regB, $regA, 2    ->  li $regB, 3
+\end{verbatim}
+Of course, if \texttt{\$regA} is not used after this, it can be removed, which
+will be done by the dead code elimination.

+One problem we encountered with this is that the use of a \texttt{li} is that
+the program often also stores this in the memory, so we had to check whether
+this was necessary here as well.

 \subsubsection*{Copy propagation}

@@ -155,8 +169,7 @@ An example would be the following:
 \begin{verbatim}
 move $regA, $regB                   move $regA, $regB
 ...                                 ...
-Code not writing $regA, ->  ...
-$regB                       ...
+Code not writing $regA, $regB   ->  ...
 ...                                 ...
 addu $regC, $regA, ...              addu $regC, $regB, ...
 \end{verbatim}
@@ -166,7 +179,18 @@ removed by the dead code elimination.

 \subsubsection*{Algebraic transformations}

+Some expression can easily be replaced with more simple once if you look at
+what they are saying algebraically. An example is the statement $x = y + 0$, or
+in Assembly \texttt{addu \$1, \$2, 0}. This can easily be changed into $x = y$
+or \texttt{move \$1, \$2}.
+
+Another case is the multiplication with a power of two. This can be done way
+more efficiently by shifting left a number of times. An example:
+\texttt{mult \$regA, \$regB, 4    ->  sll  \$regA, \$regB, 2}. We perform this
+optimization for any multiplication with a power of two.

+There are a number of such cases, all of which are once again stated in
+appendix \ref{opt}. 

 \section{Implementation}

@@ -203,7 +227,7 @@ The optimizations are done in two different steps. First the global
 optimizations are performed, which are only the optimizations on branch-jump
 constructions. This is done repeatedly until there are no more changes.

-After all possible global optimizations are done, the program is seperated into
+After all possible global optimizations are done, the program is separated into
 basic blocks. The algorithm to do this is described earlier, and means all
 jump and branch instructions are called leaders, as are their targets. A basic
 block then goes from leader to leader.
@@ -215,7 +239,7 @@ steps can be done to optimize something.
 \subsection{Writing}

 Once all the optimizations have been done, the IR needs to be rewritten into
-Assembly code, so the xgcc crosscompiler can make binary code out of it.
+Assembly code, so the xgcc cross compiler can make binary code out of it.

 The writer expects a list of statements, so first the blocks have to be
 concatenated again into a list. After this is done, the list is passed on to

--- a/src/dataflow.py
+++ b/src/dataflow.py
-#from copy import copy
+from copy import copy

 from statement import Block

@@ -26,38 +26,83 @@ class BasicBlock(Block):
            self.dominates.append(block)
            block.dominated_by.append(self)

+    def create_gen_kill(self, defs):
+        used = set()
+        self_defs = {}

-#    def get_gen(self):
-#        for s in self.statements:       
-#            if s.is_arith():
-#                self.gen_set.add(s[0])
-#                print 'added: ', s[0]
-#        
-#        return self.gen_set
-#        
-#    def get_kill(self):
-##        if self.edges_from != []:
-#    
-#        for backw in self.edges_from:
-#            self.kill_set = self.gen_set & backw.kill_set
-#            
-#        self.kill_set = self.kill_set - self.get_gen()
-#        print 'get_kill_set', self.kill_set
-#        return self.kill_set
+        # Get the last of each definition series and put in in the `def' set
+        self.gen_set = set()
+
+        for s in reversed(self):
+            for reg in s.get_def():
+                if reg not in self_defs:
+                    print 'Found def:', s
+                    self_defs[reg] = s.sid
+                    self.gen_set.add(s.sid)
+
+        # Generate kill set
+        self.kill_set = set()
+
+        for reg, statement_ids in defs.iteritems():
+            if reg in self_defs:
+                add = statement_ids - set([self_defs[reg]])
+            else:
+                add = statement_ids
+
+            self.kill_set |= add
+
+
+def defs(blocks):
+    # Collect definitions of all registers
+    defs = {}
+
+    for b in blocks:
+        for s in b:
+            for reg in s.get_def():
+                if reg not in defs:
+                    defs[reg] = set([s.sid])
+                else:
+                    defs[reg].add(s.sid)
+
+    return defs
+
+
+def reaching_definitions(blocks):
+    """Generate the `in' and `out' sets of the given blocks using the iterative
+    algorithm from the slides."""
+    defs = defs(blocks)
+
+    for b in blocks:
+        b.create_gen_kill(defs)
+        b.out_set = b.gen_set
+
+    change = True
+
+    while change:
+        change = False
+
+        for b in blocks:
+            b.in_set = set()
+
+            for pred in b.edges_from:
+                b.in_set |= pred.out_set
+
+            oldout = copy(p.out_set)
+            p.out_set = b.gen_set | (b.in_set - b.kill_set)
+
+            if b.out_set != oldout:
+                change = True

-#    def get_in(self):
-#        for backw in self.edges_from:
-#            self.in_set = self.in_set | backw.out_set
-#        print 'in_set', self.in_set
-#        return self.in_set

-#    def get_out(self):
-#        print 'gen_set', self.gen_set
-#        print 'get_in', self.get_in()
-#        print 'get_kill', self.get_kill()
-#        self.out_set = self.gen_set | (self.get_in() - self.get_kill())
+def pred(n, known=[]):
+    """Recursively find all predecessors of a node."""
+    direct = filter(lambda b: b not in known, n.edges_from)
+    p = copy(direct)

+    for ancestor in direct:
+        p += pred(ancestor, direct)

+    return p


 def find_leaders(statements):

--- a/src/optimize/__init__.py
+++ b/src/optimize/__init__.py
@@ -3,7 +3,7 @@ from src.dataflow import find_basic_blocks
 from redundancies import remove_redundant_jumps, move_1, move_2, move_3, \
        move_4, load, shift, add
 from advanced import eliminate_common_subexpressions, fold_constants, \
-        copy_propagation, algebraic_transformations
+        copy_propagation, algebraic_transformations, eliminate_dead_code


 def remove_redundancies(block):
@@ -32,7 +32,8 @@ def optimize_block(block):
            | eliminate_common_subexpressions(block) \
            | fold_constants(block) \
            | copy_propagation(block)\
-            | algebraic_transformations(block):
+            | algebraic_transformations(block) \
+            | eliminate_dead_code(block):
        pass


@@ -63,6 +64,6 @@ def optimize(statements, verbose=0):
        print 'After global optimization:       %d' % g
        print 'After basic blocks optimization: %d' % b
        print 'Optimization:                    %d (%d%%)' \
-                % (b - o, int((b - o) / float(o) * 100))
+                % (o - b, int((o - b) / float(b) * 100))

    return opt_blocks
--- a/src/optimize/advanced.py
+++ b/src/optimize/advanced.py
--- a/src/statement.py
+++ b/src/statement.py
@@ -2,12 +2,18 @@ import re


 class Statement:
+    sid = 1
+
    def __init__(self, stype, name, *args, **kwargs):
        self.stype = stype
        self.name = name
        self.args = list(args)
        self.options = kwargs

+        # Assign a unique ID to each satement
+        self.sid = Statement.sid
+        Statement.sid += 1
+
    def __getitem__(self, n):
        """Get an argument."""
        return self.args[n]
@@ -26,8 +32,8 @@ class Statement:
        return len(self.args)

    def __str__(self):  # pragma: nocover
-        return '<Statement type=%s name=%s args=%s>' \
-                % (self.stype, self.name, self.args)
+        return '<Statement sid=%d type=%s name=%s args=%s>' \
+                % (self.sid, self.stype, self.name, self.args)

    def __repr__(self):  # pragma: nocover
        return str(self)
@@ -62,16 +68,21 @@ class Statement:

    def is_shift(self):
        """Check if the statement is a shift operation."""
-        return self.is_command() and re.match('^s(ll|la|rl|ra)$', self.name)
+        return self.is_command() and re.match('^s(ll|rl|ra)$', self.name)

    def is_load(self):
        """Check if the statement is a load instruction."""
-        return self.is_command() and self.name in ['lw', 'dlw', 'l.s', 'l.d']
+        return self.is_command() and self.name in ['lw', 'li', 'dlw', 'l.s', \
+                                                   'l.d']
                                                   
    def is_arith(self):
        """Check if the statement is an arithmetic operation."""
        return self.is_command() \
-               and re.match('^(add|sub|mult|div|abs|neg)(u|\.d)?$', self.name)
+               and re.match('^s(ll|rl|ra)'
+                            + '|(mfhi|mflo|abs|neg|and|[xn]?or)'
+                            + '|(add|sub|slt)u?'
+                            + '|(add|sub|mult|div|abs|neg|sqrt|c)\.[sd]$', \
+                            self.name)

    def is_monop(self):
        """Check if the statement is an unary operation."""
@@ -81,6 +92,41 @@ class Statement:
        """Check if the statement is an binary operation."""
        return self.is_command() and len(self) == 3 and not self.is_jump()
        
+    def is_load_non_immediate(self):
+        """Check if the statement is a load statement."""
+        return self.is_command() \
+               and re.match('^l(w|a|b|bu|\.d|\.s)|dlw$', \
+                            self.name)
+    def is_logical(self):
+        """Check if the statement is a logical operator."""
+        return self.is_command() and re.match('^(xor|or|and)i?$', self.name)
+    
+    def is_double_aritmethic(self):
+        """Check if the statement is a arithmetic .d operator."""
+        return self.is_command() and \
+                re.match('^(add|sub|div|mul)\.d$', self.name)
+                
+    def is_double_unary(self):
+        """Check if the statement is a unary .d operator."""
+        return self.is_command() and \
+                re.match('^(abs|neg|mov)\.d$', self.name)
+                
+    def is_move_from_spec(self):
+        """Check if the statement is a move from the result register."""
+        return self.is_command() and self.name in ['mflo', 'mthi']
+        
+    def is_set_if_less(self):
+        """Check if the statement is a shift if less then."""
+        return self.is_command() and self.name in ['slt', 'sltu']
+        
+    def is_convert(self):
+        """Check if the statement is a convert operator."""
+        return self.is_command() and re.match('^cvt\.[a-z\.]*$', self.name)
+        
+    def is_truncate(self):
+        """Check if the statement is a convert operator."""
+        return self.is_command() and re.match('^trunc\.[a-z\.]*$', self.name)
+        
    def jump_target(self):
        """Get the jump target of this statement."""
        if not self.is_jump():
@@ -88,15 +134,49 @@ class Statement:

        return self[-1]
    
+    def get_def(self):
+        """Get the variable that this statement defines, if any."""
+        instr = ['move', 'addu', 'subu', 'li', 'mtc1', 'dmfc1']
+        
+        if self.is_load_non_immediate() or self.is_arith() \
+                or self.is_logical() or self.is_double_arithmetic() \
+                or self.is_move_from_spec() or self.is_double_unary() \
+                or self.is_set_if_less() or self.is_convert() \
+                or self.is_truncate() or self.is_load() \
+                or (self.is_command and self.name in instr):
+            return self[0]
+
+        return []
+
+    def get_use(self):
+        # TODO: Finish with ALL the available commands!
+        use = []
+
+        if self.is_binop():
+            use += self[1:]
+        elif self.is_command('move'):
+            use.append(self[1])
+        elif self.is_command('lw', 'sb', 'sw', 'dsw', 's.s', 's.d'):
+            m = re.match('^\d+\(([^)]+)\)$', self[1])
+
+            if m:
+                use.append(m.group(1))
+
+            # 'sw' also uses its first argument
+            if self.name in ['sw', 'dsw']:
+                use.append(self[0])
+        elif len(self) == 2:  # FIXME: temporary fix, manually add all commands
+            use.append(self[1])
+
+        return use
+
    def defines(self, reg):
        """Check if this statement defines the given register."""
-        # TODO: Finish
-        return (self.is_load() or self.is_arith()) and self[0] == reg
+        return reg in self.get_def()

    def uses(self, reg):
        """Check if this statement uses the given register."""
-        # TODO: Finish
-        return (self.is_load() or self.is_arith()) and reg in self[1:]
+        return reg in self.get_use()


 class Block:

--- a/tests/test_dataflow.py
+++ b/tests/test_dataflow.py
@@ -2,7 +2,7 @@ import unittest

 from src.statement import Statement as S
 from src.dataflow import BasicBlock as B, find_leaders, find_basic_blocks, \
-        generate_flow_graph, Dag, DagNode, DagLeaf
+        generate_flow_graph, Dag, DagNode, DagLeaf, defs, reaching_definitions


 class TestDataflow(unittest.TestCase):
@@ -112,6 +112,46 @@ class TestDataflow(unittest.TestCase):
 #
 #        self.assertEqualDag(dag, expect)

+    def test_defs(self):
+        s1 = S('command', 'addu', '$3', '$1', '$2')
+        s2 = S('command', 'addu', '$1', '$3', 10)
+        s3 = S('command', 'subu', '$3', '$1', 5)
+        s4 = S('command', 'li', '$4', '0x00000001')
+        block = B([s1, s2, s3, s4])
+        self.assertEqual(defs([block]), {
+            '$3': set([s1.sid, s3.sid]),
+            '$1': set([s2.sid]),
+            '$4': set([s4.sid])
+        })
+
+    #def test_defs(self):
+    #    s1 = S('command', 'add', '$3', '$1', '$2')
+    #    s2 = S('command', 'move', '$1', '$3')
+    #    s3 = S('command', 'move', '$3', '$2')
+    #    s4 = S('command', 'li', '$4', '0x00000001')
+    #    block = B([s1, s2, s3, s4])
+    #    self.assertEqual(defs([block]), {
+    #        '$3': set([s1.sid, s3.sid]),
+    #        '$1': set([s2.sid]),
+    #        '$4': set([s4.sid])
+    #    })
+
+    def test_create_gen_kill_gen(self):
+        s1 = S('command', 'addu', '$3', '$1', '$2')
+        s2 = S('command', 'addu', '$1', '$3', 10)
+        s3 = S('command', 'subu', '$3', '$1', 5)
+        s4 = S('command', 'li', '$4', '0x00000001')
+        block = B([s1, s2, s3, s4])
+        block.create_gen_kill(defs([block]))
+        self.assertEqual(block.gen_set, set([s2.sid, s3.sid, s4.sid]))
+
+    #def test_get_kill_used(self):
+    #    block = B([S('command', 'move', '$1', '$3'),
+    #               S('command', 'add', '$3', '$1', '$2'),
+    #               S('command', 'move', '$1', '$3'),
+    #               S('command', 'move', '$2', '$3')])
+    #    self.assertEqual(block.get_kill(), set())
+
    def assertEqualDag(self, dag1, dag2):
        self.assertEqual(len(dag1.nodes), len(dag2.nodes))


--- a/tests/test_optimize_advanced.py
+++ b/tests/test_optimize_advanced.py
@@ -19,9 +19,9 @@ class TestOptimizeAdvanced(unittest.TestCase):
    def test_eliminate_common_subexpressions_simple(self):
        b = B([S('command', 'addu', '$regC', '$regA', '$regB'),
               S('command', 'addu', '$regD', '$regA', '$regB')])
-        e = [S('command', 'addu', '$t0', '$regA', '$regB'), \
-             S('command', 'move', '$regC', '$t0'), \
-             S('command', 'move', '$regD', '$t0')]
+        e = [S('command', 'addu', '$8', '$regA', '$regB'), \
+             S('command', 'move', '$regC', '$8'), \
+             S('command', 'move', '$regD', '$8')]
        eliminate_common_subexpressions(b)
        self.assertEqual(b.statements, e)

@@ -50,6 +50,20 @@ class TestOptimizeAdvanced(unittest.TestCase):
                   S('command', 'addu', '$3', '$2', '$4'),
                   self.bar])
                   
+    def test_copy_propagation_other_arg(self):
+        block = B([self.foo,
+                   S('command', 'move', '$1', '$2'),
+                   self.foo,
+                   S('command', 'addu', '$3', '$4', '$1'),
+                   self.bar])
+
+        self.assertTrue(copy_propagation(block))
+        self.assertEqual(block.statements, [self.foo,
+                   S('command', 'move', '$1', '$2'),
+                   self.foo,
+                   S('command', 'addu', '$3', '$4', '$2'),
+                   self.bar])
+
    def test_copy_propagation_overwrite(self):
        block = B([self.foo, \
                    S('command', 'move', '$1', '$2'),
@@ -125,7 +139,8 @@ class TestOptimizeAdvanced(unittest.TestCase):

    def test_algebraic_transforms_mult0(self):
        block = B([self.foo,
-                   S('command', 'mult', '$1', '$2', 0),
+                   S('command', 'mult', '$2', 0),
+                   S('command', 'mflo', '$1'),
                   self.bar])

        self.assertTrue(algebraic_transformations(block))
@@ -135,7 +150,8 @@ class TestOptimizeAdvanced(unittest.TestCase):

    def test_algebraic_transforms_mult1(self):
        block = B([self.foo,
-                   S('command', 'mult', '$1', '$2', 1),
+                   S('command', 'mult', '$2', 1),
+                   S('command', 'mflo', '$1'),
                   self.bar])

        self.assertTrue(algebraic_transformations(block))
@@ -145,7 +161,8 @@ class TestOptimizeAdvanced(unittest.TestCase):

    def test_algebraic_transforms_mult2(self):
        block = B([self.foo,
-                   S('command', 'mult', '$1', '$2', 2),
+                   S('command', 'mult', '$2', 2),
+                   S('command', 'mflo', '$1'),
                   self.bar])

        self.assertTrue(algebraic_transformations(block))
@@ -155,7 +172,8 @@ class TestOptimizeAdvanced(unittest.TestCase):

    def test_algebraic_transforms_mult16(self):
        block = B([self.foo,
-                   S('command', 'mult', '$1', '$2', 16),
+                   S('command', 'mult', '$2', 16),
+                   S('command', 'mflo', '$1'),
                   self.bar])

        self.assertTrue(algebraic_transformations(block))
@@ -165,7 +183,8 @@ class TestOptimizeAdvanced(unittest.TestCase):

    def test_algebraic_transforms_mult3(self):
        arguments = [self.foo,
-                     S('command', 'mult', '$1', '$2', 3),
+                     S('command', 'mult', '$2', 3),
+                     S('command', 'mflo', '$1'),
                     self.bar]
        block = B(arguments)


--- a/tests/test_statement.py
+++ b/tests/test_statement.py
@@ -90,6 +90,6 @@ class TestStatement(unittest.TestCase):
        self.assertFalse(S('label', 'lw').is_load())

    def test_is_arith(self):
-        self.assertTrue(S('command', 'add', '$1', '$2', '$3').is_arith())
+        self.assertTrue(S('command', 'addu', '$1', '$2', '$3').is_arith())
        self.assertFalse(S('command', 'foo').is_arith())
-        self.assertFalse(S('label', 'add').is_arith())
+        self.assertFalse(S('label', 'addu').is_arith())