Merged conflicts.

f60673bd · Taddeus Kroes · 091fc8d0 · acc1a765 · 091fc8d0 · f60673bd
Commit f60673bd authored Dec 29, 2011 by Taddeus Kroes
Show whitespace changes
Inline Side-by-side

Showing with 78 additions and 156 deletions

benchmarks/optimized/pi.s benchmarks/optimized/pi.s +0 -130

report/report.tex report/report.tex +62 -11

tests/test_statement.py tests/test_statement.py +16 -15

No files found.
--- a/benchmarks/optimized/pi.s
+++ b/benchmarks/optimized/pi.s
-	.file	1 "pi.c"
-# GNU C 2.7.2.3 [AL 1.1, MM 40, tma 0.1] SimpleScalar running sstrix compiled by GNU C
-# Cc1 defaults:
-# -mgas -mgpOPT
-# Cc1 arguments (-G value = 8, Cpu = default, ISA = 1):
-# -quiet -dumpbase -o
-gcc2_compiled.:
-__gnu_compiled_c:
-	.rdata
-	.align	2
-$LC0:
-	.ascii	"Usage: %s <iterations>\n\000"
-	.sdata
-	.align	2
-$LC3:
-	.ascii	"%.10f\n\000"
-	.align	3
-$LC1:
-	.word	0xffc00000		# 2147483647
-	.word	0x41dfffff
-	.align	3
-$LC2:
-	.word	0x00000000		# 1
-	.word	0x3ff00000
-	.align	3
-$LC4:
-	.word	0x00000000		# 4
-	.word	0x40100000
-	.text
-	.align	2
-	.globl	main
-	.extern	stderr, 4
-	.text
-	.loc	1 5
-	.ent	main
-main:
-	.frame	$fp,56,$31		# vars= 32, regs= 2/0, args= 16, extra= 0
-	.mask	0xc0000000,-4
-	.fmask	0x00000000,0
-	subu	$sp,$sp,56
-	sw	$31,52($sp)
-	sw	$fp,48($sp)
-	move	$fp,$sp
-	sw	$4,56($fp)
-	sw	$5,60($fp)
-	jal	__main
-	sw	$0,24($fp)
-	lw	$2,56($fp)
-	li	$3,0x00000002		# 2
-	beq	$2,$3,$L2
-	lw	$2,60($fp)
-	lw	$4,stderr
-	la	$5,$LC0
-	lw	$6,0($2)
-	jal	fprintf
-	move	$4,$0
-	jal	exit
-$L2:
-	lw	$3,60($fp)
-	addu	$2,$3,4
-	lw	$4,0($2)
-	jal	atoi
-	sw	$2,20($fp)
-	li	$4,0x00000001		# 1
-	jal	srandom
-	sw	$0,16($fp)
-$L3:
-	lw	$2,16($fp)
-	lw	$3,20($fp)
-	slt	$2,$2,$3
-	bne	$2,$0,$L6
-	j	$L4
-$L6:
-	jal	random
-	mtc1	$2,$f0
-	#nop
-	cvt.d.w	$f0,$f0
-	l.d	$f2,$LC1
-	div.d	$f0,$f0,$f2
-	s.d	$f0,32($fp)
-	jal	random
-	mtc1	$2,$f0
-	#nop
-	cvt.d.w	$f0,$f0
-	l.d	$f2,$LC1
-	div.d	$f0,$f0,$f2
-	s.d	$f0,40($fp)
-	l.d	$f0,32($fp)
-	l.d	$f2,32($fp)
-	mul.d	$f0,$f0,$f2
-	l.d	$f2,40($fp)
-	l.d	$f4,40($fp)
-	mul.d	$f2,$f2,$f4
-	add.d	$f0,$f0,$f2
-	l.d	$f2,$LC2
-	c.le.d	$f0,$f2
-	bc1f	$L7
-	lw	$3,24($fp)
-	addu	$2,$3,1
-	move	$3,$2
-	sw	$3,24($fp)
-$L7:
-$L5:
-	lw	$3,16($fp)
-	addu	$2,$3,1
-	move	$3,$2
-	sw	$3,16($fp)
-	j	$L3
-$L4:
-	l.s	$f0,24($fp)
-	#nop
-	cvt.d.w	$f0,$f0
-	l.s	$f2,20($fp)
-	#nop
-	cvt.d.w	$f2,$f2
-	div.d	$f0,$f0,$f2
-	l.d	$f2,$LC4
-	mul.d	$f0,$f0,$f2
-	la	$4,$LC3
-	dmfc1	$6,$f0
-	jal	printf
-	li	$2,0x00000001		# 1
-	j	$L1
-$L1:
-	move	$sp,$fp			# sp not trusted here
-	lw	$31,52($sp)
-	lw	$fp,48($sp)
-	addu	$sp,$sp,56
-	j	$31
-	.end	main
\ No newline at end of file
--- a/report/report.tex
+++ b/report/report.tex
@@ -109,24 +109,24 @@ addu	$5,$4,$3		   mov = $4, $t1

 \end{verbatim}

-
 A standard method for doing this is the creation of a DAG or Directed Acyclic
 Graph. However, this requires a fairly advanced implementation. Our
 implementation is a slightly less fancy, but easier to implement.
 We search from the end of the block up for instructions that are eligible for
 CSE. If we find one, we check further up in the code for the same instruction,
 and add that to a temporary storage list. This is done until the beginning of
-the block or until one of the arguments of this expression is assigned.
+the block or until one of the arguments of this expression is assigned. The temporty storage is 

 We now add the instruction above the first use, and write the result in a new
 variable. Then all occurrences of this expression can be replaced by a move of
 from new variable into the original destination variable of the instruction.

-This is a less efficient method then the dag, but because the basic blocks are
+This is a less efficient method then the DAG, but because the basic blocks are
 in general not very large and the execution time of the optimizer is not a
 primary concern, this is not a big problem.

 \subsubsection*{Fold constants}
+Constant folding is an optimization where the outcome of arithmetics are calculated at compile time. If a value x is assigned to a certain value, let's say 10, than all next occurences of \texttt{x} are replaced by 10 until a redefinition of x. Arithmetics in Assembly are always preformed between two constants, if this is not the case the calculation is not possible. See the example for a more clear explanation of constant folding(will come). In other words until the current definition of \texttt{x} becomes dead. Therefore reaching definitions analysis is needed.



@@ -168,7 +168,18 @@ removed by the dead code elimination.

 \subsubsection*{Algebraic transformations}

+Some expression can easily be replaced with more simple once if you look at
+what they are saying algebraically. An example is the statement $x = y + 0$, or
+in Assembly \texttt{addu \$1, \$2, 0}. This can easily be changed into $x = y$
+or \texttt{move \$1, \$2}.
+
+Another case is the multiplication with a power of two. This can be done way
+more efficiently by shifting left a number of times. An example:
+\texttt{mult \$regA, \$regB, 4    ->  sll  \$regA, \$regB, 2}. We perform this
+optimization for any multiplication with a power of two.

+There are a number of such cases, all of which are once again stated in
+appendix \ref{opt}. 

 \section{Implementation}

@@ -205,7 +216,7 @@ The optimizations are done in two different steps. First the global
 optimizations are performed, which are only the optimizations on branch-jump
 constructions. This is done repeatedly until there are no more changes.

-After all possible global optimizations are done, the program is seperated into
+After all possible global optimizations are done, the program is separated into
 basic blocks. The algorithm to do this is described earlier, and means all
 jump and branch instructions are called leaders, as are their targets. A basic
 block then goes from leader to leader.
@@ -225,17 +236,57 @@ concatenated again into a list. After this is done, the list is passed on to
 the writer, which writes the instructions back to Assembly and saves the file
 so we can let xgcc compile it.

-\section{Results}
+\section{Testing}
+
+Of course, it has to be guaranteed that the optimized code still functions
+exactly the same as the none-optimized code. To do this, testing is an
+important part of out program. We have two stages of testing. The first stage
+is unit testing. The second stage is to test whether the compiled code has
+exactly the same output.

-\subsection{pi.c}
+\subsection{Unit testing}

-\subsection{acron.c}
+For almost every piece of important code, unit tests are available. Unit tests
+give the possibility to check whether each small part of the program, for
+instance each small function, is performing as expected. This way bugs are
+found early and very exactly. Otherwise, one would only see that there is a
+mistake in the program, not knowing where this bug is. Naturally, this means
+debugging is a lot easier.

-\subsection{whet.c}
+The unit tests can be run by executing \texttt{make test} in the root folder of
+the project. This does require the \texttt{textrunner} module.

-\subsection{slalom.c}
+Also available is a coverage report. This report shows how much of the code has
+been unit tested. To make this report, the command \texttt{make coverage} can
+be run in the root folder. The report is than added as a folder \emph{coverage}
+in which a \emph{index.html} can be used to see the entire report.
+
+\subsection{Ouput comparison}
+
+In order to check whether the optimization does not change the functioning of
+the program, the output of the provided benchmark programs has to be compared
+to the output after optimization. If any of these outputs is not equal to the
+original output, our optimizations are to aggressive, or there is a bug
+somewhere in the code.
+
+\section{Results}

-\subsection{clinpack.c}
+The following results have been obtained:\\
+\begin{tabular}{|c|c|c|c|c|c|}
+\hline
+Benchmark & Original     & Optimized    & Original & Optimized & Performance \\
+        & Instructions & instructions & cycles   & cycles    &  boost(cycles)\\
+\hline
+pi        &          134 &              &    13011 &           &             \\
+acron     &              &              &  4435687 &           &             \\
+dhrystone &              &              &  2887710 &           &             \\
+whet      &              &              &  2864089 &           &             \\
+slalom    &              &              &    27270 &           &             \\
+clinpack  &              &              &  1547941 &           &             \\
+\hline
+\end{tabular}\\
+\\
+The imput for slalom was 1000 seconds and a minimum of $n = 100$

 \section{Conclusion}


--- a/tests/test_statement.py
+++ b/tests/test_statement.py
@@ -95,18 +95,19 @@ class TestStatement(unittest.TestCase):
        self.assertFalse(S('label', 'addu').is_arith())

    def test_get_def(self):
-        self.assertEqual(S('command', 'move', 'a', 'b').get_def(), ['a'])
-        self.assertEqual(S('command', 'subu', 'a', 'b').get_def(), ['a'])
-        self.assertEqual(S('command', 'addu', 'a', 'b', 'c').get_def(), ['a'])
-        self.assertEqual(S('command', 'sll', 'a', 'b', 'c').get_def(), ['a'])
-        self.assertEqual(S('command', 'srl', 'a', 'b', 'c').get_def(), ['a'])
-        self.assertEqual(S('command', 'lb', 'a', '16($fp)').get_def(), ['a'])
-        self.assertEqual(S('command', 'li', 'a', '16($fp)').get_def(), ['a'])
-        self.assertEqual(S('command', 'add.d', 'a', 'b', 'c').get_def(), ['a'])
-        self.assertEqual(S('command', 'neg.d', 'a', 'b').get_def(), ['a'])
-        self.assertEqual(S('command', 'sub.d', 'a', 'b', 'c').get_def(), ['a'])
-        self.assertEqual(S('command', 'slt', 'a', 'b').get_def(), ['a'])
-        self.assertEqual(S('command', 'xori', 'a', 'b', '0x0000').get_def(),
-                         ['a'])
-        self.assertEqual(S('command', 'mov.d', 'a', 'b').get_def(), ['a'])
-        self.assertEqual(S('command', 'dmfc1', 'a', '$f0').get_def(), ['a'])
+        a = ['a']
+
+        self.assertEqual(S('command', 'move', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'subu', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'addu', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'sll', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'srl', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'la', 'a', '16($fp)').get_def(), a)
+        self.assertEqual(S('command', 'li', 'a', '16($fp)').get_def(), a)
+        self.assertEqual(S('command', 'add.d', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'neg.d', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'sub.d', 'a', 'b', 'c').get_def(), a)
+        self.assertEqual(S('command', 'slt', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'xori', 'a', 'b', '0x0000').get_def(), a)
+        self.assertEqual(S('command', 'mov.d', 'a', 'b').get_def(), a)
+        self.assertEqual(S('command', 'dmfc1', 'a', '$f0').get_def(), a)