Merge branch 'master' of github.com:taddeus/peephole

Conflicts: src/optimize.py

Merge branch 'master' of github.com:taddeus/peephole
Conflicts: src/optimize.py
870d25ee · Richard Torenvliet · eea69c60 · 61b61a39 · 870d25ee · 870d25ee
Commit 870d25ee authored Dec 28, 2011 by Richard Torenvliet
27 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,13 @@
 *.pdf
 *.pyc
 *~
+*.aux
+*.log
+*.out
+*.toc
 .coverage
+parser.out
+parsetab.py
 coverage/
 build/
 src/Makefile_old
--- a/Makefile
+++ b/Makefile
 BUILD=build/
+CLEAN=src/*.pyc src/optimize/*.pyc
 # Fix pdflatex search path
 TGT_DIR :=

--- a/QUESTIONS
+++ b/QUESTIONS
--- a/TODO CSE.txt
+++ b/TODO CSE.txt
+Common subexpression elimination
+Loop through statements of each block
+    for each binary operator, look back for usage of rs and rt. If rs or rt are
+    assigned, break. If exact same operator is found, add it to the list of
+    common subexpressions. If you reach the end of the block, or rs or rt are
+    assigned, make new destination address, and change each occurence of this
+    expression with a move from the new register address.
--- a/benchmarks/.gitignore
+++ b/benchmarks/.gitignore
+acron
+clinpack
+dhrystone
+pi
+slalom
+whet
--- a/benchmarks/build/hello.s
+++ b/benchmarks/build/hello.s
@@ -6,33 +6,65 @@
 # -mgas -mgpOPT
 # Cc1 arguments (-G value = 8, Cpu = default, ISA = 1):
- # -quiet -dumpbase -o
+ # -quiet -dumpbase -O0 -o
 gcc2_compiled.:
 __gnu_compiled_c:
+	.sdata
+	.align	2
+$LC2:
+	.ascii	"e: %d\n\000"
+	.align	3
+$LC0:
+	.word	0x00000000		# 2
+	.word	0x40000000
+	.align	3
+$LC1:
+	.word	0x00000000		# 3.5
+	.word	0x400c0000
 	.text
 	.align	2
 	.globl	main
 	.text
-	.loc	1 2
+	.loc	1 3
 	.ent	main
 main:
-	.frame	$fp,24,$31		# vars= 0, regs= 2/0, args= 16, extra= 0
+	.frame	$fp,64,$31		# vars= 40, regs= 2/0, args= 16, extra= 0
 	.mask	0xc0000000,-4
 	.fmask	0x00000000,0
-	subu	$sp,$sp,24
+	subu	$sp,$sp,64
-	sw	$31,20($sp)
+	sw	$31,60($sp)
-	sw	$fp,16($sp)
+	sw	$fp,56($sp)
 	move	$fp,$sp
 	jal	__main
+	li	$2,0x00000001		# 1
+	sw	$2,16($fp)
+	li	$2,0x00000005		# 5
+	sw	$2,20($fp)
+	lw	$2,16($fp)
+	lw	$3,20($fp)
+	addu	$2,$2,$3
+	sw	$2,24($fp)
+	lw	$2,16($fp)
+	addu	$3,$2,10
+	sw	$3,28($fp)
+	l.d	$f0,$LC0
+	s.d	$f0,32($fp)
+	l.d	$f0,$LC1
+	s.d	$f0,40($fp)
+	li	$2,0x00000061		# 97
+	sb	$2,48($fp)
+	la	$4,$LC2
+	lw	$5,28($fp)
+	jal	printf
 	move	$2,$0
 	j	$L1
 $L1:
 	move	$sp,$fp			# sp not trusted here
-	lw	$31,20($sp)
+	lw	$31,60($sp)
-	lw	$fp,16($sp)
+	lw	$fp,56($sp)
-	addu	$sp,$sp,24
+	addu	$sp,$sp,64
 	j	$31
 	.end	main
--- a/benchmarks/hello.c
+++ b/benchmarks/hello.c
-int main(void)
+#include <stdio.h>
-{
+int main(void) {
+    int x = 1, b = 5, d = x + b, e = x + 10;
+    double y = 2., z = 3.5;
+    char c = 'a';
+	printf("e: %d\n", e);  // 11
    return 0;
 }
--- a/benchmarks/optimized/.gitignore
+++ b/benchmarks/optimized/.gitignore
-*.s
--- a/benchmarks/optimized/acron.s
+++ b/benchmarks/optimized/acron.s
+	.file	1 "acron.c"
+# GNU C 2.7.2.3 [AL 1.1, MM 40, tma 0.1] SimpleScalar running sstrix compiled by GNU C
+# Cc1 defaults:
+# -mgas -mgpOPT
+# Cc1 arguments (-G value = 8, Cpu = default, ISA = 1):
+# -quiet -dumpbase -O0 -o
+gcc2_compiled.:
+__gnu_compiled_c:
+	.globl	w
+	.data
+	.align	2
+w:
+	.word	$LC0
+	.word	$LC1
+	.word	$LC2
+	.word	$LC3
+	.word	$LC4
+	.word	$LC5
+	.sdata
+	.align	2
+$LC5:
+	.ascii	"Eephole\000"
+	.rdata
+	.align	2
+$LC4:
+	.ascii	"Peephole\000"
+	.align	2
+$LC3:
+	.ascii	"Optimization\000"
+	.align	2
+$LC2:
+	.ascii	"Practicum\000"
+	.align	2
+$LC1:
+	.ascii	"Ertalerbouw\000"
+	.align	2
+$LC0:
+	.ascii	"Vertalerbouw\000"
+	.text
+	.align	2
+	.globl	is_vowel
+	.sdata
+	.align	2
+$LC6:
+	.ascii	"%s\000"
+	.align	2
+$LC7:
+	.ascii	" %s\000"
+	.align	2
+$LC8:
+	.ascii	"\n\000"
+	.text
+	.align	2
+	.globl	do_perm
+	.align	2
+	.globl	main
+	.comm	acron,12
+	.comm	command,100
+	.comm	done,24
+	.comm	pindex,28
+	.text
+	.loc	1 10
+	.ent	is_vowel
+is_vowel:
+	.frame	$fp,16,$31		# vars= 8, regs= 1/0, args= 0, extra= 0
+	.mask	0x40000000,-8
+	.fmask	0x00000000,0
+	subu	$sp,$sp,16
+	sw	$fp,8($sp)
+	move	$fp,$sp
+	move	$3,$4
+	sb	$3,0($fp)
+	move	$2,$0
+	lb	$4,0($fp)
+	li	$5,0x00000041		# 65
+	beq	$4,$5,$L3
+	lb	$4,0($fp)
+	li	$5,0x00000045		# 69
+	beq	$4,$5,$L3
+	lb	$4,0($fp)
+	li	$5,0x00000049		# 73
+	beq	$4,$5,$L3
+	lb	$4,0($fp)
+	li	$5,0x0000004f		# 79
+	beq	$4,$5,$L3
+	lb	$4,0($fp)
+	li	$5,0x00000055		# 85
+	beq	$4,$5,$L3
+	lb	$4,0($fp)
+	li	$5,0x00000059		# 89
+	bne	$4,$5,$L2
+$L3:
+	li	$2,0x00000001		# 1
+$L2:
+	j	$L1
+$L1:
+	move	$sp,$fp			# sp not trusted here
+	lw	$fp,8($sp)
+	addu	$sp,$sp,16
+	j	$31
+	.end	is_vowel
+	.loc	1 15
+	.ent	do_perm
+do_perm:
+	.frame	$fp,56,$31		# vars= 24, regs= 4/0, args= 16, extra= 0
+	.mask	0xc0030000,-4
+	.fmask	0x00000000,0
+	subu	$sp,$sp,56
+	sw	$31,52($sp)
+	sw	$fp,48($sp)
+	sw	$17,44($sp)
+	sw	$16,40($sp)
+	move	$fp,$sp
+	sw	$4,56($fp)
+	sw	$5,60($fp)
+	sw	$6,64($fp)
+	sw	$7,68($fp)
+	sw	$0,24($fp)
+	lw	$2,64($fp)
+	li	$3,0x00000001		# 1
+	bne	$2,$3,$L5
+	lw	$2,pindex
+	move	$3,$2
+	sll	$2,$3,2
+	la	$3,w
+	addu	$2,$2,$3
+	lw	$3,0($2)
+	lb	$4,0($3)
+	jal	is_vowel
+	bne	$2,$0,$L5
+	lw	$2,56($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	la	$3,w
+	addu	$2,$2,$3
+	lw	$3,0($2)
+	lb	$4,0($3)
+	jal	is_vowel
+	beq	$2,$0,$L4
+$L5:
+	lw	$2,64($fp)
+	slt	$3,$2,2
+	bne	$3,$0,$L6
+	lw	$2,64($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	la	$3,pindex-8
+	addu	$2,$2,$3
+	lw	$3,0($2)
+	move	$2,$3
+	sll	$3,$2,2
+	la	$4,w
+	addu	$2,$3,$4
+	lw	$3,0($2)
+	lb	$4,0($3)
+	jal	is_vowel
+	move	$16,$2
+	lw	$2,64($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	la	$3,pindex-4
+	addu	$2,$2,$3
+	lw	$3,0($2)
+	move	$2,$3
+	sll	$3,$2,2
+	la	$4,w
+	addu	$2,$3,$4
+	lw	$3,0($2)
+	lb	$4,0($3)
+	jal	is_vowel
+	move	$17,$2
+	lw	$2,56($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	la	$3,w
+	addu	$2,$2,$3
+	lw	$3,0($2)
+	lb	$4,0($3)
+	jal	is_vowel
+	addu	$3,$16,$17
+	addu	$2,$3,$2
+	sw	$2,24($fp)
+	lw	$2,24($fp)
+	beq	$2,$0,$L8
+	lw	$2,24($fp)
+	li	$3,0x00000003		# 3
+	bne	$2,$3,$L7
+$L8:
+	j	$L4
+$L7:
+$L6:
+	lw	$2,64($fp)
+	addu	$3,$2,1
+	sw	$3,64($fp)
+	sll	$3,$2,2
+	la	$4,pindex
+	addu	$2,$3,$4
+	lw	$3,56($fp)
+	sw	$3,0($2)
+	lw	$2,64($fp)
+	slt	$3,$2,6
+	beq	$3,$0,$L9
+	lw	$3,68($fp)
+	subu	$2,$3,1
+	move	$3,$2
+	sw	$3,68($fp)
+	beq	$3,$0,$L9
+	sw	$0,16($fp)
+$L10:
+	lw	$2,16($fp)
+	slt	$3,$2,6
+	beq	$3,$0,$L11
+$L13:
+	lw	$2,16($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	lw	$3,60($fp)
+	addu	$2,$2,$3
+	lw	$3,0($2)
+	bne	$3,$0,$L14
+	lw	$2,16($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	lw	$3,60($fp)
+	addu	$2,$2,$3
+	li	$3,0x00000001		# 1
+	sw	$3,0($2)
+	lw	$4,16($fp)
+	lw	$5,60($fp)
+	lw	$6,64($fp)
+	lw	$7,68($fp)
+	jal	do_perm
+	lw	$2,16($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	lw	$3,60($fp)
+	addu	$2,$2,$3
+	sw	$0,0($2)
+$L14:
+$L12:
+	lw	$3,16($fp)
+	addu	$2,$3,1
+	move	$3,$2
+	sw	$3,16($fp)
+	j	$L10
+$L11:
+	j	$L15
+$L9:
+	sw	$0,28($fp)
+	sw	$0,20($fp)
+$L16:
+	lw	$2,20($fp)
+	lw	$3,64($fp)
+	slt	$2,$2,$3
+	beq	$2,$0,$L17
+$L19:
+	sw	$0,32($fp)
+$L20:
+	lw	$2,20($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	la	$3,pindex
+	addu	$2,$2,$3
+	lw	$3,0($2)
+	move	$2,$3
+	sll	$3,$2,2
+	la	$4,w
+	addu	$2,$3,$4
+	lw	$3,0($2)
+	lw	$4,32($fp)
+	addu	$2,$3,$4
+	lb	$4,0($2)
+	jal	isupper
+	beq	$2,$0,$L21
+$L22:
+	lw	$2,28($fp)
+	addu	$3,$2,1
+	sw	$3,28($fp)
+	lw	$3,20($fp)
+	move	$4,$3
+	sll	$3,$4,2
+	la	$4,pindex
+	addu	$3,$3,$4
+	lw	$4,0($3)
+	move	$3,$4
+	sll	$4,$3,2
+	la	$5,w
+	addu	$3,$4,$5
+	lw	$4,32($fp)
+	addu	$5,$4,1
+	sw	$5,32($fp)
+	lw	$5,0($3)
+	addu	$3,$4,$5
+	lbu	$4,0($3)
+	sb	$4,acron($2)
+	j	$L20
+$L21:
+$L18:
+	lw	$3,20($fp)
+	addu	$2,$3,1
+	move	$3,$2
+	sw	$3,20($fp)
+	j	$L16
+$L17:
+	lw	$2,28($fp)
+	la	$3,acron
+	addu	$2,$2,$3
+	sb	$0,0($2)
+	la	$4,$LC6
+	la	$5,acron
+	jal	printf
+	sw	$0,20($fp)
+$L23:
+	lw	$2,20($fp)
+	lw	$3,64($fp)
+	slt	$2,$2,$3
+	beq	$2,$0,$L24
+$L26:
+	lw	$2,20($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	la	$3,pindex
+	addu	$2,$2,$3
+	lw	$3,0($2)
+	move	$2,$3
+	sll	$3,$2,2
+	la	$4,w
+	addu	$2,$3,$4
+	la	$4,$LC7
+	lw	$5,0($2)
+	jal	printf
+$L25:
+	lw	$3,20($fp)
+	addu	$2,$3,1
+	move	$3,$2
+	sw	$3,20($fp)
+	j	$L23
+$L24:
+	la	$4,$LC8
+	jal	printf
+$L15:
+$L4:
+	move	$sp,$fp			# sp not trusted here
+	lw	$31,52($sp)
+	lw	$fp,48($sp)
+	lw	$17,44($sp)
+	lw	$16,40($sp)
+	addu	$sp,$sp,56
+	j	$31
+	.end	do_perm
+	.loc	1 53
+	.ent	main
+main:
+	.frame	$fp,32,$31		# vars= 8, regs= 2/0, args= 16, extra= 0
+	.mask	0xc0000000,-4
+	.fmask	0x00000000,0
+	subu	$sp,$sp,32
+	sw	$31,28($sp)
+	sw	$fp,24($sp)
+	move	$fp,$sp
+	jal	__main
+	li	$2,0x00000004		# 4
+	sw	$2,20($fp)
+$L28:
+	lw	$2,20($fp)
+	slt	$3,$2,7
+	beq	$3,$0,$L29
+$L31:
+	sw	$0,16($fp)
+$L32:
+	lw	$2,16($fp)
+	slt	$3,$2,6
+	beq	$3,$0,$L33
+$L35:
+	lw	$2,16($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	la	$3,done
+	addu	$2,$2,$3
+	li	$3,0x00000001		# 1
+	sw	$3,0($2)
+	lw	$4,16($fp)
+	la	$5,done
+	move	$6,$0
+	lw	$7,20($fp)
+	jal	do_perm
+	lw	$2,16($fp)
+	move	$3,$2
+	sll	$2,$3,2
+	la	$3,done
+	addu	$2,$2,$3
+	sw	$0,0($2)
+$L34:
+	lw	$3,16($fp)
+	addu	$2,$3,1
+	move	$3,$2
+	sw	$3,16($fp)
+	j	$L32
+$L33:
+$L30:
+	lw	$3,20($fp)
+	addu	$2,$3,1
+	move	$3,$2
+	sw	$3,20($fp)
+	j	$L28
+$L29:
+$L27:
+	move	$sp,$fp			# sp not trusted here
+	lw	$31,28($sp)
+	lw	$fp,24($sp)
+	addu	$sp,$sp,32
+	j	$31
+	.end	main
\ No newline at end of file
--- a/benchmarks/optimized/clinpack.s
+++ b/benchmarks/optimized/clinpack.s
--- a/benchmarks/optimized/dhrystone.s
+++ b/benchmarks/optimized/dhrystone.s
--- a/benchmarks/optimized/pi.s
+++ b/benchmarks/optimized/pi.s
+	.file	1 "pi.c"
+# GNU C 2.7.2.3 [AL 1.1, MM 40, tma 0.1] SimpleScalar running sstrix compiled by GNU C
+# Cc1 defaults:
+# -mgas -mgpOPT
+# Cc1 arguments (-G value = 8, Cpu = default, ISA = 1):
+# -quiet -dumpbase -o
+gcc2_compiled.:
+__gnu_compiled_c:
+	.rdata
+	.align	2
+$LC0:
+	.ascii	"Usage: %s <iterations>\n\000"
+	.sdata
+	.align	2
+$LC3:
+	.ascii	"%.10f\n\000"
+	.align	3
+$LC1:
+	.word	0xffc00000		# 2147483647
+	.word	0x41dfffff
+	.align	3
+$LC2:
+	.word	0x00000000		# 1
+	.word	0x3ff00000
+	.align	3
+$LC4:
+	.word	0x00000000		# 4
+	.word	0x40100000
+	.text
+	.align	2
+	.globl	main
+	.extern	stderr, 4
+	.text
+	.loc	1 5
+	.ent	main
+main:
+	.frame	$fp,56,$31		# vars= 32, regs= 2/0, args= 16, extra= 0
+	.mask	0xc0000000,-4
+	.fmask	0x00000000,0
+	subu	$sp,$sp,56
+	sw	$31,52($sp)
+	sw	$fp,48($sp)
+	move	$fp,$sp
+	sw	$4,56($fp)
+	sw	$5,60($fp)
+	jal	__main
+	sw	$0,24($fp)
+	lw	$2,56($fp)
+	li	$3,0x00000002		# 2
+	beq	$2,$3,$L2
+	lw	$2,60($fp)
+	lw	$4,stderr
+	la	$5,$LC0
+	lw	$6,0($2)
+	jal	fprintf
+	move	$4,$0
+	jal	exit
+$L2:
+	lw	$3,60($fp)
+	addu	$2,$3,4
+	lw	$4,0($2)
+	jal	atoi
+	sw	$2,20($fp)
+	li	$4,0x00000001		# 1
+	jal	srandom
+	sw	$0,16($fp)
+$L3:
+	lw	$2,16($fp)
+	lw	$3,20($fp)
+	slt	$2,$2,$3
+	bne	$2,$0,$L6
+	j	$L4
+$L6:
+	jal	random
+	mtc1	$2,$f0
+	#nop
+	cvt.d.w	$f0,$f0
+	l.d	$f2,$LC1
+	div.d	$f0,$f0,$f2
+	s.d	$f0,32($fp)
+	jal	random
+	mtc1	$2,$f0
+	#nop
+	cvt.d.w	$f0,$f0
+	l.d	$f2,$LC1
+	div.d	$f0,$f0,$f2
+	s.d	$f0,40($fp)
+	l.d	$f0,32($fp)
+	l.d	$f2,32($fp)
+	mul.d	$f0,$f0,$f2
+	l.d	$f2,40($fp)
+	l.d	$f4,40($fp)
+	mul.d	$f2,$f2,$f4
+	add.d	$f0,$f0,$f2
+	l.d	$f2,$LC2
+	c.le.d	$f0,$f2
+	bc1f	$L7
+	lw	$3,24($fp)
+	addu	$2,$3,1
+	move	$3,$2
+	sw	$3,24($fp)
+$L7:
+$L5:
+	lw	$3,16($fp)
+	addu	$2,$3,1
+	move	$3,$2
+	sw	$3,16($fp)
+	j	$L3
+$L4:
+	l.s	$f0,24($fp)
+	#nop
+	cvt.d.w	$f0,$f0
+	l.s	$f2,20($fp)
+	#nop
+	cvt.d.w	$f2,$f2
+	div.d	$f0,$f0,$f2
+	l.d	$f2,$LC4
+	mul.d	$f0,$f0,$f2
+	la	$4,$LC3
+	dmfc1	$6,$f0
+	jal	printf
+	li	$2,0x00000001		# 1
+	j	$L1
+$L1:
+	move	$sp,$fp			# sp not trusted here
+	lw	$31,52($sp)
+	lw	$fp,48($sp)
+	addu	$sp,$sp,56
+	j	$31
+	.end	main
\ No newline at end of file
--- a/benchmarks/optimized/slalom.s
+++ b/benchmarks/optimized/slalom.s
--- a/src/main.py
+++ b/src/main.py
 #!/usr/bin/python
-from parser import parse_file
+from src.parser import parse_file
-from optimize import optimize
+from src.optimize import optimize
-from writer import write_statements
+from src.writer import write_statements
 if __name__ == '__main__':
    from sys import argv, exit

--- a/report/Makefile
+++ b/report/Makefile
+RM=rm -rf
+all: report.pdf
+%.pdf: %.tex
+	pdflatex $^
+	pdflatex $^
+clean:
+	$(RM) *.pdf *.aux *.log *.out *.toc *.snm *.nav
--- a/report/report.tex
+++ b/report/report.tex
@@ -11,39 +11,230 @@
 \usepackage{hyperref}
 \title{Peephole Optimizer}
-\author{Jayke Meijer (6049885), Richard Torenvliet (6138861), Taddeus Kroes (6054129)}
+\author{Jayke Meijer (6049885), Richard Torenvliet (6138861), Tadde\"us Kroes
+    (6054129)}
 \begin{document}
 \maketitle
+\tableofcontents
+\pagebreak
 \section{Introduction}
-The goal of the assignment is to implement the optimization stage of the compiler. To reach this goal the parser part of the compiler has to be implemented.
-The output of the gcc cross compiler on a c program is our input, the output of the gcc cross compiler is in the form of Assembly code, but not optimized. Our assignment includes a number of c programs, an important part of the assignment is parsing the data. Parsing the data is done with lex and yacc. The lexer is a program that finds keywords that meets the regular expression provided in the lexer. After the lexer, the yaccer takes over. Yaccer can turn the keywords in to an action.
+The goal of the assignment is to implement the optimization stage of the
+compiler. To reach this goal the parser and the optimizer part of the compiler
+have to be implemented.
+The output of the xgcc cross compiler on a C program is our input. The output
+of the xgcc cross compiler is in the form of Assembly code, but not optimized.
+Our assignment includes a number of C programs. An important part of the
+assignment is parsing the data. Parsing the data is done with Lex and Yacc. The
+Lexer is a program that finds keywords that meets the regular expression
+provided in the Lexer. After the Lexer, the Yaccer takes over. Yacc can turn
+the keywords in to an action.
+\section{Design}
+There are two general types of of optimizations of the assembly code, global
+optimizations and optimizations on a so-called basic block. These optimizations
+will be discussed separately
+\subsection{Global optimizations}
+We only perform one global optimization, which is optimizing branch-jump
+statements. The unoptimized Assembly code contains sequences of code of the
+following structure:
+\begin{verbatim}
+    beq ...,$Lx
+    j $Ly
+$Lx:   ...
+\end{verbatim}
+This is inefficient, since there is a jump to a label that follows this code.
+It would be more efficient to replace the branch statement with a \texttt{bne}
+(the opposite case) to the label used in the jump statement. This way the jump
+statement can be eliminated, since the next label follows anyway. The same can
+of course be done for the opposite case, where a \texttt{bne} is changed into a
+\texttt{beq}.
+Since this optimization is done between two series of codes with jumps and
+labels, we can not perform this code during the basic block optimizations. The
+reason for this will become clearer in the following section.
+\subsection{Basic Block Optimizations}
+Optimizations on basic blocks are a more important part of the optimizer.
+First, what is a basic block? A basic block is a sequence of statements
+guaranteed to be executed in that order, and that order alone. This is the case
+for a piece of code not containing any branches or jumps.
+To create a basic block, you need to define what is the leader of a basic
+block. We call a statement a leader if it is either a jump/branch statement, or
+the target of such a statement. Then a basic block runs from one leader until
+the next leader.
+There are quite a few optimizations we perform on these basic blocks, so we
+will describe the types of optimizations here in stead of each optimization.
+\subsubsection*{Standard peephole optimizations}
+These are optimizations that simply look for a certain statement or pattern of
+statements, and optimize these. For example,
+\begin{verbatim}
+mov $regA,$regB
+instr $regA, $regA,... 
+\end{verbatim}
+can be optimized into
+\begin{verbatim}
+instr $regA, $regB,...
+\end{verbatim}
+since the register \texttt{\$regA} gets overwritten by the second instruction
+anyway, and the instruction can easily use \texttt{\$regB} in stead of
+\texttt{\$regA}. There are a few more of these cases, which are the same as
+those described on the practicum page
+\footnote{\url{http://staff.science.uva.nl/~andy/compiler/prac.html}} and in
+Appendix \ref{opt}.
+\subsubsection*{Common subexpression elimination}
+A more advanced optimization is common subexpression elimination. This means
+that expensive operations as a multiplication or addition are performed only
+once and the result is then `copied' into variables where needed.
+A standard method for doing this is the creation of a DAG or Directed Acyclic
+Graph. However, this requires a fairly advanced implementation. Our
+implementation is a slightly less fancy, but easier to implement.
+We search from the end of the block up for instructions that are eligible for
+CSE. If we find one, we check further up in the code for the same instruction,
+and add that to a temporary storage list. This is done until the beginning of
+the block or until one of the arguments of this expression is assigned.
+We now add the instruction above the first use, and write the result in a new
+variable. Then all occurrences of this expression can be replaced by a move of
+from new variable into the original destination variable of the instruction.
+This is a less efficient method then the dag, but because the basic blocks are
+in general not very large and the execution time of the optimizer is not a
+primary concern, this is not a big problem.
+\section{Implementation}
+We decided to implement the optimization in Python. We chose this programming
+language because Python is an easy language to manipulate strings, work
+object-oriented etc.
+It turns out that a Lex and Yacc are also available as a Python module,
+named PLY(Python Lex-Yacc). This allows us to use one language, Python, instead
+of two, i.e. C and Python. Also no debugging is needed in C, only in Python
+which makes our assignment more feasible.
+The program has three steps, parsing the Assembly code into a datastructure we
+can use, the so-called Intermediate Representation, performing optimizations on
+this IR and writing the IR back to Assembly.
+\subsection{Parsing}
+The parsing is done with PLY, which allows us to perform Lex-Yacc tasks in
+Python by using a Lex-Yacc like syntax. This way there is no need to combine
+languages like we should do otherwise since Lex and Yacc are coupled with C.
+The decision was made to not recognize exactly every possible instruction in
+the parser, but only if something is for example a command, a comment or a gcc
+directive. We then transform per line to a object called a Statement. A
+statement has a type, a name and optionally a list of arguments. These
+statements together form a statement list, which is placed in another object
+called a Block. In the beginning there is one block for the entire program, but
+after global optimizations this will be separated in several blocks that are
+the basic blocks.
+\subsection{Optimizations}
+The optimizations are done in two different steps. First the global
+optimizations are performed, which are only the optimizations on branch-jump
+constructions. This is done repeatedly until there are no more changes.
+After all possible global optimizations are done, the program is seperated into
+basic blocks. The algorithm to do this is described earlier, and means all
+jump and branch instructions are called leaders, as are their targets. A basic
+block then goes from leader to leader.
+After the division in basic blocks, optimizations are performed on each of
+these basic blocks. This is also done repeatedly, since some times several
+steps can be done to optimize something.
+\subsection{Writing}
+Once all the optimizations have been done, the IR needs to be rewritten into
+Assembly code, so the xgcc crosscompiler can make binary code out of it.
+The writer expects a list of statements, so first the blocks have to be
+concatenated again into a list. After this is done, the list is passed on to
+the writer, which writes the instructions back to Assembly and saves the file
+so we can let xgcc compile it.
-\section{Design \& Implementation}
+\section{Results}
-We decided to implement the optimization in python. We chose this programming language because python is an easy language to manipulate strings, work objective ori\"ented etc.
-It turns out that a lex and yacc are also implemented in a python version, named PLY(Python Lex-Yacc). This allows us to use one language, Python, instead of two i.e. C and Python. Also no debugging is needed in C, only in Python which makes our assignment more feasible.
-\subsection{Design}
+\subsection{pi.c}
+\subsection{acron.c}
-\subsection*{Implementation}
+\subsection{whet.c}
-This 
-\subsubsection*{PLY}
+\subsection{slalom.c}
+\subsection{clinpack.c}
+\section{Conclusion}
+\appendix
+\section{List of all optimizations}
+\label{opt}
+\textbf{Global optimizations}
+\begin{verbatim}
+    beq ...,$Lx             bne ...,$Ly
+    j $Ly               ->  $Lx:   ...
+$Lx:   ...
+    bne ...,$Lx             beq ...,$Ly
+    j $Ly               ->  $Lx:   ...
+$Lx:   ...
+\end{verbatim}
+\textbf{Standard basic block optimizations}
+\begin{verbatim}
+mov $regA,$regA         ->  --- // remove it
+mov $regA,$regB         ->  instr $regA, $regB,...
+instr $regA, $regA,...
+instr $regA,...             instr $4,...
+mov [$4-$7], $regA      ->  jal XXX
+jal  XXX
-\section{Results}
-\subsection*{pi.c}
+sw $regA,XXX            ->  sw $regA, XXX
+ld $regA,XXX
-\subsection*{arcron.c}
-\subsection*{whet.c}
+shift $regA,$regA,0     ->  --- // remove it
-\subsection*{slalom.c}
-\subsection*{clinpack.c}
+add $regA,$regA,X       ->  lw ...,X($regA)
+lw ...,0($regA)
+\end{verbatim}
+\textbf{Advanced basic block optimizations}
-\section{conclusion}
+\begin{verbatim}
+# Common subexpression elimination
+addu $regA, $regB, 4        addu $regD, $regB, 4
+...                         move $regA, $regD
+Code not writing $regB  ->  ...
+...                         ...
+addu $regC, $regB, 4        move $regC, $regD
+\end{verbatim}
 \end{document}
--- a/src/dataflow.py
+++ b/src/dataflow.py
-from copy import copy
+#from copy import copy
 from statement import Block
@@ -122,55 +122,102 @@ def generate_flow_graph(blocks):
            b.add_edge_to(blocks[i + 1])
-def generate_dominator_tree(nodes):
+#def generate_dominator_tree(nodes):
-    """Add dominator administration to the given flow graph nodes."""
+#    """Add dominator administration to the given flow graph nodes."""
-    # Dominator of the start node is the start itself
+#    # Dominator of the start node is the start itself
-    nodes[0].dom = set([nodes[0]])
+#    nodes[0].dom = set([nodes[0]])
+#
-    # For all other nodes, set all nodes as the dominators
+#    # For all other nodes, set all nodes as the dominators
-    for n in nodes[1:]:
+#    for n in nodes[1:]:
-        n.dom = set(copy(nodes))
+#        n.dom = set(copy(nodes))
+#
-    def pred(n, known=[]):
+#    def pred(n, known=[]):
-        """Recursively find all predecessors of a node."""
+#        """Recursively find all predecessors of a node."""
-        direct = filter(lambda x: x not in known, n.edges_from)
+#        direct = filter(lambda x: x not in known, n.edges_from)
-        p = copy(direct)
+#        p = copy(direct)
+#
-        for ancestor in direct:
+#        for ancestor in direct:
-            p += pred(ancestor, direct)
+#            p += pred(ancestor, direct)
+#
-        return p
+#        return p
+#
-    # Iteratively eliminate nodes that are not dominators
+#    # Iteratively eliminate nodes that are not dominators
-    changed = True
+#    changed = True
+#
-    while changed:
+#    while changed:
-        changed = False
+#        changed = False
+#
-        for n in nodes[1:]:
+#        for n in nodes[1:]:
-            old_dom = n.dom
+#            old_dom = n.dom
-            intersection = lambda p1, p2: p1.dom & p2.dom
+#            intersection = lambda p1, p2: p1.dom & p2.dom
-            n.dom = set([n]) | reduce(intersection, pred(n), set([]))
+#            n.dom = set([n]) | reduce(intersection, pred(n), set([]))
+#
-            if n.dom != old_dom:
+#            if n.dom != old_dom:
-                changed = True
+#                changed = True
+#
-    def idom(d, n):
+#    def idom(d, n):
-        """Check if d immediately dominates n."""
+#        """Check if d immediately dominates n."""
-        for b in n.dom:
+#        for b in n.dom:
-            if b != d and b != n and b in n.dom:
+#            if b != d and b != n and b in n.dom:
-                return False
+#                return False
+#
-        return True
+#        return True
+#
-    # Build tree using immediate dominators
+#    # Build tree using immediate dominators
-    for n in nodes:
+#    for n in nodes:
-        for d in n.dom:
+#        for d in n.dom:
-            if idom(d, n):
+#            if idom(d, n):
-                d.set_dominates(n)
+#                d.set_dominates(n)
-                break
+#                break
-# statements = parse_file(...)
-# b = find_basic_blocks(statements)
+class Dag:
-# generate_flow_graph(b)  # nodes now have edges
+    def __init__(self, block):
-# generate_dominator_tree(b)  # nodes now have dominators
+        """Create the Directed Acyclic Graph of all binary operations in a
+        basic block."""
+        self.nodes = []
+        for s in block:
+            if s.is_command('move') or s.is_monop():
+                rd, rs = s
+                y = self.find_reg_node(rs)
+                self.find_op_node(s.name, rd, y)
+            elif s.is_binop():
+                rd, rs, rt = s
+                y = self.find_reg_node(rs)
+                z = self.find_reg_node(rt)
+                self.find_op_node(s.name, rd, y, z)
+    def find_reg_node(self, reg):
+        for n in self.nodes:
+            if reg in n.reg:
+                return n
+        node = DagLeaf(reg)
+        self.nodes.append(node)
+        return node
+    def find_op_node(self, op, rd, *args):
+        for n in self.nodes:
+            if not isinstance(n, DagLeaf) and n.op == op and n.nodes == args:
+                n.labels.append(rd)
+                return n
+        node = DagNode(op, rd, *args)
+        self.nodes.append(node)
+        return node
+class DagNode:
+    def __init__(self, op, label, *args):
+        self.op = op
+        self.labels = [label]
+        self.nodes = args
+class DagLeaf:
+    def __init__(self, reg):
+        self.reg = reg
--- a/src/optimize.py
+++ b/src/optimize.py
--- a/src/optimize/advanced.py
+++ b/src/optimize/advanced.py
--- a/src/optimize/standard.py
+++ b/src/optimize/standard.py
+import re
+def redundant_move_1(mov, statements):
+    """
+    mov $regA, $regA          ->  --- remove it
+    """
+    if mov.is_command('move') and mov[0] == mov[1]:
+        statements.replace(1, [])
+        return True
+def redundant_move_2(mov, statements):
+    """
+    mov $regA, $regB          ->  instr $regA, $regB, ...
+    instr $regA, $regA, ...
+    """
+    if mov.is_command('move'):
+        ins = statements.peek()
+        if ins and len(ins) >= 2 and ins[0] == mov[0] and ins[1] == mov[0]:
+            ins[1] = mov[1]
+            statements.replace(2, [ins])
+            return True
+def redundant_move_3(ins, statements):
+    """
+    instr $regA, ...          ->  instr $4, ...
+    mov $4, $regA                 jal XX
+    jal XX
+    """
+    if ins.is_command() and len(ins):
+        following = statements.peek(2)
+        if len(following) == 2:
+            mov, jal = following
+            if mov.is_command('move') and mov[1] == ins[0] \
+                    and re.match('^\$[4-7]$', mov[0]) \
+                    and jal.is_command('jal'):
+                ins[0] = mov[0]
+                statements.replace(2, [ins])
+                return True
+def redundant_move_4(mov1, statements):
+    """
+    mov $RegA, $RegB         ->  move $RegA, $RegB
+    mov $RegB, $RegA
+    """
+    if mov1.is_command('move'):
+        mov2 = statements.peek()
+        if mov2.is_command('move') and mov2[0] == mov1[1] and \
+                mov2[1] == mov1[0]:
+            statements.replace(2, [mov1])
+            return True
+def redundant_load(sw, statements):
+    """
+    sw $regA, XX              ->  sw $regA, XX
+    ld $regA, XX
+    """
+    if sw.is_command('sw'):
+        ld = statements.peek()
+        if ld.is_command('lw') and ld.args == sw.args:
+            statements.replace(2, [sw])
+            return True
+def redundant_shift(shift, statements):
+    """
+    shift $regA, $regA, 0     ->  --- remove it
+    """
+    if shift.is_shift() and shift[0] == shift[1] and shift[2] == 0:
+        statements.replace(1, [])
+        return True
+def redundant_add(add, statements):
+    """
+    add $regA, $regA, X       ->  lw ..., X($regA)
+    lw ..., 0($regA)
+    """
+    if add.is_command('addu') and add[0] == add[1] and isinstance(add[2], int):
+        lw = statements.peek()
+        if lw.is_load() and lw[-1] == '0(%s)' % add[0]:
+            lw[-1] = '%s(%s)' % (add[2], add[0])
+            statements.replace(2, [lw])
+            return True
--- a/src/parser.py
+++ b/src/parser.py
@@ -3,11 +3,13 @@ import ply.yacc as yacc
 from statement import Statement as S, Block
 # Global statements administration
 statements = []
 tokens = ('NEWLINE', 'WORD', 'COMMENT', 'DIRECTIVE', 'COMMA', 'COLON')
 # Tokens
 def t_NEWLINE(t):
    r'\n+'
@@ -32,7 +34,7 @@ def t_DIRECTIVE(t):
    return t
 def t_hex_word(t):
-    r'0x[0-9a-fA-F]{8}'
+    r'0x([0-9a-fA-F]{8}|[0-9a-fA-F]{4})'
    t.type = 'WORD'
    return t
@@ -47,9 +49,10 @@ def t_int(t):
    return t
 def t_WORD(t):
-    r'[a-zA-Z0-9$_.+()]+'
+    r'[a-zA-Z0-9$_.+()-]+'
    return t
 # Ignore whitespaces
 t_ignore = ' \t'
@@ -57,9 +60,11 @@ def t_error(t):
    print('Illegal character "%s"' % t.value[0])
    t.lexer.skip(1)
 # Build the lexer
 lexer = lex.lex()
 # Parsing rules
 start = 'input'
@@ -102,9 +107,14 @@ def p_command(p):
 def p_error(p):
    print 'Syntax error at "%s" on line %d' % (p.value, lexer.lineno)
+# Build YACC
 yacc.yacc()
 def parse_file(filename):
+    """Parse a given Assembly file, return a Block with Statement objects
+    containing the parsed instructions."""
    global statements
    statements = []

--- a/src/statement.py
+++ b/src/statement.py
--- a/src/writer.py
+++ b/src/writer.py
--- a/tests/__init__.pyc
+++ b/tests/__init__.pyc
--- a/tests/test_dataflow.py
+++ b/tests/test_dataflow.py
--- a/tests/test_optimize.py
+++ b/tests/test_optimize.py
--- a/tests/test_optimize_advanced.py
+++ b/tests/test_optimize_advanced.py