Commit 870d25ee authored by Richard Torenvliet's avatar Richard Torenvliet

Merge branch 'master' of github.com:taddeus/peephole

Conflicts:
	src/optimize.py
parents eea69c60 61b61a39
...@@ -2,7 +2,13 @@ ...@@ -2,7 +2,13 @@
*.pdf *.pdf
*.pyc *.pyc
*~ *~
*.aux
*.log
*.out
*.toc
.coverage .coverage
parser.out
parsetab.py
coverage/ coverage/
build/ build/
src/Makefile_old src/Makefile_old
BUILD=build/ BUILD=build/
CLEAN=src/*.pyc src/optimize/*.pyc
# Fix pdflatex search path # Fix pdflatex search path
TGT_DIR := TGT_DIR :=
......
Common subexpression elimination
Loop through statements of each block
for each binary operator, look back for usage of rs and rt. If rs or rt are
assigned, break. If exact same operator is found, add it to the list of
common subexpressions. If you reach the end of the block, or rs or rt are
assigned, make new destination address, and change each occurence of this
expression with a move from the new register address.
acron
clinpack
dhrystone
pi
slalom
whet
...@@ -6,33 +6,65 @@ ...@@ -6,33 +6,65 @@
# -mgas -mgpOPT # -mgas -mgpOPT
# Cc1 arguments (-G value = 8, Cpu = default, ISA = 1): # Cc1 arguments (-G value = 8, Cpu = default, ISA = 1):
# -quiet -dumpbase -o # -quiet -dumpbase -O0 -o
gcc2_compiled.: gcc2_compiled.:
__gnu_compiled_c: __gnu_compiled_c:
.sdata
.align 2
$LC2:
.ascii "e: %d\n\000"
.align 3
$LC0:
.word 0x00000000 # 2
.word 0x40000000
.align 3
$LC1:
.word 0x00000000 # 3.5
.word 0x400c0000
.text .text
.align 2 .align 2
.globl main .globl main
.text .text
.loc 1 2 .loc 1 3
.ent main .ent main
main: main:
.frame $fp,24,$31 # vars= 0, regs= 2/0, args= 16, extra= 0 .frame $fp,64,$31 # vars= 40, regs= 2/0, args= 16, extra= 0
.mask 0xc0000000,-4 .mask 0xc0000000,-4
.fmask 0x00000000,0 .fmask 0x00000000,0
subu $sp,$sp,24 subu $sp,$sp,64
sw $31,20($sp) sw $31,60($sp)
sw $fp,16($sp) sw $fp,56($sp)
move $fp,$sp move $fp,$sp
jal __main jal __main
li $2,0x00000001 # 1
sw $2,16($fp)
li $2,0x00000005 # 5
sw $2,20($fp)
lw $2,16($fp)
lw $3,20($fp)
addu $2,$2,$3
sw $2,24($fp)
lw $2,16($fp)
addu $3,$2,10
sw $3,28($fp)
l.d $f0,$LC0
s.d $f0,32($fp)
l.d $f0,$LC1
s.d $f0,40($fp)
li $2,0x00000061 # 97
sb $2,48($fp)
la $4,$LC2
lw $5,28($fp)
jal printf
move $2,$0 move $2,$0
j $L1 j $L1
$L1: $L1:
move $sp,$fp # sp not trusted here move $sp,$fp # sp not trusted here
lw $31,20($sp) lw $31,60($sp)
lw $fp,16($sp) lw $fp,56($sp)
addu $sp,$sp,24 addu $sp,$sp,64
j $31 j $31
.end main .end main
int main(void) #include <stdio.h>
{
int main(void) {
int x = 1, b = 5, d = x + b, e = x + 10;
double y = 2., z = 3.5;
char c = 'a';
printf("e: %d\n", e); // 11
return 0; return 0;
} }
.file 1 "acron.c"
# GNU C 2.7.2.3 [AL 1.1, MM 40, tma 0.1] SimpleScalar running sstrix compiled by GNU C
# Cc1 defaults:
# -mgas -mgpOPT
# Cc1 arguments (-G value = 8, Cpu = default, ISA = 1):
# -quiet -dumpbase -O0 -o
gcc2_compiled.:
__gnu_compiled_c:
.globl w
.data
.align 2
w:
.word $LC0
.word $LC1
.word $LC2
.word $LC3
.word $LC4
.word $LC5
.sdata
.align 2
$LC5:
.ascii "Eephole\000"
.rdata
.align 2
$LC4:
.ascii "Peephole\000"
.align 2
$LC3:
.ascii "Optimization\000"
.align 2
$LC2:
.ascii "Practicum\000"
.align 2
$LC1:
.ascii "Ertalerbouw\000"
.align 2
$LC0:
.ascii "Vertalerbouw\000"
.text
.align 2
.globl is_vowel
.sdata
.align 2
$LC6:
.ascii "%s\000"
.align 2
$LC7:
.ascii " %s\000"
.align 2
$LC8:
.ascii "\n\000"
.text
.align 2
.globl do_perm
.align 2
.globl main
.comm acron,12
.comm command,100
.comm done,24
.comm pindex,28
.text
.loc 1 10
.ent is_vowel
is_vowel:
.frame $fp,16,$31 # vars= 8, regs= 1/0, args= 0, extra= 0
.mask 0x40000000,-8
.fmask 0x00000000,0
subu $sp,$sp,16
sw $fp,8($sp)
move $fp,$sp
move $3,$4
sb $3,0($fp)
move $2,$0
lb $4,0($fp)
li $5,0x00000041 # 65
beq $4,$5,$L3
lb $4,0($fp)
li $5,0x00000045 # 69
beq $4,$5,$L3
lb $4,0($fp)
li $5,0x00000049 # 73
beq $4,$5,$L3
lb $4,0($fp)
li $5,0x0000004f # 79
beq $4,$5,$L3
lb $4,0($fp)
li $5,0x00000055 # 85
beq $4,$5,$L3
lb $4,0($fp)
li $5,0x00000059 # 89
bne $4,$5,$L2
$L3:
li $2,0x00000001 # 1
$L2:
j $L1
$L1:
move $sp,$fp # sp not trusted here
lw $fp,8($sp)
addu $sp,$sp,16
j $31
.end is_vowel
.loc 1 15
.ent do_perm
do_perm:
.frame $fp,56,$31 # vars= 24, regs= 4/0, args= 16, extra= 0
.mask 0xc0030000,-4
.fmask 0x00000000,0
subu $sp,$sp,56
sw $31,52($sp)
sw $fp,48($sp)
sw $17,44($sp)
sw $16,40($sp)
move $fp,$sp
sw $4,56($fp)
sw $5,60($fp)
sw $6,64($fp)
sw $7,68($fp)
sw $0,24($fp)
lw $2,64($fp)
li $3,0x00000001 # 1
bne $2,$3,$L5
lw $2,pindex
move $3,$2
sll $2,$3,2
la $3,w
addu $2,$2,$3
lw $3,0($2)
lb $4,0($3)
jal is_vowel
bne $2,$0,$L5
lw $2,56($fp)
move $3,$2
sll $2,$3,2
la $3,w
addu $2,$2,$3
lw $3,0($2)
lb $4,0($3)
jal is_vowel
beq $2,$0,$L4
$L5:
lw $2,64($fp)
slt $3,$2,2
bne $3,$0,$L6
lw $2,64($fp)
move $3,$2
sll $2,$3,2
la $3,pindex-8
addu $2,$2,$3
lw $3,0($2)
move $2,$3
sll $3,$2,2
la $4,w
addu $2,$3,$4
lw $3,0($2)
lb $4,0($3)
jal is_vowel
move $16,$2
lw $2,64($fp)
move $3,$2
sll $2,$3,2
la $3,pindex-4
addu $2,$2,$3
lw $3,0($2)
move $2,$3
sll $3,$2,2
la $4,w
addu $2,$3,$4
lw $3,0($2)
lb $4,0($3)
jal is_vowel
move $17,$2
lw $2,56($fp)
move $3,$2
sll $2,$3,2
la $3,w
addu $2,$2,$3
lw $3,0($2)
lb $4,0($3)
jal is_vowel
addu $3,$16,$17
addu $2,$3,$2
sw $2,24($fp)
lw $2,24($fp)
beq $2,$0,$L8
lw $2,24($fp)
li $3,0x00000003 # 3
bne $2,$3,$L7
$L8:
j $L4
$L7:
$L6:
lw $2,64($fp)
addu $3,$2,1
sw $3,64($fp)
sll $3,$2,2
la $4,pindex
addu $2,$3,$4
lw $3,56($fp)
sw $3,0($2)
lw $2,64($fp)
slt $3,$2,6
beq $3,$0,$L9
lw $3,68($fp)
subu $2,$3,1
move $3,$2
sw $3,68($fp)
beq $3,$0,$L9
sw $0,16($fp)
$L10:
lw $2,16($fp)
slt $3,$2,6
beq $3,$0,$L11
$L13:
lw $2,16($fp)
move $3,$2
sll $2,$3,2
lw $3,60($fp)
addu $2,$2,$3
lw $3,0($2)
bne $3,$0,$L14
lw $2,16($fp)
move $3,$2
sll $2,$3,2
lw $3,60($fp)
addu $2,$2,$3
li $3,0x00000001 # 1
sw $3,0($2)
lw $4,16($fp)
lw $5,60($fp)
lw $6,64($fp)
lw $7,68($fp)
jal do_perm
lw $2,16($fp)
move $3,$2
sll $2,$3,2
lw $3,60($fp)
addu $2,$2,$3
sw $0,0($2)
$L14:
$L12:
lw $3,16($fp)
addu $2,$3,1
move $3,$2
sw $3,16($fp)
j $L10
$L11:
j $L15
$L9:
sw $0,28($fp)
sw $0,20($fp)
$L16:
lw $2,20($fp)
lw $3,64($fp)
slt $2,$2,$3
beq $2,$0,$L17
$L19:
sw $0,32($fp)
$L20:
lw $2,20($fp)
move $3,$2
sll $2,$3,2
la $3,pindex
addu $2,$2,$3
lw $3,0($2)
move $2,$3
sll $3,$2,2
la $4,w
addu $2,$3,$4
lw $3,0($2)
lw $4,32($fp)
addu $2,$3,$4
lb $4,0($2)
jal isupper
beq $2,$0,$L21
$L22:
lw $2,28($fp)
addu $3,$2,1
sw $3,28($fp)
lw $3,20($fp)
move $4,$3
sll $3,$4,2
la $4,pindex
addu $3,$3,$4
lw $4,0($3)
move $3,$4
sll $4,$3,2
la $5,w
addu $3,$4,$5
lw $4,32($fp)
addu $5,$4,1
sw $5,32($fp)
lw $5,0($3)
addu $3,$4,$5
lbu $4,0($3)
sb $4,acron($2)
j $L20
$L21:
$L18:
lw $3,20($fp)
addu $2,$3,1
move $3,$2
sw $3,20($fp)
j $L16
$L17:
lw $2,28($fp)
la $3,acron
addu $2,$2,$3
sb $0,0($2)
la $4,$LC6
la $5,acron
jal printf
sw $0,20($fp)
$L23:
lw $2,20($fp)
lw $3,64($fp)
slt $2,$2,$3
beq $2,$0,$L24
$L26:
lw $2,20($fp)
move $3,$2
sll $2,$3,2
la $3,pindex
addu $2,$2,$3
lw $3,0($2)
move $2,$3
sll $3,$2,2
la $4,w
addu $2,$3,$4
la $4,$LC7
lw $5,0($2)
jal printf
$L25:
lw $3,20($fp)
addu $2,$3,1
move $3,$2
sw $3,20($fp)
j $L23
$L24:
la $4,$LC8
jal printf
$L15:
$L4:
move $sp,$fp # sp not trusted here
lw $31,52($sp)
lw $fp,48($sp)
lw $17,44($sp)
lw $16,40($sp)
addu $sp,$sp,56
j $31
.end do_perm
.loc 1 53
.ent main
main:
.frame $fp,32,$31 # vars= 8, regs= 2/0, args= 16, extra= 0
.mask 0xc0000000,-4
.fmask 0x00000000,0
subu $sp,$sp,32
sw $31,28($sp)
sw $fp,24($sp)
move $fp,$sp
jal __main
li $2,0x00000004 # 4
sw $2,20($fp)
$L28:
lw $2,20($fp)
slt $3,$2,7
beq $3,$0,$L29
$L31:
sw $0,16($fp)
$L32:
lw $2,16($fp)
slt $3,$2,6
beq $3,$0,$L33
$L35:
lw $2,16($fp)
move $3,$2
sll $2,$3,2
la $3,done
addu $2,$2,$3
li $3,0x00000001 # 1
sw $3,0($2)
lw $4,16($fp)
la $5,done
move $6,$0
lw $7,20($fp)
jal do_perm
lw $2,16($fp)
move $3,$2
sll $2,$3,2
la $3,done
addu $2,$2,$3
sw $0,0($2)
$L34:
lw $3,16($fp)
addu $2,$3,1
move $3,$2
sw $3,16($fp)
j $L32
$L33:
$L30:
lw $3,20($fp)
addu $2,$3,1
move $3,$2
sw $3,20($fp)
j $L28
$L29:
$L27:
move $sp,$fp # sp not trusted here
lw $31,28($sp)
lw $fp,24($sp)
addu $sp,$sp,32
j $31
.end main
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
.file 1 "pi.c"
# GNU C 2.7.2.3 [AL 1.1, MM 40, tma 0.1] SimpleScalar running sstrix compiled by GNU C
# Cc1 defaults:
# -mgas -mgpOPT
# Cc1 arguments (-G value = 8, Cpu = default, ISA = 1):
# -quiet -dumpbase -o
gcc2_compiled.:
__gnu_compiled_c:
.rdata
.align 2
$LC0:
.ascii "Usage: %s <iterations>\n\000"
.sdata
.align 2
$LC3:
.ascii "%.10f\n\000"
.align 3
$LC1:
.word 0xffc00000 # 2147483647
.word 0x41dfffff
.align 3
$LC2:
.word 0x00000000 # 1
.word 0x3ff00000
.align 3
$LC4:
.word 0x00000000 # 4
.word 0x40100000
.text
.align 2
.globl main
.extern stderr, 4
.text
.loc 1 5
.ent main
main:
.frame $fp,56,$31 # vars= 32, regs= 2/0, args= 16, extra= 0
.mask 0xc0000000,-4
.fmask 0x00000000,0
subu $sp,$sp,56
sw $31,52($sp)
sw $fp,48($sp)
move $fp,$sp
sw $4,56($fp)
sw $5,60($fp)
jal __main
sw $0,24($fp)
lw $2,56($fp)
li $3,0x00000002 # 2
beq $2,$3,$L2
lw $2,60($fp)
lw $4,stderr
la $5,$LC0
lw $6,0($2)
jal fprintf
move $4,$0
jal exit
$L2:
lw $3,60($fp)
addu $2,$3,4
lw $4,0($2)
jal atoi
sw $2,20($fp)
li $4,0x00000001 # 1
jal srandom
sw $0,16($fp)
$L3:
lw $2,16($fp)
lw $3,20($fp)
slt $2,$2,$3
bne $2,$0,$L6
j $L4
$L6:
jal random
mtc1 $2,$f0
#nop
cvt.d.w $f0,$f0
l.d $f2,$LC1
div.d $f0,$f0,$f2
s.d $f0,32($fp)
jal random
mtc1 $2,$f0
#nop
cvt.d.w $f0,$f0
l.d $f2,$LC1
div.d $f0,$f0,$f2
s.d $f0,40($fp)
l.d $f0,32($fp)
l.d $f2,32($fp)
mul.d $f0,$f0,$f2
l.d $f2,40($fp)
l.d $f4,40($fp)
mul.d $f2,$f2,$f4
add.d $f0,$f0,$f2
l.d $f2,$LC2
c.le.d $f0,$f2
bc1f $L7
lw $3,24($fp)
addu $2,$3,1
move $3,$2
sw $3,24($fp)
$L7:
$L5:
lw $3,16($fp)
addu $2,$3,1
move $3,$2
sw $3,16($fp)
j $L3
$L4:
l.s $f0,24($fp)
#nop
cvt.d.w $f0,$f0
l.s $f2,20($fp)
#nop
cvt.d.w $f2,$f2
div.d $f0,$f0,$f2
l.d $f2,$LC4
mul.d $f0,$f0,$f2
la $4,$LC3
dmfc1 $6,$f0
jal printf
li $2,0x00000001 # 1
j $L1
$L1:
move $sp,$fp # sp not trusted here
lw $31,52($sp)
lw $fp,48($sp)
addu $sp,$sp,56
j $31
.end main
\ No newline at end of file
This diff is collapsed.
#!/usr/bin/python #!/usr/bin/python
from parser import parse_file from src.parser import parse_file
from optimize import optimize from src.optimize import optimize
from writer import write_statements from src.writer import write_statements
if __name__ == '__main__': if __name__ == '__main__':
from sys import argv, exit from sys import argv, exit
......
RM=rm -rf
all: report.pdf
%.pdf: %.tex
pdflatex $^
pdflatex $^
clean:
$(RM) *.pdf *.aux *.log *.out *.toc *.snm *.nav
...@@ -11,39 +11,230 @@ ...@@ -11,39 +11,230 @@
\usepackage{hyperref} \usepackage{hyperref}
\title{Peephole Optimizer} \title{Peephole Optimizer}
\author{Jayke Meijer (6049885), Richard Torenvliet (6138861), Taddeus Kroes (6054129)} \author{Jayke Meijer (6049885), Richard Torenvliet (6138861), Tadde\"us Kroes
(6054129)}
\begin{document} \begin{document}
\maketitle \maketitle
\tableofcontents
\pagebreak
\section{Introduction} \section{Introduction}
The goal of the assignment is to implement the optimization stage of the compiler. To reach this goal the parser part of the compiler has to be implemented.
The output of the gcc cross compiler on a c program is our input, the output of the gcc cross compiler is in the form of Assembly code, but not optimized. Our assignment includes a number of c programs, an important part of the assignment is parsing the data. Parsing the data is done with lex and yacc. The lexer is a program that finds keywords that meets the regular expression provided in the lexer. After the lexer, the yaccer takes over. Yaccer can turn the keywords in to an action. The goal of the assignment is to implement the optimization stage of the
compiler. To reach this goal the parser and the optimizer part of the compiler
have to be implemented.
The output of the xgcc cross compiler on a C program is our input. The output
of the xgcc cross compiler is in the form of Assembly code, but not optimized.
Our assignment includes a number of C programs. An important part of the
assignment is parsing the data. Parsing the data is done with Lex and Yacc. The
Lexer is a program that finds keywords that meets the regular expression
provided in the Lexer. After the Lexer, the Yaccer takes over. Yacc can turn
the keywords in to an action.
\section{Design}
There are two general types of of optimizations of the assembly code, global
optimizations and optimizations on a so-called basic block. These optimizations
will be discussed separately
\subsection{Global optimizations}
We only perform one global optimization, which is optimizing branch-jump
statements. The unoptimized Assembly code contains sequences of code of the
following structure:
\begin{verbatim}
beq ...,$Lx
j $Ly
$Lx: ...
\end{verbatim}
This is inefficient, since there is a jump to a label that follows this code.
It would be more efficient to replace the branch statement with a \texttt{bne}
(the opposite case) to the label used in the jump statement. This way the jump
statement can be eliminated, since the next label follows anyway. The same can
of course be done for the opposite case, where a \texttt{bne} is changed into a
\texttt{beq}.
Since this optimization is done between two series of codes with jumps and
labels, we can not perform this code during the basic block optimizations. The
reason for this will become clearer in the following section.
\subsection{Basic Block Optimizations}
Optimizations on basic blocks are a more important part of the optimizer.
First, what is a basic block? A basic block is a sequence of statements
guaranteed to be executed in that order, and that order alone. This is the case
for a piece of code not containing any branches or jumps.
To create a basic block, you need to define what is the leader of a basic
block. We call a statement a leader if it is either a jump/branch statement, or
the target of such a statement. Then a basic block runs from one leader until
the next leader.
There are quite a few optimizations we perform on these basic blocks, so we
will describe the types of optimizations here in stead of each optimization.
\subsubsection*{Standard peephole optimizations}
These are optimizations that simply look for a certain statement or pattern of
statements, and optimize these. For example,
\begin{verbatim}
mov $regA,$regB
instr $regA, $regA,...
\end{verbatim}
can be optimized into
\begin{verbatim}
instr $regA, $regB,...
\end{verbatim}
since the register \texttt{\$regA} gets overwritten by the second instruction
anyway, and the instruction can easily use \texttt{\$regB} in stead of
\texttt{\$regA}. There are a few more of these cases, which are the same as
those described on the practicum page
\footnote{\url{http://staff.science.uva.nl/~andy/compiler/prac.html}} and in
Appendix \ref{opt}.
\subsubsection*{Common subexpression elimination}
A more advanced optimization is common subexpression elimination. This means
that expensive operations as a multiplication or addition are performed only
once and the result is then `copied' into variables where needed.
A standard method for doing this is the creation of a DAG or Directed Acyclic
Graph. However, this requires a fairly advanced implementation. Our
implementation is a slightly less fancy, but easier to implement.
We search from the end of the block up for instructions that are eligible for
CSE. If we find one, we check further up in the code for the same instruction,
and add that to a temporary storage list. This is done until the beginning of
the block or until one of the arguments of this expression is assigned.
We now add the instruction above the first use, and write the result in a new
variable. Then all occurrences of this expression can be replaced by a move of
from new variable into the original destination variable of the instruction.
This is a less efficient method then the dag, but because the basic blocks are
in general not very large and the execution time of the optimizer is not a
primary concern, this is not a big problem.
\section{Implementation}
We decided to implement the optimization in Python. We chose this programming
language because Python is an easy language to manipulate strings, work
object-oriented etc.
It turns out that a Lex and Yacc are also available as a Python module,
named PLY(Python Lex-Yacc). This allows us to use one language, Python, instead
of two, i.e. C and Python. Also no debugging is needed in C, only in Python
which makes our assignment more feasible.
The program has three steps, parsing the Assembly code into a datastructure we
can use, the so-called Intermediate Representation, performing optimizations on
this IR and writing the IR back to Assembly.
\subsection{Parsing}
The parsing is done with PLY, which allows us to perform Lex-Yacc tasks in
Python by using a Lex-Yacc like syntax. This way there is no need to combine
languages like we should do otherwise since Lex and Yacc are coupled with C.
The decision was made to not recognize exactly every possible instruction in
the parser, but only if something is for example a command, a comment or a gcc
directive. We then transform per line to a object called a Statement. A
statement has a type, a name and optionally a list of arguments. These
statements together form a statement list, which is placed in another object
called a Block. In the beginning there is one block for the entire program, but
after global optimizations this will be separated in several blocks that are
the basic blocks.
\subsection{Optimizations}
The optimizations are done in two different steps. First the global
optimizations are performed, which are only the optimizations on branch-jump
constructions. This is done repeatedly until there are no more changes.
After all possible global optimizations are done, the program is seperated into
basic blocks. The algorithm to do this is described earlier, and means all
jump and branch instructions are called leaders, as are their targets. A basic
block then goes from leader to leader.
After the division in basic blocks, optimizations are performed on each of
these basic blocks. This is also done repeatedly, since some times several
steps can be done to optimize something.
\subsection{Writing}
Once all the optimizations have been done, the IR needs to be rewritten into
Assembly code, so the xgcc crosscompiler can make binary code out of it.
The writer expects a list of statements, so first the blocks have to be
concatenated again into a list. After this is done, the list is passed on to
the writer, which writes the instructions back to Assembly and saves the file
so we can let xgcc compile it.
\section{Design \& Implementation} \section{Results}
We decided to implement the optimization in python. We chose this programming language because python is an easy language to manipulate strings, work objective ori\"ented etc.
It turns out that a lex and yacc are also implemented in a python version, named PLY(Python Lex-Yacc). This allows us to use one language, Python, instead of two i.e. C and Python. Also no debugging is needed in C, only in Python which makes our assignment more feasible.
\subsection{Design} \subsection{pi.c}
\subsection{acron.c}
\subsection*{Implementation} \subsection{whet.c}
This
\subsubsection*{PLY} \subsection{slalom.c}
\subsection{clinpack.c}
\section{Conclusion}
\appendix
\section{List of all optimizations}
\label{opt}
\textbf{Global optimizations}
\begin{verbatim}
beq ...,$Lx bne ...,$Ly
j $Ly -> $Lx: ...
$Lx: ...
bne ...,$Lx beq ...,$Ly
j $Ly -> $Lx: ...
$Lx: ...
\end{verbatim}
\textbf{Standard basic block optimizations}
\begin{verbatim}
mov $regA,$regA -> --- // remove it
mov $regA,$regB -> instr $regA, $regB,...
instr $regA, $regA,...
instr $regA,... instr $4,...
mov [$4-$7], $regA -> jal XXX
jal XXX
\section{Results}
\subsection*{pi.c} sw $regA,XXX -> sw $regA, XXX
ld $regA,XXX
\subsection*{arcron.c}
\subsection*{whet.c} shift $regA,$regA,0 -> --- // remove it
\subsection*{slalom.c}
\subsection*{clinpack.c} add $regA,$regA,X -> lw ...,X($regA)
lw ...,0($regA)
\end{verbatim}
\textbf{Advanced basic block optimizations}
\section{conclusion} \begin{verbatim}
# Common subexpression elimination
addu $regA, $regB, 4 addu $regD, $regB, 4
... move $regA, $regD
Code not writing $regB -> ...
... ...
addu $regC, $regB, 4 move $regC, $regD
\end{verbatim}
\end{document} \end{document}
from copy import copy #from copy import copy
from statement import Block from statement import Block
...@@ -122,55 +122,102 @@ def generate_flow_graph(blocks): ...@@ -122,55 +122,102 @@ def generate_flow_graph(blocks):
b.add_edge_to(blocks[i + 1]) b.add_edge_to(blocks[i + 1])
def generate_dominator_tree(nodes): #def generate_dominator_tree(nodes):
"""Add dominator administration to the given flow graph nodes.""" # """Add dominator administration to the given flow graph nodes."""
# Dominator of the start node is the start itself # # Dominator of the start node is the start itself
nodes[0].dom = set([nodes[0]]) # nodes[0].dom = set([nodes[0]])
#
# For all other nodes, set all nodes as the dominators # # For all other nodes, set all nodes as the dominators
for n in nodes[1:]: # for n in nodes[1:]:
n.dom = set(copy(nodes)) # n.dom = set(copy(nodes))
#
def pred(n, known=[]): # def pred(n, known=[]):
"""Recursively find all predecessors of a node.""" # """Recursively find all predecessors of a node."""
direct = filter(lambda x: x not in known, n.edges_from) # direct = filter(lambda x: x not in known, n.edges_from)
p = copy(direct) # p = copy(direct)
#
for ancestor in direct: # for ancestor in direct:
p += pred(ancestor, direct) # p += pred(ancestor, direct)
#
return p # return p
#
# Iteratively eliminate nodes that are not dominators # # Iteratively eliminate nodes that are not dominators
changed = True # changed = True
#
while changed: # while changed:
changed = False # changed = False
#
for n in nodes[1:]: # for n in nodes[1:]:
old_dom = n.dom # old_dom = n.dom
intersection = lambda p1, p2: p1.dom & p2.dom # intersection = lambda p1, p2: p1.dom & p2.dom
n.dom = set([n]) | reduce(intersection, pred(n), set([])) # n.dom = set([n]) | reduce(intersection, pred(n), set([]))
#
if n.dom != old_dom: # if n.dom != old_dom:
changed = True # changed = True
#
def idom(d, n): # def idom(d, n):
"""Check if d immediately dominates n.""" # """Check if d immediately dominates n."""
for b in n.dom: # for b in n.dom:
if b != d and b != n and b in n.dom: # if b != d and b != n and b in n.dom:
return False # return False
#
return True # return True
#
# Build tree using immediate dominators # # Build tree using immediate dominators
for n in nodes: # for n in nodes:
for d in n.dom: # for d in n.dom:
if idom(d, n): # if idom(d, n):
d.set_dominates(n) # d.set_dominates(n)
break # break
# statements = parse_file(...)
# b = find_basic_blocks(statements) class Dag:
# generate_flow_graph(b) # nodes now have edges def __init__(self, block):
# generate_dominator_tree(b) # nodes now have dominators """Create the Directed Acyclic Graph of all binary operations in a
basic block."""
self.nodes = []
for s in block:
if s.is_command('move') or s.is_monop():
rd, rs = s
y = self.find_reg_node(rs)
self.find_op_node(s.name, rd, y)
elif s.is_binop():
rd, rs, rt = s
y = self.find_reg_node(rs)
z = self.find_reg_node(rt)
self.find_op_node(s.name, rd, y, z)
def find_reg_node(self, reg):
for n in self.nodes:
if reg in n.reg:
return n
node = DagLeaf(reg)
self.nodes.append(node)
return node
def find_op_node(self, op, rd, *args):
for n in self.nodes:
if not isinstance(n, DagLeaf) and n.op == op and n.nodes == args:
n.labels.append(rd)
return n
node = DagNode(op, rd, *args)
self.nodes.append(node)
return node
class DagNode:
def __init__(self, op, label, *args):
self.op = op
self.labels = [label]
self.nodes = args
class DagLeaf:
def __init__(self, reg):
self.reg = reg
This diff is collapsed.
import re
def redundant_move_1(mov, statements):
"""
mov $regA, $regA -> --- remove it
"""
if mov.is_command('move') and mov[0] == mov[1]:
statements.replace(1, [])
return True
def redundant_move_2(mov, statements):
"""
mov $regA, $regB -> instr $regA, $regB, ...
instr $regA, $regA, ...
"""
if mov.is_command('move'):
ins = statements.peek()
if ins and len(ins) >= 2 and ins[0] == mov[0] and ins[1] == mov[0]:
ins[1] = mov[1]
statements.replace(2, [ins])
return True
def redundant_move_3(ins, statements):
"""
instr $regA, ... -> instr $4, ...
mov $4, $regA jal XX
jal XX
"""
if ins.is_command() and len(ins):
following = statements.peek(2)
if len(following) == 2:
mov, jal = following
if mov.is_command('move') and mov[1] == ins[0] \
and re.match('^\$[4-7]$', mov[0]) \
and jal.is_command('jal'):
ins[0] = mov[0]
statements.replace(2, [ins])
return True
def redundant_move_4(mov1, statements):
"""
mov $RegA, $RegB -> move $RegA, $RegB
mov $RegB, $RegA
"""
if mov1.is_command('move'):
mov2 = statements.peek()
if mov2.is_command('move') and mov2[0] == mov1[1] and \
mov2[1] == mov1[0]:
statements.replace(2, [mov1])
return True
def redundant_load(sw, statements):
"""
sw $regA, XX -> sw $regA, XX
ld $regA, XX
"""
if sw.is_command('sw'):
ld = statements.peek()
if ld.is_command('lw') and ld.args == sw.args:
statements.replace(2, [sw])
return True
def redundant_shift(shift, statements):
"""
shift $regA, $regA, 0 -> --- remove it
"""
if shift.is_shift() and shift[0] == shift[1] and shift[2] == 0:
statements.replace(1, [])
return True
def redundant_add(add, statements):
"""
add $regA, $regA, X -> lw ..., X($regA)
lw ..., 0($regA)
"""
if add.is_command('addu') and add[0] == add[1] and isinstance(add[2], int):
lw = statements.peek()
if lw.is_load() and lw[-1] == '0(%s)' % add[0]:
lw[-1] = '%s(%s)' % (add[2], add[0])
statements.replace(2, [lw])
return True
...@@ -3,11 +3,13 @@ import ply.yacc as yacc ...@@ -3,11 +3,13 @@ import ply.yacc as yacc
from statement import Statement as S, Block from statement import Statement as S, Block
# Global statements administration # Global statements administration
statements = [] statements = []
tokens = ('NEWLINE', 'WORD', 'COMMENT', 'DIRECTIVE', 'COMMA', 'COLON') tokens = ('NEWLINE', 'WORD', 'COMMENT', 'DIRECTIVE', 'COMMA', 'COLON')
# Tokens # Tokens
def t_NEWLINE(t): def t_NEWLINE(t):
r'\n+' r'\n+'
...@@ -32,7 +34,7 @@ def t_DIRECTIVE(t): ...@@ -32,7 +34,7 @@ def t_DIRECTIVE(t):
return t return t
def t_hex_word(t): def t_hex_word(t):
r'0x[0-9a-fA-F]{8}' r'0x([0-9a-fA-F]{8}|[0-9a-fA-F]{4})'
t.type = 'WORD' t.type = 'WORD'
return t return t
...@@ -47,9 +49,10 @@ def t_int(t): ...@@ -47,9 +49,10 @@ def t_int(t):
return t return t
def t_WORD(t): def t_WORD(t):
r'[a-zA-Z0-9$_.+()]+' r'[a-zA-Z0-9$_.+()-]+'
return t return t
# Ignore whitespaces # Ignore whitespaces
t_ignore = ' \t' t_ignore = ' \t'
...@@ -57,9 +60,11 @@ def t_error(t): ...@@ -57,9 +60,11 @@ def t_error(t):
print('Illegal character "%s"' % t.value[0]) print('Illegal character "%s"' % t.value[0])
t.lexer.skip(1) t.lexer.skip(1)
# Build the lexer # Build the lexer
lexer = lex.lex() lexer = lex.lex()
# Parsing rules # Parsing rules
start = 'input' start = 'input'
...@@ -102,9 +107,14 @@ def p_command(p): ...@@ -102,9 +107,14 @@ def p_command(p):
def p_error(p): def p_error(p):
print 'Syntax error at "%s" on line %d' % (p.value, lexer.lineno) print 'Syntax error at "%s" on line %d' % (p.value, lexer.lineno)
# Build YACC
yacc.yacc() yacc.yacc()
def parse_file(filename): def parse_file(filename):
"""Parse a given Assembly file, return a Block with Statement objects
containing the parsed instructions."""
global statements global statements
statements = [] statements = []
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment