advanced.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. from src.statement import Statement as S
  2. from math import log
  3. def reg_can_be_used_in(reg, block, start, end):
  4. """Check if a register addres safely be used in a block section using local
  5. dataflow analysis."""
  6. # Check if the register used or defined in the block section
  7. for s in block[start:end]:
  8. if s.uses(reg) or s.defines(reg):
  9. return False
  10. # Check if the register is used inside the block after the specified
  11. # section, without having been re-assigned first
  12. for s in block[end:]:
  13. if s.uses(reg):
  14. return False
  15. elif s.defines(reg):
  16. return True
  17. return True
  18. def find_free_reg(block, start, end):
  19. """Find a temporary register that is free in a given list of statements."""
  20. for i in xrange(8, 16):
  21. tmp = '$%d' % i
  22. if reg_can_be_used_in(tmp, block, start, end):
  23. return tmp
  24. raise Exception('No temporary register is available.')
  25. def eliminate_common_subexpressions(block):
  26. """
  27. Common subexpression elimination:
  28. x = a + b -> u = a + b
  29. y = a + b x = u
  30. y = u
  31. The algorithm used is as follows:
  32. - Traverse through the statements.
  33. - If the statement can be possibly be eliminated, walk further collecting
  34. all other occurrences of the expression until one of the arguments is
  35. assigned in a statement, or the start of the block has been reached.
  36. - If one or more occurrences were changed, insert the expression with a new
  37. destination address before the last changed occurrence and change all
  38. occurrences to a move instruction from that address.
  39. """
  40. changed = False
  41. while not block.end():
  42. s = block.read()
  43. if s.is_arith():
  44. pointer = block.pointer
  45. occurrences = [pointer - 1]
  46. args = s[1:]
  47. # Collect similar statements
  48. while not block.end():
  49. s2 = block.read()
  50. if not s2.is_command():
  51. continue
  52. # Stop if one of the arguments is assigned
  53. if len(s2) and s2[0] in args:
  54. break
  55. # Replace a similar expression by a move instruction
  56. if s2.name == s.name and s2[1:] == args:
  57. occurrences.append(block.pointer - 1)
  58. if len(occurrences) > 1:
  59. new_reg = find_free_reg(block, occurrences[0], occurrences[-1])
  60. # Replace all occurrences with a move statement
  61. for occurrence in occurrences:
  62. rd = block[occurrence][0]
  63. block.replace(1, [S('command', 'move', rd, new_reg)], \
  64. start=occurrence)
  65. # Insert the calculation before the original with the new
  66. # destination address
  67. block.insert(S('command', s.name, *([new_reg] + args)), \
  68. index=occurrences[0])
  69. changed = True
  70. # Reset pointer to continue from the original statement
  71. block.pointer = pointer
  72. return changed
  73. def to_hex(value):
  74. """Create the hexadecimal string of an integer."""
  75. return '0x%08x' % value
  76. def fold_constants(block):
  77. """
  78. Constant folding:
  79. x = 3 + 5 -> x = 8
  80. y = x * 2 y = 16
  81. To keep track of constant values, the following assumptions are made:
  82. - An immediate load defines a register value:
  83. li $reg, XX -> register[$reg] = XX
  84. - Integer variable definition is of the following form:
  85. li $reg, XX -> constants[VAR] = XX
  86. sw $reg, VAR -> register[$reg] = XX
  87. - When a variable is used, the following happens:
  88. lw $reg, VAR -> register[$reg] = constants[VAR]
  89. """
  90. changed = False
  91. # Variable values
  92. constants = {}
  93. # Current known values in register
  94. register = {}
  95. while not block.end():
  96. s = block.read()
  97. if not s.is_command():
  98. continue
  99. if s.name == 'li':
  100. # Save value in register
  101. register[s[0]] = int(s[1], 16)
  102. elif s.name == 'move' and s[0] in register:
  103. reg_to, reg_from = s
  104. if reg_from in register:
  105. # Other value is also known, copy its value
  106. register[reg_to] = register[reg_from]
  107. else:
  108. # Other value is unknown, delete the value
  109. del register[reg_to]
  110. elif s.name == 'sw' and s[0] in register:
  111. # Constant variable definition, e.g. 'int a = 1;'
  112. constants[s[1]] = register[s[0]]
  113. elif s.name == 'lw' and s[1] in constants:
  114. # Usage of variable with constant value
  115. register[s[0]] = constants[s[1]]
  116. elif s.name == 'mflo':
  117. # Move of `Lo' register to another register
  118. register[s[0]] = register['$lo']
  119. elif s.name == 'mfhi':
  120. # Move of `Hi' register to another register
  121. register[s[0]] = register['$hi']
  122. elif s.name in ['mult', 'div'] \
  123. and s[0]in register and s[1] in register:
  124. # Multiplication/division with constants
  125. rs, rt = s
  126. a, b = register[rs], register[rt]
  127. if s.name == 'mult':
  128. if not a or not b:
  129. # Multiplication by 0
  130. hi = lo = to_hex(0)
  131. elif a == 1:
  132. # Multiplication by 1
  133. hi = to_hex(0)
  134. lo = to_hex(b)
  135. elif b == 1:
  136. # Multiplication by 1
  137. hi = to_hex(0)
  138. lo = to_hex(a)
  139. else:
  140. # Calculate result and fill Hi/Lo registers
  141. binary = bin(a * b)[2:]
  142. binary = '0' * (64 - len(binary)) + binary
  143. hi = int(binary[:32], base=2)
  144. lo = int(binary[32:], base=2)
  145. # Replace the multiplication with two immidiate loads to the
  146. # Hi/Lo registers
  147. block.replace(1, [S('command', 'li', '$hi', hi),
  148. S('command', 'li', '$lo', li)])
  149. elif s.name == 'div':
  150. lo, hi = divmod(rs, rt)
  151. register['$lo'], register['$hi'] = lo, hi
  152. changed = True
  153. elif s.name in ['addu', 'subu']:
  154. # Addition/subtraction with constants
  155. rd, rs, rt = s
  156. rs_known = rs in register
  157. rt_known = rt in register
  158. if rs_known and rt_known:
  159. # a = 5 -> b = 15
  160. # c = 10
  161. # b = a + c
  162. rs_val = register[rs]
  163. rt_val = register[rt]
  164. if s.name == 'addu':
  165. result = to_hex(rs_val + rt_val)
  166. if s.name == 'subu':
  167. result = to_hex(rs_val - rt_val)
  168. block.replace(1, [S('command', 'li', rd, result)])
  169. register[rd] = result
  170. changed = True
  171. continue
  172. if rt_known:
  173. # a = 10 -> b = c + 10
  174. # b = c + a
  175. s[2] = register[rt]
  176. changed = True
  177. elif rs_known and s.name == 'addu':
  178. # c = 10 -> b = a + 10
  179. # b = c + a
  180. s[1] = rt
  181. s[2] = register[rs]
  182. changed = True
  183. if s[2] == 0:
  184. # Addition/subtraction with 0
  185. block.replace(1, [S('command', 'move', rd, s[1])])
  186. else:
  187. for reg in s.get_def():
  188. if reg in register:
  189. # Known register is overwritten, remove its value
  190. del register[reg]
  191. return changed
  192. def copy_propagation(block):
  193. """
  194. Unpack a move instruction, by replacing its destination
  195. address with its source address in the code following the move instruction.
  196. This way, the move statement might be a target for dead code elimination.
  197. move $regA, $regB move $regA, $regB
  198. ... ...
  199. Code not writing $regA, -> ...
  200. $regB ...
  201. ... ...
  202. addu $regC, $regA, ... addu $regC, $regB, ...
  203. """
  204. moves_from = []
  205. moves_to = []
  206. changed = False
  207. while not block.end():
  208. s = block.read()
  209. if s.is_command('move') and s[0] not in moves_to:
  210. # Add this move to the lists, because it is not yet there.
  211. moves_from.append(s[1])
  212. moves_to.append(s[0])
  213. elif s.is_command('move') and s[0] in moves_to:
  214. # This move is already in the lists, so only update it
  215. for i in xrange(len(moves_to)):
  216. if moves_to[i] == s[0]:
  217. moves_from[i] = s[1]
  218. continue
  219. elif (len(s) == 3 or s.is_command('mlfo') or s.is_load()) \
  220. and (s[0] in moves_to or s[0] in moves_from):
  221. # One of the registers gets overwritten, so remove the data from
  222. # the list.
  223. i = 0
  224. while i < len(moves_to):
  225. if moves_to[i] == s[0] or moves_to[i] == s[1]:
  226. del moves_to[i]
  227. del moves_from[i]
  228. else:
  229. i += 1
  230. elif len(s) == 3 and (s[1] in moves_to or s[2] in moves_to):
  231. # Check where the result of the move is used and replace it with
  232. # the original variable.
  233. for i in xrange(len(moves_to)):
  234. if s[1] == moves_to[i]:
  235. s[1] = moves_from[i]
  236. continue
  237. if s[2] == moves_to[i]:
  238. s[2] = moves_from[i]
  239. continue
  240. changed = True
  241. return changed
  242. def algebraic_transformations(block):
  243. """
  244. Change ineffective or useless algebraic expressions. Handled are:
  245. - x = y + 0 -> x = y
  246. - x = y - 0 -> x = y
  247. - x = y * 1 -> x = y
  248. - x = y * 0 -> x = 0
  249. - x = y * 2 -> x = x << 1
  250. """
  251. changed = False
  252. while not block.end():
  253. s = block.read()
  254. if (s.is_command('addu') or s.is_command('subu')) and s[2] == 0:
  255. block.replace(1, [S('command', 'move', s[0], s[1])])
  256. changed = True
  257. elif s.is_command('mult'):
  258. mflo = block.peek()
  259. if mflo.is_command('mflo'):
  260. if s[1] == 1:
  261. block.replace(2, [S('command', 'move', mflo[0], s[0])])
  262. changed = True
  263. continue
  264. elif s[1] == 0:
  265. block.replace(2, [S('command', 'li', '$1', to_hex(0))])
  266. changed = True
  267. continue
  268. shift_amount = log(s[1], 2)
  269. if shift_amount.is_integer():
  270. new_command = S('command', 'sll', \
  271. mflo[0], s[0], \
  272. int(shift_amount))
  273. block.replace(2, [new_command])
  274. changed = True
  275. return changed
  276. def eliminate_dead_code(block):
  277. """
  278. Dead code elimination:
  279. TODO: example...
  280. The algorithm used is as follows:
  281. - Traverse through the statements in reverse order.
  282. - If the statement definition is dead, remove it. A variable is dead if it
  283. is not used in the rest of the block, and is not in the `out' set of the
  284. block.
  285. """
  286. # TODO: Finish
  287. changed = False
  288. block.reverse_statements()
  289. unused = set()
  290. while not block.end():
  291. s = block.read()
  292. for reg in s.get_def():
  293. if reg in unused:
  294. # Statement is redefined later, so this statement is useless
  295. s.remove = True
  296. #print 'reg %s is in %s, remove:' % (reg, unused), \
  297. # block.pointer - 1, s
  298. else:
  299. unused.add(reg)
  300. unused -= set(s.get_use())
  301. block.apply_filter(lambda s: not hasattr(s, 'remove'))
  302. block.reverse_statements()
  303. return changed