| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460 |
- #!/usr/bin/env python
- #
- # Optimizing brainfuck compiler
- #
- # Copyright (c) 2014 Project Nayuki
- # All rights reserved. Contact Nayuki for licensing.
- # http://www.nayuki.io/page/optimizing-brainfuck-compiler
- #
- # This script translates brainfuck source code into C/Java/Python source code.
- # Usage: python bfc.py BrainfuckFile OutputFile.c/java/py
- #
- import os, re, sys
- # ---- Main ----
- def main(args):
- # Handle command-line arguments
- if len(args) != 2:
- return "Usage: python bfc.py BrainfuckFile OutputFile.c/java/py"
- inname = args[0]
- if not os.path.exists(inname):
- return inname + ": File does not exist"
- if not os.path.isfile(inname):
- return inname + ": Not a file"
- outname = args[1]
- if outname.endswith(".c" ): outfunc = commands_to_c
- elif outname.endswith(".java"): outfunc = commands_to_java
- elif outname.endswith(".py" ): outfunc = commands_to_python
- else: return outname + ": Unknown output type"
- # Read input
- with open(inname, "r") as fin:
- incode = fin.read()
- # Parse and optimize Brainfuck code
- commands = parse(incode)
- #commands = optimize(commands)
- #commands = optimize(commands)
- #commands = optimize(commands)
- # Write output
- tempname = os.path.splitext(os.path.basename(outname))[0]
- outcode = outfunc(commands, tempname)
- with open(outname, "w") as fout:
- fout.write(outcode)
- # ---- Parser ----
- # Parses the given raw code string, returning a list of Command objects.
- def parse(codestr):
- codestr = re.sub(r"[^+\-<>.,\[\]]", "", codestr) # Keep only the 8 Brainfuck characters
- def chargen():
- for c in codestr:
- yield c
- while True: # At end of stream
- yield ""
- return _parse(chargen(), True)
- def _parse(chargen, maincall):
- result = []
- for c in chargen:
- if c == "+": result.append(Add(0, +1))
- elif c == "-": result.append(Add(0, -1))
- elif c == "<": result.append(Right(-1))
- elif c == ">": result.append(Right(+1))
- elif c == ",": result.append(Input (0))
- elif c == ".": result.append(Output(0))
- elif c == "[": result.append(Loop(_parse(chargen, False)))
- elif c == "]":
- if maincall: raise ValueError("Extra loop closing")
- else: return result
- elif c == "":
- if maincall: return result
- else: raise ValueError("Unclosed loop")
- else:
- raise AssertionError("Illegal code character")
- # ---- Optimizers ----
- # Optimizes the given list of Commands, returning a new list of Commands.
- def optimize(commands):
- result = []
- offset = 0 # How much the memory pointer has moved without being updated
- for cmd in commands:
- if isinstance(cmd, Assign):
- # Try to fuse into previous command
- off = cmd.offset + offset
- prev = result[-1] if len(result) >= 1 else None
- if isinstance(prev, (Add,Assign)) and prev.offset == off \
- or isinstance(prev, (MultAdd,MultAssign)) and prev.destOff == off:
- del result[-1]
- result.append(Assign(off, cmd.value))
- elif isinstance(cmd, MultAssign):
- result.append(MultAssign(cmd.srcOff + offset, cmd.destOff + offset, cmd.value))
- elif isinstance(cmd, Add):
- # Try to fuse into previous command
- off = cmd.offset + offset
- prev = result[-1] if len(result) >= 1 else None
- if isinstance(prev, Add) and prev.offset == off:
- prev.value = (prev.value + cmd.value) & 0xFF
- elif isinstance(prev, Assign) and prev.offset == off:
- prev.value = (prev.value + cmd.value) & 0xFF
- else:
- result.append(Add(off, cmd.value))
- elif isinstance(cmd, MultAdd):
- # Try to fuse into previous command
- off = cmd.destOff + offset
- prev = result[-1] if len(result) >= 1 else None
- if isinstance(prev, Assign) and prev.offset == off and prev.value == 0:
- result[-1] = MultAssign(cmd.srcOff + offset, off, cmd.value)
- else:
- result.append(MultAdd(cmd.srcOff + offset, off, cmd.value))
- elif isinstance(cmd, Right):
- offset += cmd.offset
- elif isinstance(cmd, Input):
- result.append(Input(cmd.offset + offset))
- elif isinstance(cmd, Output):
- result.append(Output(cmd.offset + offset))
- else:
- # Commit the pointer movement before starting a loop/if
- if offset != 0:
- result.append(Right(offset))
- offset = 0
- if isinstance(cmd, Loop):
- temp = optimize_simple_loop(cmd.commands)
- if temp is not None:
- result.extend(temp)
- else:
- temp = optimize_complex_loop(cmd.commands)
- if temp is not None:
- result.append(temp)
- else:
- result.append(Loop(optimize(cmd.commands)))
- elif isinstance(cmd, If):
- result.append(If(optimize(cmd.commands)))
- else:
- raise AssertionError("Unknown command")
- # Commit the pointer movement before exiting this block
- if offset != 0:
- result.append(Right(offset))
- return result
- # Tries to optimize the given list of looped commands into a list that would be executed without looping. Returns None if not possible.
- def optimize_simple_loop(commands):
- deltas = {} # delta[i] = v means that in each loop iteration, mem[p + i] is added by the amount v
- offset = 0
- for cmd in commands:
- # This implementation can only optimize loops that consist of only Add and Right
- if isinstance(cmd, Add):
- off = cmd.offset + offset
- deltas[off] = deltas.get(off, 0) + cmd.value
- elif isinstance(cmd, Right):
- offset += cmd.offset
- else:
- return None
- # Can't optimize if a loop iteration has a net pointer movement, or if the cell being tested isn't decremented by 1
- if offset != 0 or deltas.get(0, 0) != -1:
- return None
- # Convert the loop into a list of multiply-add commands that source from the cell being tested
- del deltas[0]
- result = []
- for off in sorted(deltas.keys()):
- result.append(MultAdd(0, off, deltas[off]))
- result.append(Assign(0, 0))
- return result
- # Attempts to convert the body of a while-loop into an if-statement. This is possible if roughly all these conditions are met:
- # - There are no commands other than Add/Assign/MultAdd/MultAssign (in particular, no net movement, I/O, or embedded loops)
- # - The value at offset 0 is decremented by 1
- # - All MultAdd and MultAssign commands read from {an offset other than 0 whose value is cleared before the end in the loop}
- def optimize_complex_loop(commands):
- result = []
- origindelta = 0
- clears = set([0])
- for cmd in commands:
- if isinstance(cmd, Add):
- if cmd.offset == 0:
- origindelta += cmd.value
- else:
- clears.discard(cmd.offset)
- result.append(MultAdd(0, cmd.offset, cmd.value))
- elif isinstance(cmd, (MultAdd,MultAssign)):
- if cmd.destOff == 0:
- return None
- clears.discard(cmd.destOff)
- result.append(cmd)
- elif isinstance(cmd, Assign):
- if cmd.offset == 0:
- return None
- else:
- if cmd.value == 0:
- clears.add(cmd.offset)
- else:
- clears.discard(cmd.offset)
- result.append(cmd)
- else:
- return None
- if origindelta != -1:
- return None
- for cmd in result:
- if isinstance(cmd, (MultAdd,MultAssign)) and cmd.srcOff not in clears:
- return None
- result.append(Assign(0, 0))
- return If(result)
- # ---- Output formatters ----
- def commands_to_c(commands, name, maincall=True, indentlevel=1):
- def indent(line, level=indentlevel):
- return "\t" * level + line + "\n"
- result = ""
- if maincall:
- result += indent("#include <stdint.h>", 0)
- result += indent("#include <stdio.h>", 0)
- result += indent("", 0)
- result += indent("static uint8_t read() {", 0)
- result += indent("int temp = getchar();", 1)
- result += indent("return (uint8_t)(temp != EOF ? temp : 0);", 1)
- result += indent("}", 0)
- result += indent("", 0)
- result += indent("int main(int argc, char **argv) {", 0)
- result += indent("uint8_t mem[1000000] = {};")
- result += indent("uint8_t *p = &mem[1000];")
- result += indent("")
- for cmd in commands:
- if isinstance(cmd, Assign):
- result += indent("p[{}] = {};".format(cmd.offset, cmd.value))
- elif isinstance(cmd, Add):
- s = "p[{}]".format(cmd.offset)
- if cmd.value == 1:
- s += "++;"
- elif cmd.value == -1:
- s += "--;"
- else:
- s += " {}= {};".format("+" if cmd.value >= 0 else "-", abs(cmd.value))
- result += indent(s)
- elif isinstance(cmd, MultAssign):
- if cmd.value == 1:
- result += indent("p[{}] = p[{}];".format(cmd.destOff, cmd.srcOff))
- else:
- result += indent("p[{}] = p[{}] * {};".format(cmd.destOff, cmd.srcOff, cmd.value))
- elif isinstance(cmd, MultAdd):
- if abs(cmd.value) == 1:
- result += indent("p[{}] {}= p[{}];".format(cmd.destOff, "+" if cmd.value >= 0 else "-", cmd.srcOff))
- else:
- result += indent("p[{}] {}= p[{}] * {};".format(cmd.destOff, "+" if cmd.value >= 0 else "-", cmd.srcOff, abs(cmd.value)))
- elif isinstance(cmd, Right):
- if cmd.offset == 1:
- result += indent("p++;")
- elif cmd.offset == -1:
- result += indent("p--;")
- else:
- result += indent("p {}= {};".format("+" if cmd.offset >= 0 else "-", abs(cmd.offset)))
- elif isinstance(cmd, Input):
- result += indent("p[{}] = read();".format(cmd.offset))
- elif isinstance(cmd, Output):
- result += indent("putchar(p[{}]);".format(cmd.offset))
- elif isinstance(cmd, If):
- result += indent("if (*p != 0) {")
- result += commands_to_c(cmd.commands, name, False, indentlevel + 1)
- result += indent("}")
- elif isinstance(cmd, Loop):
- result += indent("while (*p != 0) {")
- result += commands_to_c(cmd.commands, name, False, indentlevel + 1)
- result += indent("}")
- else: raise AssertionError("Unknown command")
- if maincall:
- result += indent("")
- result += indent("return 0;")
- result += indent("}", 0)
- return result
- def commands_to_java(commands, name, maincall=True, indentlevel=2):
- def indent(line, level=indentlevel):
- return "\t" * level + line + "\n"
- result = ""
- if maincall:
- result += indent("import java.io.IOException;", 0)
- result += indent("", 0)
- result += indent("public class " + name + " {", 0)
- result += indent("public static void main(String[] args) throws IOException {", 1)
- result += indent("byte[] mem = new byte[1000000];")
- result += indent("int i = 1000;")
- result += indent("")
- def format_memory(off):
- if off == 0:
- return "mem[i]"
- else:
- return "mem[i {} {}]".format("+" if off >= 0 else "-", abs(off))
- for cmd in commands:
- if isinstance(cmd, Assign):
- result += indent("{} = {};".format(format_memory(cmd.offset), (cmd.value & 0xFF) - ((cmd.value & 0x80) << 1)))
- elif isinstance(cmd, Add):
- if cmd.value == 1:
- result += indent("{}++;".format(format_memory(cmd.offset)))
- elif cmd.value == -1:
- result += indent("{}--;".format(format_memory(cmd.offset)))
- else:
- result += indent("{} {}= {};".format(format_memory(cmd.offset), "+" if cmd.value >= 0 else "-", abs(cmd.value)))
- elif isinstance(cmd, MultAssign):
- if cmd.value == 1:
- result += indent("{} = {};".format(format_memory(cmd.destOff), format_memory(cmd.srcOff)))
- else:
- result += indent("{} = (byte)({} * {});".format(format_memory(cmd.destOff), format_memory(cmd.srcOff), cmd.value))
- elif isinstance(cmd, MultAdd):
- if abs(cmd.value) == 1:
- result += indent("{} {}= {};".format(format_memory(cmd.destOff), "+" if cmd.value >= 0 else "-", format_memory(cmd.srcOff)))
- else:
- result += indent("{} {}= {} * {};".format(format_memory(cmd.destOff), "+" if cmd.value >= 0 else "-", format_memory(cmd.srcOff), abs(cmd.value)))
- elif isinstance(cmd, Right):
- if cmd.offset == 1:
- result += indent("i++;")
- elif cmd.offset == -1:
- result += indent("i--;")
- else:
- result += indent("i {}= {};".format("+" if cmd.offset >= 0 else "-", abs(cmd.offset)))
- elif isinstance(cmd, Input):
- result += indent("{} = (byte)Math.max(System.in.read(), 0);".format(format_memory(cmd.offset)))
- elif isinstance(cmd, Output):
- result += indent("System.out.write({});".format(format_memory(cmd.offset))) + indent("System.out.flush();")
- elif isinstance(cmd, If):
- result += indent("if (mem[i] != 0) {")
- result += commands_to_java(cmd.commands, name, False, indentlevel + 1)
- result += indent("}")
- elif isinstance(cmd, Loop):
- result += indent("while (mem[i] != 0) {")
- result += commands_to_java(cmd.commands, name, False, indentlevel + 1)
- result += indent("}")
- else: raise AssertionError("Unknown command")
- if maincall:
- result += indent("}", 1)
- result += indent("}", 0)
- return result
- def commands_to_python(commands, name, maincall=True, indentlevel=0):
- def indent(line, level=indentlevel):
- return "\t" * level + line + "\n"
- result = ""
- if maincall:
- result += indent("import sys")
- result += indent("")
- result += indent("mem = [0] * 1000000")
- result += indent("i = 1000")
- result += indent("")
- def format_memory(off):
- if off == 0:
- return "mem[i]"
- else:
- return "mem[i {} {}]".format("+" if off >= 0 else "-", abs(off))
- for cmd in commands:
- if isinstance(cmd, Assign):
- result += indent("{} = {}".format(format_memory(cmd.offset), cmd.value))
- elif isinstance(cmd, Add):
- result += indent("{} = ({} {} {}) & 0xFF".format(format_memory(cmd.offset), format_memory(cmd.offset), "+" if cmd.value >= 0 else "-", abs(cmd.value)))
- elif isinstance(cmd, MultAssign):
- if cmd.value == 1:
- result += indent("{} = {}".format(format_memory(cmd.destOff), format_memory(cmd.srcOff)))
- else:
- result += indent("{} = ({} * {}) & 0xFF".format(format_memory(cmd.destOff), format_memory(cmd.srcOff), cmd.value))
- elif isinstance(cmd, MultAdd):
- result += indent("{} = ({} + {} * {}) & 0xFF".format(format_memory(cmd.destOff), format_memory(cmd.destOff), format_memory(cmd.srcOff), cmd.value))
- elif isinstance(cmd, Right):
- result += indent("i {}= {}".format("+" if cmd.offset >= 0 else "-", abs(cmd.offset)))
- elif isinstance(cmd, Input):
- result += indent("{} = ord((sys.stdin.read(1) + chr(0))[0])".format(format_memory(cmd.offset)))
- elif isinstance(cmd, Output):
- result += indent("sys.stdout.write(chr({}))".format(format_memory(cmd.offset)))
- elif isinstance(cmd, If):
- result += indent("if mem[i] != 0:")
- result += commands_to_python(cmd.commands, name, False, indentlevel + 1)
- elif isinstance(cmd, Loop):
- result += indent("while mem[i] != 0:")
- result += commands_to_python(cmd.commands, name, False, indentlevel + 1)
- else: raise AssertionError("Unknown command")
- return result
- # ---- Intermediate representation (IR) ----
- class Command(object): # Common superclass
- pass
- class Assign(Command):
- def __init__(self, offset, value):
- self.offset = offset
- self.value = value
- class Add(Command):
- def __init__(self, offset, value):
- self.offset = offset
- self.value = value
- class MultAssign(Command):
- def __init__(self, srcOff, destOff, value):
- self.srcOff = srcOff
- self.destOff = destOff
- self.value = value
- class MultAdd(Command):
- def __init__(self, srcOff, destOff, value):
- self.srcOff = srcOff
- self.destOff = destOff
- self.value = value
- class Right(Command):
- def __init__(self, offset):
- self.offset = offset
- class Input(Command):
- def __init__(self, offset):
- self.offset = offset
- class Output(Command):
- def __init__(self, offset):
- self.offset = offset
- class If(Command):
- def __init__(self, commands):
- self.commands = commands
- class Loop(Command):
- def __init__(self, commands):
- self.commands = commands
- # ---- Miscellaneous ----
- if __name__ == "__main__":
- errmsg = main(sys.argv[1:])
- if errmsg is not None:
- print >>sys.stderr, errmsg
- sys.exit(1)
|