| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- open Llvm
- type program = command list
- and command =
- | Shift of int
- | Add of int
- | Set of int
- | Out | In
- | Loop of program
- | Offset of int * command
- | Mul of int * int
- let read_program ic =
- let rec next cur stack =
- try
- match input_char ic, stack with
- | '>', _ -> next (Shift 1 :: cur) stack
- | '<', _ -> next (Shift (-1) :: cur) stack
- | '+', _ -> next (Add 1 :: cur) stack
- | '-', _ -> next (Add (-1) :: cur) stack
- | '.', _ -> next (Out :: cur) stack
- | ',', _ -> next (In :: cur) stack
- | '[', _ -> next [] (cur :: stack)
- | ']', [] -> failwith "unmatched ']'"
- | ']', (hd :: tl) -> next (Loop (List.rev cur) :: hd) tl
- | _ -> next cur stack
- with
- | End_of_file when List.length stack > 0 -> failwith "unmatched '['"
- | End_of_file -> List.rev cur
- in
- next [] []
- let rec string_of_program program =
- let rec cat buf = function
- | [] -> buf
- | cmd :: tl -> cat (buf ^ string_of_command cmd) tl
- in
- cat "" program
- and string_of_command = function
- | Shift 1 -> ">"
- | Shift -1 -> "<"
- | Shift n when n < 0 -> "(<" ^ string_of_int (-n) ^ ")"
- | Shift n -> "(>" ^ string_of_int n ^ ")"
- | Add 1 -> "+"
- | Add -1 -> "-"
- | Add n when n < 0 -> "(-" ^ string_of_int (-n) ^ ")"
- | Add n -> "(+" ^ string_of_int n ^ ")"
- | Set n -> "(=" ^ string_of_int n ^ ")"
- | Mul (x, y) -> "(*" ^ string_of_int x ^ "," ^ string_of_int y ^ ")"
- | Out -> "."
- | In -> ","
- | Loop p -> "[" ^ string_of_program p ^ "]"
- | Offset (o, cmd) ->
- "(" ^ string_of_command cmd ^ "@" ^ string_of_int o ^ ")"
- let compile_llvm memsize program =
- let ctx = global_context () in
- let m = create_module ctx "brainfucker" in
- let byte_ty = i8_type ctx in
- let byteptr_ty = pointer_type byte_ty in
- let bool_ty = i1_type ctx in
- let i32_ty = i32_type ctx in
- let void_ty = void_type ctx in
- let memset =
- let arg_types = [|byteptr_ty; byte_ty; i32_ty; i32_ty; bool_ty|] in
- declare_function "llvm.memset.p0i8.i32" (function_type void_ty arg_types) m
- in
- let putchar = declare_function "putchar" (function_type i32_ty [|byte_ty|]) m in
- let getchar = declare_function "getchar" (function_type byte_ty [||]) m in
- let cexit = declare_function "exit" (function_type void_ty [|i32_ty|]) m in
- (* use custom _start symbol rather than main function to reduce complexity *)
- let f = define_function "_start" (function_type void_ty [||]) m in
- let bb_cur = ref (entry_block f) in
- let b = builder_at_end ctx !bb_cur in
- let set_cur_bb bb =
- position_at_end bb b;
- bb_cur := bb
- in
- let i w n = const_int (integer_type ctx w) n in
- let i8 = i 8 in
- let i32 = i 32 in
- let mem = build_alloca (array_type byte_ty memsize) "mem" b in
- let ptr = build_alloca byteptr_ty "ptr" b in
- let load p = build_load p "" b in
- let store p value = ignore (build_store value p b) in
- let gep n = build_in_bounds_gep (load ptr) [|i32 n|] "" b in
- let rec compile_command = function
- | Shift n ->
- store ptr (gep n)
- | Offset (o, Add n) ->
- build_add (load (gep o)) (i8 n) "" b |> store (gep o)
- | Offset (o, Set n) ->
- store (gep o) (i8 n)
- | Offset (o, Out) ->
- build_call putchar [|load (gep o)|] "" b |> ignore
- | Offset (o, In) ->
- build_call getchar [||] "" b |> store (gep o)
- | Offset (o, Mul (x, y)) ->
- let mul = build_mul (load (gep (o + x))) (i8 y) "" b in
- build_add (load (gep o)) mul "" b |> store (gep o)
- | (Add _ | Set _ | Out | In | Mul _) as cmd ->
- compile_command (Offset (0, cmd))
- | Loop p ->
- let bb_end = append_block ctx "" f in
- move_block_after !bb_cur bb_end;
- let bb_body = insert_block ctx "" bb_end in
- let bb_cond = insert_block ctx "" bb_body in
- build_br bb_cond b |> ignore;
- position_at_end bb_cond b;
- let cond = build_icmp Icmp.Eq (load (gep 0)) (i8 0) "" b in
- build_cond_br cond bb_end bb_body b |> ignore;
- set_cur_bb bb_body;
- List.iter compile_command p;
- build_br bb_cond b |> ignore;
- set_cur_bb bb_end
- | cmd ->
- failwith ("cannot compile: " ^ string_of_command cmd)
- in
- (* zero-initialize memory (use intrinsic for optimization assumptions) *)
- set_data_layout "e" m; (* little-endian, needed for optimization *)
- let memptr = build_bitcast mem byteptr_ty "" b in
- build_call memset [|memptr; i8 0; i32 memsize; i32 0; i 1 0|] "" b |> ignore;
- (* set pivot to index 0 and compile program commands *)
- build_in_bounds_gep mem [|i32 0; i32 0|] "" b |> store ptr;
- List.iter compile_command program;
- (* exit gracefully *)
- build_call cexit [|i32 0|] "" b |> ignore;
- build_ret_void b |> ignore;
- m
- let compile_c memsize program =
- let indent = Str.global_replace (Str.regexp "^\\(.\\)") " \\1" in
- let ptr o = "p[" ^ string_of_int o ^ "]" in
- let add = function
- | 1 -> "++"
- | -1 -> "--"
- | n when n < 0 -> " -= " ^ string_of_int (-n)
- | n -> " += " ^ string_of_int n
- in
- let rec compile_commands buf = function
- | [] -> buf
- | cmd :: tl -> compile_commands (buf ^ compile_command cmd ^ "\n") tl
- and compile_command = function
- | Loop p ->
- "while (*p != 0) {\n" ^ indent (compile_commands "" p) ^ "}"
- | Offset (o, cmd) ->
- begin
- match cmd with
- | Shift n -> "p" ^ add n
- | Add n -> ptr o ^ add n
- | Set n -> ptr o ^ " = " ^ string_of_int n
- | Out -> "putchar(" ^ ptr o ^ ")"
- | In -> ptr o ^ " = getchar()"
- | Mul (x, 1) -> ptr o ^ " += " ^ ptr (o + x)
- | Mul (x, -1) -> ptr o ^ " -= " ^ ptr (o + x)
- | Mul (x, y) -> ptr o ^ " += " ^ ptr (o + x) ^ " * " ^ string_of_int y
- | _ -> failwith "cannot compile: " ^ string_of_command cmd
- end ^ ";"
- | cmd ->
- compile_command (Offset (0, cmd))
- in
- "#include <stdio.h>\n" ^
- "#include <stdlib.h>\n" ^
- "void _start() {\n" ^
- " unsigned char mem[" ^ string_of_int memsize ^ "] = {}, *p = mem;\n" ^
- indent (compile_commands "" program) ^
- " exit(0);\n" ^
- "}\n"
- let optimize program =
- let should_swap i a j b =
- i > j && begin
- match a, b with
- | Mul (xa, _), Mul (xb, _) -> i + xa <> j && j + xb <> i
- | (Add _ | Set _ | Out | In), Mul (x, _) -> j + x <> i
- | Mul (x, _), (Add _ | Set _ | Out | In) -> i + x <> j
- | In, In | Out, Out -> false
- | (Add _ | Set _ | Out | In), (Add _ | Set _ | Out | In) -> true
- | _ -> false
- end
- in
- let rec opt = function
- (* combine shift/add commands *)
- | Shift a :: Shift b :: tl ->
- Shift (a + b) :: tl |> opt
- | Add a :: Add b :: tl ->
- Add (a + b) :: tl |> opt
- | Set a :: Add b :: tl ->
- Set (a + b) :: tl |> opt
- (* remove dead code *)
- | Set 0 :: Loop _ :: tl ->
- Set 0 :: tl |> opt
- | (Offset (i, Set 0) as off) :: Shift j :: Loop _ :: tl when i = j ->
- off :: Shift j :: tl |> opt
- | (Shift 0 | Add 0) :: tl
- | (Add _ | Set _ | Mul _) :: (Set _ :: _ as tl) ->
- opt tl
- (* postpone shifts by wrapping commands in offset nodes *)
- | Shift o :: (Add _ | Set _ | Out | In | Mul _ as cmd) :: tl ->
- Offset (o, cmd) :: Shift o :: tl |> opt
- | Shift i :: Offset (j, cmd) :: tl ->
- Offset (i + j, cmd) :: Shift i :: tl |> opt
- | Offset (a, Offset (b, cmd)) :: tl ->
- Offset (a + b, cmd) :: tl |> opt
- | Offset (0, cmd) :: tl ->
- cmd :: tl |> opt
- (* combine adjacent commands that work on the same offset *)
- | Offset (i, a) :: Offset (j, b) :: tl when i = j ->
- begin match opt [a; b] with
- | [cmd] -> Offset (i, cmd) :: tl |> opt
- | _ -> Offset (i, a) :: opt (Offset (j, b) :: tl)
- end
- (* sort offset operations so that they are likely to be combined *)
- | Offset (i, a) :: Offset (j, b) :: tl when should_swap i a j b ->
- Offset (j, b) :: Offset (i, a) :: tl |> opt
- | Offset (i, a) :: b :: tl when should_swap i a 0 b ->
- b :: Offset (i, a) :: tl |> opt
- (* flatten reset loops *)
- | Loop [Add (1 | -1)] :: tl ->
- Set 0 :: tl |> opt
- (* unfold multiplication loops *)
- | Loop p :: tl ->
- let p = check_change p in
- let rec next buf counter_found = function
- | Add -1 :: tl when not counter_found ->
- next buf true tl
- | Offset (o, Add n) :: tl ->
- next (Offset (o, Mul (-o, n)) :: buf) counter_found tl
- | [] when counter_found ->
- List.rev (Offset (0, Set 0) :: buf) @ tl |> opt
- | _ ->
- Loop p :: opt tl
- in
- next [] false p
- | hd :: tl -> hd :: opt tl
- | [] -> []
- and check_change program =
- match opt program with
- | p when p <> program -> check_change p
- | p -> p
- in
- match check_change (Set 0 :: program) with Set 0 :: p | p -> p
- let () =
- let args = List.tl (Array.to_list Sys.argv) in
- stdin |> read_program
- |> (if List.mem "-o" args then optimize else fun p -> p)
- |> fun program ->
- if List.mem "-e" args then
- program |> string_of_program |> print_endline
- else if List.mem "-c" args then
- program |> compile_c 30000 |> print_string
- else
- program |> compile_llvm 30000 |> string_of_llmodule |> print_string
|