bf.ml 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. open Llvm
  2. type program = command list
  3. and command =
  4. | Shift of int
  5. | Add of int
  6. | Set of int
  7. | Out | In
  8. | Loop of program
  9. | Offset of int * command
  10. | Mul of int * int
  11. let read_program ic =
  12. let rec next cur stack =
  13. try
  14. match input_char ic, stack with
  15. | '>', _ -> next (Shift 1 :: cur) stack
  16. | '<', _ -> next (Shift (-1) :: cur) stack
  17. | '+', _ -> next (Add 1 :: cur) stack
  18. | '-', _ -> next (Add (-1) :: cur) stack
  19. | '.', _ -> next (Out :: cur) stack
  20. | ',', _ -> next (In :: cur) stack
  21. | '[', _ -> next [] (cur :: stack)
  22. | ']', [] -> failwith "unmatched ']'"
  23. | ']', (hd :: tl) -> next (Loop (List.rev cur) :: hd) tl
  24. | _ -> next cur stack
  25. with
  26. | End_of_file when List.length stack > 0 -> failwith "unmatched '['"
  27. | End_of_file -> List.rev cur
  28. in
  29. next [] []
  30. let rec string_of_program program =
  31. let rec cat buf = function
  32. | [] -> buf
  33. | cmd :: tl -> cat (buf ^ string_of_command cmd) tl
  34. in
  35. cat "" program
  36. and string_of_command = function
  37. | Shift 1 -> ">"
  38. | Shift -1 -> "<"
  39. | Shift n when n < 0 -> "(<" ^ string_of_int (-n) ^ ")"
  40. | Shift n -> "(>" ^ string_of_int n ^ ")"
  41. | Add 1 -> "+"
  42. | Add -1 -> "-"
  43. | Add n when n < 0 -> "(-" ^ string_of_int (-n) ^ ")"
  44. | Add n -> "(+" ^ string_of_int n ^ ")"
  45. | Set n -> "(=" ^ string_of_int n ^ ")"
  46. | Mul (x, y) -> "(*" ^ string_of_int x ^ "," ^ string_of_int y ^ ")"
  47. | Out -> "."
  48. | In -> ","
  49. | Loop p -> "[" ^ string_of_program p ^ "]"
  50. | Offset (o, cmd) ->
  51. "(" ^ string_of_command cmd ^ "@" ^ string_of_int o ^ ")"
  52. let compile_llvm memsize program =
  53. let ctx = global_context () in
  54. let m = create_module ctx "brainfucker" in
  55. let byte_ty = i8_type ctx in
  56. let byteptr_ty = pointer_type byte_ty in
  57. let bool_ty = i1_type ctx in
  58. let i32_ty = i32_type ctx in
  59. let void_ty = void_type ctx in
  60. let memset =
  61. let arg_types = [|byteptr_ty; byte_ty; i32_ty; i32_ty; bool_ty|] in
  62. declare_function "llvm.memset.p0i8.i32" (function_type void_ty arg_types) m
  63. in
  64. let putchar = declare_function "putchar" (function_type i32_ty [|byte_ty|]) m in
  65. let getchar = declare_function "getchar" (function_type byte_ty [||]) m in
  66. let cexit = declare_function "exit" (function_type void_ty [|i32_ty|]) m in
  67. (* use custom _start symbol rather than main function to reduce complexity *)
  68. let f = define_function "_start" (function_type void_ty [||]) m in
  69. let bb_cur = ref (entry_block f) in
  70. let b = builder_at_end ctx !bb_cur in
  71. let set_cur_bb bb =
  72. position_at_end bb b;
  73. bb_cur := bb
  74. in
  75. let i w n = const_int (integer_type ctx w) n in
  76. let i8 = i 8 in
  77. let i32 = i 32 in
  78. let mem = build_alloca (array_type byte_ty memsize) "mem" b in
  79. let ptr = build_alloca byteptr_ty "ptr" b in
  80. let load p = build_load p "" b in
  81. let store p value = ignore (build_store value p b) in
  82. let gep n = build_in_bounds_gep (load ptr) [|i32 n|] "" b in
  83. let rec compile_command = function
  84. | Shift n ->
  85. store ptr (gep n)
  86. | Offset (o, Add n) ->
  87. build_add (load (gep o)) (i8 n) "" b |> store (gep o)
  88. | Offset (o, Set n) ->
  89. store (gep o) (i8 n)
  90. | Offset (o, Out) ->
  91. build_call putchar [|load (gep o)|] "" b |> ignore
  92. | Offset (o, In) ->
  93. build_call getchar [||] "" b |> store (gep o)
  94. | Offset (o, Mul (x, y)) ->
  95. let mul = build_mul (load (gep (o + x))) (i8 y) "" b in
  96. build_add (load (gep o)) mul "" b |> store (gep o)
  97. | (Add _ | Set _ | Out | In | Mul _) as cmd ->
  98. compile_command (Offset (0, cmd))
  99. | Loop p ->
  100. let bb_end = append_block ctx "" f in
  101. move_block_after !bb_cur bb_end;
  102. let bb_body = insert_block ctx "" bb_end in
  103. let bb_cond = insert_block ctx "" bb_body in
  104. build_br bb_cond b |> ignore;
  105. position_at_end bb_cond b;
  106. let cond = build_icmp Icmp.Eq (load (gep 0)) (i8 0) "" b in
  107. build_cond_br cond bb_end bb_body b |> ignore;
  108. set_cur_bb bb_body;
  109. List.iter compile_command p;
  110. build_br bb_cond b |> ignore;
  111. set_cur_bb bb_end
  112. | cmd ->
  113. failwith ("cannot compile: " ^ string_of_command cmd)
  114. in
  115. (* zero-initialize memory (use intrinsic for optimization assumptions) *)
  116. set_data_layout "e" m; (* little-endian, needed for optimization *)
  117. let memptr = build_bitcast mem byteptr_ty "" b in
  118. build_call memset [|memptr; i8 0; i32 memsize; i32 0; i 1 0|] "" b |> ignore;
  119. (* set pivot to index 0 and compile program commands *)
  120. build_in_bounds_gep mem [|i32 0; i32 0|] "" b |> store ptr;
  121. List.iter compile_command program;
  122. (* exit gracefully *)
  123. build_call cexit [|i32 0|] "" b |> ignore;
  124. build_ret_void b |> ignore;
  125. m
  126. let compile_c memsize program =
  127. let indent = Str.global_replace (Str.regexp "^\\(.\\)") " \\1" in
  128. let ptr o = "p[" ^ string_of_int o ^ "]" in
  129. let add = function
  130. | 1 -> "++"
  131. | -1 -> "--"
  132. | n when n < 0 -> " -= " ^ string_of_int (-n)
  133. | n -> " += " ^ string_of_int n
  134. in
  135. let rec compile_commands buf = function
  136. | [] -> buf
  137. | cmd :: tl -> compile_commands (buf ^ compile_command cmd ^ "\n") tl
  138. and compile_command = function
  139. | Loop p ->
  140. "while (*p != 0) {\n" ^ indent (compile_commands "" p) ^ "}"
  141. | Offset (o, cmd) ->
  142. begin
  143. match cmd with
  144. | Shift n -> "p" ^ add n
  145. | Add n -> ptr o ^ add n
  146. | Set n -> ptr o ^ " = " ^ string_of_int n
  147. | Out -> "putchar(" ^ ptr o ^ ")"
  148. | In -> ptr o ^ " = getchar()"
  149. | Mul (x, 1) -> ptr o ^ " += " ^ ptr (o + x)
  150. | Mul (x, -1) -> ptr o ^ " -= " ^ ptr (o + x)
  151. | Mul (x, y) -> ptr o ^ " += " ^ ptr (o + x) ^ " * " ^ string_of_int y
  152. | _ -> failwith "cannot compile: " ^ string_of_command cmd
  153. end ^ ";"
  154. | cmd ->
  155. compile_command (Offset (0, cmd))
  156. in
  157. "#include <stdio.h>\n" ^
  158. "#include <stdlib.h>\n" ^
  159. "void _start() {\n" ^
  160. " unsigned char mem[" ^ string_of_int memsize ^ "] = {}, *p = mem;\n" ^
  161. indent (compile_commands "" program) ^
  162. " exit(0);\n" ^
  163. "}\n"
  164. let optimize program =
  165. let should_swap i a j b =
  166. i > j && begin
  167. match a, b with
  168. | Mul (xa, _), Mul (xb, _) -> i + xa <> j && j + xb <> i
  169. | (Add _ | Set _ | Out | In), Mul (x, _) -> j + x <> i
  170. | Mul (x, _), (Add _ | Set _ | Out | In) -> i + x <> j
  171. | In, In | Out, Out -> false
  172. | (Add _ | Set _ | Out | In), (Add _ | Set _ | Out | In) -> true
  173. | _ -> false
  174. end
  175. in
  176. let rec opt = function
  177. (* combine shift/add commands *)
  178. | Shift a :: Shift b :: tl ->
  179. Shift (a + b) :: tl |> opt
  180. | Add a :: Add b :: tl ->
  181. Add (a + b) :: tl |> opt
  182. | Set a :: Add b :: tl ->
  183. Set (a + b) :: tl |> opt
  184. (* remove dead code *)
  185. | Set 0 :: Loop _ :: tl ->
  186. Set 0 :: tl |> opt
  187. | (Offset (i, Set 0) as off) :: Shift j :: Loop _ :: tl when i = j ->
  188. off :: Shift j :: tl |> opt
  189. | (Shift 0 | Add 0) :: tl
  190. | (Add _ | Set _ | Mul _) :: (Set _ :: _ as tl) ->
  191. opt tl
  192. (* postpone shifts by wrapping commands in offset nodes *)
  193. | Shift o :: (Add _ | Set _ | Out | In | Mul _ as cmd) :: tl ->
  194. Offset (o, cmd) :: Shift o :: tl |> opt
  195. | Shift i :: Offset (j, cmd) :: tl ->
  196. Offset (i + j, cmd) :: Shift i :: tl |> opt
  197. | Offset (a, Offset (b, cmd)) :: tl ->
  198. Offset (a + b, cmd) :: tl |> opt
  199. | Offset (0, cmd) :: tl ->
  200. cmd :: tl |> opt
  201. (* combine adjacent commands that work on the same offset *)
  202. | Offset (i, a) :: Offset (j, b) :: tl when i = j ->
  203. begin match opt [a; b] with
  204. | [cmd] -> Offset (i, cmd) :: tl |> opt
  205. | _ -> Offset (i, a) :: opt (Offset (j, b) :: tl)
  206. end
  207. (* sort offset operations so that they are likely to be combined *)
  208. | Offset (i, a) :: Offset (j, b) :: tl when should_swap i a j b ->
  209. Offset (j, b) :: Offset (i, a) :: tl |> opt
  210. | Offset (i, a) :: b :: tl when should_swap i a 0 b ->
  211. b :: Offset (i, a) :: tl |> opt
  212. (* flatten reset loops *)
  213. | Loop [Add (1 | -1)] :: tl ->
  214. Set 0 :: tl |> opt
  215. (* unfold multiplication loops *)
  216. | Loop p :: tl ->
  217. let p = check_change p in
  218. let rec next buf counter_found = function
  219. | Add -1 :: tl when not counter_found ->
  220. next buf true tl
  221. | Offset (o, Add n) :: tl ->
  222. next (Offset (o, Mul (-o, n)) :: buf) counter_found tl
  223. | [] when counter_found ->
  224. List.rev (Offset (0, Set 0) :: buf) @ tl |> opt
  225. | _ ->
  226. Loop p :: opt tl
  227. in
  228. next [] false p
  229. | hd :: tl -> hd :: opt tl
  230. | [] -> []
  231. and check_change program =
  232. match opt program with
  233. | p when p <> program -> check_change p
  234. | p -> p
  235. in
  236. match check_change (Set 0 :: program) with Set 0 :: p | p -> p
  237. let () =
  238. let args = List.tl (Array.to_list Sys.argv) in
  239. stdin |> read_program
  240. |> (if List.mem "-o" args then optimize else fun p -> p)
  241. |> fun program ->
  242. if List.mem "-e" args then
  243. program |> string_of_program |> print_endline
  244. else if List.mem "-c" args then
  245. program |> compile_c 30000 |> print_string
  246. else
  247. program |> compile_llvm 30000 |> string_of_llmodule |> print_string