bf.ml 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. open Llvm
  2. type program = command list
  3. and command =
  4. | Shift of int
  5. | Goto of int
  6. | Add of int
  7. | Set of int
  8. | Out | In
  9. | Loop of program
  10. | Offset of int * command
  11. | Mul of int * int
  12. let read_program ic =
  13. let rec next cur stack =
  14. try
  15. match input_char ic, stack with
  16. | '>', _ -> next (Shift 1 :: cur) stack
  17. | '<', _ -> next (Shift (-1) :: cur) stack
  18. | '+', _ -> next (Add 1 :: cur) stack
  19. | '-', _ -> next (Add (-1) :: cur) stack
  20. | '.', _ -> next (Out :: cur) stack
  21. | ',', _ -> next (In :: cur) stack
  22. | '[', _ -> next [] (cur :: stack)
  23. | ']', [] -> failwith "unmatched ']'"
  24. | ']', (hd :: tl) -> next (Loop (List.rev cur) :: hd) tl
  25. | _ -> next cur stack
  26. with End_of_file ->
  27. if List.length stack > 0 then failwith "unmatched '['";
  28. List.rev cur
  29. in
  30. next [] []
  31. let rec string_of_program program =
  32. let rec cat buf = function
  33. | [] -> buf
  34. | cmd :: tl -> cat (buf ^ string_of_command cmd) tl
  35. in
  36. cat "" program
  37. and string_of_command = function
  38. | Shift 1 -> ">"
  39. | Shift -1 -> "<"
  40. | Shift n when n < 0 -> "(<" ^ string_of_int (-n) ^ ")"
  41. | Shift n -> "(+" ^ string_of_int n ^ ")"
  42. | Goto n -> "(_" ^ string_of_int n ^ ")"
  43. | Add 1 -> "+"
  44. | Add -1 -> "-"
  45. | Add n when n < 0 -> "(-" ^ string_of_int (-n) ^ ")"
  46. | Add n -> "(+" ^ string_of_int n ^ ")"
  47. | Set n -> "(=" ^ string_of_int n ^ ")"
  48. | Mul (x, y) -> "(>" ^ string_of_int x ^ "*" ^ string_of_int y ^ ")"
  49. | Out -> "."
  50. | In -> ","
  51. | Loop p -> "[" ^ string_of_program p ^ "]"
  52. | Offset (o, cmd) ->
  53. "(" ^ string_of_int o ^ ":" ^ string_of_command cmd ^ ")"
  54. let compile_llvm memsize program =
  55. let ctx = global_context () in
  56. let m = create_module ctx "brainfucker" in
  57. let byte_ty = i8_type ctx in
  58. let byteptr_ty = pointer_type byte_ty in
  59. let bool_ty = i1_type ctx in
  60. let i32_ty = i32_type ctx in
  61. let void_ty = void_type ctx in
  62. let memset =
  63. let arg_types = [|byteptr_ty; byte_ty; i32_ty; i32_ty; bool_ty|] in
  64. declare_function "llvm.memset.p0i8.i32" (function_type void_ty arg_types) m
  65. in
  66. let putchar = declare_function "putchar" (function_type i32_ty [|byte_ty|]) m in
  67. let getchar = declare_function "getchar" (function_type byte_ty [||]) m in
  68. let cexit = declare_function "exit" (function_type void_ty [|i32_ty|]) m in
  69. (* use custom _start symbol rather than main function to reduce complexity *)
  70. let f = define_function "_start" (function_type void_ty [||]) m in
  71. let bb_cur = ref (entry_block f) in
  72. let b = builder_at_end ctx !bb_cur in
  73. let set_cur_bb bb =
  74. position_at_end bb b;
  75. bb_cur := bb
  76. in
  77. let i w n = const_int (integer_type ctx w) n in
  78. let i8 = i 8 in
  79. let i32 = i 32 in
  80. let mem = build_alloca (array_type byte_ty memsize) "mem" b in
  81. let ptr = build_alloca byteptr_ty "ptr" b in
  82. let load p = build_load p "" b in
  83. let store p value = ignore (build_store value p b) in
  84. let gep n = build_in_bounds_gep (load ptr) [|i32 n|] "" b in
  85. let rec compile_command = function
  86. | Shift n ->
  87. store ptr (gep n)
  88. | Goto n ->
  89. let memptr = build_in_bounds_gep mem [|i32 0; i32 n|] "" b in
  90. build_bitcast memptr byteptr_ty "" b |> store ptr
  91. | Offset (o, Add n) ->
  92. build_add (load (gep o)) (i8 n) "" b |> store (gep o)
  93. | Offset (o, Set n) ->
  94. store (gep o) (i8 n)
  95. | Offset (o, Out) ->
  96. build_call putchar [|load (gep o)|] "" b |> ignore
  97. | Offset (o, In) ->
  98. build_call getchar [||] "" b |> store (gep o)
  99. | Offset (o, Mul (x, y)) ->
  100. let mul = build_mul (load (gep (o + x))) (i8 y) "" b in
  101. build_add (load (gep o)) mul "" b |> store (gep o)
  102. | (Add _ | Set _ | Out | In | Mul _) as cmd ->
  103. compile_command (Offset (0, cmd))
  104. | Loop p ->
  105. let bb_end = append_block ctx "" f in
  106. move_block_after !bb_cur bb_end;
  107. let bb_body = insert_block ctx "" bb_end in
  108. let bb_cond = insert_block ctx "" bb_body in
  109. build_br bb_cond b |> ignore;
  110. position_at_end bb_cond b;
  111. let cond = build_icmp Icmp.Eq (load (gep 0)) (i8 0) "" b in
  112. build_cond_br cond bb_end bb_body b |> ignore;
  113. set_cur_bb bb_body;
  114. List.iter compile_command p;
  115. build_br bb_cond b |> ignore;
  116. set_cur_bb bb_end
  117. | cmd ->
  118. failwith ("cannot compile: " ^ string_of_command cmd)
  119. in
  120. (* zero-initialize memory (use intrinsic for optimization assumptions) *)
  121. set_data_layout "e" m; (* little-endian, needed for optimization *)
  122. let memptr = build_bitcast mem byteptr_ty "" b in
  123. build_call memset [|memptr; i8 0; i32 memsize; i32 0; i 1 0|] "" b |> ignore;
  124. (* set pivot to index 0 and compile program commands *)
  125. build_in_bounds_gep mem [|i32 0; i32 0|] "" b |> store ptr;
  126. List.iter compile_command program;
  127. (* exit gracefully *)
  128. build_call cexit [|i32 0|] "" b |> ignore;
  129. build_ret_void b |> ignore;
  130. m
  131. let compile_c memsize program =
  132. let indent = Str.global_replace (Str.regexp "^\\(.\\)") " \\1" in
  133. let ptr o = "p[" ^ string_of_int o ^ "]" in
  134. let add = function
  135. | 1 -> "++"
  136. | -1 -> "--"
  137. | n when n < 0 -> " -= " ^ string_of_int (-n)
  138. | n -> " += " ^ string_of_int n
  139. in
  140. let rec compile_commands buf = function
  141. | [] -> buf
  142. | cmd :: tl -> compile_commands (buf ^ compile_command cmd ^ "\n") tl
  143. and compile_command = function
  144. | Loop p ->
  145. "while (*p) {\n" ^ indent (compile_commands "" p) ^ "}"
  146. | Offset (o, cmd) ->
  147. begin
  148. match cmd with
  149. | Shift n -> "p" ^ add n
  150. | Add n -> ptr o ^ add n
  151. | Goto 0 -> "p = mem"
  152. | Goto n -> "p = mem + " ^ string_of_int n
  153. | Set n -> ptr o ^ " = " ^ string_of_int n
  154. | Out -> "putchar(" ^ ptr o ^ ")"
  155. | In -> ptr o ^ " = getchar()"
  156. | Mul (x, 1) -> ptr o ^ " += " ^ ptr (o + x)
  157. | Mul (x, -1) -> ptr o ^ " -= " ^ ptr (o + x)
  158. | Mul (x, y) -> ptr o ^ " += " ^ ptr (o + x) ^ " * " ^ string_of_int y
  159. | _ -> failwith "cannot compile: " ^ string_of_command cmd
  160. end ^ ";"
  161. | cmd ->
  162. compile_command (Offset (0, cmd))
  163. in
  164. "#include <stdio.h>\n" ^
  165. "#include <stdlib.h>\n" ^
  166. "void _start() {\n" ^
  167. " unsigned char mem[" ^ string_of_int memsize ^ "] = {};\n" ^
  168. " unsigned char *p = mem;\n" ^
  169. indent (compile_commands "" program) ^
  170. " exit(0);\n" ^
  171. "}\n"
  172. let optimize program =
  173. let opt_loop p =
  174. let rec next buf counter_found = function
  175. | Add -1 :: tl when not counter_found ->
  176. next buf true tl
  177. | Offset (o, Add n) :: tl ->
  178. next (Offset (o, Mul (-o, n)) :: buf) counter_found tl
  179. | [] when counter_found ->
  180. List.rev (Offset (0, Set 0) :: buf)
  181. | _ ->
  182. [Loop p]
  183. in
  184. next [] false p
  185. in
  186. let can_offset = function
  187. | Add _ | Set _ | Out | In | Mul _ -> true
  188. | _ -> false
  189. in
  190. let rec opt = function
  191. | Shift a :: Shift b :: tl ->
  192. Shift (a + b) :: tl |> opt
  193. | Add a :: Add b :: tl ->
  194. Add (a + b) :: tl |> opt
  195. | Set 0 :: Loop _ :: tl ->
  196. Set 0 :: tl |> opt
  197. | Goto a :: Shift b :: tl ->
  198. Goto (a + b) :: tl |> opt
  199. | Set a :: Add b :: tl ->
  200. Set (a + b) :: tl |> opt
  201. | (Shift 0 | Add 0) :: tl
  202. | (Shift _ | Goto _) :: (Goto _ :: _ as tl)
  203. | (Add _ | Set _) :: (Set _ :: _ as tl) ->
  204. opt tl
  205. | Goto o :: cmd :: tl when can_offset cmd ->
  206. Offset (o, cmd) :: Goto o :: tl |> opt
  207. | Shift o :: cmd :: tl when can_offset cmd ->
  208. Offset (o, cmd) :: Shift o :: tl |> opt
  209. | Offset (a, Offset (b, cmd)) :: tl ->
  210. Offset (a + b, cmd) :: tl |> opt
  211. | Offset (0, cmd) :: tl ->
  212. cmd :: tl |> opt
  213. | Offset (i, a) :: Offset (j, b) :: tl when i = j ->
  214. begin match opt [a; b] with
  215. | [cmd] -> Offset (i, cmd) :: tl |> opt
  216. | _ -> Offset (i, a) :: opt (Offset (j, b) :: tl)
  217. end
  218. | Shift i :: Offset (j, cmd) :: tl ->
  219. Offset (i + j, cmd) :: Shift i :: tl |> opt
  220. | Shift i :: cmd :: tl when can_offset cmd ->
  221. Offset (i, cmd) :: Shift i :: tl |> opt
  222. | Loop [Add (1 | -1)] :: tl ->
  223. Set 0 :: tl |> opt
  224. | Loop p :: tl ->
  225. begin match opt_loop (check_change p) with
  226. | [Loop _ as loop] -> loop :: opt tl
  227. | replacement -> opt (replacement @ tl)
  228. end
  229. | hd :: tl -> hd :: opt tl
  230. | [] -> []
  231. and check_change program =
  232. match opt program with
  233. | p when p <> program -> check_change p
  234. | p -> p
  235. in
  236. match check_change (Set 0 :: program) with Set 0 :: p | p -> p
  237. let () =
  238. let args = List.tl (Array.to_list Sys.argv) in
  239. stdin |> read_program
  240. |> (if List.mem "-o" args then optimize else fun p -> p)
  241. |> fun program ->
  242. if List.mem "-e" args then
  243. program |> string_of_program |> print_endline
  244. else if List.mem "-c" args then
  245. program |> compile_c 30000 |> print_string
  246. else
  247. program |> compile_llvm 30000 |> string_of_llmodule |> print_string