bf.ml 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. open Llvm
  2. type program = command list
  3. and command =
  4. | Incptr | Decptr
  5. | Incdata | Decdata
  6. | Output | Input
  7. | Loop of program
  8. | Addptr of int
  9. | Adddata of int
  10. | Setptr of int
  11. | Setdata of int
  12. let read_program ic =
  13. let rec next cur stack =
  14. try
  15. match input_char ic, stack with
  16. | '>', _ -> next (Incptr :: cur) stack
  17. | '<', _ -> next (Decptr :: cur) stack
  18. | '+', _ -> next (Incdata :: cur) stack
  19. | '-', _ -> next (Decdata :: cur) stack
  20. | '.', _ -> next (Output :: cur) stack
  21. | ',', _ -> next (Input :: cur) stack
  22. | '[', _ -> next [] (cur :: stack)
  23. | ']', [] -> failwith "unmatched ']'"
  24. | ']', (hd :: tl) -> next (Loop (List.rev cur) :: hd) tl
  25. | _ -> next cur stack
  26. with End_of_file ->
  27. if List.length stack > 0 then failwith "unmatched '['";
  28. List.rev cur
  29. in
  30. next [] []
  31. let rec string_of_program program =
  32. let rec cat buf = function
  33. | [] -> buf
  34. | cmd :: tl -> cat (buf ^ string_of_command cmd) tl
  35. in
  36. cat "" program
  37. and string_of_command = function
  38. | Incptr -> ">"
  39. | Decptr -> "<"
  40. | Incdata -> "+"
  41. | Decdata -> "-"
  42. | Output -> "."
  43. | Input -> ","
  44. | Loop p -> "[" ^ string_of_program p ^ "]"
  45. | Addptr n when n < 0 -> "(<" ^ string_of_int (-n) ^ ")"
  46. | Addptr n -> "(>" ^ string_of_int n ^ ")"
  47. | Adddata n when n < 0 -> "(" ^ string_of_int n ^ ")"
  48. | Adddata n -> "(+" ^ string_of_int n ^ ")"
  49. | Setptr n -> "(<>" ^ string_of_int n ^ ")"
  50. | Setdata n -> "(=" ^ string_of_int n ^ ")"
  51. let compile memsize program =
  52. let ctx = global_context () in
  53. let m = create_module ctx "brainfucker" in
  54. let byte_ty = i8_type ctx in
  55. let byteptr_ty = pointer_type byte_ty in
  56. let bool_ty = i1_type ctx in
  57. let i32_ty = i32_type ctx in
  58. let void_ty = void_type ctx in
  59. let putchar = declare_function "putchar" (function_type i32_ty [|byte_ty|]) m in
  60. let getchar = declare_function "getchar" (function_type byte_ty [||]) m in
  61. let cexit = declare_function "exit" (function_type void_ty [|i32_ty|]) m in
  62. (* use custom _start symbol rather than main function to reduce complexity *)
  63. let f = define_function "_start" (function_type void_ty [||]) m in
  64. let bb_cur = ref (entry_block f) in
  65. let b = builder_at_end ctx !bb_cur in
  66. let set_cur_bb bb =
  67. position_at_end bb b;
  68. bb_cur := bb
  69. in
  70. let i w n = const_int (integer_type ctx w) n in
  71. let i8 = i 8 in
  72. let i32 = i 32 in
  73. let mem = build_alloca (array_type byte_ty memsize) "mem" b in
  74. let idx = build_alloca i32_ty "idx" b in
  75. let load ptr = build_load ptr "" b in
  76. let store ptr value = ignore (build_store value ptr b) in
  77. let gep () = build_in_bounds_gep mem [|i32 0; load idx|] "" b in
  78. let rec compile_command = function
  79. | Incptr ->
  80. build_add (load idx) (i32 1) "" b |> store idx
  81. | Decptr ->
  82. build_sub (load idx) (i32 1) "" b |> store idx
  83. | Incdata ->
  84. build_add (load (gep ())) (i8 1) "" b |> store (gep ())
  85. | Decdata ->
  86. build_sub (load (gep ())) (i8 1) "" b |> store (gep ())
  87. | Output ->
  88. build_call putchar [|load (gep ())|] "" b |> ignore
  89. | Input ->
  90. build_call getchar [||] "" b |> store (gep ())
  91. | Loop p ->
  92. let bb_end = append_block ctx "" f in
  93. move_block_after !bb_cur bb_end;
  94. let bb_body = insert_block ctx "" bb_end in
  95. let bb_cond = insert_block ctx "" bb_body in
  96. build_br bb_cond b |> ignore;
  97. position_at_end bb_cond b;
  98. let cond = build_icmp Icmp.Eq (load (gep ())) (i8 0) "" b in
  99. build_cond_br cond bb_end bb_body b |> ignore;
  100. set_cur_bb bb_body;
  101. List.iter compile_command p;
  102. build_br bb_cond b |> ignore;
  103. set_cur_bb bb_end
  104. | Addptr n ->
  105. build_add (load idx) (i32 n) "" b |> store idx
  106. | Adddata n ->
  107. build_add (load (gep ())) (i8 n) "" b |> store (gep ())
  108. | Setptr n when n >= 0 ->
  109. store idx (i32 n)
  110. | Setdata n when n >= 0 ->
  111. store (gep ()) (i8 n)
  112. | cmd -> failwith ("invalid command: " ^ string_of_command cmd)
  113. in
  114. (* zero-initialize memory (use intrinsic for optimization assumptions) *)
  115. set_data_layout "e" m; (* little-endian, needed for optimization *)
  116. let memset =
  117. let arg_types = [|byteptr_ty; byte_ty; i32_ty; i32_ty; bool_ty|] in
  118. declare_function "llvm.memset.p0i8.i32" (function_type void_ty arg_types) m
  119. in
  120. let ptr = build_bitcast mem byteptr_ty "" b in
  121. build_call memset [|ptr; i8 0; i32 memsize; i32 0; i 1 0|] "" b |> ignore;
  122. (* set pivot to index 0 and compile program commands *)
  123. store idx (i32 0);
  124. List.iter compile_command program;
  125. (* exit gracefully *)
  126. build_call cexit [|i32 0|] "" b |> ignore;
  127. build_ret_void b |> ignore;
  128. m
  129. let compile_to_c memsize program =
  130. let indent = Str.global_replace (Str.regexp "^\\(.\\)") " \\1" in
  131. let rec compile_commands buf = function
  132. | [] -> buf
  133. | cmd :: tl -> compile_commands (buf ^ compile_command cmd ^ "\n") tl
  134. and compile_command = function
  135. | Incptr -> "idx++;"
  136. | Decptr -> "idx--;"
  137. | Incdata -> "mem[idx]++;"
  138. | Decdata -> "mem[idx]--;"
  139. | Output -> "putchar(mem[idx]);"
  140. | Input -> "mem[idx] = getchar();"
  141. | Loop p -> "while (mem[idx] != 0) {\n" ^ indent (compile_commands "" p) ^ "}"
  142. | Addptr n -> "idx += " ^ string_of_int n ^ ";"
  143. | Adddata n -> "mem[idx] += " ^ string_of_int n ^ ";"
  144. | Setptr n -> "idx = " ^ string_of_int n ^ ";"
  145. | Setdata n -> "mem[idx] = " ^ string_of_int n ^ ";"
  146. in
  147. "#include <stdio.h>\n" ^
  148. "#include <stdlib.h>\n" ^
  149. "void _start() {\n" ^
  150. " unsigned char mem[" ^ string_of_int memsize ^ "] = {};\n" ^
  151. " unsigned idx = 0;\n" ^
  152. indent (compile_commands "" program) ^
  153. " exit(0);\n" ^
  154. "}\n"
  155. let rec optimize program =
  156. let rec opt = function
  157. | Incptr :: tl -> opt (Addptr 1 :: tl)
  158. | Decptr :: tl -> opt (Addptr (-1) :: tl)
  159. | Incdata :: tl -> opt (Adddata 1 :: tl)
  160. | Decdata :: tl -> opt (Adddata (-1) :: tl)
  161. | Addptr a :: Addptr b :: tl -> opt (Addptr (a + b) :: tl)
  162. | Adddata a :: Adddata b :: tl -> opt (Adddata (a + b) :: tl)
  163. | Loop [Addptr -1] :: tl -> opt (Setptr 0 :: tl)
  164. | Loop [Adddata (1 | -1)] :: tl -> opt (Setdata 0 :: tl)
  165. | (Addptr 0 | Adddata 0) :: tl
  166. | (Addptr _ | Setptr _) :: (Setptr _ :: _ as tl)
  167. | (Adddata _ | Setdata _) :: (Setdata _ :: _ as tl) -> opt tl
  168. | Loop p :: tl -> Loop (optimize p) :: opt tl
  169. | hd :: tl -> hd :: opt tl
  170. | [] -> []
  171. in
  172. match opt program with
  173. | p when p <> program -> optimize p
  174. | p -> p
  175. let rec flatten = function
  176. | Addptr 1 :: tl -> Incptr :: flatten tl
  177. | Addptr (-1) :: tl -> Decptr :: flatten tl
  178. | Adddata 1 :: tl -> Incdata :: flatten tl
  179. | Adddata (-1) :: tl -> Decdata :: flatten tl
  180. | Loop p :: tl -> Loop (flatten p) :: flatten tl
  181. | hd :: tl -> hd :: flatten tl
  182. | [] -> []
  183. let () =
  184. let args = List.tl (Array.to_list Sys.argv) in
  185. stdin |> read_program
  186. |> (if List.mem "-o" args then optimize else fun p -> p)
  187. |> fun program ->
  188. if List.mem "-e" args then
  189. program |> flatten |> string_of_program |> print_endline
  190. else if List.mem "-c" args then
  191. program |> flatten |> compile_to_c 30000 |> print_string
  192. else
  193. program |> compile 30000 |> string_of_llmodule |> print_string