Răsfoiți Sursa

Started implementing node locations + some work on desugaring

Taddeus Kroes 12 ani în urmă
părinte
comite
0c538f2ee3
12 a modificat fișierele cu 220 adăugiri și 148 ștergeri
  1. 1 1
      Makefile
  2. 65 56
      ast.ml
  3. 4 3
      lexer.mll
  4. 14 16
      parser.mly
  5. 81 19
      phases/desug.ml
  6. 9 2
      phases/parse.ml
  7. 22 15
      stringify.ml
  8. 6 0
      test/array_init.cvc
  9. 4 0
      test/test.cvc
  10. 2 0
      test/var_init.cvc
  11. 11 36
      util.ml
  12. 1 0
      util.mli

+ 1 - 1
Makefile

@@ -1,5 +1,5 @@
 RESULT := civicc
-SOURCES := ast.ml lexer.mll parser.mly trav.mli trav.ml stringify.mli \
+SOURCES := ast.ml lexer.mll parser.mly util.mli util.ml stringify.mli \
 	stringify.ml \
 	phases/parse.ml  phases/print.ml phases/desug.ml \
 	main.ml

+ 65 - 56
ast.ml

@@ -1,3 +1,5 @@
+type loc = string * int * int * int * int
+
 type monop = Neg | Not
 type binop = Add | Sub | Mul | Div | Mod
            | Eq | Ne | Lt | Le | Gt | Ge
@@ -6,44 +8,49 @@ type ctype = Void | Bool | Int | Float
            | ArrayDec of ctype * string list
            | ArrayDef of ctype * node list
 and node =
-    (* Global *)
-    | Program of node list
-    | Param of ctype * string
-    | FunDec of ctype * string * node list
-    | FunDef of bool * ctype * string * node list * node list
-    | GlobalDec of ctype * string
-    | GlobalDef of bool * ctype * string * node option
+    (* global *)
+    | Program of node list * loc
+    | Param of ctype * string * loc
+    | FunDec of ctype * string * node list * loc
+    | FunDef of bool * ctype * string * node list * node list * loc
+    | GlobalDec of ctype * string * loc
+    | GlobalDef of bool * ctype * string * node option * loc
 
-    (* Statements *)
-    | VarDec of ctype * string * node option
-    | Assign of string * node
-    | Return of node
-    | If of node * node list
-    | IfElse of node * node list * node list
-    | While of node * node list
-    | DoWhile of node * node list
-    | For of string * node * node * node * node list
-    | Expr of node
+    (* statements *)
+    | VarDec of ctype * string * node option * loc
+    | Assign of string * node * loc
+    | Return of node * loc
+    | If of node * node list * loc
+    | IfElse of node * node list * node list * loc
+    | While of node * node list * loc
+    | DoWhile of node * node list * loc
+    | For of string * node * node * node * node list * loc
+    | Allocate of string * node list * loc
+    | Expr of node * loc
+    | Statements of node list * loc
 
-    (* Expressions *)
-    | BoolConst of bool
-    | IntConst of int
-    | FloatConst of float
-    | ArrayConst of node list
-    | Var of string
-    | Deref of string * node list
-    | Monop of monop * node
-    | Binop of binop * node * node
-    | Cond of node * node * node
-    | TypeCast of ctype * node
-    | FunCall of string * node list
+    (* expressions *)
+    | BoolConst of bool * loc
+    | IntConst of int * loc
+    | FloatConst of float * loc
+    | ArrayConst of node list * loc
+    | ArrayScalar of node * loc
+    | Var of string * loc
+    | Deref of string * node list * loc
+    | Monop of monop * node * loc
+    | Binop of binop * node * node * loc
+    | Cond of node * node * node * loc
+    | TypeCast of ctype * node * loc
+    | FunCall of string * node list * loc
 
-(* Intermediate representations between phases *)
+(* intermediate representations between phases *)
 type repr =
     | Inputfile of string option * int  (* filename, verbose *)
     | Node of node * int                (* ast, verbose *)
     | Assembly of string list * int     (* instructions *)
 
+exception LocError of string * loc
+
 exception CompileError of string
 
 
@@ -51,34 +58,36 @@ exception CompileError of string
  * Template for node matching follows below.
  *
  * let rec visit = function
- *     | Program (decls) ->
- *     | Param (ctype, name) ->
- *     | FunDec (ret_type, name, params) ->
- *     | FunDef (export, ret_type, name, params, body) ->
- *     | GlobalDec (ctype, name) ->
- *     | GlobalDef (export, ctype, name, None) ->
- *     | GlobalDef (export, ctype, name, Some init) ->
+ *     | Program (decls, loc) ->
+ *     | Param (ctype, name, loc) ->
+ *     | FunDec (ret_type, name, params, loc) ->
+ *     | FunDef (export, ret_type, name, params, body, loc) ->
+ *     | GlobalDec (ctype, name, loc) ->
+ *     | GlobalDef (export, ctype, name, None, loc) ->
+ *     | GlobalDef (export, ctype, name, Some init, loc) ->
+ *
+ *     | VarDec (ctype, name, None, loc) ->
+ *     | VarDec (ctype, name, Some init, loc) ->
+ *     | Assign (name, value, loc) ->
+ *     | Return (value, loc) ->
+ *     | If (cond, body, loc) ->
+ *     | IfElse (cond, true_body, false_body, loc) ->
+ *     | While (cond, body, loc) ->
+ *     | DoWhile (cond, body, loc) ->
+ *     | For (counter, start, stop, step, body, loc) ->
+ *     | Expr (value, loc) ->
  *
- *     | VarDec (ctype, name, None) ->
- *     | VarDec (ctype, name, Some init) ->
- *     | Assign (name, value) ->
- *     | Return (value) ->
- *     | If (cond, body) ->
- *     | IfElse (cond, true_body, false_body) ->
- *     | While (cond, body) ->
- *     | DoWhile (cond, body) ->
- *     | For (counter, start, stop, step, body) ->
- *     | Expr (value) ->
+ *     | BoolConst (value, loc) ->
+ *     | IntConst (value, loc) ->
+ *     | FloatConst (value, loc) ->
+ *     | Var (name, loc) ->
+ *     | Monop (op, value, loc) ->
+ *     | Binop (op, left, right, loc) ->
+ *     | Cond (cond, true_expr, false_expr, loc) ->
+ *     | TypeCast (ctype, value, loc) ->
+ *     | FunCall (name, args, loc) ->
  *
- *     | BoolConst (value) ->
- *     | IntConst (value) ->
- *     | FloatConst (value) ->
- *     | Var (name) ->
- *     | Monop (op, value) ->
- *     | Binop (op, left, right) ->
- *     | Cond (cond, true_expr, false_expr) ->
- *     | TypeCast (ctype, value) ->
- *     | FunCall (name, args) ->
+ *     | Statements (stats, loc) ->
  *
  *     | node -> transform visit node
  *

+ 4 - 3
lexer.mll

@@ -80,9 +80,10 @@ rule token = parse
     | ['0'-'9']+'.'['0'-'9']+ as f { FLOAT_CONST (float_of_string f) }
     | ['A'-'Z''a'-'z']['A'-'Z''a'-'z''0'-'9''_']* as id { ID id }
 
-    | '\r'|'\n'|"\r\n"    { next_line lexbuf; token lexbuf }
-    | [' ''\t']+          { token lexbuf }
-    | "//"_* | "/*"_*"*/" { token lexbuf }
+    | '\r'|'\n'|"\r\n"  { next_line lexbuf; token lexbuf }
+    | [' ''\t']+        { token lexbuf }
+    | "//"[^'\n']*      { token lexbuf }
+    | "/*"_*"*/"        { token lexbuf }
 
     | eof       { EOF }
     | _ as chr  { raise (SyntaxError ("Unexpected char: " ^ Char.escaped chr)) }

+ 14 - 16
parser.mly

@@ -1,5 +1,10 @@
 %{
 open Ast
+
+let loc lexbuf =
+    (0, 0, 0, 0)
+    sprintf "%s:%d:%d" pos.pos_fname pos.pos_lnum
+                       (pos.pos_cnum - pos.pos_bol + 1)
 %}
 
 /* Tokens */
@@ -17,12 +22,6 @@ open Ast
 %token <int> INT_CONST
 %token <string> ID
 
-/* Types */
-
-%type <Ast.node> program decl fun_header var_dec param statement expr
-%type <Ast.node list> return_statement array_const
-%type <Ast.ctype> basic_type
-
 /* Precedence */
 
 %right ASSIGN
@@ -37,6 +36,9 @@ open Ast
 %nonassoc IF
 %nonassoc ELSE
 
+/* Start symbol */
+
+%type <Ast.node> program
 %start program
 
 %%
@@ -49,18 +51,15 @@ program : decl*; EOF
           { Program $1 }
 
 decl : EXTERN; fun_header; SEMICOL
-       { $2 }
+       { let (t, n, p) = $2 in FunDec(t, n, p) }
      | boption(EXPORT); fun_header; LBRACE; fun_body; RBRACE
-       { match $2 with FunDec (t, n, p) -> FunDef ($1, t, n, p, $4)
-         | _ -> raise _eRR }
+       { let (t, n, p) = $2 in FunDef ($1, t, n, p, $4) }
 
      | EXTERN; basic_type; ID; SEMICOL
        { GlobalDec ($2, $3) }
-
-         | EXTERN; t=basic_type; LBRACK; d=separated_list(COMMA, ID); RBRACK; n=ID; SEMICOL
+     | EXTERN; t=basic_type; LBRACK; d=separated_list(COMMA, ID); RBRACK; n=ID; SEMICOL
        { GlobalDec (ArrayDec (t, d), n) }
 
-
      | boption(EXPORT); basic_type; ID; SEMICOL
        { GlobalDef ($1, $2, $3, None) }
      | boption(EXPORT); basic_type; ID; ASSIGN; expr; SEMICOL
@@ -72,9 +71,9 @@ decl : EXTERN; fun_header; SEMICOL
        { GlobalDef (e, ArrayDef (t, d), n, Some v) }
 
 fun_header : ret=basic_type; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
-             { FunDec (ret, name, params) }
+             { (ret, name, params) }
            | VOID; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
-             { FunDec (Void, name, params) }
+             { (Void, name, params) }
 
 param : basic_type; ID  { Param ($1, $2) }
 
@@ -82,8 +81,7 @@ fun_body : var_dec* local_fun_dec* statement* loption(return_statement)
            { $1 @ $2 @ $3 }
 
 local_fun_dec : fun_header; LBRACE; fun_body; RBRACE
-                { match $1 with FunDec (t, n, p) -> FunDef (false, t, n, p, $3)
-                  | _ -> raise _eRR }
+                { let (t, n, p) = $1 in FunDef (false, t, n, p, $3) }
 
 var_dec : basic_type; ID; SEMICOL
           { VarDec ($1, $2, None) }

+ 81 - 19
phases/desug.ml

@@ -1,25 +1,87 @@
 open Ast
-open Trav
-open Stringify
-
-let rec var_init node =
-    let move_inits body =
-        let rec trav inits = function
-            (* local declarations: collect initialisations *)
-            | VarDec (ctype, name, Some init) :: t ->
-                VarDec (ctype, name, None) :: (trav (inits @ [Assign (name, init)]) t)
-            | (VarDec (_, _, None) as h) :: t
-            | (FunDef (_, _, _, _, _) as h) :: t ->
-                 h :: (trav inits t)
-
-            (* rest of function body: recurse *)
-            | rest -> inits @ (List.map var_init rest)
-        in trav [] body
-    in
-    match node with
+open Util
+
+let rec flatten = function
+    | [] -> []
+    | Statements nodes :: t -> (flatten nodes) @ (flatten t)
+    | h :: t -> h :: (flatten t)
+
+let rec var_init = function
+    (* Split local variable initialisations in declaration and assignment *)
     | FunDef (export, ret_type, name, params, body) ->
+        let move_inits body =
+            let rec trav inits = function
+                (* translate scalar array initialisation to ArrayScalar node,
+                 * for easy replacement later on *)
+                | VarDec (ArrayDef (_, _) as vtype, name, Some (BoolConst _ as v)) :: t
+                | VarDec (ArrayDef (_, _) as vtype, name, Some (FloatConst _ as v)) :: t
+                | VarDec (ArrayDef (_, _) as vtype, name, Some (IntConst _ as v)) :: t ->
+                    trav inits (VarDec (vtype, name, Some (ArrayScalar v)) :: t)
+
+                | VarDec (ctype, name, init) :: t ->
+                    (* array definition: create __allocate statement *)
+                    let alloc = match ctype with
+                        | ArrayDef (_, dims) -> [Allocate (name, dims)]
+                        | _ -> []
+                    in
+                    (* variable initialisation: create assign statement *)
+                    let stats = match init with
+                        | Some value -> alloc @ [Assign (name, value)]
+                        | None -> alloc
+                    in
+                    VarDec (ctype, name, None) :: (trav (inits @ stats) t)
+
+                (* initialisations need to be placed after local functions *)
+                | (FunDef (_, _, _, _, _) as h) :: t ->
+                    (var_init h) :: (trav inits t)
+
+                (* rest of function body: recurse *)
+                | rest -> inits @ (List.map var_init rest)
+            in trav [] body
+        in
         FunDef (export, ret_type, name, params, move_inits body)
-    | _ -> transform var_init node
+
+    (* Move global variable initialisations to exported __init function *)
+    | GlobalDef (export, ctype, name, Some init) ->
+        Statements [GlobalDef (export, ctype, name, None); Assign (name, init)]
+
+    | Program decls ->
+        let decls = flatten (List.map var_init decls) in
+        let rec trav assigns = function
+            | [] -> (assigns, [])
+            | (Assign (_, _) as h) :: t -> trav (assigns @ [h]) t
+            | h :: t ->
+                let (assigns, decls) = trav assigns t in
+                (assigns, (h :: decls))
+        in
+        let (assigns, decls) = trav [] decls in
+        (match assigns with
+            | [] -> Program decls
+            | assigns ->
+                let init_func = FunDef (true, Void, "__init", [], assigns) in
+                Program (init_func :: decls)
+            )
+
+    | node -> transform var_init node
+
+(*
+let rec array_init = function
+    (* transform scalar assignment into nested for loops *)
+    | Assign (name, ArrayScalar (value)) ->
+        let rec add_loop indices = function
+            | [] ->
+                Assign (Deref (name, indices), value)
+            | dim :: rest ->
+                let counter = fresh_var "counter" in
+                let ind = (indices @ [Var counter]) in
+                For (counter, IntConst 0, dim, IntConst 1, add_loop ind rest)
+        in
+        add_loop [] dims
+
+    | Assign (name, ArrayConst (dims)) -> Statements []
+
+    | node -> transform array_init node
+*)
 
 let rec phase repr =
     let _ = print_endline "- Var init" in

+ 9 - 2
phases/parse.ml

@@ -7,12 +7,19 @@ let get_position lexbuf =
     sprintf "%s:%d:%d" pos.pos_fname pos.pos_lnum
                        (pos.pos_cnum - pos.pos_bol + 1)
 
+let get_loc lexbuf =
+    let pos = lexbuf.lex_curr_p in
+    let colnum = (pos.pos_cnum - pos.pos_bol + 1) in
+    Loc (pos.pos_fname, pos.pos_lnum, , colnum, )
+    sprintf "%s:%d:%d" pos.pos_fname pos.pos_lnum
+                       (pos.pos_cnum - pos.pos_bol + 1)
+
 let parse_with_error lexbuf =
     try Some (Parser.program Lexer.token lexbuf) with
     | Lexer.SyntaxError msg ->
-        raise (CompileError (sprintf "%s: %s" (get_position lexbuf) msg))
+        raise (CompileError (sprintf "%s: %s" (get_loc lexbuf) msg))
     | Parser.Error ->
-        raise (CompileError (sprintf "%s: syntax error" (get_position lexbuf)))
+        raise (LocError ("syntax error" (get_loc lexbuf)))
 
 let phase repr =
     print_endline "- Parse input";

+ 22 - 15
stringify.ml

@@ -45,21 +45,21 @@ and node2lines node =
     (* Decls *)
     | FunDec (ret_type, name, params) ->
         let params = String.concat ", " (all_str params) in
-        ["extern " ^ (type2str ret_type) ^ " " ^ name ^ "(" ^ params ^ ");"]
+        ["extern " ^ type2str ret_type ^ " " ^ name ^ "(" ^ params ^ ");"]
     | FunDef (export, ret_type, name, params, body) ->
         let export = if export then "export " else "" in
         let params = String.concat ", " (all_str params) in
-        let header = (type2str ret_type) ^ " " ^ name ^ "(" ^ params ^ ")" in
+        let header = type2str ret_type ^ " " ^ name ^ "(" ^ params ^ ")" in
         let body = indent (List.concat (all_lines body)) in
         [export ^ header ^ " {"] @
             body @
         ["}"]
     | GlobalDec (var_type, name) ->
-        ["extern " ^ (type2str var_type) ^ " " ^ name ^ ";"]
+        ["extern " ^ type2str var_type ^ " " ^ name ^ ";"]
     | GlobalDef (export, ret_type, name, init) ->
         let export = if export then "export " else "" in
         let init = match init with
-            | Some value -> " = " ^ (node2str value)
+            | Some value -> " = " ^ node2str value
             | None -> ""
         in
         [export ^ (type2str ret_type) ^ " " ^ name ^ init ^ ";"]
@@ -77,43 +77,49 @@ and node2lines node =
         ["return " ^ (node2str value) ^ ";"]
     | If (cond, body) ->
         let body = indent (List.concat (all_lines body)) in
-        ["if (" ^ (node2str cond) ^ ") {"] @
+        ["if (" ^ node2str cond ^ ") {"] @
             body @
         ["}"]
     | IfElse (cond, true_body, false_body) ->
         let true_body = indent (List.concat (all_lines true_body)) in
         let false_body = indent (List.concat (all_lines false_body)) in
-        ["if (" ^ (node2str cond) ^ ") {"] @
+        ["if (" ^ node2str cond ^ ") {"] @
             true_body @
         ["} else {"] @
             false_body @
         ["}"]
     | While (cond, body) ->
         let body = indent (List.concat (all_lines body)) in
-        ["while (" ^ (node2str cond) ^ ") {"] @
+        ["while (" ^ node2str cond ^ ") {"] @
             body @
         ["}"]
     | DoWhile (cond, body) ->
         let body = indent (List.concat (all_lines body)) in
         ["do {"] @
             body @
-        ["} while (" ^ (node2str cond) ^ ");"]
+        ["} while (" ^ node2str cond ^ ");"]
     | For (counter, start, stop, step, body) ->
         let step = match step with
             | IntConst 1 -> ""
-            | value -> ", " ^ (node2str value)
+            | value -> ", " ^ node2str value
         in
-        let range = (node2str start) ^ ", " ^ (node2str stop) ^ step in
+        let range = node2str start ^ ", " ^ node2str stop ^ step in
         let body = indent (List.concat (all_lines body)) in
         ["for (int " ^ counter ^ " = " ^ range ^ ") {"] @
             body @
         ["}"]
+    | Allocate (name, dims) ->
+        [name ^ " = __allocate(" ^ String.concat ", " (List.map node2str dims) ^ ");"]
+
+    | Statements stats -> List.concat (List.map node2lines stats)
 
     (* Catch-all, whould never happen *)
     | _ -> failwith "invalid node"
 
 (* node -> string *)
-and node2str = function
+and node2str node =
+    let concat sep nodes = String.concat sep (List.map node2str nodes) in
+    match node with
     (* Global *)
     | Program decls ->
         let decl2str decl = String.concat "\n" (node2lines decl) in
@@ -124,17 +130,18 @@ and node2str = function
     | BoolConst b -> string_of_bool b
     | IntConst i -> string_of_int i
     | FloatConst f -> string_of_float f
-    | ArrayConst dims -> "[" ^ (String.concat ", " (List.map node2str dims)) ^ "]"
+    | ArrayConst dims -> "[" ^ concat ", " dims ^ "]"
+    | ArrayScalar value -> node2str value
     | Var v -> v
     | Deref (name, dims) -> name ^ (node2str (ArrayConst dims))
     | Monop (op, opnd) -> monop2str op ^ node2str opnd
     | Binop (op, left, right) ->
         "(" ^ node2str left ^ binop2str op ^ node2str right ^ ")"
     | Cond (cond, t, f) ->
-        (node2str cond) ^ " ? " ^ (node2str t) ^ " : " ^ (node2str f)
+        (node2str cond) ^ " ? " ^ node2str t ^ " : " ^ node2str f
     | TypeCast (ctype, value) ->
-        "(" ^ (type2str ctype) ^ ")" ^ (node2str value)
+        "(" ^ type2str ctype ^ ")" ^ node2str value
     | FunCall (name, args) ->
-        name ^ "(" ^ (String.concat ", " (List.map node2str args)) ^ ")"
+        name ^ "(" ^ (concat ", " args) ^ ")"
 
     | node -> String.concat "\n" (node2lines node)

+ 6 - 0
test/array_init.cvc

@@ -25,3 +25,9 @@ export int main() {
 
     return 0;
 }
+
+void foo() {
+    void bar() {
+        int[4, 5] arr = 1;
+    }
+}

+ 4 - 0
test/test.cvc

@@ -16,6 +16,10 @@ export int main() {
     int i = 0;
     int a = i + 1;
 
+    /* ad asd
+     *
+     * test
+     * */
     for (int j = 0, i)
         a = a + 1;
 

+ 2 - 0
test/var_init.cvc

@@ -1,3 +1,5 @@
+int glob_a = 1;
+
 void foo() {
     int a = 1;
 }

+ 11 - 36
trav.ml → util.ml

@@ -1,5 +1,11 @@
 include Ast
 
+let var_counter = ref 0
+
+let fresh_var prefix =
+    var_counter := !var_counter + 1;
+    prefix ^ "$" ^ string_of_int !var_counter
+
 (* Default tree transformation
  * (node -> node) -> node -> node *)
 let rec transform visitor node =
@@ -47,40 +53,9 @@ let rec transform visitor node =
     | FunCall (name, args) ->
         FunCall (name, trav_all args)
 
-    | _ -> node
+    | Statements (stats) ->
+        Statements (trav_all stats)
+    | Loc (node, loc) ->
+        Loc (trav node, loc)
 
-(*
-(* Visit
- *  *)
-let rec visit visitor = function
-    let trav = visit visitor in
-    let trav_all nodes = List.map trav nodes in
-    | Program (decls) -> List.map visitor decls;
-    | Param (ctype, name) ->
-    | FunDec (ret_type, name, params) ->
-    | FunDef (export, ret_type, name, params, body) ->
-    | GlobalDec (ctype, name) ->
-    | GlobalDef (export, ctype, name, None) ->
-    | GlobalDef (export, ctype, name, Some init) ->
-
-    | VarDec (ctype, name, None) ->
-    | VarDec (ctype, name, Some init) ->
-    | Assign (name, value) ->
-    | Return (value) ->
-    | If (cond, body) ->
-    | IfElse (cond, true_body, false_body) ->
-    | While (cond, body) ->
-    | DoWhile (cond, body) ->
-    | For (counter, start, stop, step, body) ->
-    | Expr (value) ->
-
-    | BoolConst (value) ->
-    | IntConst (value) ->
-    | FloatConst (value) ->
-    | Var (name) ->
-    | Monop (op, value) ->
-    | Binop (op, left, right) ->
-    | Cond (cond, true_expr, false_expr) ->
-    | TypeCast (ctype, value) ->
-    | FunCall (name, args) ->
-*)
+    | _ -> node

+ 1 - 0
trav.mli → util.mli

@@ -1 +1,2 @@
+val fresh_var : string -> string
 val transform : (Ast.node -> Ast.node) -> Ast.node -> Ast.node