Selaa lähdekoodia

Implemented new scoping rules, moved up context analysys, tweaked where extern variables are handled, fixed some bugs (see updated html docs) (needs new test cases!)

Taddeus Kroes 11 vuotta sitten
vanhempi
sitoutus
7867875804
17 muutettua tiedostoa jossa 605 lisäystä ja 486 poistoa
  1. 2 2
      Makefile
  2. 3 3
      main.ml
  3. 1 1
      main.mli
  4. 1 1
      phases/constprop.mli
  5. 218 118
      phases/context.ml
  6. 114 19
      phases/context.mli
  7. 157 261
      phases/desug.ml
  8. 10 18
      phases/desug.mli
  9. 27 10
      phases/dimreduce.ml
  10. 30 11
      phases/dimreduce.mli
  11. 1 16
      phases/index.ml
  12. 4 4
      phases/index.mli
  13. 2 2
      phases/typecheck.ml
  14. 2 2
      stringify.ml
  15. 1 3
      types.mli
  16. 18 12
      util.ml
  17. 14 3
      util.mli

+ 2 - 2
Makefile

@@ -2,7 +2,7 @@
 BIN_DIR := bin
 RESULT := $(BIN_DIR)/civcc
 GLOBALS := types globals stringify util
-PHASES := load parse print desug context typecheck dimreduce boolop constprop \
+PHASES := load parse print context desug typecheck dimreduce boolop constprop \
 	unroll index assemble peephole output
 SOURCES := $(addsuffix .mli,$(GLOBALS)) $(addsuffix .ml,$(GLOBALS)) \
 	lexer.mll parser.mly main.mli \
@@ -23,7 +23,7 @@ TESTSUITE_TGT := testsuite.tar.gz
 TOOLCHAIN_TGT := toolchain.tar.gz
 
 # Set debugging flag to enable exception backtraces for OCAMLRUNPARAM=b
-#OCAMLFLAGS := -g
+OCAMLFLAGS := -g
 
 OCAMLYACC := menhir
 YFLAGS := --infer --explain

+ 3 - 3
main.ml

@@ -21,8 +21,8 @@ let parse_args () =
               Possible options are (in order of execution):
                 load      : Load input file and run C preprocessor
                 parse     : Parse input
-                desug     : Desugaring
                 context   : Context analysis
+                desug     : Desugaring
                 typecheck : Type checking
                 dimreduce : Array dimension reduction
                 boolop    : Convert boolean operations
@@ -110,10 +110,10 @@ let () =
     |> print_ir  always   false           "load"
     |> run_phase always   Parse.phase     "Parse input"
     |> print_ir  always   false           "parse"
+    |> run_phase always   Context.phase   "Context analysis"
+    |> print_ir  always   false           "context"
     |> run_phase always   Desug.phase     "Desugaring"
     |> print_ir  always   false           "desug"
-    |> run_phase always   Context.phase   "Context analysis"
-    |> print_ir  always   true            "context"
     |> run_phase always   Typecheck.phase "Type checking"
     |> print_ir  always   true            "typecheck"
     |> run_phase always   Dimreduce.phase "Array dimension reduction"

+ 1 - 1
main.mli

@@ -6,5 +6,5 @@
     accordingly. *)
 
 (** Main function of a phase. Each phase exports a function of this signature
-    that is called by the {!main}. *)
+    that is called by the main module. *)
 type phase_func = Types.intermediate -> Types.intermediate

+ 1 - 1
phases/constprop.mli

@@ -80,5 +80,5 @@ Constant propagation reduces this to:
 \} v}
     *)
 
-(** Main phase function, called by {!Main}. Calls {!propagate_consts}. *)
+(** Main phase function, called by {!Main}. *)
 val phase : Main.phase_func

+ 218 - 118
phases/context.ml

@@ -2,156 +2,256 @@ open Printf
 open Types
 open Util
 
-type nametype = Varname of string | Funcname of string
+let rec add_depth depth node =
+  match node with
+  | GlobalDec (ArrayDims (ctype, dims), name, ann) ->
+    (* XXX: traversal should traverse into Dim nodes *)
+    let dims = List.map (add_depth depth) dims in
+    GlobalDec (ArrayDims (ctype, dims), name, Depth depth :: ann)
+  | Param (ArrayDims (ctype, dims), name, ann) ->
+    let dims = List.map (add_depth depth) dims in
+    Param (ArrayDims (ctype, dims), name, Depth depth :: ann)
+  | GlobalDec _
+  | GlobalDef _
+  | Param _
+  | Dim _
+  | VarDec _
+  | Var _
+  | FunCall _
+  | Assign _
+  | For _
+  | VarUse _
+  | FunUse _
+  | VarLet _ ->
+    annotate (Depth depth) node |> traverse_unit (add_depth depth)
+  | FunDec _ ->
+    annotate (Depth depth) node
+  | FunDef (export, ret_type, name, params, body, ann) ->
+    let params = List.map (add_depth (depth + 1)) params in
+    let body = add_depth (depth + 1) body in
+    FunDef (export, ret_type, name, params, body, Depth depth :: ann)
+  | _ ->
+    traverse_unit (add_depth depth) node
 
-let type2str = function Funcname _ -> "function" | Varname _ -> "variable"
+type identifier_type = Funcname | Varname
 
-let mapfind name tbl =
-  if Hashtbl.mem tbl name then Some (Hashtbl.find tbl name) else None
+let typename = function Varname -> "variable" | Funcname -> "function"
 
-let check_in_scope name errnode scope err =
-  let vars, funs = scope in
-  let name, tbl, other_map, desired_type = match name with
-  | Varname  name -> (name, vars, funs, "variable")
-  | Funcname name -> (name, funs, vars, "function")
-  in
-  match mapfind name tbl with
-  | Some (dec, dec_depth, _) ->
-    (dec, dec_depth)
-  | None ->
-    let msg = match mapfind name other_map with
-    | Some _ -> sprintf "\"%s\" is not a %s" name desired_type
-    | None   -> sprintf "undefined %s \"%s\"" desired_type name
-    in
-    err := !err @ [NodeMsg (errnode, msg)];
-    (DummyNode, -1)
+let tblfind tbl name = try Some (Hashtbl.find tbl name) with Not_found -> None
 
-let add_to_scope name dec depth (vars, funs) =
-  let name, tbl, name_type = match name with
-  | Varname name  -> name, vars, "variable"
-  | Funcname name -> name, funs, "function"
-  in
-  match mapfind name tbl with
-  (* Identifiers of lower depth may be overwritten, but idenetifiers at
-    * the same depth must be unique for consistency *)
-  | Some (orig, orig_depth, _) when orig_depth >= depth ->
-    (* For generated variables, don't gove an error, since the error variable
-     * is a derived array dimension of a redefined array, which will yield an
-     * error later on *)
-    if is_generated_id name then
-      Hashtbl.replace tbl name (dec, depth, name_type)
-    else
-      let msg = sprintf "Error: cannot redeclare %s \"%s\"" name_type name in
-      prerr_loc_msg (locof dec) msg;
-      prerr_loc_msg (locof orig) "Previously declared here:";
-      raise (FatalError NoMsg)
+let add_to_scope scope name dec namety err =
+  match tblfind scope name with
+  | Some orig when depthof orig >= depthof dec ->
+    err := NodeMsg (orig, "\rPreviously declared here:") ::
+           NodeMsg (dec, sprintf "Error: cannot redeclare %s \"%s\""
+                         (typename namety) name) :: !err
   | Some _ ->
-    Hashtbl.replace tbl name (dec, depth, name_type)
+    Hashtbl.replace scope name dec
   | None ->
-    Hashtbl.add tbl name (dec, depth, name_type)
+    Hashtbl.add scope name dec
 
-let rec analyse scope depth node err =
-  let rec collect node = match node with
-    (* For extern array declarations, add the dimension names as well *)
-    | GlobalDec (ArrayDims (ctype, dims), name, ann) ->
-      let t = ArrayDims (ctype, List.map (annotate (Depth depth)) dims) in
-      let node = GlobalDec (t, name, Depth depth :: ann) in
-      add_to_scope (Varname name) node depth scope;
-      node
+let check_in_scope scope name errnode namety err =
+  match tblfind scope name with
+  | Some dec -> dec
+  | None ->
+    let msg = sprintf "undefined %s \"%s\"" (typename namety) name in
+    err := NodeMsg (errnode, msg) :: !err;
+    DummyNode
 
-    (* For variables, add the name (array dimensions are added
-     * implicitly, since they have separate VarDec nodes which were added
-     * during the desugaring phase *)
-    | VarDec (_, name, _, _)
-    | GlobalDec (_, name, _)
-    | GlobalDef (_, _, name, _, _) ->
-      let node = annotate (Depth depth) node in
-      add_to_scope (Varname name) node depth scope;
-      node
+let prt_vars vars =
+  let prt name dec = prerr_string (name ^ ", ") in
+  Hashtbl.iter prt vars;
+  prerr_endline "(end)"
+
+let analyse do_rename program =
+  let err = ref [] in
 
-    (* Functions are traversed later on, for now only add the name *)
+  (* Add functions at the current depth to the function scope, do not traverse
+   * into nested functions *)
+  let rec collect_funs funs node =
+    match node with
     | FunDec (_, name, _, _)
     | FunDef (_, _, name, _, _, _) ->
-      let node = annotate (Depth depth) node in
-      add_to_scope (Funcname name) node depth scope;
+      (* TODO: don't copy function body to save memory *)
+      add_to_scope funs name node Funcname err;
       node
+    | _ -> traverse_unit (collect_funs funs) node
+  in
 
-    (* For a variable or function call, look for its declaration in the
-     * current scope and save a its type/depth information  *)
-    | Var (name, dims, ann) ->
-      let dec, dec_depth = check_in_scope (Varname name) node scope err in
-      VarUse (dec, optmap collect dims, Depth depth :: ann)
+  (* Traverse through statements in the current scope, checking and replacing
+   * variable occurrences. Add newly declared variables to the variable scope
+   * on-the-fly. *)
+  let rec traverse scope node =
+    let trav = traverse scope in
+    let trav_dims = function
+      | ArrayDims (ctype, dims) ->
+        ArrayDims (ctype, List.map trav dims)
+      | ctype -> ctype
+    in
+    let vars, funs, repl = scope in
+    let check_rename node rename =
+      let name = nameof node in
+      let shadows_higher_scope =
+        do_rename && (Hashtbl.mem vars name || Hashtbl.mem repl name)
+      in
 
-    | FunCall (name, args, ann) ->
-      let dec, dec_depth = check_in_scope (Funcname name) node scope err in
-      FunUse (dec, List.map collect args, Depth depth :: ann)
+      (* Trigger duplication error and make sure following duplication errors
+       * will refer to a non-generated variable name *)
+      add_to_scope vars name node Varname err;
 
-    (* Assign statements are replaced with VarLet nodes, which stores the
-     * declaration of the assigned variable *)
-    | Assign (name, dims, value, ann) ->
-      let dec, dec_depth = check_in_scope (Varname name) node scope err in
-      VarLet (dec, optmap collect dims, collect value, Depth depth :: ann)
+      if shadows_higher_scope then begin
+        let newname = fresh_id name in
+        Hashtbl.replace repl name newname;
+        let newnode = rename newname in
+        add_to_scope vars newname newnode Varname err;
+        newnode
+      end else
+        node
+    in
+    let add_dims = function
+      | ArrayDims (ctype, dims) ->
+        let rec add = function
+          | [] -> []
+          | (Dim (name, ann) as dim) :: tl ->
+            check_rename dim (fun name -> Dim (name, ann)) :: add tl
+          | _ -> raise InvalidNode
+        in
+        ArrayDims (ctype, add dims)
+      | ctype -> ctype
+    in
+    match node with
+    | Program (decls, ann) ->
+      Program (Block decls |> trav |> trav_funs scope |> block_body, ann)
 
-    | Allocate (dec, dims, ann) ->
-      let dec, dec_depth = check_in_scope (Varname (nameof dec)) node scope err in
-      Allocate (dec, List.map collect dims, Depth depth :: ann)
+    | FunDec _ | FunDef _ -> node
 
-    | _ -> traverse_unit collect node
-  in
+    | GlobalDef (export, ctype, name, init, ann) ->
+      let node = GlobalDef (export, trav_dims ctype, name, optdo trav init, ann) in
+      add_to_scope vars name node Varname err;
+      node
 
-  let rec traverse scope depth = function
-    (* Increase nesting level when entering function *)
-    | FunDef (export, ret_type, name, params, body, ann) ->
-      let vars, funs = scope in
-      let local_scope = (Hashtbl.copy vars, Hashtbl.copy funs) in
-      let params = List.map (traverse local_scope (depth + 1)) params in
-      let body = analyse local_scope (depth + 1) body err in
-      FunDef (export, ret_type, name, params, body, ann)
+    | VarDec (ctype, name, init, ann) ->
+      let ctype = trav_dims ctype in
+      let init = optdo trav init in
+      let node = VarDec (ctype, name, init, ann) in
+      check_rename node (fun name -> VarDec (ctype, name, init, ann))
 
-    | Param (ArrayDims (ctype, dims), name, ann) ->
-      let rec add_dims = function
+    | GlobalDec (ArrayDims (ctype, dims), name, ann) ->
+      let rec replace_dims i = function
         | [] -> []
-        | Dim (name, ann) :: tl ->
-          let dim = Dim (name, Depth depth :: ann) in
-          add_to_scope (Varname name) dim depth scope;
-          dim :: (add_dims tl)
+        | Dim (dimname, ann) :: tl ->
+          let newname = generate_array_dim name i in
+          Hashtbl.add repl dimname newname;
+          Dim (newname, ann) :: replace_dims (i + 1) tl
         | _ -> raise InvalidNode
       in
-      let node = Param (ArrayDims (ctype, add_dims dims), name, ann) in
-      add_to_scope (Varname name) node depth scope;
+      let ctype = ArrayDims (ctype, replace_dims 0 dims) in
+      let node = GlobalDec (add_dims ctype, name, ann) in
+      add_to_scope vars name node Varname err;
       node
 
-    | VarDec _ as node -> node
+    | GlobalDec (ctype, name, ann) ->
+      add_to_scope vars name node Varname err;
+      node
+
+    | Param (ctype, name, ann) ->
+      let ctype = add_dims ctype in
+      let node = Param (ctype, name, ann) in
+      check_rename node (fun name -> Param (ctype, name, ann))
+
+    | For (name, start, stop, step, body, ann) ->
+      let start, stop, step = trav start, trav stop, trav step in
+
+      (* For-loops are a special case: the loop counter defines a new scope
+       * which is allowed to shadow existing local variables, and allows for
+       * nested loops with the same induction variables. Replace the loop
+       * counter with a fresh variable to enforce this behaviour, and avoid
+       * having to replace variables during desigaring. *)
+      (* FIXME: only create new variable if necessary *)
+      let newname = fresh_id name in
+      let node = For (newname, start, stop, step, body, ann) in
+      Hashtbl.add vars name node;
+      Hashtbl.add vars newname node;
+      Hashtbl.add repl name newname;
 
-    | Param (_, name, _) as node ->
-      let node = annotate (Depth depth) node in
-      add_to_scope (Varname name) node depth scope;
+      let node = For (newname, start, stop, step, trav body, ann) in
+      Hashtbl.remove repl name;
+      Hashtbl.remove vars newname;
+      Hashtbl.remove vars name;
       node
 
+    (* Perform renaming *)
+    | Var (name, dims, ann) when Hashtbl.mem repl name ->
+      trav (Var (Hashtbl.find repl name, dims, ann))
+
+    | Assign (name, dims, value, ann) when Hashtbl.mem repl name ->
+      trav (Assign (Hashtbl.find repl name, dims, value, ann) )
+
+    (* Replace variables or function calls with use-nodes which contain the
+     * entire declaration   *)
+    | Var (name, dims, ann) ->
+      let dec = check_in_scope vars name node Varname err in
+      VarUse (dec, optmap trav dims, ann)
+
+    | FunCall (name, args, ann) ->
+      let dec = check_in_scope funs name node Funcname err in
+      FunUse (dec, List.map trav args, ann)
+
+    (* Assign statements are replaced with VarLet nodes, which stores the
+     * declaration of the assigned variable *)
+    | Assign (name, dims, value, ann) ->
+      begin
+        match check_in_scope vars name node Varname err with
+        | For _ ->
+          err := NodeMsg (node, "cannot assign to induction variable") :: !err;
+          node
+        | dec ->
+          VarLet (dec, optmap trav dims, trav value, ann)
+      end
+
+    (* Also support intermediary nodes because context analysis is re-run later
+     * on to propagate new declaration properties *)
+    | VarUse (dec, dims, ann) ->
+      VarUse (Hashtbl.find vars (nameof dec), optmap trav dims, ann)
+
+    | FunUse (dec, args, ann) ->
+      FunUse (Hashtbl.find funs (nameof dec), List.map trav args, ann)
+
+    | VarLet (dec, dims, value, ann) ->
+      VarLet (Hashtbl.find vars (nameof dec), optmap trav dims, trav value, ann)
+
+    | Allocate (dec, dims, ann) ->
+      Allocate (Hashtbl.find vars (nameof dec), List.map trav dims, ann)
+
     (* Do not traverse into external function declarations, since their
      * parameters must not be added to the namespace *)
-    | FunDec _ as node -> node
+    | FunDec _ -> node
 
-    | node  -> traverse_unit (traverse scope depth) node
+    | _ -> traverse_unit trav node
+
+  and trav_funs scope = function
+    (* Copy scope when entering a function body *)
+    | FunDef (export, ret_type, name, params, body, ann) ->
+      let vars, funs, repl = scope in
+      let locfuns = Hashtbl.copy funs in
+      let locscope = (Hashtbl.copy vars, locfuns, Hashtbl.copy repl) in
+      let params = List.map (traverse locscope) params in
+      let body =
+        collect_funs locfuns body |> traverse locscope |> trav_funs locscope
+      in
+      FunDef (export, ret_type, name, params, body, ann)
+
+    | node -> traverse_unit (trav_funs scope) node
   in
 
-  (*
-   * First collect all definitions at the current depth. Then, traverse into
-   * functions with a copy of the current scope. This is needed because
-   * functions can access all identifiers in their surrounding scope.
-   * E.g., the following is allowed:
-   *
-   * void foo() { glob = 1; }
-   * int glob;
-   *)
-  collect node |> traverse scope depth
-
-let analyse_context program =
-  let scope = (Hashtbl.create 20, Hashtbl.create 20) in
-  let err = ref [] in
-  let node = analyse scope 0 program err in
-  quit_on_error node !err
+  let vars = Hashtbl.create 32 in
+  let funs = Hashtbl.create 16 in
+  let repl = Hashtbl.create 16 in
+  add_depth 0 program |>
+  collect_funs funs |>
+  traverse (vars, funs, repl) |>
+  quit_on_error (List.rev !err)
 
 let phase = function
-  | Ast node -> Ast (analyse_context node)
+  | Ast node -> Ast (analyse true node)
   | _ -> raise InvalidInput

+ 114 - 19
phases/context.mli

@@ -1,28 +1,123 @@
 (** Context analysis: find declarations of used variables and functions. *)
 
-(** The desugared CiviC code contains [Var], [FunCall] and [Assign] nodes. These
+(** {2 Overview }
+
+    The desugared CiviC code contains [Var], [FunCall] and [Assign] nodes. These
     all use variables or functions identified by a [string] name. The context
     analysis phase links each variable occurrence to its declaration: a
-    [VarDec], [Param], [Dim], [GlobalDe[cf]] or [FunDe[cf]].  Since the original
-    nodes only have a [string] field to save the declaration, new node types
-    have been added which replace the name with a declaration node: [VarUse],
-    [FunUse], and [VarLet].
-
-    The whole analysis is done in one traversal. When a declaration node is
-    encountered, its name and declaration are added to the currect scope (a
-    mutable hash table). When a variable of fuction use is encountered, the name
-    and declaration are looked up in the current scope. The scope is duplicated
-    when entering a function, and restored when exiting the function, so that
-    functions that are not subroutines of each other, do not share inner variable
-    definitions. Note that the traversal traverses into functions AFTER it has
-    found all declarations in the outer scope of the function, since functions
-    can use any function of variable that is defined within the same scope (also
-    those defined after the function itself).
+    [VarDec], [Param], [Dim], [GlobalDe[cf]], [FunDe[cf]] or [For] node. Since
+    the original nodes only have a [string] field to save the declaration, new
+    node types have been added which replace the name with a declaration node:
+    [VarUse], [FunUse], and [VarLet].
+
+    Context analysis can be broken down into the following steps: {ol
+    {li Find all function declarations in the current scope. }
+    {li Traverse the current scope without traversing into nested functions,
+        adding variable declarations as they are declared, and applying renaming
+        rules as described below. Error messages for duplicate varaible
+        declarations, undefined variable uses, and assignments to loop induction
+        variables are generated here. }
+    {li Traverse into (nested) functions and go to step 1, while copying the
+        current scope for each function (so that the higher scope is not
+        polluted with local variables of the nested function). }
+    }
+
+    {2 Renaming variables }
+
+    One task of context analysis is resolvement of scoping conclicts.
+    Specifically, when a function parameter or local variable shadows a variable
+    in a higher scope (either an ancestor function or the global scope),
+    initializations in the corresponding function that reference the shadowed
+    variable will be invalidated when the initialization moved down after
+    desugaring. This fixed by first traversing into the initialization part of a
+    variable declaration before adding the newly declared variable to the scope,
+    and then renaming the declared variable and all of its future uses. An
+    example follows:
+
+{v int a = 1;
+int foo() \{
+  int a = a + 1;  // 'a' is already in the scope, so rename it
+  return a;
+\} v}
+    becomes:
+{v int a = 1;
+int foo() \{
+  int _a_1 = a + 1;
+  return _a_1;    // uses of local 'a' must be renamed as well
+\} v}
+
+    Note that the renaming scheme also solves the case in which a variable with
+    the name of another local variable that is declared later on, is referenced
+    in an initialization expression:
+
+{v int a = 1;
+int foo() \{
+  int b = a + 1;  // should reference global 'a', not local 'a' below
+  int a = 2;      // 'a' is already in the scope, so rename it
+  return a;
+\} v}
+    After desugaring, this becomes:
+{v int a = 1;
+int foo() \{
+  int b;
+  int _a_1;
+  b = a + 1;      // 'a' correctly references to global variable
+  _a_1 = 2;
+  return _a_1;
+\} v}
+
+    {3 For-loop counters }
+
+    Induction variables in for-loop counters form a special case since they may
+    shadow a local variable in the current scope, and even the induction
+    variable of a parent for-loop. Since for-loops will be rewritten to
+    while-loops during desugaring and these semantics only apply to for-loops,
+    induction variables are renamed to fresh variables as needed to avoid
+    scoping conflicts:
+{v
+void foo() \{                     |   void foo() \{
+    int i;                       |       int i;
+    printInt(i);                 |       printInt(i);
+    for (int i = 1, 10) \{        |       for (int _i_1 = 1, 10) \{
+        printInt(i);             |           printInt(_i_1);
+        for (int i = 1, 10) \{    |           for (int _i_2 = 1, 10) \{
+            printInt(i);         |               printInt(_i_2);
+        \}                        |           \}
+        printInt(i);             |           printInt(_i_1);
+    \}                            |       \}
+    printInt(i);                 |       printInt(i);
+\}                                |   \} v}
+
+    {3 Imported array dimensions }
+
+    Another special case are dimensions of imported arrays. In [extern int[n]
+    a], [n] is just a name given locally to the first dimension of [a]. Since
+    this name is unknown the module where the dimension is exported, this needs
+    to be consistently transformed into a format which both modules can decide
+    on independently.
+
+    Our implementation changes the snippet into [extern int[__a_0] a], where the
+    [__] prefix indicates an array dimension and [a_0] indicates the first
+    dimension of [a]. The double underscore prefix is necessary to assert
+    uniqueness of the variable, since the suffix is based on the dimension index
+    rather than the global counter that is used for other generated variables
+    (we cannot use this global counter since it may differ between compilation
+    instances of the different modules).
+
+    This way, exporting dimensions global arrays in another module can be done
+    in the same way while asserting uniqeness of the imported/exported
+    variables.
+
+    For exported variables, this renaming is done in the {!Desug} phase. The
+    reason for this is that no separate variable exist yet for exported array
+    dimensions at this stage, they are generated during desugaring.
     *)
 
 (** Traversal that replaces names with declarations. Exported for use in other
-    phases. *)
-val analyse_context : Types.node -> Types.node
+    phases. The boolean argument toggles renaming, which is only [true] the
+    first time the phase is run (after that it is not necessary since the
+    compiler does not generate conflicting variables). *)
+val analyse : bool -> Types.node -> Types.node
 
-(** Main phase function, called by {!Main}. Calls {!analyse_context}. *)
+(** Main phase function, called by {!Main}. Calls {!analyse}. *)
 val phase : Main.phase_func

+ 157 - 261
phases/desug.ml

@@ -2,171 +2,66 @@ open Printf
 open Types
 open Util
 
-(* Check if a function defines a variable name *)
-let defines var = function
-  | FunDef (export, ret_type, name, params, Block (VarDecs decs :: tl), ann) ->
-    let rec trav_decs = function
-      | [] -> false
-      | Param (ArrayDims (_, dims), name, _) :: tl ->
-        name = var || trav_decs dims || trav_decs tl
-      | (Dim (name, _) | VarDec (_, name, _, _) | Param (_, name, _)) :: _
-        when name = var -> true
-      | _ :: tl -> trav_decs tl
-    in
-    trav_decs params || trav_decs decs
-  | _ -> raise InvalidNode
-
-(* Replace all occurences of a variable name with another name *)
-let rec replace_var var replacement node =
-  let trav = (replace_var var replacement) in
-  let trav_all = List.map trav in
-  let trav_opt = function None -> None | Some node -> Some (trav node) in
-  match node with
-  (* Replace variable name on match *)
-  | Var (name, ind, ann) when name = var ->
-    let ind = match ind with None -> None | Some ind -> Some (trav_all ind) in
-    Var (replacement, ind, ann)
-
-  (* Don't enter a function body if it redefines the variable *)
-  | FunDef _ when defines var node -> node
-
-  (* Don't traverse into a for-loop body if the loop counter redefines var *)
-  | For (counter, start, stop, step, body, ann) when counter = var ->
-    For (counter, trav start, trav stop, trav step, body, ann)
-
-  (* At this point, array dimension expressions may not have been moved to new
-   * variables yet, so traverse them explicitly *)
-  | VarDec (ArrayDims (ctype, dims), name, init, ann) ->
-    VarDec (ArrayDims (ctype, trav_all dims), name, trav_opt init, ann)
-
-  | node -> traverse_unit trav node
-
-(* Create new constant variables for scalar initialisations on arrays so that
- * they are only evaluated once *)
-let rec move_scalars = function
-  (* Prevent next match for ArrayConst initialisations *)
-  | VarDec (ArrayDims _, _, Some (ArrayConst _), _) as node ->
-    node
-
-  (* Add vardec for scalar value *)
-  | VarDec (ArrayDims _ as ctype, name, Some value, ann) as node ->
-    let scalar_name = fresh_const "scalar" in
-    Block [
-      VarDec (basetypeof node, scalar_name, Some value, ann);
-      VarDec (ctype, name, Some (Var (scalar_name, None, annof value)), ann);
-    ]
-
-  | node -> traverse_unit move_scalars node
-
 (* Generate new variables for array dimensions, to avoid re-evalutation when
  * array dimensions are used (e.g., after array dimension reduction). *)
-let array_dims node =
-  (*
-  let make_dimname basename i = generate_const basename (i + 1) in
-  *)
-  let make_dimname = generate_const in
-  let patch_dims basename values make_decs =
+let move_array_dims node =
+  let patch_dims basename values make_decs make_dimname =
     let names = mapi (fun i _ -> make_dimname basename i) values in
-
     let decs = List.concat (List.map2 make_decs values names) in
-
     let make_dim value name = Dim (name, annof value) in
     let dims = List.map2 make_dim values names in
-
     (decs, dims)
   in
-
-  (* Save dimension replacements in one global hash table (we are not replacing
-   * local vars, so everything is in the global scope) *)
-  let replacements = Hashtbl.create 10 in
-
+  let fresh_dim name _ = fresh_const name in
   let rec trav = function
     | VarDec (ArrayDims (ctype, values), name, init, ann) ->
       let make_decs value name = [VarDec (Int, name, Some value, [])] in
-      let decs, dims = patch_dims name values make_decs in
+      let decs, dims = patch_dims name values make_decs fresh_dim in
       Block (decs @ [VarDec (ArrayDims (ctype, dims), name, init, ann)])
 
+    (* Omit the trailing "_" for exported variables since they should not be
+     * pruned by optimisations *)
     | GlobalDef (export, ArrayDims (ctype, values), name, init, ann) ->
-      (* Move array dimensions into new variables to avoid double evaluation of
-      * expressions with side effects (i.e. function calls) *)
+      let make_dimname = if export then generate_array_dim else fresh_dim in
       let make_decs value name = [GlobalDef (export, Int, name, Some value, [])] in
-      let decs, dims = patch_dims name values make_decs in
+      let decs, dims = patch_dims name values make_decs make_dimname in
       Block (decs @ [GlobalDef (export, ArrayDims (ctype, dims), name, init, ann)])
 
-    | GlobalDec (ArrayDims (ctype, dims), name, ann) ->
-      (* Create an 'extern int ...' definition for each dimension with a
-      * consistent import name, and replace all local uses with the imported
-      * variable name *)
-      let make_decs dim impname =
-        match dim with
-        | Dim (dimname, _) ->
-          (* Fix name clashes (needed because context analysis has not been done
-           * yet) *)
-          if Hashtbl.mem replacements dimname then begin
-            raise (FatalError (NodeMsg (dim, "duplicate dimension name")))
-          end;
-
-          (* Occurences of dimension names are replaced after the traversal *)
-          Hashtbl.add replacements dimname impname;
-
-          [GlobalDec (Int, impname, [])]
-        | _ -> raise InvalidNode
-      in
-      let decs, dims = patch_dims name dims make_decs in
-      Block (decs @ [GlobalDec (ArrayDims (ctype, dims), name, ann)])
-
-      (*
-      let make_decs i = function
-        | Dim (dimname, dimann) ->
-          let impname = generate_id name (i + 1) in
-          let decs = [
-            GlobalDec (Int, impname, []);
-            GlobalDef (false, Int, dimname, Some (Var (impname, None, [])), [])
-          ] in
-          (decs, Dim (impname, dimann))
-        | _ -> raise InvalidNode
-      in
-      let decs, dims = List.split (mapi make_decs dims) in
-      Block (List.concat decs @ [GlobalDec (ArrayDims (ctype, dims), name, ann)])
-      *)
-
     | node -> traverse_unit trav node
   in
-  Hashtbl.fold replace_var replacements (trav node)
+  trav node
 
-(* Split variable initialisation into declaration and assignment *)
-let rec split_inits = function
-  (* Wrap array initialisation in ArrayInit to pass dimensions *)
-  | VarDec (ArrayDims (_, dims) as ctype, name, Some value, ann) ->
-    Block [
-      VarDec (ctype, name, None, ann);
-      Assign (name, None, ArrayInit (value, dims), ann);
-    ]
+(* Create new constant variables for scalar initialisations on arrays so that
+ * they are only evaluated once *)
+let rec move_scalar_inits = function
+  (* Prevent next match for ArrayConst initialisations *)
+  | VarDec (ArrayDims _, _, Some (ArrayConst _), _) as node ->
+    node
 
-  | GlobalDef (export, (ArrayDims (_, dims) as ctype), name, Some value, ann) ->
-    Block [
-      GlobalDef (export, ctype, name, None, ann);
-      Assign (name, None, ArrayInit (value, dims), ann);
-    ]
+  (* Add vardec for scalar value *)
+  | VarDec (ArrayDims (ctype, dims) as atype, name, Some value, ann) as node ->
+    let scalar_dec = VarDec (ctype, fresh_const "scalar", Some value, ann) in
+    let scalar_use = VarUse (scalar_dec, None, annof value) in
+    Block [scalar_dec; VarDec (atype, name, Some scalar_use, ann)]
 
+  | node -> traverse_unit move_scalar_inits node
+
+(* Split variable initialisation into declaration and assignment *)
+let rec split_inits = function
   | VarDec (ctype, name, Some init, ann) ->
-    Block [
-      VarDec (ctype, name, None, ann);
-      Assign (name, None, init, ann);
-    ]
+    let dec = VarDec (ctype, name, None, ann) in
+    Block [dec; VarLet (dec, None, init, ann)]
 
   | GlobalDef (export, ctype, name, Some init, ann) ->
-    Block [
-      GlobalDef (export, ctype, name, None, ann);
-      Assign (name, None, init, ann);
-    ]
+    let dec = GlobalDef (export, ctype, name, None, ann) in
+    Block [dec; VarLet (dec, None, init, ann)]
 
   | node -> traverse_unit split_inits node
 
 (* Add __allocate statements after array declarations *)
 let rec add_allocs node =
   let create_dimvar = function
-    | Dim (name, _) -> Var (name, None, [])
+    | Dim (name, _) as dim -> VarUse (dim, None, [])
     | _ -> raise InvalidNode
   in
   match node with
@@ -178,103 +73,18 @@ let rec add_allocs node =
 
   | node -> traverse_unit add_allocs node
 
-let extract_inits lst =
-  let rec trav inits = function
-    | [] ->
-      (List.rev inits, [])
-    | (Assign _ as hd) :: tl
-    | (Allocate _ as hd) :: tl ->
-      trav (hd :: inits) tl
-    | hd :: tl ->
-      let inits, tl = trav inits tl in
-      (inits, (hd :: tl))
-  in trav [] lst
-
-let rec move_inits = function
-  (* Move global initialisations to __init function *)
-  | Program (decls, ann) ->
-    let decls = List.map move_inits decls in
-    begin match extract_inits decls with
-    | ([], _) -> Program (decls, ann)
-    | (inits, decls) ->
-      let body = Block (VarDecs [] :: LocalFuns [] :: inits) in
-      let init_func = FunDef (true, Void, "__init", [], body, []) in
-      Program (init_func :: decls, ann)
-    end
-
-  (* Split local variable initialisations in declaration and assignment *)
-  | FunDef (export, ret_type, name, params, Block body, ann) ->
-    let rec place_inits inits = function
-      | VarDecs lst :: tl ->
-        let inits, decs = extract_inits lst in
-        VarDecs decs :: (place_inits inits tl)
-      | LocalFuns _ as hd :: tl ->
-        hd :: inits @ tl
-      | _ -> raise InvalidNode
-    in
-    let body = Block (place_inits [] body) in
-    FunDef (export, ret_type, name, params, body, ann)
-
-  | node -> traverse_unit move_inits node
-
-let for_to_while node =
-  let rec trav new_vars = function
-    | FunDef (export, ret_type, name, params, body, ann) ->
-      let rec place_decs decs = function
-        | Block (VarDecs lst :: tl) -> Block (VarDecs (decs @ lst) :: tl)
-        | _ -> raise InvalidNode
-      in
-      let new_vars = ref [] in
-      let body = trav new_vars body in
-      let create_vardec name = VarDec (Int, name, None, []) in
-      let new_vardecs = List.map create_vardec !new_vars in
-      let body = place_decs new_vardecs body in
-      FunDef (export, ret_type, name, params, body, ann)
-
-    (* Transform for-loops to while-loops *)
-    | For (counter, start, stop, step, body, ann) ->
-      let _i = fresh_id counter in
-      let _stop = fresh_const "stop" in
-      let _step = fresh_const "step" in
-      new_vars := !new_vars @ [_i; _stop; _step];
-
-      let vi = Var (_i, None, []) in
-      let vstop = Var (_stop, None, annof stop) in
-      let vstep = Var (_step, None, annof step) in
-      let cond = Cond (
-        Binop (Gt, vstep, Const (IntVal 0l, []), []),
-        Binop (Lt, vi, vstop, []),
-        Binop (Gt, vi, vstop, []),
-        []
-      ) in
-      Block [
-        Assign (_i, None, start, annof start);
-        Assign (_stop, None, stop, annof stop);
-        Assign (_step, None, step, annof step);
-        trav new_vars (While (cond, (Block (
-          block_body (replace_var counter _i body) @
-          [Assign (_i, None, Binop (Add, vi, vstep, []), [])]
-        )), ann));
-      ]
-
-    (* Transform while-loops to do-while loops in if-statements *)
-    (* DISABLED, while-loops are explicitly supported by the assembly phase
-    | While (cond, body, ann) ->
-      let cond = trav new_vars cond in
-      let body = trav new_vars body in
-      Block [If (cond, Block [DoWhile (cond, body, ann)], ann)]
-    *)
-
-    | node -> traverse_unit (trav new_vars) node
-  in
-  trav (ref []) node
+let dimsof = function
+  | GlobalDef (_, ArrayDims (_, dims), _, _, _)
+  | VarDec (ArrayDims (_, dims), _, _, _) -> dims
+  | _ -> raise InvalidNode
 
 let rec array_init = function
   (* Transform array constant initialisation into separate assign statements
    * for all entries in the array literal *)
-  | Assign (name, None, ArrayInit (ArrayConst _ as value, dims), ann) ->
+  | VarLet (dec, None, (ArrayConst _ as value), ann) ->
+    let name = nameof dec in
     let intconst i = Const (IntVal (Int32.of_int i), []) in
-    let ndims = List.length dims in
+    let ndims = List.length (dimsof dec) in
     let rec make_assigns depth i indices = function
       | [] -> []
       | hd :: tl ->
@@ -285,14 +95,7 @@ let rec array_init = function
         make_assigns (depth + 1) 0 indices values
       | value when depth = ndims ->
         let indices = List.map intconst indices in
-        [Assign (name, Some (List.rev indices), value, ann)]
-      (* DISABLED: nesting level must now be equal to number of dimensions
-      | value when depth < ndims ->
-        (* Use for-loops for scalar assignment on sub-array *)
-        let value = ArrayInit (value, dims) in
-        let indices = List.map intconst indices in
-        [array_init (Assign (name, Some (List.rev indices), value, ann))]
-      *)
+        [VarLet (dec, Some (List.rev indices), value, ann)]
       | node ->
         raise (FatalError (NodeMsg (node, sprintf
           "dimension mismatch: expected %d nesting levels, got %d"
@@ -300,51 +103,144 @@ let rec array_init = function
     in
     Block (List.rev (trav 0 [] value))
 
-  (* Replace no indices with empty indices to have a list below *)
-  | Assign (name, None, (ArrayInit _ as value), ann) ->
-    array_init (Assign (name, Some [], value, ann))
-
-  | Assign (name, Some indices, ArrayInit (value, dims), ann) ->
-    let rec add_loop indices = function
-      | [] ->
-        array_init (Assign (name, Some indices, value, ann))
-      | dim :: rest ->
+  (* Scalar initialisation *)
+  | VarLet (dec, None, scalar, ann) when is_array dec ->
+    let create_loop dim body =
+      let counter = fresh_id "i" in
+      let start = Const (IntVal 0l, []) in
+      let stop = VarUse (dim, None, ann) in
+      let step = Const (IntVal 1l, []) in
+      For (counter, start, stop, step, body, [])
+    in
+    let rec nest_loops indices = function
+      | [] -> Block [VarLet (dec, Some (List.rev indices), scalar, [])]
+      | dim :: tl ->
         let counter = fresh_id "i" in
         let start = Const (IntVal 0l, []) in
+        let stop = VarUse (dim, None, ann) in
         let step = Const (IntVal 1l, []) in
-        let body = Block [add_loop (indices @ [Var (counter, None, [])]) rest] in
-        let stop = match dim with
-        | Dim (name, ann) -> Var (name, None, ann)
-        | _ -> dim
-        in
+        let body = nest_loops (Var (counter, None, []) :: indices) tl in
         For (counter, start, stop, step, body, [])
     in
-    let rec sublist n = function
-      | [] when n > 0  -> raise (Invalid_argument "n")
-      | []             -> []
-      | lst when n = 0 -> lst
-      | _ :: tl        -> sublist (n - 1) tl
-    in
-    let dims_left = sublist (List.length indices) dims in
-    add_loop indices dims_left
+    nest_loops [] (dimsof dec)
 
   | node -> traverse_unit array_init node
 
+let rec for_to_while = function
+  (* Transform for-loops to while-loops *)
+  | For (counter, start, stop, step, body, ann) ->
+    let dec name init = VarDec (Int, name, Some init, []) in
+    let _i = dec counter start in
+    let _stop = dec (fresh_const "stop") stop in
+    let _step = dec (fresh_const "step") step in
+
+    let vi = VarUse (_i, None, []) in
+    let vstop = VarUse (_stop, None, annof stop) in
+    let vstep = VarUse (_step, None, annof step) in
+    let cond =
+      Cond (
+        Binop (Gt, vstep, Const (IntVal 0l, []), []),
+        Binop (Lt, vi, vstop, []),
+        Binop (Gt, vi, vstop, []),
+      [])
+    in
+    Block [
+      _i; _stop; _step;
+      While (cond, (Block (
+        [body; VarLet (_i, None, Binop (Add, vi, vstep, []), [])]
+      )), ann) |> for_to_while;
+    ]
+
+  (* Transform while-loops to do-while loops in if-statements *)
+  (* DISABLED, while-loops are explicitly supported by the assembly phase
+  | While (cond, body, ann) ->
+    If (cond, DoWhile (cond, for_to_while body, ann), ann)
+  *)
+
+  | node -> traverse_unit for_to_while node
+
+let rec move_vardecs = function
+  | FunDef (export, ret_type, name, params, body, ann) ->
+    let rec trav = function
+      | FunDef _ as node -> (move_vardecs node, [])
+      | VarDec _ as node -> (DummyNode, [node])
+      | node -> traverse_list trav node
+    in
+    let body, decs = traverse_list trav body in
+    let body = Block (decs @ (block_body body)) in
+    FunDef (export, ret_type, name, params, body, ann)
+
+  | node -> traverse_unit move_vardecs node
+
+let rec move_global_inits = function
+  (* Move global initialisations to __init function *)
+  | Program (decls, ann) ->
+    let decls = List.map move_global_inits decls in
+    let rec extract_inits inits = function
+      | [] ->
+        (List.rev inits, [])
+      | (VarLet _ as hd) :: tl
+      | (Allocate _ as hd) :: tl ->
+        extract_inits (hd :: inits) tl
+      | hd :: tl ->
+        let inits, tl = extract_inits inits tl in
+        (inits, (hd :: tl))
+    in
+    begin match extract_inits [] decls with
+    | ([], _) -> Program (decls, ann)
+    | (inits, decls) ->
+      let init_func = FunDef (true, Void, "__init", [], Block inits, []) in
+      Program (init_func :: decls, ann)
+    end
+
+  | node -> traverse_unit move_global_inits node
+
+let rec group_vardecs = function
+  | FunDef (export, ret_type, name, params, Block body, ann) ->
+    let rec create = function
+      | (VarDec _ as hd) :: tl -> VarDecs [hd] :: create tl
+      | tl -> tl
+    in
+    let rec merge = function
+      | VarDecs [a] :: VarDecs b :: tl -> merge (VarDecs (a :: b) :: tl)
+      | VarDecs a :: VarDecs b :: tl -> merge (VarDecs (a @ b) :: tl)
+      | tl -> tl
+    in
+    let body = Block (create body |> merge) |> group_vardecs in
+    FunDef (export, ret_type, name, params, body, ann)
+
+  | node -> traverse_unit group_vardecs node
+
 let phase = function
   | Ast node ->
     Ast begin
       (* Move array dimensions and scalar initialisations into new variables as
        * initialisations, so that they are evaluated exactly once, and so that
        * dimension names are consistent with the array name *)
-      array_dims node |> move_scalars
+      move_array_dims node |> move_scalar_inits |>
 
       (* Split variable initialisations into declarations and assignments, and
        * move the assignments to the function body *)
-      |> split_inits |> add_allocs |> move_inits
+      split_inits |> add_allocs |>
+
+      (* Transform ArrayConst assignment to assignments into for-loops *)
+      array_init |>
+
+      (* Transform for-loops to while-loops *)
+      for_to_while |> split_inits |>
+
+      (* Move variable declarations to the beginning of the function *)
+      move_vardecs |>
+
+      (* Move global initialisation assignments to __init *)
+      move_global_inits |>
+
+      (* Create and merge VarDecs nodes at the start of each function *)
+      group_vardecs |>
 
-      (* Transform ArrayConst assignment to assignments in for-loops, and
-       * transform all for-loops to while-loops afterwards *)
-      |> array_init |> for_to_while
+      (* Propagate new declaration properties to uses (since we have no
+       * pointers) *)
+      Context.analyse false
     end
 
   | _ -> raise InvalidInput

+ 10 - 18
phases/desug.mli

@@ -4,12 +4,11 @@
 (** {2 Split variable initialisations}
 
     Variable initialisations are split into declarations and assignments, to
-    generalize the AST format. This makes context analysis easier, since
-    initialisations need not be considered. The assignments are placed in the
-    function body, after local fuction declarations (which are not in the
-    example below). Initialisations fo global variables are moved to a new
-    function called "__init", which is a reserved function that is called by the
-    VM before the main function is called.
+    generalize the AST format. The assignments are placed in the function body,
+    after local fuction declarations (which are not in the example below).
+    Initialisations fo global variables are moved to a new function called
+    "__init", which is a reserved function that is called by the VM before the
+    main function is called.
 
 {v int glob = 1;
 void foo() \{
@@ -115,19 +114,12 @@ resulting in:
             a[_i_2, _i_3] = _scalar_1_;
         \}
     \}
-    ...  v}
+    ... v}
 
-    The transformation described above is applied to all array definitions,
-    including extern arrays. Although dimensions of extern arrays are not
-    expressions (but identifiers), the transformation is necessary in order to
-    generate consistent names to be imported/exported. E.g. in [extern int[n]
-    a], [n] is just a name given locally to the first dimension of [a].
-    Therefore it is transformed into:
-{v     extern int _a_0_;
-    extern int[_a_0_] a; v}
-    Also, all occurrences of [n] in the rest of the module are replaced by
-    [_a_0_]. For exported arrays, the generated dimension variables need to be
-    exported as well.
+    The transformation described above is applied to both local and global array
+    definitions. For exported global arrays, however, dimensions are renamed
+    slightly differently to be able to consistently the dimensions in another
+    module. The renaming rules for this case are described in {!Context}.
 
     {2 Transforming for-loops to while-loops}
 

+ 27 - 10
phases/dimreduce.ml

@@ -18,6 +18,18 @@ let flatten_type = function
 
 (* Pass array dimensions explicitly to functions *)
 let rec expand_dims = function
+  (* For extern arrays, also add a new variables for each dimension with a
+   * consistent naming scheme so that they can be exported with the same name by
+   * another module *)
+  | GlobalDec (ArrayDims (_, dims) as ctype, name, ann) ->
+    let rec gendims decs = function
+      | [] -> decs
+      | Dim (name, ann) as dim :: tl ->
+        gendims (GlobalDec (Int, name, ann) :: decs) tl
+      | _ -> raise InvalidNode
+    in
+    Block (List.rev (GlobalDec (ctype, name, ann) :: gendims [] dims))
+
   (* Flatten Block nodes returned by transformations below *)
   | FunDef (export, ret_type, name, params, body, ann) ->
     let params = flatten_blocks (List.map expand_dims params) in
@@ -31,9 +43,9 @@ let rec expand_dims = function
     FunUse (dec, flatten_blocks (List.map expand_dims params), ann)
 
   (* Add additional parameters for array dimensions *)
-  | Param (ArrayDims (ctype, dims), name, ann) ->
+  | Param (ArrayDims (_, dims) as ctype, name, ann) ->
     let rec do_expand = function
-      | [] -> [Param (Array ctype, name, ann)]
+      | [] -> [Param (ctype, name, ann)]
       | Dim (name, ann) :: tail ->
         Param (Int, name, ann) :: (do_expand tail)
       | _ -> raise InvalidNode
@@ -54,11 +66,12 @@ let rec expand_dims = function
       | hd :: tl ->
         (* A declaration has been added for each dimension during earlier
          * phases, so we can safely reconstruct it here *)
-        Arg (VarUse (make_dimdec hd, None, [])) :: (do_expand tl)
+        Arg (VarUse (make_dimdec hd, None, [])) :: do_expand tl
     in
-    let dims = match typeof dec with
-    | ArrayDims (_, dims) -> dims
-    | _ -> raise InvalidNode
+    let dims =
+      match typeof dec with
+      | ArrayDims (_, dims) -> dims
+      | _ -> raise InvalidNode
     in
     Block (do_expand dims)
 
@@ -87,9 +100,6 @@ let rec expand depth dims =
 (* Transform multi-dimensional arrays into one-dimensional arrays in row-major
  * order *)
 and dim_reduce depth = function
-  | Allocate (dec, dims, ann) ->
-    Allocate (dec, [multiply dims], ann)
-
   (* Simplify array types in declarations *)
   | GlobalDef (export, ArrayDims (ctype, _), name, None, ann) ->
     GlobalDef (export, Array ctype, name, None, ann)
@@ -97,9 +107,16 @@ and dim_reduce depth = function
   | GlobalDec (ArrayDims (ctype, _), name, ann) ->
     GlobalDec (Array ctype, name, ann)
 
+  | Param (ArrayDims (ctype, _), name, ann) ->
+    Param (Array ctype, name, ann)
+
   | VarDec (ArrayDims (ctype, _), name, None, ann) ->
     VarDec (Array ctype, name, None, ann)
 
+  (* Allocate in rw-major order with the array size being the cartegian product
+   * of all dimensions *)
+  | Allocate (dec, dims, ann) -> Allocate (dec, [multiply dims], ann)
+
   (* Increase nesting depth when going into function *)
   | FunDef (export, ret_type, name, params, body, ann) ->
     let trav = dim_reduce (depth + 1) in
@@ -126,5 +143,5 @@ and dim_reduce depth = function
   | node -> traverse_unit (dim_reduce depth) node
 
 let phase = function
-  | Ast node -> Ast (dim_reduce 0 (expand_dims node))
+  | Ast node -> Ast (expand_dims node |> dim_reduce 0)
   | _ -> raise InvalidInput

+ 30 - 11
phases/dimreduce.mli

@@ -9,10 +9,8 @@
     For function calls, array dimensions are passed as explicit arguments before
     the array argument itself. Naturally, function declarations need to be
     modified accordingly, adding explicit parameters for array dimensions.
-
-    Note that we need not create new variable declarations for array
-    declarations here. This is already done during the desugaring phase
-    ({!Desug} explains the reason for this).
+    Similarly, dimensions of imported arrays are moved into new integer
+    variables.
 
     In the second step, statements and expressions involving multi-dimensional
     array referencing are transformed such that they reference a one-dimensional
@@ -22,7 +20,22 @@
     is the product of all array dimensions. Indexing multi-dimensional arrays is
     changed such that arrays are accessed in row-major order.
 
-{v void foo(int[a, b, c] p) \{
+    Original code ([x], [y] and [z] are defined elsewhere):
+
+{v extern int[u] ext;
+
+void foo(int[a, b, c] p) \{
+    int[5, 10, 3] q;
+    q[x, y, z] = p[x, y, z];
+    foo(q);
+\} v}
+
+    Input for dimension reduction (we use [n], [m] and [k] for readability but
+    in reality these are generated variables):
+
+{v extern int[__ext_0] ext;  // dimensions are encoded in type
+
+void foo(int[a, b, c] p) \{
     int n; int m; int k; int[n, m, k] q;  // n, m, k are added during desugaring
     n = 5;
     m = 10;
@@ -34,25 +47,31 @@
 
     step 1:
 
-{v void foo(int a, int b, int c, int[] p) \{  // Array parameter
+{v extern int __ext_0;  // imported dimensions moved to new variables
+extern int[__ext_0] ext;
+
+void foo(int a, int b, int c, int[] p) \{  // array parameter
     int n; int m; int k; int[n, m, k] q;
     n = 5;
     m = 10;
     k = 3;
     q = __allocate(n, m, k);
     q[x, y, z] = p[x, y, z];
-    foo(n, m, k, q);                      // Array argument
+    foo(n, m, k, q);                      // array argument
 \} v}
 
     step 2:
 
-{v void foo(int[a, b, c] p) \{
-    int n; int m; int k; int[] q;  // Removing dimension information
+{v extern int __ext_0;
+extern int[] ext;                  // removing dimension information
+
+void foo(int[a, b, c] p) \{
+    int n; int m; int k; int[] q;  // removing dimension information
     n = 5;
     m = 10;
     k = 3;
-    q = __allocate(n * m * k);                            // Allocation
-    q[((x * m) + y) * k + z] = p[((x * b) + y) * c + z];  // Indexing
+    q = __allocate(n * m * k);                            // allocation
+    q[((x * m) + y) * k + z] = p[((x * b) + y) * c + z];  // indexing
     foo(n, m, k, q);
 \} v}
 

+ 1 - 16
phases/index.ml

@@ -67,21 +67,6 @@ let tag_index program =
     | _ -> traverse_unit trav node
   in tag (ref 0) [] program
 
-(* Undo context analysis *)
-let rec strip_context = function
-  | VarUse (dec, dims, ann) ->
-    Var (nameof dec, optmap strip_context dims, ann)
-
-  | VarLet (dec, dims, value, ann) ->
-    Assign (nameof dec, optmap strip_context dims, strip_context value, ann)
-
-  | FunUse (dec, args, ann) ->
-    FunCall (nameof dec, List.map strip_context args, ann)
-
-  | node -> traverse_unit strip_context node
-
 let phase = function
-  | Ast node ->
-    let tagged = tag_index (strip_context node) in
-    Ast (Context.analyse_context tagged)
+  | Ast node -> Ast (tag_index node |> Context.analyse false)
   | _ -> raise InvalidInput

+ 4 - 4
phases/index.mli

@@ -10,10 +10,10 @@
     Therefore, this phase first strips all context analysis by replacing
     variable/function uses with the original [Var|Assign|FunCall] nodes. Then,
     index analysis is performed on declarations. Finally, the
-    {!Context.analyse_context} traversal is re-run to carry the [Index]
-    annotations to the variable/function uses. Note that we can safely assume
-    that no errors will occur during this context analysis, since incorrect uses
-    would have been identified by the earlier context analysis already.
+    {!Context.analyse} traversal is re-run to carry the [Index] annotations to
+    the variable/function uses. Note that we can safely assume that no errors
+    will occur during this context analysis, since incorrect uses would have
+    been identified by the earlier context analysis already.
     *)
 
 (** Main phase function, called by {!Main}. *)

+ 2 - 2
phases/typecheck.ml

@@ -247,7 +247,7 @@ let rec typecheck node =
     add_error node "cannot assign value to array pointer after initialisation"
 
   (* Assigned values must match variable declaration *)
-  | VarLet (dec, None, value, ann) ->
+  | VarLet (dec, None, value, ann) as node ->
     let value, err = typecheck value in
     let err = err @ check_type (typeof dec) value in
     (VarLet (dec, None, value, ann), err)
@@ -276,5 +276,5 @@ let rec typecheck node =
 let phase = function
   | Ast node ->
     let node, err = typecheck node in
-    Ast (quit_on_error node err)
+    Ast (quit_on_error err node)
   | _ -> raise InvalidInput

+ 2 - 2
stringify.ml

@@ -21,7 +21,8 @@ let nameof = function
   | FunDef (_, _, name, _, _, _)
   | VarDec (_, name, _, _)
   | Param (_, name, _)
-  | Dim (name, _) -> name
+  | Dim (name, _)
+  | For (name, _, _, _, _, _) -> name
   | _ -> raise InvalidNode
 
 (* operator -> string *)
@@ -162,7 +163,6 @@ and node2str node =
   | FunUse (dec, args, _) ->
     node2str (FunCall (nameof dec, args, []))
   | Dim (name, _) -> name
-  | ArrayInit (node, _)
   | Arg node -> str node
 
   | VarDecs nodes

+ 1 - 3
types.mli

@@ -133,8 +133,6 @@ and node =
     (** Replacement for [FunCall] with declaration. *)
   | VarLet of node * node list option * node * ann
     (** Replacement for [Assign] with declaration. *)
-  | ArrayInit of node * node list
-    (** Wrapper for array initalisation with dimensions, used by {!Desug}. *)
   | Cond of node * node * node * ann
     (** cond, true_expr, false_expr [<cond> ? <true_expr> : <false_expr>]
         Used for short-circuit evaluation. *)
@@ -229,7 +227,7 @@ type args_record = {
   mutable optimize : bool;
   (** Run optimization phases? *)
   mutable endphase : string;
-  (** Stop at the phase which has the given identifier (see {!Main.phases}). *)
+  (** Stop at the phase which has the given identifier (see [civcc -h]). *)
 }
 
 (** {2 Exceptions} *)

+ 18 - 12
util.ml

@@ -54,6 +54,8 @@ let is_const_id id =
   && id.[0] = '_'
   && id.[String.length id - 1] = '_'
 
+let generate_array_dim name index = "_" ^ generate_id name index
+
 let loc_from_lexpos pstart pend =
   let fname, ystart, yend, xstart, xend =
     pstart.pos_fname,
@@ -203,9 +205,6 @@ let rec traverse u ( *: ) trav node =
     let value, res_value = trav value in
     (Arg value, res_value)
 
-  | ArrayInit (value, dims) ->
-    let value, res_value = trav value in
-    (ArrayInit (value, dims), res_value)
   | Var (dec, Some dims, ann) ->
     let dims, res_dims = trav_all dims in
     (Var (dec, Some dims, ann), res_dims)
@@ -318,7 +317,6 @@ let rec annof = function
   | FunUse (_, _, ann)
   | FunCall (_, _, ann) -> ann
 
-  | ArrayInit (value, _)
   | Expr value
   | Arg value -> annof value
 
@@ -364,7 +362,7 @@ let typeof = function
 
   (* Dim nodes are always type Int, and are copied by context analysis before
    * they are annotated with Type Int, so this match is necessary *)
-  | Dim _ -> Int
+  | Dim _ | For _ -> Int
 
   (* Other nodes must be annotated during typechecking *)
   | node ->
@@ -466,6 +464,7 @@ let prerr_loc_msg loc msg =
 
 let block_body = function
   | Block nodes -> nodes
+  (*| node -> [node]*)
   | _ -> raise InvalidNode
 
 let basetypeof node = match typeof node with
@@ -480,12 +479,15 @@ let nameof = function
   | FunDef (_, _, name, _, _, _)
   | VarDec (_, name, _, _)
   | Param (_, name, _)
-  | Dim (name, _) -> name
+  | Dim (name, _)
+  | For (name, _, _, _, _, _) -> name
   | _ -> raise InvalidNode
 
-let optmap f = function
+let optdo f = function
   | None -> None
-  | Some lst -> Some (List.map f lst)
+  | Some value -> Some (f value)
+
+let optmap f = optdo (List.map f)
 
 let optmapl f = function
   | None -> []
@@ -533,14 +535,18 @@ let print_error = function
   | NodeMsg (node, msg) ->
     (* If no location is given, just stringify the node to at least give
       * some information *)
-    let msg = if locof node = noloc then
-      msg ^ "\nnode: " ^ Stringify.node2str node
-    else msg in
+    let msg =
+      if locof node = noloc then
+        msg ^ "\nnode: " ^ Stringify.node2str node
+      else
+        msg
+    in
     node_error node msg
 
   | NoMsg -> ()
 
-let quit_on_error node = function
+let quit_on_error err node =
+  match err with
   | [] -> node
   | errors ->
     List.iter print_error errors;

+ 14 - 3
util.mli

@@ -28,6 +28,11 @@ val generate_id : string -> int -> string
     E.g., [generate_id "foo" 1] returns ["_foo_1_"]*)
 val generate_const : string -> int -> string
 
+(** Array dimensions are a special case sine they are suffixed by a non-fresh
+    number (the dimension index), therefore an additional underscore is prefixed
+    to assert uniqueness *)
+val generate_array_dim : string -> int -> string
+
 (** {2 AST traversal} *)
 
 (** Default transformation traversal for AST nodes of arbitrary constructor:
@@ -138,8 +143,8 @@ val prt_node : node -> unit
 (** Output a line to stderr if the verbosity level in {!Globals.args} is at
     least as high as the specified verbosity level. The line is indented with a
     number of spaces to match the longest phase identifier (so that logged lines
-    align with ideitifiers logged by {!Main.main}). A newline is added
-    automatically. *)
+    align with identifiers logged by {!Main}). A newline is added automatically.
+    *)
 val log_line : int -> string -> unit
 
 (** Print a line to [stderr] without indent (but do add a newline). *)
@@ -179,7 +184,7 @@ val print_error : error_msg -> unit
 
 (** Raise a {!Types.FatalError} if the given error list is not empty, and
     print the errors before quitting. *)
-val quit_on_error : node -> error_msg list -> node
+val quit_on_error : error_msg list -> node -> node
 
 (** {2 String utilities} *)
 
@@ -204,3 +209,9 @@ val optmapl : ('a -> 'b) -> 'a list option -> 'b list
     to a list like [List.map] does, but the iterator function is called with the
     element's index as an additional argument. *)
 val mapi : (int -> 'a -> 'b) -> 'a list -> 'b list
+
+(** {2 List utilities} *)
+
+(** [optdo f opt] applies [f] to the value of [opt] if [opt] exists, and
+    returns [None] otherwise. *)
+val optdo : ('a -> 'b) -> 'a option -> 'b option