Explorar el Código

Removed preprocessor from parser, improved some error messages, moved some functions to common files

Taddeus Kroes hace 12 años
padre
commit
2126ae7cee
Se han modificado 11 ficheros con 80 adiciones y 81 borrados
  1. 0 1
      .gitignore
  2. 2 7
      Makefile
  3. 1 2
      README.md
  4. 3 1
      ast.ml
  5. 7 1
      lexer.mll
  6. 6 6
      main.ml
  7. 37 44
      parser.mly
  8. 3 4
      phases/context_analysis.ml
  9. 3 14
      phases/parse.ml
  10. 15 1
      util.ml
  11. 3 0
      util.mli

+ 0 - 1
.gitignore

@@ -9,4 +9,3 @@ civicc
 lexer.ml
 lexer.ml
 parser.ml
 parser.ml
 parser.mli
 parser.mli
-parser.mly

+ 2 - 7
Makefile

@@ -1,10 +1,10 @@
 RESULT := civicc
 RESULT := civicc
-SOURCES := ast.ml lexer.mll parser.mly util.mli util.ml stringify.mli \
+SOURCES := ast.ml util.mli util.ml lexer.mll parser.mly stringify.mli \
 	stringify.ml \
 	stringify.ml \
 	phases/parse.ml phases/print.ml phases/desug.ml \
 	phases/parse.ml phases/print.ml phases/desug.ml \
 	phases/context_analysis.ml phases/test.ml \
 	phases/context_analysis.ml phases/test.ml \
 	main.ml
 	main.ml
-PRE_TARGETS := ast.cmi
+PRE_TARGETS := ast.cmi util.cmi
 LIBS := str
 LIBS := str
 
 
 OCAMLFLAGS := -g
 OCAMLFLAGS := -g
@@ -14,11 +14,6 @@ YFLAGS := --infer
 
 
 all: native-code
 all: native-code
 
 
-parser.mly: parser.cpp.mly
-	cpp -o $@ $<
-	line=`grep -m 1 -n '%{' $@ | head -c -4`; \
-	sed -i "1,$$(($$line - 1))d" $@
-
 clean:: myclean
 clean:: myclean
 
 
 .PHONY: myclean
 .PHONY: myclean

+ 1 - 2
README.md

@@ -7,7 +7,6 @@ CiviCaml is a compiler for the CiviC language, written in OCaml.
 Issues & TODO
 Issues & TODO
 -------------
 -------------
 
 
-- Advance newlines in multi-line comments.
+- Run C preprocessor in parse phase
 - Create arguments record to pass to phases, instead of separate variables
 - Create arguments record to pass to phases, instead of separate variables
   (e.g. verbose -> args.verbose).
   (e.g. verbose -> args.verbose).
-- Clean up position getter in parser and remove C preprocessor.

+ 3 - 1
ast.ml

@@ -51,7 +51,9 @@ type repr =
     | Node of node * int                (* ast, verbose *)
     | Node of node * int                (* ast, verbose *)
     | Assembly of string list * int     (* instructions *)
     | Assembly of string list * int     (* instructions *)
 
 
-exception LocError of string * loc
+exception LocError of loc * string
+
+exception NodeError of node * string
 
 
 exception CompileError of string
 exception CompileError of string
 
 

+ 7 - 1
lexer.mll

@@ -86,7 +86,13 @@ rule token = parse
     | '\r' | '\n' | "\r\n"  { next_line lexbuf; token lexbuf }
     | '\r' | '\n' | "\r\n"  { next_line lexbuf; token lexbuf }
     | [' ''\t']+            { token lexbuf }
     | [' ''\t']+            { token lexbuf }
     | "//"[^'\n']*          { token lexbuf }
     | "//"[^'\n']*          { token lexbuf }
-    | "/*"_*"*/"            { token lexbuf }
+    | "/*"                  { comment lexbuf }
 
 
     | eof       { EOF }
     | eof       { EOF }
     | _ as chr  { raise (SyntaxError ("unexpected char: " ^ Char.escaped chr)) }
     | _ as chr  { raise (SyntaxError ("unexpected char: " ^ Char.escaped chr)) }
+
+(* Multi-line comments *)
+and comment = parse
+    | '\r' | '\n' | "\r\n"  { next_line lexbuf; comment lexbuf }
+    | _                     { comment lexbuf }
+    | "*/"                  { token lexbuf }

+ 6 - 6
main.ml

@@ -27,16 +27,13 @@ let compile infile verbose =
         *)
         *)
     ]
     ]
 
 
-let rec repeat str n =
-    if n = 0 then "" else str ^ (repeat str (n - 1))
-
 let print_fancy_error msg loc verbose =
 let print_fancy_error msg loc verbose =
     let (fname, ystart, yend, xstart, xend) = loc in
     let (fname, ystart, yend, xstart, xend) = loc in
-    let line_s = if yend > ystart
+    let line_s = if yend != ystart
         then sprintf "lines %d-%d" ystart yend
         then sprintf "lines %d-%d" ystart yend
         else sprintf "line %d" ystart
         else sprintf "line %d" ystart
     in
     in
-    let char_s = if xend > xstart || yend > ystart
+    let char_s = if xend != xstart || yend != ystart
         then sprintf "characters %d-%d" xstart xend
         then sprintf "characters %d-%d" xstart xend
         else sprintf "character %d" xstart
         else sprintf "character %d" xstart
     in
     in
@@ -82,7 +79,10 @@ let main () =
     | CompileError msg ->
     | CompileError msg ->
         prerr_endline ("Error: " ^ msg);
         prerr_endline ("Error: " ^ msg);
         1
         1
-    | LocError (msg, loc) ->
+    | NodeError (node, msg) ->
+        print_fancy_error msg (Util.locof node) !verbose;
+        1
+    | LocError (loc, msg) ->
         print_fancy_error msg loc !verbose;
         print_fancy_error msg loc !verbose;
         1
         1
 
 

+ 37 - 44
parser.cpp.mly → parser.mly

@@ -1,15 +1,8 @@
-#define LOC (let pstart = $startpos in \
-             let pend = $endpos in ( \
-                pstart.pos_fname, \
-                pstart.pos_lnum, \
-                pend.pos_lnum, \
-                (pstart.pos_cnum - pstart.pos_bol + 1), \
-                (pend.pos_cnum - pend.pos_bol) \
-            ))
-
 %{
 %{
 open Lexing
 open Lexing
 open Ast
 open Ast
+
+let loc = Util.loc_from_lexpos
 %}
 %}
 
 
 /* Tokens */
 /* Tokens */
@@ -53,88 +46,88 @@ basic_type : FLOAT      { Float }
            | BOOL       { Bool }
            | BOOL       { Bool }
 
 
 program : decl*; EOF
 program : decl*; EOF
-          { Program ($1, LOC) }
+          { Program ($1, loc $startpos $endpos) }
 
 
 decl : EXTERN; fun_header; SEMICOL
 decl : EXTERN; fun_header; SEMICOL
-       { let (t, n, p) = $2 in FunDec(t, n, p, LOC) }
+       { let (t, n, p) = $2 in FunDec(t, n, p, loc $startpos $endpos) }
      | boption(EXPORT); fun_header; LBRACE; fun_body; RBRACE
      | boption(EXPORT); fun_header; LBRACE; fun_body; RBRACE
-       { let (t, n, p) = $2 in FunDef ($1, t, n, p, Block $4, LOC) }
+       { let (t, n, p) = $2 in FunDef ($1, t, n, p, Block $4, loc $startpos $endpos) }
 
 
      | EXTERN; basic_type; ID; SEMICOL
      | EXTERN; basic_type; ID; SEMICOL
-       { GlobalDec ($2, $3, LOC) }
+       { GlobalDec ($2, $3, loc $startpos $endpos) }
      | EXTERN; t=basic_type; LBRACK; d=separated_list(COMMA, ID); RBRACK; n=ID; SEMICOL
      | EXTERN; t=basic_type; LBRACK; d=separated_list(COMMA, ID); RBRACK; n=ID; SEMICOL
-       { GlobalDec (ArrayDec (t, d), n, LOC) }
+       { GlobalDec (ArrayDec (t, d), n, loc $startpos $endpos) }
 
 
      | boption(EXPORT); basic_type; ID; SEMICOL
      | boption(EXPORT); basic_type; ID; SEMICOL
-       { GlobalDef ($1, $2, $3, None, LOC) }
+       { GlobalDef ($1, $2, $3, None, loc $startpos $endpos) }
      | boption(EXPORT); basic_type; ID; ASSIGN; expr; SEMICOL
      | boption(EXPORT); basic_type; ID; ASSIGN; expr; SEMICOL
-       { GlobalDef ($1, $2, $3, Some $5, LOC) }
+       { GlobalDef ($1, $2, $3, Some $5, loc $startpos $endpos) }
 
 
      | e=boption(EXPORT); t=basic_type; LBRACK; d=separated_list(COMMA, expr);
      | e=boption(EXPORT); t=basic_type; LBRACK; d=separated_list(COMMA, expr);
             RBRACK; n=ID; SEMICOL
             RBRACK; n=ID; SEMICOL
-       { GlobalDef (e, ArrayDef (t, d), n, None, LOC) }
+       { GlobalDef (e, ArrayDef (t, d), n, None, loc $startpos $endpos) }
      | e=boption(EXPORT); t=basic_type; LBRACK; d=separated_list(COMMA, expr);
      | e=boption(EXPORT); t=basic_type; LBRACK; d=separated_list(COMMA, expr);
             RBRACK; n=ID; ASSIGN; v=expr; SEMICOL
             RBRACK; n=ID; ASSIGN; v=expr; SEMICOL
-       { GlobalDef (e, ArrayDef (t, d), n, Some v, LOC) }
+       { GlobalDef (e, ArrayDef (t, d), n, Some v, loc $startpos $endpos) }
 
 
 fun_header : ret=basic_type; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
 fun_header : ret=basic_type; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
              { (ret, name, params) }
              { (ret, name, params) }
            | VOID; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
            | VOID; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
              { (Void, name, params) }
              { (Void, name, params) }
 
 
-param : basic_type; ID  { Param ($1, $2, LOC) }
+param : basic_type; ID  { Param ($1, $2, loc $startpos $endpos) }
 
 
 fun_body : var_dec* local_fun_dec* statement* loption(return_statement)
 fun_body : var_dec* local_fun_dec* statement* loption(return_statement)
            { $1 @ $2 @ $3 @ $4 }
            { $1 @ $2 @ $3 @ $4 }
 
 
 local_fun_dec : fun_header; LBRACE; fun_body; RBRACE
 local_fun_dec : fun_header; LBRACE; fun_body; RBRACE
-                { let (t, n, p) = $1 in FunDef (false, t, n, p, Block $3, LOC) }
+                { let (t, n, p) = $1 in FunDef (false, t, n, p, Block $3, loc $startpos $endpos) }
 
 
 var_dec : basic_type; ID; SEMICOL
 var_dec : basic_type; ID; SEMICOL
-          { VarDec ($1, $2, None, LOC) }
+          { VarDec ($1, $2, None, loc $startpos $endpos) }
         | basic_type; ID; ASSIGN; expr; SEMICOL
         | basic_type; ID; ASSIGN; expr; SEMICOL
-          { VarDec ($1, $2, Some $4, LOC) }
+          { VarDec ($1, $2, Some $4, loc $startpos $endpos) }
         | t=basic_type; LBRACK; d=separated_list(COMMA, expr); RBRACK; n=ID; SEMICOL
         | t=basic_type; LBRACK; d=separated_list(COMMA, expr); RBRACK; n=ID; SEMICOL
-          { VarDec (ArrayDef (t, d), n, None, LOC) }
+          { VarDec (ArrayDef (t, d), n, None, loc $startpos $endpos) }
         | t=basic_type; LBRACK; d=separated_list(COMMA, expr); RBRACK; n=ID; ASSIGN; v=expr; SEMICOL
         | t=basic_type; LBRACK; d=separated_list(COMMA, expr); RBRACK; n=ID; ASSIGN; v=expr; SEMICOL
-          { VarDec (ArrayDef (t, d), n, Some v, LOC) }
+          { VarDec (ArrayDef (t, d), n, Some v, loc $startpos $endpos) }
 
 
 statement : ID; ASSIGN; expr; SEMICOL
 statement : ID; ASSIGN; expr; SEMICOL
-            { Assign ($1, $3, LOC) }
+            { Assign ($1, $3, loc $startpos $endpos) }
           | name=ID; LPAREN; params=separated_list(COMMA, expr); RPAREN; SEMICOL
           | name=ID; LPAREN; params=separated_list(COMMA, expr); RPAREN; SEMICOL
-            { Expr (FunCall (name, params, LOC)) }
+            { Expr (FunCall (name, params, loc $startpos $endpos)) }
           | IF; LPAREN; expr; RPAREN; block
           | IF; LPAREN; expr; RPAREN; block
-            { If ($3, Block $5, LOC) }                      %prec IF
+            { If ($3, Block $5, loc $startpos $endpos) }                      %prec IF
           | IF; LPAREN; expr; RPAREN; block; ELSE; block
           | IF; LPAREN; expr; RPAREN; block; ELSE; block
-            { IfElse ($3, Block $5, Block $7, LOC) }        %prec ELSE
+            { IfElse ($3, Block $5, Block $7, loc $startpos $endpos) }        %prec ELSE
           | WHILE; LPAREN; expr; RPAREN; block
           | WHILE; LPAREN; expr; RPAREN; block
-            { While ($3, Block $5, LOC) }
+            { While ($3, Block $5, loc $startpos $endpos) }
           | DO; block; WHILE; LPAREN; expr; RPAREN; SEMICOL
           | DO; block; WHILE; LPAREN; expr; RPAREN; SEMICOL
-            { DoWhile ($5, Block $2, LOC) }
+            { DoWhile ($5, Block $2, loc $startpos $endpos) }
           | FOR; LPAREN; INT; id=ID; ASSIGN; start=expr; COMMA; stop=expr; RPAREN; body=block
           | FOR; LPAREN; INT; id=ID; ASSIGN; start=expr; COMMA; stop=expr; RPAREN; body=block
-            { For (id, start, stop, IntConst (1, noloc), Block body, LOC) }
+            { For (id, start, stop, IntConst (1, noloc), Block body, loc $startpos $endpos) }
           | FOR; LPAREN; INT; id=ID; ASSIGN; start=expr; COMMA; stop=expr;
           | FOR; LPAREN; INT; id=ID; ASSIGN; start=expr; COMMA; stop=expr;
                 COMMA; step=expr; RPAREN; body=block
                 COMMA; step=expr; RPAREN; body=block
-            { For (id, start, stop, step, Block body, LOC) }
+            { For (id, start, stop, step, Block body, loc $startpos $endpos) }
 
 
-return_statement : RETURN; expr; SEMICOL  { [Return ($2, LOC)] }
+return_statement : RETURN; expr; SEMICOL  { [Return ($2, loc $startpos $endpos)] }
 
 
 block : LBRACE; statement*; RBRACE  { $2 }
 block : LBRACE; statement*; RBRACE  { $2 }
       | statement                   { [$1] }
       | statement                   { [$1] }
 
 
 expr : LPAREN; expr; RPAREN                 { $2 }
 expr : LPAREN; expr; RPAREN                 { $2 }
      | name=ID; LPAREN; params=separated_list(COMMA, expr); RPAREN
      | name=ID; LPAREN; params=separated_list(COMMA, expr); RPAREN
-       { FunCall (name, params, LOC) }
-     | ID                                   { Var ($1, LOC) }
-     | l=expr; op=binop; r=expr             { Binop (op, l, r, LOC) }
-     | SUB; expr                            { Monop (Neg, $2, LOC) }    %prec NEG
-     | NOT; expr                            { Monop (Not, $2, LOC) }
-     | LPAREN; basic_type; RPAREN; expr     { TypeCast ($2, $4, LOC) }  %prec CAST
-     | FLOAT_CONST                          { FloatConst ($1, LOC) }
-     | INT_CONST                            { IntConst ($1, LOC) }
-     | BOOL_CONST                           { BoolConst ($1, LOC) }
-     | ID; array_const                      { Deref ($1, $2, LOC) }
-     | array_const                          { ArrayConst ($1, LOC) }
+       { FunCall (name, params, loc $startpos $endpos) }
+     | ID                                { Var ($1, loc $startpos $endpos) }
+     | l=expr; op=binop; r=expr          { Binop (op, l, r, loc $startpos $endpos) }
+     | SUB; expr                         { Monop (Neg, $2, loc $startpos $endpos) }    %prec NEG
+     | NOT; expr                         { Monop (Not, $2, loc $startpos $endpos) }
+     | LPAREN; basic_type; RPAREN; expr  { TypeCast ($2, $4, loc $startpos $endpos) }  %prec CAST
+     | FLOAT_CONST                       { FloatConst ($1, loc $startpos $endpos) }
+     | INT_CONST                         { IntConst ($1, loc $startpos $endpos) }
+     | BOOL_CONST                        { BoolConst ($1, loc $startpos $endpos) }
+     | ID; array_const                   { Deref ($1, $2, loc $startpos $endpos) }
+     | array_const                       { ArrayConst ($1, loc $startpos $endpos) }
 
 
 array_const : LBRACK; values=separated_list(COMMA, expr); RBRACK  { values }
 array_const : LBRACK; values=separated_list(COMMA, expr); RBRACK  { values }
 
 

+ 3 - 4
phases/context_analysis.ml

@@ -8,8 +8,7 @@ let analyse_context node =
     let scope = ref StrMap.empty in
     let scope = ref StrMap.empty in
     let add_to_scope name decl depth desc =
     let add_to_scope name decl depth desc =
         if StrMap.mem name !scope then(
         if StrMap.mem name !scope then(
-            eprintf "node: %s\n" (Stringify.node2str !decl);
-            raise (LocError (sprintf "cannot redeclare %s \"%s\"" desc name, (locof !decl)))
+            raise (NodeError (!decl, sprintf "cannot redeclare %s \"%s\"" desc name))
         ) else
         ) else
             scope := StrMap.add name (decl, depth) !scope
             scope := StrMap.add name (decl, depth) !scope
     in
     in
@@ -27,12 +26,12 @@ let analyse_context node =
 
 
         (* for a variable, look for its declaration in the current scope and
         (* for a variable, look for its declaration in the current scope and
          * save a reference with the relative nesting depth *)
          * save a reference with the relative nesting depth *)
-        | Var (name, loc) as node ->
+        | Var (name, _) as node ->
             if StrMap.mem name !scope then
             if StrMap.mem name !scope then
                 let (decl, decl_depth) = StrMap.find name !scope in
                 let (decl, decl_depth) = StrMap.find name !scope in
                 VarUse (node, decl, depth - decl_depth)
                 VarUse (node, decl, depth - decl_depth)
             else
             else
-                raise (LocError (sprintf "undefined variable \"%s\"" name, loc))
+                raise (NodeError (node, (sprintf "undefined variable \"%s\"" name)))
 
 
         (*
         (*
         (* increase nesting level when entering function *)
         (* increase nesting level when entering function *)

+ 3 - 14
phases/parse.ml

@@ -2,26 +2,15 @@ open Lexing
 open Printf
 open Printf
 open Ast
 open Ast
 
 
-let get_position lexbuf =
-    let pos = lexbuf.lex_curr_p in
-    sprintf "%s:%d:%d" pos.pos_fname pos.pos_lnum
-                       (pos.pos_cnum - pos.pos_bol + 1)
-
-(*
 let get_loc lexbuf =
 let get_loc lexbuf =
-    let pos = lexbuf.lex_curr_p in
-    let colnum = (pos.pos_cnum - pos.pos_bol + 1) in
-    sprintf "%s:%d:%d" pos.pos_fname pos.pos_lnum
-                       (pos.pos_cnum - pos.pos_bol + 1)
-*)
+    Util.loc_from_lexpos lexbuf.lex_curr_p lexbuf.lex_curr_p
 
 
 let parse_with_error lexbuf =
 let parse_with_error lexbuf =
     try Some (Parser.program Lexer.token lexbuf) with
     try Some (Parser.program Lexer.token lexbuf) with
     | Lexer.SyntaxError msg ->
     | Lexer.SyntaxError msg ->
-        raise (CompileError (sprintf "%s: %s" (get_position lexbuf) msg))
+        raise (LocError (get_loc lexbuf, msg))
     | Parser.Error ->
     | Parser.Error ->
-        raise (CompileError (sprintf "%s: syntax error" (get_position lexbuf)))
-        (*raise (LocError ("syntax error" (get_loc lexbuf)))*)
+        raise (LocError (get_loc lexbuf, "syntax error"))
 
 
 let phase repr =
 let phase repr =
     let _ = print_endline "- Parse input" in
     let _ = print_endline "- Parse input" in

+ 15 - 1
util.ml

@@ -1,4 +1,5 @@
-include Ast
+open Lexing
+open Ast
 
 
 let var_counter = ref 0
 let var_counter = ref 0
 
 
@@ -6,6 +7,19 @@ let fresh_var prefix =
     var_counter := !var_counter + 1;
     var_counter := !var_counter + 1;
     prefix ^ "$" ^ string_of_int !var_counter
     prefix ^ "$" ^ string_of_int !var_counter
 
 
+let loc_from_lexpos pstart pend =
+    let (fname, ystart, yend, xstart, xend) = (
+        pstart.pos_fname,
+        pstart.pos_lnum,
+        pend.pos_lnum,
+        (pstart.pos_cnum - pstart.pos_bol + 1),
+        (pend.pos_cnum - pend.pos_bol)
+    ) in
+    if ystart = yend && xend < xstart then
+        (fname, ystart, yend, xstart, xstart)
+    else
+        (fname, ystart, yend, xstart, xend)
+
 (* Default tree transformation
 (* Default tree transformation
  * (node -> node) -> node -> node *)
  * (node -> node) -> node -> node *)
 let transform_children trav node =
 let transform_children trav node =

+ 3 - 0
util.mli

@@ -1,6 +1,9 @@
 (* Generate a fresh variable from a given prefix, e.g. "counter" -> "counter$1"  *)
 (* Generate a fresh variable from a given prefix, e.g. "counter" -> "counter$1"  *)
 val fresh_var : string -> string
 val fresh_var : string -> string
 
 
+(* Generate an Ast.loc tuple from Lexing data structures *)
+val loc_from_lexpos : Lexing.position -> Lexing.position -> Ast.loc
+
 (* Default transformation traversal for AST nodes *)
 (* Default transformation traversal for AST nodes *)
 val transform_children : (Ast.node -> Ast.node) -> Ast.node -> Ast.node
 val transform_children : (Ast.node -> Ast.node) -> Ast.node -> Ast.node