Ver Fonte

Removed preprocessor from parser, improved some error messages, moved some functions to common files

Taddeus Kroes há 12 anos atrás
pai
commit
2126ae7cee
11 ficheiros alterados com 80 adições e 81 exclusões
  1. 0 1
      .gitignore
  2. 2 7
      Makefile
  3. 1 2
      README.md
  4. 3 1
      ast.ml
  5. 7 1
      lexer.mll
  6. 6 6
      main.ml
  7. 37 44
      parser.mly
  8. 3 4
      phases/context_analysis.ml
  9. 3 14
      phases/parse.ml
  10. 15 1
      util.ml
  11. 3 0
      util.mli

+ 0 - 1
.gitignore

@@ -9,4 +9,3 @@ civicc
 lexer.ml
 parser.ml
 parser.mli
-parser.mly

+ 2 - 7
Makefile

@@ -1,10 +1,10 @@
 RESULT := civicc
-SOURCES := ast.ml lexer.mll parser.mly util.mli util.ml stringify.mli \
+SOURCES := ast.ml util.mli util.ml lexer.mll parser.mly stringify.mli \
 	stringify.ml \
 	phases/parse.ml phases/print.ml phases/desug.ml \
 	phases/context_analysis.ml phases/test.ml \
 	main.ml
-PRE_TARGETS := ast.cmi
+PRE_TARGETS := ast.cmi util.cmi
 LIBS := str
 
 OCAMLFLAGS := -g
@@ -14,11 +14,6 @@ YFLAGS := --infer
 
 all: native-code
 
-parser.mly: parser.cpp.mly
-	cpp -o $@ $<
-	line=`grep -m 1 -n '%{' $@ | head -c -4`; \
-	sed -i "1,$$(($$line - 1))d" $@
-
 clean:: myclean
 
 .PHONY: myclean

+ 1 - 2
README.md

@@ -7,7 +7,6 @@ CiviCaml is a compiler for the CiviC language, written in OCaml.
 Issues & TODO
 -------------
 
-- Advance newlines in multi-line comments.
+- Run C preprocessor in parse phase
 - Create arguments record to pass to phases, instead of separate variables
   (e.g. verbose -> args.verbose).
-- Clean up position getter in parser and remove C preprocessor.

+ 3 - 1
ast.ml

@@ -51,7 +51,9 @@ type repr =
     | Node of node * int                (* ast, verbose *)
     | Assembly of string list * int     (* instructions *)
 
-exception LocError of string * loc
+exception LocError of loc * string
+
+exception NodeError of node * string
 
 exception CompileError of string
 

+ 7 - 1
lexer.mll

@@ -86,7 +86,13 @@ rule token = parse
     | '\r' | '\n' | "\r\n"  { next_line lexbuf; token lexbuf }
     | [' ''\t']+            { token lexbuf }
     | "//"[^'\n']*          { token lexbuf }
-    | "/*"_*"*/"            { token lexbuf }
+    | "/*"                  { comment lexbuf }
 
     | eof       { EOF }
     | _ as chr  { raise (SyntaxError ("unexpected char: " ^ Char.escaped chr)) }
+
+(* Multi-line comments *)
+and comment = parse
+    | '\r' | '\n' | "\r\n"  { next_line lexbuf; comment lexbuf }
+    | _                     { comment lexbuf }
+    | "*/"                  { token lexbuf }

+ 6 - 6
main.ml

@@ -27,16 +27,13 @@ let compile infile verbose =
         *)
     ]
 
-let rec repeat str n =
-    if n = 0 then "" else str ^ (repeat str (n - 1))
-
 let print_fancy_error msg loc verbose =
     let (fname, ystart, yend, xstart, xend) = loc in
-    let line_s = if yend > ystart
+    let line_s = if yend != ystart
         then sprintf "lines %d-%d" ystart yend
         else sprintf "line %d" ystart
     in
-    let char_s = if xend > xstart || yend > ystart
+    let char_s = if xend != xstart || yend != ystart
         then sprintf "characters %d-%d" xstart xend
         else sprintf "character %d" xstart
     in
@@ -82,7 +79,10 @@ let main () =
     | CompileError msg ->
         prerr_endline ("Error: " ^ msg);
         1
-    | LocError (msg, loc) ->
+    | NodeError (node, msg) ->
+        print_fancy_error msg (Util.locof node) !verbose;
+        1
+    | LocError (loc, msg) ->
         print_fancy_error msg loc !verbose;
         1
 

+ 37 - 44
parser.cpp.mly → parser.mly

@@ -1,15 +1,8 @@
-#define LOC (let pstart = $startpos in \
-             let pend = $endpos in ( \
-                pstart.pos_fname, \
-                pstart.pos_lnum, \
-                pend.pos_lnum, \
-                (pstart.pos_cnum - pstart.pos_bol + 1), \
-                (pend.pos_cnum - pend.pos_bol) \
-            ))
-
 %{
 open Lexing
 open Ast
+
+let loc = Util.loc_from_lexpos
 %}
 
 /* Tokens */
@@ -53,88 +46,88 @@ basic_type : FLOAT      { Float }
            | BOOL       { Bool }
 
 program : decl*; EOF
-          { Program ($1, LOC) }
+          { Program ($1, loc $startpos $endpos) }
 
 decl : EXTERN; fun_header; SEMICOL
-       { let (t, n, p) = $2 in FunDec(t, n, p, LOC) }
+       { let (t, n, p) = $2 in FunDec(t, n, p, loc $startpos $endpos) }
      | boption(EXPORT); fun_header; LBRACE; fun_body; RBRACE
-       { let (t, n, p) = $2 in FunDef ($1, t, n, p, Block $4, LOC) }
+       { let (t, n, p) = $2 in FunDef ($1, t, n, p, Block $4, loc $startpos $endpos) }
 
      | EXTERN; basic_type; ID; SEMICOL
-       { GlobalDec ($2, $3, LOC) }
+       { GlobalDec ($2, $3, loc $startpos $endpos) }
      | EXTERN; t=basic_type; LBRACK; d=separated_list(COMMA, ID); RBRACK; n=ID; SEMICOL
-       { GlobalDec (ArrayDec (t, d), n, LOC) }
+       { GlobalDec (ArrayDec (t, d), n, loc $startpos $endpos) }
 
      | boption(EXPORT); basic_type; ID; SEMICOL
-       { GlobalDef ($1, $2, $3, None, LOC) }
+       { GlobalDef ($1, $2, $3, None, loc $startpos $endpos) }
      | boption(EXPORT); basic_type; ID; ASSIGN; expr; SEMICOL
-       { GlobalDef ($1, $2, $3, Some $5, LOC) }
+       { GlobalDef ($1, $2, $3, Some $5, loc $startpos $endpos) }
 
      | e=boption(EXPORT); t=basic_type; LBRACK; d=separated_list(COMMA, expr);
             RBRACK; n=ID; SEMICOL
-       { GlobalDef (e, ArrayDef (t, d), n, None, LOC) }
+       { GlobalDef (e, ArrayDef (t, d), n, None, loc $startpos $endpos) }
      | e=boption(EXPORT); t=basic_type; LBRACK; d=separated_list(COMMA, expr);
             RBRACK; n=ID; ASSIGN; v=expr; SEMICOL
-       { GlobalDef (e, ArrayDef (t, d), n, Some v, LOC) }
+       { GlobalDef (e, ArrayDef (t, d), n, Some v, loc $startpos $endpos) }
 
 fun_header : ret=basic_type; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
              { (ret, name, params) }
            | VOID; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
              { (Void, name, params) }
 
-param : basic_type; ID  { Param ($1, $2, LOC) }
+param : basic_type; ID  { Param ($1, $2, loc $startpos $endpos) }
 
 fun_body : var_dec* local_fun_dec* statement* loption(return_statement)
            { $1 @ $2 @ $3 @ $4 }
 
 local_fun_dec : fun_header; LBRACE; fun_body; RBRACE
-                { let (t, n, p) = $1 in FunDef (false, t, n, p, Block $3, LOC) }
+                { let (t, n, p) = $1 in FunDef (false, t, n, p, Block $3, loc $startpos $endpos) }
 
 var_dec : basic_type; ID; SEMICOL
-          { VarDec ($1, $2, None, LOC) }
+          { VarDec ($1, $2, None, loc $startpos $endpos) }
         | basic_type; ID; ASSIGN; expr; SEMICOL
-          { VarDec ($1, $2, Some $4, LOC) }
+          { VarDec ($1, $2, Some $4, loc $startpos $endpos) }
         | t=basic_type; LBRACK; d=separated_list(COMMA, expr); RBRACK; n=ID; SEMICOL
-          { VarDec (ArrayDef (t, d), n, None, LOC) }
+          { VarDec (ArrayDef (t, d), n, None, loc $startpos $endpos) }
         | t=basic_type; LBRACK; d=separated_list(COMMA, expr); RBRACK; n=ID; ASSIGN; v=expr; SEMICOL
-          { VarDec (ArrayDef (t, d), n, Some v, LOC) }
+          { VarDec (ArrayDef (t, d), n, Some v, loc $startpos $endpos) }
 
 statement : ID; ASSIGN; expr; SEMICOL
-            { Assign ($1, $3, LOC) }
+            { Assign ($1, $3, loc $startpos $endpos) }
           | name=ID; LPAREN; params=separated_list(COMMA, expr); RPAREN; SEMICOL
-            { Expr (FunCall (name, params, LOC)) }
+            { Expr (FunCall (name, params, loc $startpos $endpos)) }
           | IF; LPAREN; expr; RPAREN; block
-            { If ($3, Block $5, LOC) }                      %prec IF
+            { If ($3, Block $5, loc $startpos $endpos) }                      %prec IF
           | IF; LPAREN; expr; RPAREN; block; ELSE; block
-            { IfElse ($3, Block $5, Block $7, LOC) }        %prec ELSE
+            { IfElse ($3, Block $5, Block $7, loc $startpos $endpos) }        %prec ELSE
           | WHILE; LPAREN; expr; RPAREN; block
-            { While ($3, Block $5, LOC) }
+            { While ($3, Block $5, loc $startpos $endpos) }
           | DO; block; WHILE; LPAREN; expr; RPAREN; SEMICOL
-            { DoWhile ($5, Block $2, LOC) }
+            { DoWhile ($5, Block $2, loc $startpos $endpos) }
           | FOR; LPAREN; INT; id=ID; ASSIGN; start=expr; COMMA; stop=expr; RPAREN; body=block
-            { For (id, start, stop, IntConst (1, noloc), Block body, LOC) }
+            { For (id, start, stop, IntConst (1, noloc), Block body, loc $startpos $endpos) }
           | FOR; LPAREN; INT; id=ID; ASSIGN; start=expr; COMMA; stop=expr;
                 COMMA; step=expr; RPAREN; body=block
-            { For (id, start, stop, step, Block body, LOC) }
+            { For (id, start, stop, step, Block body, loc $startpos $endpos) }
 
-return_statement : RETURN; expr; SEMICOL  { [Return ($2, LOC)] }
+return_statement : RETURN; expr; SEMICOL  { [Return ($2, loc $startpos $endpos)] }
 
 block : LBRACE; statement*; RBRACE  { $2 }
       | statement                   { [$1] }
 
 expr : LPAREN; expr; RPAREN                 { $2 }
      | name=ID; LPAREN; params=separated_list(COMMA, expr); RPAREN
-       { FunCall (name, params, LOC) }
-     | ID                                   { Var ($1, LOC) }
-     | l=expr; op=binop; r=expr             { Binop (op, l, r, LOC) }
-     | SUB; expr                            { Monop (Neg, $2, LOC) }    %prec NEG
-     | NOT; expr                            { Monop (Not, $2, LOC) }
-     | LPAREN; basic_type; RPAREN; expr     { TypeCast ($2, $4, LOC) }  %prec CAST
-     | FLOAT_CONST                          { FloatConst ($1, LOC) }
-     | INT_CONST                            { IntConst ($1, LOC) }
-     | BOOL_CONST                           { BoolConst ($1, LOC) }
-     | ID; array_const                      { Deref ($1, $2, LOC) }
-     | array_const                          { ArrayConst ($1, LOC) }
+       { FunCall (name, params, loc $startpos $endpos) }
+     | ID                                { Var ($1, loc $startpos $endpos) }
+     | l=expr; op=binop; r=expr          { Binop (op, l, r, loc $startpos $endpos) }
+     | SUB; expr                         { Monop (Neg, $2, loc $startpos $endpos) }    %prec NEG
+     | NOT; expr                         { Monop (Not, $2, loc $startpos $endpos) }
+     | LPAREN; basic_type; RPAREN; expr  { TypeCast ($2, $4, loc $startpos $endpos) }  %prec CAST
+     | FLOAT_CONST                       { FloatConst ($1, loc $startpos $endpos) }
+     | INT_CONST                         { IntConst ($1, loc $startpos $endpos) }
+     | BOOL_CONST                        { BoolConst ($1, loc $startpos $endpos) }
+     | ID; array_const                   { Deref ($1, $2, loc $startpos $endpos) }
+     | array_const                       { ArrayConst ($1, loc $startpos $endpos) }
 
 array_const : LBRACK; values=separated_list(COMMA, expr); RBRACK  { values }
 

+ 3 - 4
phases/context_analysis.ml

@@ -8,8 +8,7 @@ let analyse_context node =
     let scope = ref StrMap.empty in
     let add_to_scope name decl depth desc =
         if StrMap.mem name !scope then(
-            eprintf "node: %s\n" (Stringify.node2str !decl);
-            raise (LocError (sprintf "cannot redeclare %s \"%s\"" desc name, (locof !decl)))
+            raise (NodeError (!decl, sprintf "cannot redeclare %s \"%s\"" desc name))
         ) else
             scope := StrMap.add name (decl, depth) !scope
     in
@@ -27,12 +26,12 @@ let analyse_context node =
 
         (* for a variable, look for its declaration in the current scope and
          * save a reference with the relative nesting depth *)
-        | Var (name, loc) as node ->
+        | Var (name, _) as node ->
             if StrMap.mem name !scope then
                 let (decl, decl_depth) = StrMap.find name !scope in
                 VarUse (node, decl, depth - decl_depth)
             else
-                raise (LocError (sprintf "undefined variable \"%s\"" name, loc))
+                raise (NodeError (node, (sprintf "undefined variable \"%s\"" name)))
 
         (*
         (* increase nesting level when entering function *)

+ 3 - 14
phases/parse.ml

@@ -2,26 +2,15 @@ open Lexing
 open Printf
 open Ast
 
-let get_position lexbuf =
-    let pos = lexbuf.lex_curr_p in
-    sprintf "%s:%d:%d" pos.pos_fname pos.pos_lnum
-                       (pos.pos_cnum - pos.pos_bol + 1)
-
-(*
 let get_loc lexbuf =
-    let pos = lexbuf.lex_curr_p in
-    let colnum = (pos.pos_cnum - pos.pos_bol + 1) in
-    sprintf "%s:%d:%d" pos.pos_fname pos.pos_lnum
-                       (pos.pos_cnum - pos.pos_bol + 1)
-*)
+    Util.loc_from_lexpos lexbuf.lex_curr_p lexbuf.lex_curr_p
 
 let parse_with_error lexbuf =
     try Some (Parser.program Lexer.token lexbuf) with
     | Lexer.SyntaxError msg ->
-        raise (CompileError (sprintf "%s: %s" (get_position lexbuf) msg))
+        raise (LocError (get_loc lexbuf, msg))
     | Parser.Error ->
-        raise (CompileError (sprintf "%s: syntax error" (get_position lexbuf)))
-        (*raise (LocError ("syntax error" (get_loc lexbuf)))*)
+        raise (LocError (get_loc lexbuf, "syntax error"))
 
 let phase repr =
     let _ = print_endline "- Parse input" in

+ 15 - 1
util.ml

@@ -1,4 +1,5 @@
-include Ast
+open Lexing
+open Ast
 
 let var_counter = ref 0
 
@@ -6,6 +7,19 @@ let fresh_var prefix =
     var_counter := !var_counter + 1;
     prefix ^ "$" ^ string_of_int !var_counter
 
+let loc_from_lexpos pstart pend =
+    let (fname, ystart, yend, xstart, xend) = (
+        pstart.pos_fname,
+        pstart.pos_lnum,
+        pend.pos_lnum,
+        (pstart.pos_cnum - pstart.pos_bol + 1),
+        (pend.pos_cnum - pend.pos_bol)
+    ) in
+    if ystart = yend && xend < xstart then
+        (fname, ystart, yend, xstart, xstart)
+    else
+        (fname, ystart, yend, xstart, xend)
+
 (* Default tree transformation
  * (node -> node) -> node -> node *)
 let transform_children trav node =

+ 3 - 0
util.mli

@@ -1,6 +1,9 @@
 (* Generate a fresh variable from a given prefix, e.g. "counter" -> "counter$1"  *)
 val fresh_var : string -> string
 
+(* Generate an Ast.loc tuple from Lexing data structures *)
+val loc_from_lexpos : Lexing.position -> Lexing.position -> Ast.loc
+
 (* Default transformation traversal for AST nodes *)
 val transform_children : (Ast.node -> Ast.node) -> Ast.node -> Ast.node