Przeglądaj źródła

Removed preprocessor from parser, improved some error messages, moved some functions to common files

Taddeus Kroes 12 lat temu
rodzic
commit
2126ae7cee
11 zmienionych plików z 80 dodań i 81 usunięć
  1. 0 1
      .gitignore
  2. 2 7
      Makefile
  3. 1 2
      README.md
  4. 3 1
      ast.ml
  5. 7 1
      lexer.mll
  6. 6 6
      main.ml
  7. 37 44
      parser.mly
  8. 3 4
      phases/context_analysis.ml
  9. 3 14
      phases/parse.ml
  10. 15 1
      util.ml
  11. 3 0
      util.mli

+ 0 - 1
.gitignore

@@ -9,4 +9,3 @@ civicc
 lexer.ml
 parser.ml
 parser.mli
-parser.mly

+ 2 - 7
Makefile

@@ -1,10 +1,10 @@
 RESULT := civicc
-SOURCES := ast.ml lexer.mll parser.mly util.mli util.ml stringify.mli \
+SOURCES := ast.ml util.mli util.ml lexer.mll parser.mly stringify.mli \
 	stringify.ml \
 	phases/parse.ml phases/print.ml phases/desug.ml \
 	phases/context_analysis.ml phases/test.ml \
 	main.ml
-PRE_TARGETS := ast.cmi
+PRE_TARGETS := ast.cmi util.cmi
 LIBS := str
 
 OCAMLFLAGS := -g
@@ -14,11 +14,6 @@ YFLAGS := --infer
 
 all: native-code
 
-parser.mly: parser.cpp.mly
-	cpp -o $@ $<
-	line=`grep -m 1 -n '%{' $@ | head -c -4`; \
-	sed -i "1,$$(($$line - 1))d" $@
-
 clean:: myclean
 
 .PHONY: myclean

+ 1 - 2
README.md

@@ -7,7 +7,6 @@ CiviCaml is a compiler for the CiviC language, written in OCaml.
 Issues & TODO
 -------------
 
-- Advance newlines in multi-line comments.
+- Run C preprocessor in parse phase
 - Create arguments record to pass to phases, instead of separate variables
   (e.g. verbose -> args.verbose).
-- Clean up position getter in parser and remove C preprocessor.

+ 3 - 1
ast.ml

@@ -51,7 +51,9 @@ type repr =
     | Node of node * int                (* ast, verbose *)
     | Assembly of string list * int     (* instructions *)
 
-exception LocError of string * loc
+exception LocError of loc * string
+
+exception NodeError of node * string
 
 exception CompileError of string
 

+ 7 - 1
lexer.mll

@@ -86,7 +86,13 @@ rule token = parse
     | '\r' | '\n' | "\r\n"  { next_line lexbuf; token lexbuf }
     | [' ''\t']+            { token lexbuf }
     | "//"[^'\n']*          { token lexbuf }
-    | "/*"_*"*/"            { token lexbuf }
+    | "/*"                  { comment lexbuf }
 
     | eof       { EOF }
     | _ as chr  { raise (SyntaxError ("unexpected char: " ^ Char.escaped chr)) }
+
+(* Multi-line comments *)
+and comment = parse
+    | '\r' | '\n' | "\r\n"  { next_line lexbuf; comment lexbuf }
+    | _                     { comment lexbuf }
+    | "*/"                  { token lexbuf }

+ 6 - 6
main.ml

@@ -27,16 +27,13 @@ let compile infile verbose =
         *)
     ]
 
-let rec repeat str n =
-    if n = 0 then "" else str ^ (repeat str (n - 1))
-
 let print_fancy_error msg loc verbose =
     let (fname, ystart, yend, xstart, xend) = loc in
-    let line_s = if yend > ystart
+    let line_s = if yend != ystart
         then sprintf "lines %d-%d" ystart yend
         else sprintf "line %d" ystart
     in
-    let char_s = if xend > xstart || yend > ystart
+    let char_s = if xend != xstart || yend != ystart
         then sprintf "characters %d-%d" xstart xend
         else sprintf "character %d" xstart
     in
@@ -82,7 +79,10 @@ let main () =
     | CompileError msg ->
         prerr_endline ("Error: " ^ msg);
         1
-    | LocError (msg, loc) ->
+    | NodeError (node, msg) ->
+        print_fancy_error msg (Util.locof node) !verbose;
+        1
+    | LocError (loc, msg) ->
         print_fancy_error msg loc !verbose;
         1
 

+ 37 - 44
parser.cpp.mly → parser.mly

@@ -1,15 +1,8 @@
-#define LOC (let pstart = $startpos in \
-             let pend = $endpos in ( \
-                pstart.pos_fname, \
-                pstart.pos_lnum, \
-                pend.pos_lnum, \
-                (pstart.pos_cnum - pstart.pos_bol + 1), \
-                (pend.pos_cnum - pend.pos_bol) \
-            ))
-
 %{
 open Lexing
 open Ast
+
+let loc = Util.loc_from_lexpos
 %}
 
 /* Tokens */
@@ -53,88 +46,88 @@ basic_type : FLOAT      { Float }
            | BOOL       { Bool }
 
 program : decl*; EOF
-          { Program ($1, LOC) }
+          { Program ($1, loc $startpos $endpos) }
 
 decl : EXTERN; fun_header; SEMICOL
-       { let (t, n, p) = $2 in FunDec(t, n, p, LOC) }
+       { let (t, n, p) = $2 in FunDec(t, n, p, loc $startpos $endpos) }
      | boption(EXPORT); fun_header; LBRACE; fun_body; RBRACE
-       { let (t, n, p) = $2 in FunDef ($1, t, n, p, Block $4, LOC) }
+       { let (t, n, p) = $2 in FunDef ($1, t, n, p, Block $4, loc $startpos $endpos) }
 
      | EXTERN; basic_type; ID; SEMICOL
-       { GlobalDec ($2, $3, LOC) }
+       { GlobalDec ($2, $3, loc $startpos $endpos) }
      | EXTERN; t=basic_type; LBRACK; d=separated_list(COMMA, ID); RBRACK; n=ID; SEMICOL
-       { GlobalDec (ArrayDec (t, d), n, LOC) }
+       { GlobalDec (ArrayDec (t, d), n, loc $startpos $endpos) }
 
      | boption(EXPORT); basic_type; ID; SEMICOL
-       { GlobalDef ($1, $2, $3, None, LOC) }
+       { GlobalDef ($1, $2, $3, None, loc $startpos $endpos) }
      | boption(EXPORT); basic_type; ID; ASSIGN; expr; SEMICOL
-       { GlobalDef ($1, $2, $3, Some $5, LOC) }
+       { GlobalDef ($1, $2, $3, Some $5, loc $startpos $endpos) }
 
      | e=boption(EXPORT); t=basic_type; LBRACK; d=separated_list(COMMA, expr);
             RBRACK; n=ID; SEMICOL
-       { GlobalDef (e, ArrayDef (t, d), n, None, LOC) }
+       { GlobalDef (e, ArrayDef (t, d), n, None, loc $startpos $endpos) }
      | e=boption(EXPORT); t=basic_type; LBRACK; d=separated_list(COMMA, expr);
             RBRACK; n=ID; ASSIGN; v=expr; SEMICOL
-       { GlobalDef (e, ArrayDef (t, d), n, Some v, LOC) }
+       { GlobalDef (e, ArrayDef (t, d), n, Some v, loc $startpos $endpos) }
 
 fun_header : ret=basic_type; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
              { (ret, name, params) }
            | VOID; name=ID; LPAREN; params=separated_list(COMMA, param); RPAREN
              { (Void, name, params) }
 
-param : basic_type; ID  { Param ($1, $2, LOC) }
+param : basic_type; ID  { Param ($1, $2, loc $startpos $endpos) }
 
 fun_body : var_dec* local_fun_dec* statement* loption(return_statement)
            { $1 @ $2 @ $3 @ $4 }
 
 local_fun_dec : fun_header; LBRACE; fun_body; RBRACE
-                { let (t, n, p) = $1 in FunDef (false, t, n, p, Block $3, LOC) }
+                { let (t, n, p) = $1 in FunDef (false, t, n, p, Block $3, loc $startpos $endpos) }
 
 var_dec : basic_type; ID; SEMICOL
-          { VarDec ($1, $2, None, LOC) }
+          { VarDec ($1, $2, None, loc $startpos $endpos) }
         | basic_type; ID; ASSIGN; expr; SEMICOL
-          { VarDec ($1, $2, Some $4, LOC) }
+          { VarDec ($1, $2, Some $4, loc $startpos $endpos) }
         | t=basic_type; LBRACK; d=separated_list(COMMA, expr); RBRACK; n=ID; SEMICOL
-          { VarDec (ArrayDef (t, d), n, None, LOC) }
+          { VarDec (ArrayDef (t, d), n, None, loc $startpos $endpos) }
         | t=basic_type; LBRACK; d=separated_list(COMMA, expr); RBRACK; n=ID; ASSIGN; v=expr; SEMICOL
-          { VarDec (ArrayDef (t, d), n, Some v, LOC) }
+          { VarDec (ArrayDef (t, d), n, Some v, loc $startpos $endpos) }
 
 statement : ID; ASSIGN; expr; SEMICOL
-            { Assign ($1, $3, LOC) }
+            { Assign ($1, $3, loc $startpos $endpos) }
           | name=ID; LPAREN; params=separated_list(COMMA, expr); RPAREN; SEMICOL
-            { Expr (FunCall (name, params, LOC)) }
+            { Expr (FunCall (name, params, loc $startpos $endpos)) }
           | IF; LPAREN; expr; RPAREN; block
-            { If ($3, Block $5, LOC) }                      %prec IF
+            { If ($3, Block $5, loc $startpos $endpos) }                      %prec IF
           | IF; LPAREN; expr; RPAREN; block; ELSE; block
-            { IfElse ($3, Block $5, Block $7, LOC) }        %prec ELSE
+            { IfElse ($3, Block $5, Block $7, loc $startpos $endpos) }        %prec ELSE
           | WHILE; LPAREN; expr; RPAREN; block
-            { While ($3, Block $5, LOC) }
+            { While ($3, Block $5, loc $startpos $endpos) }
           | DO; block; WHILE; LPAREN; expr; RPAREN; SEMICOL
-            { DoWhile ($5, Block $2, LOC) }
+            { DoWhile ($5, Block $2, loc $startpos $endpos) }
           | FOR; LPAREN; INT; id=ID; ASSIGN; start=expr; COMMA; stop=expr; RPAREN; body=block
-            { For (id, start, stop, IntConst (1, noloc), Block body, LOC) }
+            { For (id, start, stop, IntConst (1, noloc), Block body, loc $startpos $endpos) }
           | FOR; LPAREN; INT; id=ID; ASSIGN; start=expr; COMMA; stop=expr;
                 COMMA; step=expr; RPAREN; body=block
-            { For (id, start, stop, step, Block body, LOC) }
+            { For (id, start, stop, step, Block body, loc $startpos $endpos) }
 
-return_statement : RETURN; expr; SEMICOL  { [Return ($2, LOC)] }
+return_statement : RETURN; expr; SEMICOL  { [Return ($2, loc $startpos $endpos)] }
 
 block : LBRACE; statement*; RBRACE  { $2 }
       | statement                   { [$1] }
 
 expr : LPAREN; expr; RPAREN                 { $2 }
      | name=ID; LPAREN; params=separated_list(COMMA, expr); RPAREN
-       { FunCall (name, params, LOC) }
-     | ID                                   { Var ($1, LOC) }
-     | l=expr; op=binop; r=expr             { Binop (op, l, r, LOC) }
-     | SUB; expr                            { Monop (Neg, $2, LOC) }    %prec NEG
-     | NOT; expr                            { Monop (Not, $2, LOC) }
-     | LPAREN; basic_type; RPAREN; expr     { TypeCast ($2, $4, LOC) }  %prec CAST
-     | FLOAT_CONST                          { FloatConst ($1, LOC) }
-     | INT_CONST                            { IntConst ($1, LOC) }
-     | BOOL_CONST                           { BoolConst ($1, LOC) }
-     | ID; array_const                      { Deref ($1, $2, LOC) }
-     | array_const                          { ArrayConst ($1, LOC) }
+       { FunCall (name, params, loc $startpos $endpos) }
+     | ID                                { Var ($1, loc $startpos $endpos) }
+     | l=expr; op=binop; r=expr          { Binop (op, l, r, loc $startpos $endpos) }
+     | SUB; expr                         { Monop (Neg, $2, loc $startpos $endpos) }    %prec NEG
+     | NOT; expr                         { Monop (Not, $2, loc $startpos $endpos) }
+     | LPAREN; basic_type; RPAREN; expr  { TypeCast ($2, $4, loc $startpos $endpos) }  %prec CAST
+     | FLOAT_CONST                       { FloatConst ($1, loc $startpos $endpos) }
+     | INT_CONST                         { IntConst ($1, loc $startpos $endpos) }
+     | BOOL_CONST                        { BoolConst ($1, loc $startpos $endpos) }
+     | ID; array_const                   { Deref ($1, $2, loc $startpos $endpos) }
+     | array_const                       { ArrayConst ($1, loc $startpos $endpos) }
 
 array_const : LBRACK; values=separated_list(COMMA, expr); RBRACK  { values }
 

+ 3 - 4
phases/context_analysis.ml

@@ -8,8 +8,7 @@ let analyse_context node =
     let scope = ref StrMap.empty in
     let add_to_scope name decl depth desc =
         if StrMap.mem name !scope then(
-            eprintf "node: %s\n" (Stringify.node2str !decl);
-            raise (LocError (sprintf "cannot redeclare %s \"%s\"" desc name, (locof !decl)))
+            raise (NodeError (!decl, sprintf "cannot redeclare %s \"%s\"" desc name))
         ) else
             scope := StrMap.add name (decl, depth) !scope
     in
@@ -27,12 +26,12 @@ let analyse_context node =
 
         (* for a variable, look for its declaration in the current scope and
          * save a reference with the relative nesting depth *)
-        | Var (name, loc) as node ->
+        | Var (name, _) as node ->
             if StrMap.mem name !scope then
                 let (decl, decl_depth) = StrMap.find name !scope in
                 VarUse (node, decl, depth - decl_depth)
             else
-                raise (LocError (sprintf "undefined variable \"%s\"" name, loc))
+                raise (NodeError (node, (sprintf "undefined variable \"%s\"" name)))
 
         (*
         (* increase nesting level when entering function *)

+ 3 - 14
phases/parse.ml

@@ -2,26 +2,15 @@ open Lexing
 open Printf
 open Ast
 
-let get_position lexbuf =
-    let pos = lexbuf.lex_curr_p in
-    sprintf "%s:%d:%d" pos.pos_fname pos.pos_lnum
-                       (pos.pos_cnum - pos.pos_bol + 1)
-
-(*
 let get_loc lexbuf =
-    let pos = lexbuf.lex_curr_p in
-    let colnum = (pos.pos_cnum - pos.pos_bol + 1) in
-    sprintf "%s:%d:%d" pos.pos_fname pos.pos_lnum
-                       (pos.pos_cnum - pos.pos_bol + 1)
-*)
+    Util.loc_from_lexpos lexbuf.lex_curr_p lexbuf.lex_curr_p
 
 let parse_with_error lexbuf =
     try Some (Parser.program Lexer.token lexbuf) with
     | Lexer.SyntaxError msg ->
-        raise (CompileError (sprintf "%s: %s" (get_position lexbuf) msg))
+        raise (LocError (get_loc lexbuf, msg))
     | Parser.Error ->
-        raise (CompileError (sprintf "%s: syntax error" (get_position lexbuf)))
-        (*raise (LocError ("syntax error" (get_loc lexbuf)))*)
+        raise (LocError (get_loc lexbuf, "syntax error"))
 
 let phase repr =
     let _ = print_endline "- Parse input" in

+ 15 - 1
util.ml

@@ -1,4 +1,5 @@
-include Ast
+open Lexing
+open Ast
 
 let var_counter = ref 0
 
@@ -6,6 +7,19 @@ let fresh_var prefix =
     var_counter := !var_counter + 1;
     prefix ^ "$" ^ string_of_int !var_counter
 
+let loc_from_lexpos pstart pend =
+    let (fname, ystart, yend, xstart, xend) = (
+        pstart.pos_fname,
+        pstart.pos_lnum,
+        pend.pos_lnum,
+        (pstart.pos_cnum - pstart.pos_bol + 1),
+        (pend.pos_cnum - pend.pos_bol)
+    ) in
+    if ystart = yend && xend < xstart then
+        (fname, ystart, yend, xstart, xstart)
+    else
+        (fname, ystart, yend, xstart, xend)
+
 (* Default tree transformation
  * (node -> node) -> node -> node *)
 let transform_children trav node =

+ 3 - 0
util.mli

@@ -1,6 +1,9 @@
 (* Generate a fresh variable from a given prefix, e.g. "counter" -> "counter$1"  *)
 val fresh_var : string -> string
 
+(* Generate an Ast.loc tuple from Lexing data structures *)
+val loc_from_lexpos : Lexing.position -> Lexing.position -> Ast.loc
+
 (* Default transformation traversal for AST nodes *)
 val transform_children : (Ast.node -> Ast.node) -> Ast.node -> Ast.node