Kaynağa Gözat

Transformed parser into lex+yacc parser

Taddeus Kroes 11 yıl önce
ebeveyn
işleme
682819d4a2
8 değiştirilmiş dosya ile 154 ekleme ve 140 silme
  1. 3 0
      .gitignore
  2. 18 5
      Makefile
  3. 37 0
      lexer.mll
  4. 21 106
      parse.ml
  5. 30 0
      parser.mly
  6. 19 8
      pga.ml
  7. 15 10
      stringify.ml
  8. 11 11
      types.ml

+ 3 - 0
.gitignore

@@ -2,4 +2,7 @@
 *.cmi
 *.cmx
 *.o
+lexer.ml
+parser.ml
+parser.mli
 pga

+ 18 - 5
Makefile

@@ -1,21 +1,34 @@
 RESULT := pga
-BASENAMES := types stringify parse pga
+BASENAMES := types stringify parser lexer parse pga
 
 OFILES := $(addsuffix .cmx,$(BASENAMES))
 
 OCAMLCFLAGS := -g
 OCAMLLDFLAGS :=
+OCAMLLDLIBS :=
 
 .PHONY: all clean
-.PRECIOUS: $(addsuffix .cmi,$(BASENAMES))
+.PRECIOUS: $(addprefix .cmi,$(BASENAMES))
 
 all: $(RESULT)
 
+%.ml: %.mll
+	ocamllex -o $@ $<
+
+%.ml: %.mly
+	menhir --infer --explain $<
+
+%.cmi: %.mli
+	ocamlc -c $(OCAMLCFLAGS) -o $@ $<
+
+parser.cmx: parser.cmi
+parser.mli: parser.ml
+
 %.cmx: %.ml
-	ocamlopt -c -o $@ $(<:.cmi=.ml)
+	ocamlfind ocamlopt -package batteries -c $(OCAMLCFLAGS) -o $@ $(<:.cmi=.ml)
 
 $(RESULT): $(OFILES)
-	ocamlopt -o $@ $^
+	ocamlopt -o $@ $(OCAMLLDFLAGS) $(OCAMLLDLIBS) $^
 
 clean:
-	rm -f *.cmi *.cmx *.o $(RESULT)
+	rm -f *.cmi *.cmx *.o lexer.ml parser.ml parser.mli $(RESULT)

+ 37 - 0
lexer.mll

@@ -0,0 +1,37 @@
+{
+open Lexing
+open Parser
+
+exception SyntaxError of string
+
+let next_line lexbuf =
+  let pos = lexbuf.lex_curr_p in
+  lexbuf.lex_curr_p <- {
+    pos with pos_bol = lexbuf.lex_curr_pos;
+             pos_lnum = pos.pos_lnum + 1
+  }
+}
+
+rule token = parse
+  | ';'       { SEMICOL }
+  | '!'       { EXCLAM }
+  | '+'       { PLUS }
+  | '-'       { MINUS }
+  | '#'       { HASH }
+  | '*'       { OMEGA }
+  | '('       { LPAREN }
+  | ')'       { RPAREN }
+(*
+  | '{'       { LBRACE }
+  | '}'       { RBRACE }
+*)
+
+  | ['0'-'9']+ as i { NUMBER (int_of_string i) }
+  | ['A'-'Z'] as c  { UPPER c }
+  | ['a'-'z'] as c  { LOWER c }
+
+  | '\r' | '\n' | "\r\n"  { next_line lexbuf; token lexbuf }
+  | [' ''\t']+            { token lexbuf }
+  | eof | '\000'          { EOF }
+
+  | _ as chr { raise (SyntaxError ("unexpected '" ^ Char.escaped chr ^ "'")) }

+ 21 - 106
parse.ml

@@ -1,108 +1,23 @@
+open Lexing
 open Types
 
-type token = LPAREN | RPAREN | ID of string | HASH | EXCLAM | PLUS | MINUS
-
-let is_alnum c =
-  let i = int_of_char c in
-  i >= 48 & i <= 57 || i >= 65 & i <= 90 || i >= 97 & i <= 122
-
-let tokenize next_char emit =
-  let buf = Buffer.create 32 in
-  let lineno = ref 1 in
-  let colno = ref 1 in
-
-  let unexpected c =
-    ParseError (Printf.sprintf
-      "unexpected '%c' at line %d, character %d"
-      c !lineno !colno
-    )
-  in
-
-  let emit_buf () =
-    if Buffer.length buf = 0 then
-      ()
-    else
-      emit (ID (Buffer.contents buf));
-      Buffer.clear buf
-  in
-  let nobuf c =
-    if Buffer.length buf > 0 then raise (unexpected c)
-  in
-
-  let rec read_all () =
-    match next_char () with
-    | Some c ->
-      begin
-        match c with
-        | '(' -> nobuf c; emit LPAREN
-        | ')' -> emit_buf (); emit RPAREN
-        | ';' -> emit_buf ()
-        | '#' -> nobuf c; emit HASH
-        | '!' -> nobuf c; emit EXCLAM
-        | '+' -> nobuf c; emit PLUS
-        | '-' -> nobuf c; emit MINUS
-        | ' ' | '\t' | '\r' -> emit_buf ()
-        | '\n' -> emit_buf (); incr lineno; colno := 0
-        | c when is_alnum c -> Buffer.add_char buf c
-        | _ -> raise (unexpected c)
-      end;
-      incr colno;
-      read_all ()
-    | None ->
-      emit_buf ()
-  in
-  read_all ()
-
-let program_of_list = function
-  | []  -> Empty
-  | [p] -> p
-  | p   -> Concat p
-
-type exp = E_basic | E_jump  | E_ptest | E_ntest
-
-let parse next_char =
-  let stack = ref [ref []] in
-  let expect = ref E_basic in
-  let append p =
-    let lst = List.hd !stack in
-    lst := p :: !lst
-  in
-  let handler = function
-    | EXCLAM -> append (Primitive Terminate)
-    | HASH   -> expect := E_jump
-    | PLUS   -> expect := E_ptest
-    | MINUS  -> expect := E_ntest
-    | LPAREN -> stack := ref [] :: !stack
-    | RPAREN ->
-      if List.length !stack < 2 then begin
-        raise (ParseError "too many closing parentheses")
-      end;
-      let body = List.rev !(List.hd !stack) in
-      stack := List.tl !stack;
-      append (Repeat (program_of_list body))
-    | ID s ->
-      let p =
-        match !expect with
-        | E_basic -> Basic s
-        | E_jump  -> Jump (int_of_string s)
-        | E_ptest -> Ptest s
-        | E_ntest -> Ntest s
-      in
-      append (Primitive p);
-      expect := E_basic
-  in
-  tokenize next_char handler;
-
-  if List.length !stack > 1 then
-    raise (ParseError "missing closing parenthesis");
-
-  Concat (List.rev !(List.hd !stack))
-
-let parse_string s =
-  let i = ref 0 in
-  let next_char () =
-    if !i = String.length s
-      then None
-      else (incr i; Some (String.get s (!i - 1)))
-  in
-  parse next_char
+let loc_from_lexpos pstart pend =
+  let (fname, ystart, xstart) = begin
+    pstart.pos_fname,
+    pstart.pos_lnum,
+    (pstart.pos_cnum - pstart.pos_bol + 1)
+  end in
+  (fname, ystart, xstart)
+
+let loc_msg lexbuf msg =
+  let p = lexbuf.lex_curr_p in
+  let y = p.pos_lnum in
+  let x = p.pos_cnum - p.pos_bol in
+  Printf.sprintf "%s at line %d, character %d" msg y x
+
+let parse_with_error lexbuf =
+  try Parser.program Lexer.token lexbuf with
+  | Lexer.SyntaxError msg ->
+    raise (FatalError (loc_msg lexbuf msg))
+  | Parser.Error ->
+    raise (FatalError (loc_msg lexbuf "syntax error"))

+ 30 - 0
parser.mly

@@ -0,0 +1,30 @@
+%{
+  open Lexing
+  open Types
+%}
+
+(* tokens *)
+%token SEMICOL EXCLAM PLUS MINUS OMEGA LPAREN RPAREN HASH
+%token <int> NUMBER
+%token <char> UPPER LOWER
+%token EOF
+
+(* start symbol *)
+%type <Types.program> program
+%start program
+
+%%
+
+program:
+  | instrs=separated_list(SEMICOL, instruction) EOF
+  { instrs }
+
+instruction:
+  | c=LOWER              { Basic c }
+  | EXCLAM               { Terminate }
+  | PLUS c=LOWER         { Ptest c }
+  | MINUS c=LOWER        { Ntest c }
+  | HASH n=NUMBER        { Jump n }
+  | i=instruction OMEGA  { Repeat i }
+  | c=UPPER              { Program c }
+  | LPAREN i=separated_list(SEMICOL, instruction) RPAREN { Concat i }

+ 19 - 8
pga.ml

@@ -1,34 +1,45 @@
 open Types
 open Stringify
-open Parse
 
 let main () =
   let usage status =
     prerr_endline ("usage: " ^ Sys.argv.(0) ^ " command [args]");
     prerr_endline "command:";
-    prerr_endline "  help           show this help page";
-    prerr_endline "  echo PROGRAM   pretty-print a program";
+    prerr_endline "  help        show this help page";
+    prerr_endline "  echo TERM   pretty-print a program";
     prerr_endline "input program syntax:";
-    prerr_endline "  - omit omega sign after closing parenthesis";
+    prerr_endline "  - write star (*) instead of omega sign";
     prerr_endline "  - write dollar sign ($) instead of pound sign";
+    prerr_endline "";
+    prerr_endline "A TERM argument may also be omitted and passed on stdin";
+    prerr_endline "instead for convenient use of UNIX pipes";
     exit status
   in
 
   let argc = Array.length Sys.argv in
   if argc = 1 then usage 1;
 
+  let input_term i =
+    let lexbuf =
+      if argc > i
+        then Lexing.from_string Sys.argv.(i)
+        else Lexing.from_channel stdin
+    in
+    Parse.parse_with_error lexbuf
+  in
+
   begin
     try
       match Sys.argv.(1) with
       | "help" ->
         usage 0
-      | "echo" when argc > 2 ->
-        print_endline (string_of_program (parse_string Sys.argv.(2)))
+      | "echo" ->
+        print_endline (string_of_program (input_term 2))
       | _ ->
         usage 1
     with
-    | ParseError msg ->
-      prerr_endline ("parsing error: " ^ msg);
+    | FatalError msg ->
+      prerr_endline msg;
       exit 1
   end;
 

+ 15 - 10
stringify.ml

@@ -1,19 +1,24 @@
 open Types
 
+(*
 let omega = "\xcf\x89"
 let pound = "\xc2\xa3"
+*)
+let omega = "*"
+let pound = "$"
 
-let string_of_basic ins = ins
-
-let string_of_primitive = function
-  | Basic ins -> string_of_basic ins
+let rec string_of_instruction = function
+  | Basic c   -> Char.escaped c
   | Terminate -> "!"
-  | Ptest ins -> "+" ^ string_of_basic ins
-  | Ntest ins -> "-" ^ string_of_basic ins
+  | Ptest c   -> "+" ^ Char.escaped c
+  | Ntest c   -> "-" ^ Char.escaped c
   | Jump len  -> "#" ^ string_of_int len
 
-let rec string_of_program = function
-  | Primitive p -> string_of_primitive p
-  | Concat l    -> String.concat ";" (List.map string_of_program l)
-  | Repeat p    -> "(" ^ string_of_program p ^ ")" ^ omega
+  | Concat l    -> "(" ^ String.concat ";" (List.map string_of_instruction l) ^ ")"
+  | Repeat i    -> string_of_instruction i ^ omega
+
+  | Program c   -> Char.escaped c
   | Empty       -> ""
+
+let rec string_of_program instrs =
+  String.concat ";" (List.map string_of_instruction instrs)

+ 11 - 11
types.ml

@@ -1,16 +1,16 @@
-type basic_instr = string
-
-type primitive =
-  | Basic of basic_instr
+type instruction =
+  | Basic of char
   | Terminate
-  | Ptest of basic_instr
-  | Ntest of basic_instr
+  | Ptest of char
+  | Ntest of char
   | Jump of int
 
-type program =
-  | Primitive of primitive
-  | Concat of program list
-  | Repeat of program
+  | Concat of instruction list
+  | Repeat of instruction
+
+  | Program of char
   | Empty
 
-exception ParseError of string
+type program = instruction list
+
+exception FatalError of string