Taddeus Kroes 11 лет назад
Родитель
Сommit
23418c16e1
4 измененных файлов с 67 добавлено и 39 удалено
  1. 51 31
      parse.ml
  2. 12 7
      pga.ml
  3. 2 1
      stringify.ml
  4. 2 0
      types.ml

+ 51 - 31
parse.ml

@@ -2,8 +2,22 @@ open Types
 
 type token = LPAREN | RPAREN | ID of string | HASH | EXCLAM | PLUS | MINUS
 
+let is_alnum c =
+  let i = int_of_char c in
+  i >= 48 & i <= 57 || i >= 65 & i <= 90 || i >= 97 & i <= 122
+
 let tokenize next_char emit =
   let buf = Buffer.create 32 in
+  let lineno = ref 1 in
+  let colno = ref 1 in
+
+  let unexpected c =
+    ParseError (Printf.sprintf
+      "unexpected '%c' at line %d, character %d"
+      c !lineno !colno
+    )
+  in
+
   let emit_buf () =
     match Buffer.length buf with
     | 0 -> ()
@@ -11,22 +25,33 @@ let tokenize next_char emit =
       emit (ID (Buffer.contents buf));
       Buffer.clear buf
   in
-  let nobuf () =
-    assert (Buffer.length buf = 0)
+  let nobuf c =
+    if Buffer.length buf > 0 then raise (unexpected c)
   in
-  let rec n () =
+
+  let rec read_all () =
     match next_char () with
-    | Some '(' -> nobuf (); emit LPAREN; n ()
-    | Some ')' -> emit_buf (); emit RPAREN; n ()
-    | Some ';' -> emit_buf (); n ()
-    | Some '#' -> nobuf (); emit HASH; n ()
-    | Some '!' -> nobuf (); emit EXCLAM; n ()
-    | Some '+' -> nobuf (); emit PLUS; n ()
-    | Some '-' -> nobuf (); emit MINUS; n ()
-    | Some (' ' | '\t') -> emit_buf (); n ()
-    | Some c   -> Buffer.add_char buf c; n ()
-    | None     -> emit_buf ()
-  in n ()
+    | Some c ->
+      begin
+        match c with
+        | '(' -> nobuf c; emit LPAREN
+        | ')' -> emit_buf (); emit RPAREN
+        | ';' -> emit_buf ()
+        | '#' -> nobuf c; emit HASH
+        | '!' -> nobuf c; emit EXCLAM
+        | '+' -> nobuf c; emit PLUS
+        | '-' -> nobuf c; emit MINUS
+        | ' ' | '\t' | '\r' -> emit_buf ()
+        | '\n' -> emit_buf (); incr lineno; colno := 0
+        | c when is_alnum c -> Buffer.add_char buf c
+        | _ -> raise (unexpected c)
+      end;
+      incr colno;
+      read_all ()
+    | None ->
+      emit_buf ()
+  in
+  read_all ()
 
 let program_of_list = function
   | []  -> Empty
@@ -35,7 +60,7 @@ let program_of_list = function
 
 type exp = E_basic | E_jump  | E_ptest | E_ntest
 
-let parse tokenize =
+let parse next_char =
   let stack = ref [ref []] in
   let expect = ref E_basic in
   let append p =
@@ -43,6 +68,15 @@ let parse tokenize =
     lst := p :: !lst
   in
   let handler = function
+    | EXCLAM -> append (Primitive Terminate)
+    | HASH   -> expect := E_jump
+    | PLUS   -> expect := E_ptest
+    | MINUS  -> expect := E_ntest
+    | LPAREN -> stack := ref [] :: !stack
+    | RPAREN ->
+      let body = List.rev !(List.hd !stack) in
+      stack := List.tl !stack;
+      append (Repeat (program_of_list body))
     | ID s ->
       let p =
         match !expect with
@@ -53,22 +87,8 @@ let parse tokenize =
       in
       append (Primitive p);
       expect := E_basic
-    | EXCLAM ->
-      append (Primitive Terminate)
-    | HASH ->
-      expect := E_jump
-    | PLUS ->
-      expect := E_ptest
-    | MINUS ->
-      expect := E_ntest
-    | LPAREN ->
-      stack := ref [] :: !stack
-    | RPAREN ->
-      let body = List.rev !(List.hd !stack) in
-      stack := List.tl !stack;
-      append (Repeat (program_of_list body))
   in
-  tokenize handler;
+  tokenize next_char handler;
   Concat (List.rev !(List.hd !stack))
 
 let parse_string s =
@@ -78,4 +98,4 @@ let parse_string s =
       then None
       else (incr i; Some (String.get s (!i - 1)))
   in
-  parse (tokenize next_char)
+  parse next_char

+ 12 - 7
pga.ml

@@ -18,13 +18,18 @@ let main () =
   if argc = 1 then usage 1;
 
   begin
-    match Sys.argv.(1) with
-    | "help" ->
-      usage 0
-    | "echo" when argc > 2 ->
-      print_endline (string_of_program (parse_string Sys.argv.(2)))
-    | _ ->
-      usage 1
+    try
+      match Sys.argv.(1) with
+      | "help" ->
+        usage 0
+      | "echo" when argc > 2 ->
+        print_endline (string_of_program (parse_string Sys.argv.(2)))
+      | _ ->
+        usage 1
+    with
+    | ParseError msg ->
+      prerr_endline ("parsing error: " ^ msg);
+      exit 1
   end;
 
   exit 0

+ 2 - 1
stringify.ml

@@ -1,6 +1,7 @@
 open Types
 
-let omega = "\207\137"
+let omega = "\xcf\x89"
+let pound = "\xc2\xa3"
 
 let string_of_basic ins = ins
 

+ 2 - 0
types.ml

@@ -12,3 +12,5 @@ type program =
   | Concat of program list
   | Repeat of program
   | Empty
+
+exception ParseError of string