Commit 682819d4 authored by Taddeüs Kroes's avatar Taddeüs Kroes

Transformed parser into lex+yacc parser

parent 3e754a51
......@@ -2,4 +2,7 @@
*.cmi
*.cmx
*.o
lexer.ml
parser.ml
parser.mli
pga
RESULT := pga
BASENAMES := types stringify parse pga
BASENAMES := types stringify parser lexer parse pga
OFILES := $(addsuffix .cmx,$(BASENAMES))
OCAMLCFLAGS := -g
OCAMLLDFLAGS :=
OCAMLLDLIBS :=
.PHONY: all clean
.PRECIOUS: $(addsuffix .cmi,$(BASENAMES))
.PRECIOUS: $(addprefix .cmi,$(BASENAMES))
all: $(RESULT)
%.ml: %.mll
ocamllex -o $@ $<
%.ml: %.mly
menhir --infer --explain $<
%.cmi: %.mli
ocamlc -c $(OCAMLCFLAGS) -o $@ $<
parser.cmx: parser.cmi
parser.mli: parser.ml
%.cmx: %.ml
ocamlopt -c -o $@ $(<:.cmi=.ml)
ocamlfind ocamlopt -package batteries -c $(OCAMLCFLAGS) -o $@ $(<:.cmi=.ml)
$(RESULT): $(OFILES)
ocamlopt -o $@ $^
ocamlopt -o $@ $(OCAMLLDFLAGS) $(OCAMLLDLIBS) $^
clean:
rm -f *.cmi *.cmx *.o $(RESULT)
rm -f *.cmi *.cmx *.o lexer.ml parser.ml parser.mli $(RESULT)
{
open Lexing
open Parser
exception SyntaxError of string
let next_line lexbuf =
let pos = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <- {
pos with pos_bol = lexbuf.lex_curr_pos;
pos_lnum = pos.pos_lnum + 1
}
}
rule token = parse
| ';' { SEMICOL }
| '!' { EXCLAM }
| '+' { PLUS }
| '-' { MINUS }
| '#' { HASH }
| '*' { OMEGA }
| '(' { LPAREN }
| ')' { RPAREN }
(*
| '{' { LBRACE }
| '}' { RBRACE }
*)
| ['0'-'9']+ as i { NUMBER (int_of_string i) }
| ['A'-'Z'] as c { UPPER c }
| ['a'-'z'] as c { LOWER c }
| '\r' | '\n' | "\r\n" { next_line lexbuf; token lexbuf }
| [' ''\t']+ { token lexbuf }
| eof | '\000' { EOF }
| _ as chr { raise (SyntaxError ("unexpected '" ^ Char.escaped chr ^ "'")) }
open Lexing
open Types
type token = LPAREN | RPAREN | ID of string | HASH | EXCLAM | PLUS | MINUS
let is_alnum c =
let i = int_of_char c in
i >= 48 & i <= 57 || i >= 65 & i <= 90 || i >= 97 & i <= 122
let tokenize next_char emit =
let buf = Buffer.create 32 in
let lineno = ref 1 in
let colno = ref 1 in
let unexpected c =
ParseError (Printf.sprintf
"unexpected '%c' at line %d, character %d"
c !lineno !colno
)
in
let emit_buf () =
if Buffer.length buf = 0 then
()
else
emit (ID (Buffer.contents buf));
Buffer.clear buf
in
let nobuf c =
if Buffer.length buf > 0 then raise (unexpected c)
in
let rec read_all () =
match next_char () with
| Some c ->
begin
match c with
| '(' -> nobuf c; emit LPAREN
| ')' -> emit_buf (); emit RPAREN
| ';' -> emit_buf ()
| '#' -> nobuf c; emit HASH
| '!' -> nobuf c; emit EXCLAM
| '+' -> nobuf c; emit PLUS
| '-' -> nobuf c; emit MINUS
| ' ' | '\t' | '\r' -> emit_buf ()
| '\n' -> emit_buf (); incr lineno; colno := 0
| c when is_alnum c -> Buffer.add_char buf c
| _ -> raise (unexpected c)
end;
incr colno;
read_all ()
| None ->
emit_buf ()
in
read_all ()
let program_of_list = function
| [] -> Empty
| [p] -> p
| p -> Concat p
type exp = E_basic | E_jump | E_ptest | E_ntest
let parse next_char =
let stack = ref [ref []] in
let expect = ref E_basic in
let append p =
let lst = List.hd !stack in
lst := p :: !lst
in
let handler = function
| EXCLAM -> append (Primitive Terminate)
| HASH -> expect := E_jump
| PLUS -> expect := E_ptest
| MINUS -> expect := E_ntest
| LPAREN -> stack := ref [] :: !stack
| RPAREN ->
if List.length !stack < 2 then begin
raise (ParseError "too many closing parentheses")
end;
let body = List.rev !(List.hd !stack) in
stack := List.tl !stack;
append (Repeat (program_of_list body))
| ID s ->
let p =
match !expect with
| E_basic -> Basic s
| E_jump -> Jump (int_of_string s)
| E_ptest -> Ptest s
| E_ntest -> Ntest s
in
append (Primitive p);
expect := E_basic
in
tokenize next_char handler;
if List.length !stack > 1 then
raise (ParseError "missing closing parenthesis");
Concat (List.rev !(List.hd !stack))
let parse_string s =
let i = ref 0 in
let next_char () =
if !i = String.length s
then None
else (incr i; Some (String.get s (!i - 1)))
in
parse next_char
let loc_from_lexpos pstart pend =
let (fname, ystart, xstart) = begin
pstart.pos_fname,
pstart.pos_lnum,
(pstart.pos_cnum - pstart.pos_bol + 1)
end in
(fname, ystart, xstart)
let loc_msg lexbuf msg =
let p = lexbuf.lex_curr_p in
let y = p.pos_lnum in
let x = p.pos_cnum - p.pos_bol in
Printf.sprintf "%s at line %d, character %d" msg y x
let parse_with_error lexbuf =
try Parser.program Lexer.token lexbuf with
| Lexer.SyntaxError msg ->
raise (FatalError (loc_msg lexbuf msg))
| Parser.Error ->
raise (FatalError (loc_msg lexbuf "syntax error"))
%{
open Lexing
open Types
%}
(* tokens *)
%token SEMICOL EXCLAM PLUS MINUS OMEGA LPAREN RPAREN HASH
%token <int> NUMBER
%token <char> UPPER LOWER
%token EOF
(* start symbol *)
%type <Types.program> program
%start program
%%
program:
| instrs=separated_list(SEMICOL, instruction) EOF
{ instrs }
instruction:
| c=LOWER { Basic c }
| EXCLAM { Terminate }
| PLUS c=LOWER { Ptest c }
| MINUS c=LOWER { Ntest c }
| HASH n=NUMBER { Jump n }
| i=instruction OMEGA { Repeat i }
| c=UPPER { Program c }
| LPAREN i=separated_list(SEMICOL, instruction) RPAREN { Concat i }
open Types
open Stringify
open Parse
let main () =
let usage status =
prerr_endline ("usage: " ^ Sys.argv.(0) ^ " command [args]");
prerr_endline "command:";
prerr_endline " help show this help page";
prerr_endline " echo PROGRAM pretty-print a program";
prerr_endline " echo TERM pretty-print a program";
prerr_endline "input program syntax:";
prerr_endline " - omit omega sign after closing parenthesis";
prerr_endline " - write star (*) instead of omega sign";
prerr_endline " - write dollar sign ($) instead of pound sign";
prerr_endline "";
prerr_endline "A TERM argument may also be omitted and passed on stdin";
prerr_endline "instead for convenient use of UNIX pipes";
exit status
in
let argc = Array.length Sys.argv in
if argc = 1 then usage 1;
let input_term i =
let lexbuf =
if argc > i
then Lexing.from_string Sys.argv.(i)
else Lexing.from_channel stdin
in
Parse.parse_with_error lexbuf
in
begin
try
match Sys.argv.(1) with
| "help" ->
usage 0
| "echo" when argc > 2 ->
print_endline (string_of_program (parse_string Sys.argv.(2)))
| "echo" ->
print_endline (string_of_program (input_term 2))
| _ ->
usage 1
with
| ParseError msg ->
prerr_endline ("parsing error: " ^ msg);
| FatalError msg ->
prerr_endline msg;
exit 1
end;
......
open Types
(*
let omega = "\xcf\x89"
let pound = "\xc2\xa3"
*)
let omega = "*"
let pound = "$"
let string_of_basic ins = ins
let string_of_primitive = function
| Basic ins -> string_of_basic ins
let rec string_of_instruction = function
| Basic c -> Char.escaped c
| Terminate -> "!"
| Ptest ins -> "+" ^ string_of_basic ins
| Ntest ins -> "-" ^ string_of_basic ins
| Ptest c -> "+" ^ Char.escaped c
| Ntest c -> "-" ^ Char.escaped c
| Jump len -> "#" ^ string_of_int len
let rec string_of_program = function
| Primitive p -> string_of_primitive p
| Concat l -> String.concat ";" (List.map string_of_program l)
| Repeat p -> "(" ^ string_of_program p ^ ")" ^ omega
| Concat l -> "(" ^ String.concat ";" (List.map string_of_instruction l) ^ ")"
| Repeat i -> string_of_instruction i ^ omega
| Program c -> Char.escaped c
| Empty -> ""
let rec string_of_program instrs =
String.concat ";" (List.map string_of_instruction instrs)
type basic_instr = string
type primitive =
| Basic of basic_instr
type instruction =
| Basic of char
| Terminate
| Ptest of basic_instr
| Ntest of basic_instr
| Ptest of char
| Ntest of char
| Jump of int
type program =
| Primitive of primitive
| Concat of program list
| Repeat of program
| Concat of instruction list
| Repeat of instruction
| Program of char
| Empty
exception ParseError of string
type program = instruction list
exception FatalError of string
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment