Commit 25806a8a authored by Taddeüs Kroes's avatar Taddeüs Kroes

Created basic CSS parser, data stucture and stringifier

parents
._*/
mincss
*.cmo
*.cmi
*.cmx
*.swp
*.o
types.ml
lexer.ml
lexer.mli
parser.ml
parser.mli
parser.conflicts
parser.automaton
RESULT := mincss
SOURCES := types.ml types.mli stringify.ml lexer.mll parser.mly util.ml parse.ml \
main.ml
PRE_TARGETS := types.ml types.cmi stringify.cmi
LIBS := str
# Set debugging flag to enable exception backtraces for OCAMLRUNPARAM=b
OCAMLFLAGS := -g
OCAMLYACC := menhir
YFLAGS := --infer --explain --dump
.PHONY: all myclean
all: native-code
clean:: myclean
# The Types module needs an implementation to stop ocamlc from complaining
types.ml: types.mli
cp $< $@
myclean:
rm -f a.out types.ml parser.conflicts parser.automaton
include OCamlMakefile
This diff is collapsed.
{
open Lexing
open Parser
exception SyntaxError of string
let next_line lexbuf =
let pos = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <- {
pos with pos_bol = lexbuf.lex_curr_pos;
pos_lnum = pos.pos_lnum + 1
}
}
rule token = parse
| '(' { LPAREN }
| ')' { RPAREN }
| '{' { LBRACE }
| '}' { RBRACE }
| ';' { SEMICOL }
| ',' { COMMA }
| ':' { COLON }
| "@media" { MEDIA }
| "@import" { IMPORT }
| "@charset" { CHARSET }
| "@page" { PAGE }
| "@font-face" { FONTFACE }
| "@namespace" { NAMESPACE }
| "!important" { IMPORTANT }
| ['A'-'Z''a'-'z''0'-'9''_''-''#''.']+ as id { ID id }
| ['.''#'':']['A'-'Z''a'-'z''_''-']['A'-'Z''a'-'z''0'-'9''_''-''.''#'':']* as id { SELECTOR id }
| '\r' | '\n' | "\r\n" { next_line lexbuf; token lexbuf }
| [' ''\t']+ { token lexbuf }
| "/*" { comment lexbuf }
| '"' { str (Buffer.create 17) lexbuf }
| eof | '\000' { EOF }
| _ as chr { raise (SyntaxError ("unexpected char: " ^ Char.escaped chr)) }
(* Multi-line comments *)
and comment = parse
| '\r' | '\n' | "\r\n" { next_line lexbuf; comment lexbuf }
| "*/" { token lexbuf }
| _ { comment lexbuf }
(* Strings *)
and str buf = parse
| '"' { STRING (Buffer.contents buf) }
| '\\''/' { Buffer.add_char buf '/'; str buf lexbuf }
| '\\''\\' { Buffer.add_char buf '\\'; str buf lexbuf }
| '\\''b' { Buffer.add_char buf '\b'; str buf lexbuf }
| '\\''f' { Buffer.add_char buf '\012'; str buf lexbuf }
| '\\''n' { Buffer.add_char buf '\n'; str buf lexbuf }
| '\\''r' { Buffer.add_char buf '\r'; str buf lexbuf }
| '\\''t' { Buffer.add_char buf '\t'; str buf lexbuf }
| [^'"''\\']+ as s { Buffer.add_string buf s; str buf lexbuf }
| eof { raise (SyntaxError "unterminated string") }
| _ as c { raise (SyntaxError ("illegal string character: " ^ Char.escaped c)) }
open Lexing
open Types
(* Parse command-line arguments *)
let parse_args () =
let args = {
infiles = [];
outfile = None;
verbose = 1;
} in
let args_spec = [
("<file> ...", Arg.Rest (fun _ -> ()),
" Optional input files (default is to read from stdin)");
("-o", Arg.String (fun s -> args.outfile <- Some s),
"<file> Output file (defaults to stdout)");
("-v", Arg.Int (fun i -> args.verbose <- i),
"<num> Set verbosity (0: nothing, 1: errors (default), \
2: compression rate, 3: debug)");
] in
let usage =
"Usage: " ^ Sys.argv.(0) ^ " [-o <file>] [-v <verbosity>] [<file> ...]"
in
Arg.parse args_spec (fun f -> args.infiles <- args.infiles @ [f]) usage;
args
(* Main function, returns exit status
* Command-line arguments are stored in lobals.args *)
let main () =
let args = parse_args () in
try
let css =
match args.infiles with
| [] ->
let input = Util.input_buffered stdin 512 in
Parse.parse_input "<stdin>" input
| files ->
let rec loop = function
| [] -> []
| filename :: tl ->
let input = Util.input_all (open_in filename) in
let css = Parse.parse_input filename input in
css @ loop tl
in
loop files
in
Util.print_css css;
0
with
| LocError (loc, msg) ->
Util.prerr_loc_msg args loc ("Error: " ^ msg);
1
| Failure err ->
prerr_endline ("Error: " ^ err);
1
let _ = exit (main ())
open Lexing
open Types
let loc_from_lexpos pstart pend =
let (fname, ystart, yend, xstart, xend) = begin
pstart.pos_fname,
pstart.pos_lnum,
pend.pos_lnum,
(pstart.pos_cnum - pstart.pos_bol + 1),
(pend.pos_cnum - pend.pos_bol)
end in
if ystart = yend && xend < xstart then
(fname, ystart, yend, xstart, xstart)
else
(fname, ystart, yend, xstart, xend)
let get_loc lexbuf =
loc_from_lexpos lexbuf.lex_curr_p lexbuf.lex_curr_p
let shift_loc (fname, ystart, yend, xstart, xend) yshift xshift =
(fname, ystart + yshift, yend + yshift, xstart + xshift, xend + xshift)
let shift_back lexbuf =
shift_loc (get_loc lexbuf) 0 (-1)
let parse_input display_name content =
let lexbuf = Lexing.from_string content in
lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = display_name };
try Parser.stylesheet Lexer.token lexbuf with
| Lexer.SyntaxError msg ->
raise (LocError (shift_back lexbuf, msg))
| Parser.Error ->
raise (LocError (shift_back lexbuf, "syntax error"))
%{
open Lexing
open Types
let prop2str (name, value) = name ^ ":" ^ Stringify.value2str value
%}
(* Tokens *)
%token LPAREN RPAREN LBRACE RBRACE SEMICOL COMMA COLON
%token MEDIA IMPORT CHARSET PAGE FONTFACE NAMESPACE
%token IMPORTANT EOF
%token <string> ID STRING SELECTOR
(* Start symbol *)
%type <Types.decl list> stylesheet
%start stylesheet
%%
(* Left-recursive list (use List.rev to obtain correctly ordered list) *)
llist(x):
| { [] }
| tl=llist(x) hd=x { hd :: tl }
separated_llist(sep, x):
| { [] }
| tl=llist(x) sep hd=x { hd :: tl }
stylesheet:
| decls=llist(decl) EOF
{ List.rev decls }
selector:
| id=ID { [id] }
| id=SELECTOR { [id] }
| tl=selector hd=ID { hd :: tl }
| tl=selector hd=SELECTOR { hd :: tl }
value:
| str=STRING { Str str }
| lit=ID { Lit lit }
| name=ID LPAREN arg=value RPAREN { Fn (name, arg) }
| IMPORTANT { Imp }
prop:
| name=ID COLON v=value+
{ (name, match v with [hd] -> hd | _ -> Lst v) }
propline:
| p=prop SEMICOL
{ p }
props:
| LBRACE p=llist(propline) last=prop? RBRACE
{ List.rev p @ (match last with None -> [] | Some p -> [p]) }
group:
| s=separated_nonempty_list(COMMA, selector) p=props
{ Group (List.rev s, p) }
%inline media:
| m=ID
{ m }
| LPAREN p=prop RPAREN
{ "(" ^ prop2str p ^ ")" }
%inline stringopt: f=STRING | f=ID { f }
decl:
| g=group
{ g }
| MEDIA queries=separated_nonempty_list(COMMA, media) LBRACE groups=llist(group) RBRACE
{ Media (queries, List.rev groups) }
| IMPORT f=stringopt q=separated_list(COMMA, ID) SEMICOL
{ Import (f, q) }
| CHARSET c=stringopt SEMICOL
{ Charset c }
| PAGE query=ID? p=props
{ Page (query, p) }
| FONTFACE p=props
{ Fontface p }
| NAMESPACE prefix=ID? uri=STRING SEMICOL
{ Namespace (prefix, uri) }
%%
open Types
let tab = " "
let indent = Str.global_replace (Str.regexp "^\\(.\\)") (tab ^ "\\1")
let rec cat sep fn = function
| [] -> ""
| [hd] -> fn hd
| hd :: tl -> fn hd ^ sep ^ cat sep fn tl
let rec value2str = function
| Lit lit -> lit
| Str str -> "\"" ^ str ^ "\""
| Lst values -> cat " " value2str values
| Dim (x, u) when float_of_int (int_of_float x) = x ->
string_of_int (int_of_float x) ^ u
| Dim (x, u) -> string_of_float x ^ u
| Fn (name, arg) -> name ^ "(" ^ value2str arg ^ ")"
| Imp -> "!important"
let prop2str (name, value) = name ^ ": " ^ value2str value ^ ";"
let block body = " {\n" ^ indent body ^ "\n}"
let rec decl2str = function
| Group (selectors, props) ->
cat ", " (String.concat " ") selectors ^ block (cat "\n" prop2str props)
| Media (queries, groups) ->
"@media " ^ String.concat ", " queries ^ block (cat "\n\n" decl2str groups)
| Import (filename, []) ->
"@import \"" ^ filename ^ "\";"
| Import (filename, queries) ->
"@import \"" ^ filename ^ "\" " ^ String.concat ", " queries ^ ";"
| Charset charset ->
"@charset \"" ^ charset ^ "\";"
| Page (None, props) ->
"@page" ^ block (cat "\n" prop2str props)
| Page (Some query, props) ->
"@page " ^ query ^ block (cat "\n" prop2str props)
| Fontface props ->
"@font-face " ^ block (cat "\n" prop2str props)
| Namespace (None, uri) ->
"@namespace \"" ^ uri ^ "\";"
| Namespace (Some prefix, uri) ->
"@namespace " ^ prefix ^ " \"" ^ uri ^ "\";"
let decls2str = cat "\n\n" decl2str
type value =
| Lit of string
| Str of string
| Lst of value list
| Dim of float * string
| Fn of string * value
| Imp
type prop = string * value
type selector = string list
type decl =
| Group of selector list * prop list (* <selectors> { <props> } *)
| Media of string list * decl list (* @media <queries> { <groups> } *)
| Import of string * string list (* @import "<file>" [<media>]; *)
| Charset of string (* @charset "<charset>"; *)
| Page of string option * prop list (* @page [<query>] { <props> } *)
| Fontface of prop list (* @font-face { <props> } *)
| Namespace of string option * string (* @namespace [<prefix>] "<uri>"; *)
(* TODO: @document, @keyframes, @supports *)
type args = {
mutable infiles : string list;
mutable outfile : string option;
mutable verbose : int;
}
type loc = string * int * int * int * int
exception LocError of loc * string
open Printf
open Str
open Types
let input_all ic =
let n = in_channel_length ic in
let buf = String.create n in
really_input ic buf 0 n;
close_in ic;
buf
let input_buffered ic chunksize =
let rec read_all buf bufsize pos =
match input ic buf pos (bufsize - pos) with
| 0 -> (close_in ic; buf)
| nread when nread = bufsize - pos ->
let bufsize = bufsize + chunksize in
let pos = pos + nread in
read_all (buf ^ String.create chunksize) bufsize pos
| nread ->
read_all buf bufsize (pos + nread)
in
read_all (String.create chunksize) chunksize 0
let output_css oc decls =
output_string oc (Stringify.decls2str decls);
output_char oc '\n'
let print_css = output_css stdout
let noloc = ("", 0, 0, 0, 0)
let tabwidth = 4
let count_tabs str upto =
let rec count n = function
| 0 -> n
| i -> count (if String.get str (i - 1) = '\t' then n + 1 else n) (i - 1)
in count 0 upto
let rec repeat s n = if n < 1 then "" else s ^ (repeat s (n - 1))
let retab str = global_replace (regexp "\t") (repeat " " tabwidth) str
let indent n = repeat (repeat " " (tabwidth - 1)) n
let prerr_loc (fname, ystart, yend, xstart, xend) =
let file = open_in fname in
(* skip lines until the first matched line *)
for i = 1 to ystart - 1 do let _ = input_line file in () done;
(* for each line in `loc`, print the source line with an underline *)
for l = ystart to yend do
let line = input_line file in
let linewidth = String.length line in
let left = if l = ystart then xstart else 1 in
let right = if l = yend then xend else linewidth in
if linewidth > 0 then begin
prerr_endline (retab line);
prerr_string (indent (count_tabs line right));
for i = 1 to left - 1 do prerr_char ' ' done;
for i = left to right do prerr_char '^' done;
prerr_endline "";
end
done;
()
let prerr_loc_msg args loc msg =
if args.verbose >= 1 then begin
let (fname, ystart, yend, xstart, xend) = loc in
if loc != noloc then begin
let line_s = if yend != ystart
then sprintf "lines %d-%d" ystart yend
else sprintf "line %d" ystart
in
let char_s = if xend != xstart || yend != ystart
then sprintf "characters %d-%d" xstart xend
else sprintf "character %d" xstart
in
eprintf "File \"%s\", %s, %s:\n" fname line_s char_s;
end;
eprintf "%s\n" msg;
if args.verbose >= 1 && loc != noloc then
try prerr_loc loc
with Sys_error _ -> ()
end;
()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment