Просмотр исходного кода

First version of parser that compiles (still conflicts to resolve)

Taddeus Kroes 11 лет назад
Родитель
Сommit
5c088307bb
6 измененных файлов с 213 добавлено и 107 удалено
  1. 18 27
      lexer.mll
  2. 1 1
      parse.ml
  3. 127 40
      parser.mly
  4. 35 23
      stringify.ml
  5. 31 15
      types.ml
  6. 1 1
      util.ml

+ 18 - 27
lexer.mll

@@ -3,8 +3,7 @@
    * http://www.w3.org/TR/CSS2/syndata.html#tokenization *)
   open Lexing
   open Parser
-
-  exception SyntaxError of string
+  open Types
 
   let next_line lexbuf =
     let pos = lexbuf.lex_curr_p in
@@ -51,15 +50,15 @@ rule token = parse
 
   | "<!--"              { CDO }
   | "-->"               { CDC }
-  | "~="                { INCLUDES }
-  | "|="                { DASHMATCH }
+  | ['~''|']?'=' as op  { RELATION op }
+  | ['>''~'] as c       { COMBINATOR (Char.escaped c) }
 
-  | mystring            { STRING }
-  | badstring           { BAD_STRING }
+  | mystring as s       { STRING s }
+  | badstring as s      { raise (SyntaxError "bad string") }
 
   | ident as id         { IDENT id }
 
-  | '#' (name as name)  { HASH name }
+  | '#' (name as nm)    { HASH nm }
 
   | "@import"           { IMPORT_SYM }
   | "@page"             { PAGE_SYM }
@@ -68,32 +67,17 @@ rule token = parse
 
   | '!' (w | comment)* "important"  { IMPORTANT_SYM }
 
-  | (num as n) "em"     { EMS (int_of_string n) }
-  | (num as n) "ex"     { EXS (int_of_string n) }
-  | (num as n) "px"     { LENGTH (int_of_string n, "px") }
-  | (num as n) "cm"     { LENGTH (int_of_string n, "cm") }
-  | (num as n) "mm"     { LENGTH (int_of_string n, "mm") }
-  | (num as n) "in"     { LENGTH (int_of_string n, "in") }
-  | (num as n) "pt"     { LENGTH (int_of_string n, "pt") }
-  | (num as n) "pc"     { LENGTH (int_of_string n, "pc") }
-  | (num as n) "deg"    { ANGLE (int_of_string n, "deg") }
-  | (num as n) "rad"    { ANGLE (int_of_string n, "rad") }
-  | (num as n) "grad"   { ANGLE (int_of_string n, "grad") }
-  | (num as n) "ms"     { TIME (int_of_string n, "ms") }
-  | (num as n) "s"      { TIME (int_of_string n, "s") }
-  | (num as n) "hz"     { FREQ (int_of_string n, "hz") }
-  | (num as n) "khz"    { FREQ (int_of_string n, "khz") }
-  | (num as n) "%"      { PERCENTAGE (int_of_string n) }
-  | (num as n) (ident as dim)  { DIMENSION (int_of_string n, dim) }
-  | num as n            { NUMBER (int_of_string n) }
+  | (num as n) ("em"|"ex"|"px"|"cm"|"mm"|"in"|"pt"|"pc"|"deg"|"rad"|"grad"|
+                "ms"|"s"|"hz"|"khz"|"%"|ident as u)
+  { UNIT_VALUE (float_of_string n, u) }
+  | num as n            { NUMBER (float_of_string n) }
 
   | "url(" w (mystring as uri) w ")"  { URI uri }
   | "url(" w (url as uri) w ")"       { URI uri }
-  | baduri as uri                     { BAD_URI uri }
+  | baduri as uri                     { raise (SyntaxError "bad uri") }
 
   | (ident as fn) '('   { FUNCTION fn }
 
-  | '('                 { LPAREN }
   | ')'                 { RPAREN }
   | '{'                 { LBRACE }
   | '}'                 { RBRACE }
@@ -101,6 +85,13 @@ rule token = parse
   | ']'                 { RBRACK }
   | ';'                 { SEMICOL }
   | ':'                 { COLON }
+  | ','                 { COMMA }
+
+  | '.'                 { DOT }
+  | '+'                 { PLUS }
+  | '-'                 { MINUS }
+  | '/'                 { SLASH }
+  | '*'                 { STAR }
 
   (*
   | _ as c { raise (SyntaxError ("illegal string character: " ^ Char.escaped c)) }

+ 1 - 1
parse.ml

@@ -27,7 +27,7 @@ let parse_input display_name content =
   let lexbuf = Lexing.from_string content in
   lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = display_name };
   try Parser.stylesheet Lexer.token lexbuf with
-  | Lexer.SyntaxError msg ->
+  | SyntaxError msg ->
     raise (LocError (shift_back lexbuf, msg))
   | Parser.Error ->
     raise (LocError (shift_back lexbuf, "syntax error"))

+ 127 - 40
parser.mly

@@ -2,61 +2,148 @@
 open Lexing
 open Types
 
-let prop2str (name, value) = name ^ ":" ^ Stringify.value2str value
+let filter_none l =
+  let rec filter l = function
+    | [] -> []
+    | None :: tl -> filter l tl
+    | Some hd :: tl -> filter (hd :: l) tl
+  in
+  List.rev (filter [] l)
 %}
 
 (* Tokens *)
-%token S CDO CDC INCLUDES DASHMATCH STRING BAD_STRING IMPORT_SYM PAGE_SYM
-%token MEDIA_SYM CHARSET_SYM IMPORTANT_SYM
-%token LPAREN RPAREN LBRACE RBRACE LBRACK RBRACK SEMICOL COLON
-%token <int> EMS EXS PERCENTAGE NUMBER
-%token <int * string> LENGTH ANGLE TIME FREQ DIMENSION
-%token <string> IDENT HASH URI BAD_URI FUNCTION
+%token S CDO CDC IMPORT_SYM PAGE_SYM MEDIA_SYM CHARSET_SYM
+%token IMPORTANT_SYM
+%token <float> NUMBER
+%token <float * string> UNIT_VALUE
+%token <string> COMBINATOR RELATION STRING IDENT HASH URI FUNCTION
+%token RPAREN LBRACE RBRACE LBRACK RBRACK SEMICOL COLON COMMA DOT PLUS MINUS
+%token SLASH STAR
 
 (* Start symbol *)
-%type <Types.decl list> stylesheet
+%type <Types.stylesheet> stylesheet
 %start stylesheet
 
 %%
 
-(* Left-recursive list (use List.rev to obtain correctly ordered list) *)
-(*
-llist(x):
-  |            { [] }
-  | tl=llist(x) hd=x { hd :: tl }
-*)
+%inline mylist(sep, x):
+  | l=separated_list(sep, delimited(S*, x, S*))
+  { l }
 
+cd: CDO S* | CDC S* {}
+
+%inline statement: r=ruleset | r=media | r=page { r }
 stylesheet:
-  | ( CDO | CDC | S | statement )*
+  | charset    = charset? S* cd*
+    imports    = terminated(import, cd*)*
+    statements = terminated(statement, cd*)*
+  { let charset = match charset with None -> [] | Some c -> [c] in
+    charset @ imports @ statements }
+
+charset:
+  | CHARSET_SYM set=STRING SEMICOL
+  { Charset set }
+
+%inline string_or_uri: s=STRING | s=URI { s }
+import:
+  | IMPORT_SYM S* tgt=string_or_uri media=mylist(COMMA, IDENT) SEMICOL S*
+  { Import (tgt, media) }
 
-statement:
-  | ruleset
-  | at_rule
+media:
+  | MEDIA_SYM S* queries=mylist(COMMA, IDENT) LBRACE S* rulesets=ruleset* RBRACE S*
+  { Media (queries, rulesets) }
 
-at_rule:
-  | ATKEYWORD S* any* ( block | SEMICOL S* )
+page:
+  | PAGE_SYM S* pseudo=pseudo_page? decls=decls_block
+  { Page (pseudo, decls) }
 
-block:
-  | LBRACE S* ( any | block | ATKEYWORD S* | SEMICOL S* )* RBRACE S*
+pseudo_page:
+  | COLON pseudo=IDENT S*
+  { pseudo }
+
+decls_block:
+  | LBRACE S* decls=mylist(SEMICOL, declaration?) RBRACE S*
+  { filter_none decls }
 
 ruleset:
-  | selectors=any+ LBRACE S* declaration? ( SEMICOL S* declaration? )* RBRACE S*
+  | selectors_hd = selector
+    selectors_tl = separated_list(COMMA, preceded(S*, selector))
+    decls        = decls_block
+  { Ruleset (selectors_hd :: selectors_tl, decls) }
+
+%inline combinator:
+  | S* PLUS S*          { ["+"] }
+  | S* c=COMBINATOR S*  { [c] }
+  | S+                  { [] }
+selector:
+  | hd=simple_selector comb=combinator tl=selector
+  { hd :: comb @ tl }
+  | simple=simple_selector
+  { [simple] }
+
+simple_selector:
+  | elem=element_name addons=element_addon*
+  { elem ^ String.concat "" addons }
+  | addons=element_addon+
+  { String.concat "" addons }
+
+element_addon:
+  | a=HASH | a=cls | a=attrib | a=pseudo
+  { a }
+
+cls:
+  | DOT name=IDENT
+  { "." ^ name }
+
+element_name:
+  | tag=IDENT  { tag }
+  | STAR       { "*" }
+
+%inline rel_value:
+  | S* id=IDENT S*  { id }
+  | S* s=STRING S*  { s }
+attrib:
+  | LBRACK S* left=IDENT S* right=pair(RELATION, rel_value)? RBRACK
+  { left ^ (match right with None -> "" | Some (rel, term) -> rel ^ term) }
+
+pseudo:
+  | COLON id=IDENT
+  { ":" ^ id }
+  | COLON f=FUNCTION S* arg=terminated(IDENT, S*)? RPAREN
+  { let arg = match arg with None -> "" | Some id -> id in
+    ":" ^ f ^ "(" ^ arg ^ ")" }
 
 declaration:
-  | name=IDENT S* COLON S* value=value
-  { Property (name, value) }
-
-value:
-  | ( any | block | ATKEYWORD S* )+
-
-any:
-  | ( IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING | DELIM | URI | HASH |
-  UNICODE-RANGE | INCLUDES | DASHMATCH | COLON | FUNCTION S* (any|unused)*
-  RPAREN | LPAREN S* (any|unused)* RPAREN | LBRACK S* (any|unused)* RBRACK) S*
-
-unused:
-  | block
-  | ATKEYWORD S*
-  | SEMICOL S*
-  | CDO S*
-  | CDC S*
+  | name=IDENT S* COLON S* value=expr IMPORTANT_SYM S*
+  { (name, Prio value) }
+  | name=IDENT S* COLON S* value=expr
+  { (name, value) }
+
+%inline unary_operator:
+  | MINUS  { "-" }
+  | PLUS   { "+" }
+expr:
+  | left=expr right=expr
+  { Concat [left; right] }
+  | left=expr SLASH S* right=expr
+  { Binop (left, "/", right) }
+  | op=unary_operator n=NUMBER S*
+  { Unop (op, Number n) }
+  | op=unary_operator v=UNIT_VALUE S*
+  { let (n, u) = v in Unop (op, Unit (n, u)) }
+  | n=NUMBER S*
+  { Number n }
+  | v=UNIT_VALUE S*
+  { let (n, u) = v in Unit (n, u) }
+  | str=STRING S*
+  { Strlit str }
+  | id=IDENT S*
+  { Ident id }
+  | uri=URI S*
+  { Uri uri }
+  | fn=FUNCTION S* args=separated_list(COMMA, terminated(expr, S*)) RPAREN S*
+  { Function (fn, args) }
+  | hex=HASH S*
+  { if Str.string_match (Str.regexp "\\d{3}\\d{3}?") hex 0
+      then Hexcolor hex
+      else raise (SyntaxError ("invalid color #" ^ hex)) }

+ 35 - 23
stringify.ml

@@ -9,40 +9,52 @@ let rec cat sep fn = function
   | [hd] -> fn hd
   | hd :: tl -> fn hd ^ sep ^ cat sep fn tl
 
-let rec value2str = function
-  | Lit lit -> lit
-  | Str str -> "\"" ^ str ^ "\""
-  | Lst values -> cat " " value2str values
-  | Dim (x, u) when float_of_int (int_of_float x) = x ->
-    string_of_int (int_of_float x) ^ u
-  | Dim (x, u) -> string_of_float x ^ u
-  | Fn (name, arg) -> name ^ "(" ^ value2str arg ^ ")"
-  | Imp -> "!important"
-
-let prop2str (name, value) = name ^ ": " ^ value2str value ^ ";"
+let string_of_num n =
+  if float_of_int (int_of_float n) = n
+    then string_of_int (int_of_float n)
+    else string_of_float n
+
+let rec string_of_value = function
+  | Ident id -> id
+  | Strlit str -> "\"" ^ str ^ "\""
+  | Uri uri when String.contains uri ')' -> "url(\"" ^ uri ^ "\")"
+  | Uri uri -> "url(" ^ uri ^ ")"
+  | Concat values -> cat " " string_of_value values
+  | Number n -> string_of_num n
+  | Unit (n, u) -> string_of_num n ^ u
+  | Function (name, args) -> name ^ "(" ^ cat "," string_of_value args ^ ")"
+  | Hexcolor hex -> "#" ^ hex
+  | Unop (op, opnd) -> op ^ string_of_value opnd
+  | Binop (left, op, right) -> string_of_value left ^ op ^ string_of_value right
+  | Prio value -> string_of_value value ^ " !important"
+
+let string_of_declaration (name, value) =
+  name ^ ": " ^ string_of_value value ^ ";"
 
 let block body = " {\n" ^ indent body ^ "\n}"
 
-let rec decl2str = function
-  | Group (selectors, props) ->
-    cat ", " (String.concat " ") selectors ^ block (cat "\n" prop2str props)
-  | Media (queries, groups) ->
-    "@media " ^ String.concat ", " queries ^ block (cat "\n\n" decl2str groups)
+let rec string_of_statement = function
+  | Ruleset (selectors, decls) ->
+    cat ", " (String.concat " ") selectors ^
+    block (cat "\n" string_of_declaration decls)
+  | Media (queries, rulesets) ->
+    "@media " ^ String.concat ", " queries ^
+    block (cat "\n\n" string_of_statement rulesets)
   | Import (filename, []) ->
     "@import \"" ^ filename ^ "\";"
   | Import (filename, queries) ->
     "@import \"" ^ filename ^ "\" " ^ String.concat ", " queries ^ ";"
   | Charset charset ->
     "@charset \"" ^ charset ^ "\";"
-  | Page (None, props) ->
-    "@page" ^ block (cat "\n" prop2str props)
-  | Page (Some query, props) ->
-    "@page " ^ query ^ block (cat "\n" prop2str props)
-  | Fontface props ->
-    "@font-face " ^ block (cat "\n" prop2str props)
+  | Page (None, decls) ->
+    "@page" ^ block (cat "\n" string_of_declaration decls)
+  | Page (Some pseudo, decls) ->
+    "@page :" ^ pseudo ^ block (cat "\n" string_of_declaration decls)
+  | Fontface decls ->
+    "@font-face " ^ block (cat "\n" string_of_declaration decls)
   | Namespace (None, uri) ->
     "@namespace \"" ^ uri ^ "\";"
   | Namespace (Some prefix, uri) ->
     "@namespace " ^ prefix ^ " \"" ^ uri ^ "\";"
 
-let decls2str = cat "\n\n" decl2str
+let string_of_stylesheet = cat "\n\n" string_of_statement

+ 31 - 15
types.ml

@@ -1,25 +1,39 @@
 type value =
-  | Lit of string
-  | Str of string
-  | Lst of value list
-  | Dim of float * string
-  | Fn of string * value
-  | Imp
+  | Ident of string
+  | Strlit of string
+  | Uri of string
+  | Concat of value list
+  | Number of float
+  | Unit of float * string
+  | Function of string * value list
+  | Hexcolor of string
+  | Unop of string * value
+  | Binop of value * string * value
+  | Prio of value
 
-type prop = string * value
+type declaration = string * value
 
 type selector = string list
 
-type decl =
-  | Group of selector list * prop list  (* <selectors> { <props> } *)
-  | Media of string list * decl list    (* @media <queries> { <groups> } *)
-  | Import of string * string list      (* @import "<file>" [<media>]; *)
-  | Charset of string                   (* @charset "<charset>"; *)
-  | Page of string option * prop list   (* @page [<query>] { <props> } *)
-  | Fontface of prop list               (* @font-face { <props> } *)
-  | Namespace of string option * string (* @namespace [<prefix>] "<uri>"; *)
+type statement =
+  | Ruleset of selector list * declaration list
+  (* <selectors> { <declarations> } *)
+  | Media of string list * statement list
+  (* @media <queries> { <rulesets> } *)
+  | Import of string * string list
+  (* @import "<file>" [<media>]; *)
+  | Charset of string
+  (* @charset "<charset>"; *)
+  | Page of string option * declaration list
+  (* @page [<pseudo_page>] { <declarations> } *)
+  | Fontface of declaration list
+  (* @font-face { <declarations> } *)
+  | Namespace of string option * string
+  (* @namespace [<prefix>] "<uri>"; *)
   (* TODO: @document, @keyframes, @supports *)
 
+type stylesheet = statement list
+
 type args = {
   mutable infiles : string list;
   mutable outfile : string option;
@@ -28,4 +42,6 @@ type args = {
 
 type loc = string * int * int * int * int
 
+exception SyntaxError of string
+
 exception LocError of loc * string

+ 1 - 1
util.ml

@@ -23,7 +23,7 @@ let input_buffered ic chunksize =
   read_all (String.create chunksize) chunksize 0
 
 let output_css oc decls =
-  output_string oc (Stringify.decls2str decls);
+  output_string oc (Stringify.string_of_stylesheet decls);
   output_char oc '\n'
 
 let print_css = output_css stdout