| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197 |
- {
- (* Tokenizer according to definition at
- * http://www.w3.org/TR/CSS2/syndata.html#tokenization *)
- open Lexing
- open Parser
- open Types
- let advance_pos lexbuf =
- let s = Lexing.lexeme lexbuf in
- let rec search from lines =
- try
- ignore (Str.search_forward (Str.regexp "\r\n\\|\r\\|\n") s from);
- search (Str.match_end ()) (lines + 1)
- with Not_found ->
- lines, String.length s - from
- in
- let lines, cols = search 0 0 in
- let pos = lexbuf.lex_curr_p in
- lexbuf.lex_curr_p <- {
- pos with pos_bol = lexbuf.lex_curr_pos - cols;
- pos_lnum = pos.pos_lnum + lines
- }
- let strip_quotes s = String.sub s 1 (String.length s - 2)
- }
- let h = ['0'-'9' 'a'-'f' 'A'-'F']
- let wc = '\r' '\n' | [' ' '\t' '\r' '\n' '\012']
- let nonascii = ['\160'-'\255']
- let s = [' ' '\t' '\r' '\n' '\012']+
- let w = s?
- let nl = '\n' | '\r' '\n' | '\r' | '\012'
- let unicode = '\\' h(h(h(h(h(h)?)?)?)?)? wc?
- let escape = unicode | '\\'[^'\r' '\n' '\012' '0'-'9' 'a'-'f' 'A'-'F']
- let nmstart = ['_' 'a'-'z' 'A'-'Z'] | nonascii | escape
- let nmchar = ['_' 'a'-'z' 'A'-'Z' '0'-'9' '-'] | nonascii | escape
- let string1 = '"' ([^'\n' '\r' '\012' '"'] | '\\'nl | escape)* '"'
- let string2 = '\'' ([^'\n' '\r' '\012' '\''] | '\\' nl | escape)* '\''
- let mystring = string1 | string2
- let badstring1 = '"' ([^'\n' '\r' '\012' '"'] | '\\'nl | escape)* '\\'?
- let badstring2 = '\'' ([^'\n' '\r' '\012' '\''] | '\\'nl | escape)* '\\'?
- let badstring = badstring1 | badstring2
- let badcomment1 = "/*" [^'*']* '*'+ ([^'/' '*'] [^'*']* '*'+)*
- let badcomment2 = "/*" [^'*']* ('*'+ [^'/' '*'] [^'*']*)*
- let badcomment = badcomment1 | badcomment2
- let baduri1 = "url(" w (['!' '#' '$' '%' '&' '*'-'[' ']'-'~'] | nonascii | escape)* w
- let baduri2 = "url(" w mystring w
- let baduri3 = "url(" w badstring
- let baduri = baduri1 | baduri2 | baduri3
- let comment = "/*" [^'*']* '*'+ ([^'/' '*'] [^'*']* '*'+)* '/'
- let ident = '-'? nmstart nmchar*
- let name = nmchar+
- let num = ['0'-'9']+ | ['0'-'9']* '.' ['0'-'9']+
- let url = (['!' '#' '$' '%' '&' '*'-'~'] | nonascii | escape)*
- let A = ['a' 'A']
- let B = ['b' 'B']
- let C = ['c' 'C']
- let D = ['d' 'D']
- let E = ['e' 'E']
- let F = ['f' 'F']
- let G = ['g' 'G']
- let H = ['h' 'H']
- let I = ['i' 'I']
- let J = ['j' 'J']
- let K = ['k' 'K']
- let L = ['l' 'L']
- let M = ['m' 'M']
- let N = ['n' 'N']
- let O = ['o' 'O']
- let P = ['p' 'P']
- let Q = ['q' 'Q']
- let R = ['r' 'R']
- let S = ['s' 'S']
- let T = ['t' 'T']
- let U = ['u' 'U']
- let V = ['v' 'V']
- let W = ['w' 'W']
- let X = ['x' 'X']
- let Y = ['y' 'Y']
- let Z = ['z' 'Z']
- rule token = parse
- | "\r\n" | '\r' | '\n' { new_line lexbuf; S }
- | [' ' '\t' '\012']+ { S }
- | "/*" { comment lexbuf }
- | "<!--" { CDO }
- | "-->" { CDC }
- | ['~' '^' '$' '*' '|']? '=' as op { RELATION op }
- | ['>' '~'] as c { COMBINATOR (Char.escaped c) }
- | mystring as s { STRING (strip_quotes s) }
- | badstring { raise (Syntax_error "bad string") }
- | '#' (name as nm) { HASH nm }
- | '@' I M P O R T { IMPORT_SYM }
- | '@' P A G E { PAGE_SYM }
- | '@' M E D I A { MEDIA_SYM }
- | "@charset " { CHARSET_SYM }
- | '@' F O N T '-' F A C E { FONT_FACE_SYM }
- | '@' N A M E S P A C E { NAMESPACE_SYM }
- | '@' S U P P O R T S { SUPPORTS_SYM }
- | '@' (('-' ident '-')? as prefix) K E Y F R A M E S
- { KEYFRAMES_SYM (String.lowercase prefix) }
- | '@' (('-' ident '-')? as prefix) V I E W P O R T
- { VIEWPORT_SYM (String.lowercase prefix) }
- | (s | comment)* s comment* A N D comment* s (s | comment)*
- { advance_pos lexbuf; WS_AND }
- | (s | comment)* s comment* O R comment* s (s | comment)*
- { advance_pos lexbuf; WS_OR }
- | (['-' '+'] as a_sign)? (['0'-'9']* as a) N
- (w (['-' '+'] as b_sign) w (['0'-'9']+ as b))?
- {
- let a = if a = "" then 1 else int_of_string a in
- let b = match b with None -> 0 | Some n -> int_of_string n in
- let apply_sign n = function Some '-' -> -n | _ -> n in
- FORMULA (apply_sign a a_sign, apply_sign b b_sign)
- }
- | O N L Y { ONLY }
- | N O T { NOT }
- | A N D { AND }
- (*| O R { OR } removed in favor of WS_OR *)
- | F R O M { FROM }
- | T O { TO }
- | O D D { ODD }
- | E V E N { EVEN }
- | ident as id { IDENT id }
- | '!' (s | comment)* I M P O R T A N T { IMPORTANT_SYM }
- | (num as n) '%' { PERCENTAGE (float_of_string n) }
- | (num as n) (E M | E X | P X | C M | M M | I N | P T | P C | D E G |
- G? R A D | M? S | K? H Z | D P (I | C M) | ident as u)
- { UNIT_VALUE (float_of_string n, u) }
- | num as n { NUMBER (float_of_string n) }
- | "url(" w (mystring as uri) w ")" { advance_pos lexbuf; URI (strip_quotes uri) }
- | "url(" w (url as uri) w ")" { advance_pos lexbuf; URI uri }
- | baduri { raise (Syntax_error "bad uri") }
- (*
- | "url(" { url_start lexbuf }
- *)
- | (ident as fn) '(' { FUNCTION fn }
- | '(' { LPAREN }
- | ')' { RPAREN }
- | '{' { LBRACE }
- | '}' { RBRACE }
- | '[' { LBRACK }
- | ']' { RBRACK }
- | ';' { SEMICOL }
- | ':' { COLON }
- | "::" { DOUBLE_COLON }
- | ',' { COMMA }
- | '.' { DOT }
- | '+' { PLUS }
- | '-' { MINUS }
- | '/' { SLASH }
- | '*' { STAR }
- | eof | '\000' { EOF }
- | _ as c { raise (Syntax_error ("unexpected '" ^ Char.escaped c ^ "'")) }
- (* Comments *)
- and comment = parse
- | '\r' | '\n' | "\r\n" { new_line lexbuf; comment lexbuf }
- | "*/" { token lexbuf }
- | eof | '\000' { raise (Syntax_error "unclosed comment") }
- | _ { comment lexbuf }
- (*
- (* URLs *)
- and url_start = parse
- | '\r' | '\n' | "\r\n" { new_line lexbuf; url_start lexbuf }
- | [' ' '\t' '\012']+ { url_start lexbuf }
- | urlc+ as uri { url_end uri lexbuf }
- | ')' { URI "" }
- | mystring as s { url_end (strip_quotes s) lexbuf }
- | badstring { raise (Syntax_error "bad string") }
- | (eof | '\000' | _) as c { raise (Syntax_error ("unexpected '" ^ c ^ "'")) }
- and url_end uri = parse
- | '\r' | '\n' | "\r\n" { new_line lexbuf; url_end uri lexbuf }
- | [' ' '\t' '\012']+ { url_end uri lexbuf }
- | ')' { URI uri }
- | (eof | '\000' | _) as c { raise (Syntax_error ("unexpected '" ^ c ^ "'")) }
- *)
|