lexer.mll 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. {
  2. (* Tokenizer according to definition at
  3. * http://www.w3.org/TR/CSS2/syndata.html#tokenization *)
  4. open Lexing
  5. open Parser
  6. open Types
  7. let next_line lexbuf =
  8. let pos = lexbuf.lex_curr_p in
  9. lexbuf.lex_curr_p <- {
  10. pos with pos_bol = lexbuf.lex_curr_pos;
  11. pos_lnum = pos.pos_lnum + 1
  12. }
  13. let strip_quotes s = String.sub s 1 (String.length s - 2)
  14. }
  15. let h = ['0'-'9' 'a'-'f' 'A'-'F']
  16. let wc = '\r' '\n' | [' ' '\t' '\r' '\n' '\012']
  17. let nonascii = ['\160'-'\255']
  18. let s = [' ' '\t' '\r' '\n' '\012']+
  19. let w = s?
  20. let nl = '\n' | '\r' '\n' | '\r' | '\012'
  21. let unicode = '\\' h(h(h(h(h(h)?)?)?)?)? wc?
  22. let escape = unicode | '\\'[^'\r' '\n' '\012' '0'-'9' 'a'-'f' 'A'-'F']
  23. let nmstart = ['_' 'a'-'z' 'A'-'Z'] | nonascii | escape
  24. let nmchar = ['_' 'a'-'z' 'A'-'Z' '0'-'9' '-'] | nonascii | escape
  25. let string1 = '"'([^'\n' '\r' '\012' '"'] | '\\'nl | escape)*'"'
  26. let string2 = '\'' ([^'\n' '\r' '\012' '\''] | '\\' nl | escape)* '\''
  27. let mystring = string1 | string2
  28. let badstring1 = '"' ([^'\n' '\r' '\012' '"'] | '\\'nl | escape)* '\\'?
  29. let badstring2 = '\'' ([^'\n' '\r' '\012' '\''] | '\\'nl | escape)* '\\'?
  30. let badstring = badstring1 | badstring2
  31. let badcomment1 = '/' '*'[^'*']*'*'+([^'/' '*'][^'*']*'*'+)*
  32. let badcomment2 = '/' '*'[^'*']*('*'+[^'/' '*'][^'*']*)*
  33. let badcomment = badcomment1 | badcomment2
  34. let baduri1 = "url(" w (['!' '#' '$' '%' '&' '*'-'[' ']'-'~'] | nonascii | escape)* w
  35. let baduri2 = "url(" w mystring w
  36. let baduri3 = "url(" w badstring
  37. let baduri = baduri1 | baduri2 | baduri3
  38. let comment = "/*" [^'*']* '*'+ ([^'/' '*'] [^'*']* '*'+) "*/"
  39. let ident = '-'? nmstart nmchar*
  40. let name = nmchar+
  41. let num = ['0'-'9']+ | ['0'-'9']*'.'['0'-'9']+
  42. let url = (['!' '#' '$' '%' '&' '*'-'~'] | nonascii | escape)*
  43. rule token = parse
  44. | s { S }
  45. | comment (* ignore comments *)
  46. | badcomment (* unclosed comment at EOF *)
  47. | "<!--" { CDO }
  48. | "-->" { CDC }
  49. | ['~''|']?'=' as op { RELATION op }
  50. | ['>''~'] as c { COMBINATOR (Char.escaped c) }
  51. | mystring as s { STRING (strip_quotes s) }
  52. | badstring { raise (SyntaxError "bad string") }
  53. | '#' (name as nm) { HASH nm }
  54. | "@import" { IMPORT_SYM }
  55. | "@page" { PAGE_SYM }
  56. | "@media" { MEDIA_SYM }
  57. | "@charset" { CHARSET_SYM }
  58. | "only" { ONLY }
  59. | "not" { NOT }
  60. | "and" { AND }
  61. | ident as id { IDENT id }
  62. | '!' (w | comment)* "important" { IMPORTANT_SYM }
  63. | (num as n) ("em"|"ex"|"px"|"cm"|"mm"|"in"|"pt"|"pc"|"deg"|"rad"|"grad"|
  64. "ms"|"s"|"hz"|"khz"|"%"|"dpi"|"dpcm"|ident as u)
  65. { UNIT_VALUE (float_of_string n, u) }
  66. | num as n { NUMBER (float_of_string n) }
  67. | "url(" w (mystring as uri) w ")" { URI (strip_quotes uri) }
  68. | "url(" w (url as uri) w ")" { URI uri }
  69. | baduri { raise (SyntaxError "bad uri") }
  70. | (ident as fn) '(' { FUNCTION fn }
  71. | '(' { LPAREN }
  72. | ')' { RPAREN }
  73. | '{' { LBRACE }
  74. | '}' { RBRACE }
  75. | '[' { LBRACK }
  76. | ']' { RBRACK }
  77. | ';' { SEMICOL }
  78. | ':' { COLON }
  79. | ',' { COMMA }
  80. | '.' { DOT }
  81. | '+' { PLUS }
  82. | '-' { MINUS }
  83. | '/' { SLASH }
  84. | '*' { STAR }
  85. | eof | '\000' { EOF }
  86. | _ as c { raise (SyntaxError ("unexpected '" ^ Char.escaped c ^ "'")) }