lexer.mll 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. {
  2. (* Tokenizer according to definition at
  3. * http://www.w3.org/TR/CSS2/syndata.html#tokenization *)
  4. open Lexing
  5. open Parser
  6. open Types
  7. let next_line lexbuf =
  8. let pos = lexbuf.lex_curr_p in
  9. lexbuf.lex_curr_p <- {
  10. pos with pos_bol = lexbuf.lex_curr_pos;
  11. pos_lnum = pos.pos_lnum + 1
  12. }
  13. }
  14. let h = ['0'-'9''a'-'f']
  15. let wc = '\r''\n' | [' ''\t''\r''\n''\012']
  16. let nonascii = ['\160'-'\255']
  17. let s = [' ''\t''\r''\n''\012']+
  18. let w = s?
  19. let nl = '\n' | '\r''\n' | '\r' | '\012'
  20. let unicode = '\\' h(h(h(h(h(h)?)?)?)?)? wc?
  21. let escape = unicode | '\\'[^'\r''\n''\012''0'-'9''a'-'f']
  22. let nmstart = ['_''a'-'z'] | nonascii | escape
  23. let nmchar = ['_''a'-'z''0'-'9''-'] | nonascii | escape
  24. let string1 = '"'([^'\n''\r''\012''"'] | '\\'nl | escape)*'"'
  25. let string2 = '\''([^'\n''\r''\012''\''] | '\\'nl | escape)*'\''
  26. let mystring = string1 | string2
  27. let badstring1 = '"'([^'\n''\r''\012''"'] | '\\'nl | escape)*'\\'?
  28. let badstring2 = '\''([^'\n''\r''\012''\''] | '\\'nl | escape)*'\\'?
  29. let badstring = badstring1 | badstring2
  30. let badcomment1 = '/''*'[^'*']*'*'+([^'/''*'][^'*']*'*'+)*
  31. let badcomment2 = '/''*'[^'*']*('*'+[^'/''*'][^'*']*)*
  32. let badcomment = badcomment1 | badcomment2
  33. let baduri1 = "url("w(['!''#''$''%''&''*'-'['']'-'~'] | nonascii | escape)*w
  34. let baduri2 = "url("w mystring w
  35. let baduri3 = "url("w badstring
  36. let baduri = baduri1 | baduri2 | baduri3
  37. let comment = '/''*'[^'*']*'*'+([^'/''*'][^'*']*'*'+)'*''/'
  38. let ident = '-'? nmstart nmchar*
  39. let name = nmchar+
  40. let num = ['0'-'9']+ | ['0'-'9']*'.'['0'-'9']+
  41. let url = (['!''#''$''%''&''*''-''~'] | nonascii | escape)*
  42. rule token = parse
  43. | s { S }
  44. | comment (* ignore comments *)
  45. | badcomment (* unclosed comment at EOF *)
  46. | "<!--" { CDO }
  47. | "-->" { CDC }
  48. | ['~''|']?'=' as op { RELATION op }
  49. | ['>''~'] as c { COMBINATOR (Char.escaped c) }
  50. | mystring as s { STRING s }
  51. | badstring as s { raise (SyntaxError "bad string") }
  52. | ident as id { IDENT id }
  53. | '#' (name as nm) { HASH nm }
  54. | "@import" { IMPORT_SYM }
  55. | "@page" { PAGE_SYM }
  56. | "@media" { MEDIA_SYM }
  57. | "@charset" { CHARSET_SYM }
  58. | '!' (w | comment)* "important" { IMPORTANT_SYM }
  59. | (num as n) ("em"|"ex"|"px"|"cm"|"mm"|"in"|"pt"|"pc"|"deg"|"rad"|"grad"|
  60. "ms"|"s"|"hz"|"khz"|"%"|ident as u)
  61. { UNIT_VALUE (float_of_string n, u) }
  62. | num as n { NUMBER (float_of_string n) }
  63. | "url(" w (mystring as uri) w ")" { URI uri }
  64. | "url(" w (url as uri) w ")" { URI uri }
  65. | baduri as uri { raise (SyntaxError "bad uri") }
  66. | (ident as fn) '(' { FUNCTION fn }
  67. | ')' { RPAREN }
  68. | '{' { LBRACE }
  69. | '}' { RBRACE }
  70. | '[' { LBRACK }
  71. | ']' { RBRACK }
  72. | ';' { SEMICOL }
  73. | ':' { COLON }
  74. | ',' { COMMA }
  75. | '.' { DOT }
  76. | '+' { PLUS }
  77. | '-' { MINUS }
  78. | '/' { SLASH }
  79. | '*' { STAR }
  80. (*
  81. | _ as c { raise (SyntaxError ("illegal string character: " ^ Char.escaped c)) }
  82. *)