parser.mly 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. %{
  2. (* CSS grammar based on:
  3. * - http://www.w3.org/TR/CSS2/grammar.html
  4. * - http://www.w3.org/TR/css3-mediaqueries/
  5. * - http://www.w3.org/TR/css3-fonts/
  6. * - http://www.w3.org/TR/css3-namespace/
  7. * - http://www.w3.org/TR/css3-animations/
  8. * - http://www.w3.org/TR/css3-conditional/
  9. *)
  10. open Lexing
  11. open Types
  12. open Util
  13. type term = Term of expr | Operator of string
  14. let concat_terms terms =
  15. let rec transform_ops = function
  16. | [] -> []
  17. | Term left :: Operator op :: Term right :: tl ->
  18. transform_ops (Term (Nary (op, [left; right])) :: tl)
  19. | Term hd :: tl -> hd :: transform_ops tl
  20. | Operator op :: _ -> raise (Syntax_error ("unexpected operator \"" ^ op ^ "\""))
  21. in
  22. let rec flatten_nary = function
  23. | [] -> []
  24. | Nary (op1, Nary (op2, left) :: right) :: tl when op1 = op2 ->
  25. flatten_nary (Nary (op1, flatten_nary left @ flatten_nary right) :: tl)
  26. | hd :: tl -> hd :: flatten_nary tl
  27. in
  28. match terms |> transform_ops |> flatten_nary with
  29. | [hd] -> hd
  30. | l -> Concat l
  31. (* TODO: move this to a normalization stage, because the syntax should be
  32. * preserved during parsing (e.g. for -echo command) *)
  33. let unary_number = function
  34. | Unary ("-", Number (n, u)) -> Number (-.n, u)
  35. | Unary ("+", (Number _ as n)) -> n
  36. | value -> value
  37. let rec append_addons base = function
  38. | [] ->
  39. base
  40. | `Id id :: tl ->
  41. append_addons (Id (base, id)) tl
  42. | `Class cls :: tl ->
  43. append_addons (Class (base, cls)) tl
  44. | `Attribute (attr, value) :: tl ->
  45. append_addons (Attribute (base, attr, value)) tl
  46. | `Pseudo_class (f, args) :: tl ->
  47. append_addons (Pseudo_class (base, f, args)) tl
  48. | `Pseudo_element elem :: tl ->
  49. append_addons (Pseudo_element (base, elem)) tl
  50. %}
  51. (* Tokens *)
  52. %token S CDO CDC IMPORT_SYM PAGE_SYM MEDIA_SYM CHARSET_SYM FONT_FACE_SYM
  53. %token NAMESPACE_SYM SUPPORTS_SYM IMPORTANT_SYM
  54. %token <float> PERCENTAGE NUMBER
  55. %token <float * string> UNIT_VALUE
  56. %token <string> KEYFRAMES_SYM VIEWPORT_SYM COMBINATOR RELATION STRING IDENT HASH
  57. %token <string> URI FUNCTION
  58. %token LPAREN RPAREN LBRACE RBRACE LBRACK RBRACK SEMICOL COLON DOUBLE_COLON
  59. %token COMMA DOT PLUS MINUS SLASH STAR ONLY AND (*OR*) NOT FROM TO EOF
  60. %token WS_AND WS_OR
  61. (* Start symbol *)
  62. %type <Types.stylesheet> stylesheet
  63. %start stylesheet
  64. %%
  65. (* list with arbitrary whitespace between elements and separators *)
  66. %inline ig2(a, b): a b {}
  67. %inline ig3(a, b, c): a b c {}
  68. %inline wslist(sep, x): S* l=separated_list(sep, terminated(x, S*)) { l }
  69. %inline wspreceded(prefix, x): p=preceded(ig2(prefix, S*), x) { p }
  70. %inline all_and: AND | WS_AND {}
  71. cd: CDO S* | CDC S* {}
  72. stylesheet:
  73. | charset = charset? S* cd*
  74. imports = terminated(import, cd*)*
  75. namespaces = terminated(namespace, cd*)*
  76. statements = terminated(nested_statement, cd*)*
  77. EOF
  78. { let charset = match charset with None -> [] | Some c -> [c] in
  79. charset @ imports @ namespaces @ statements }
  80. nested_statement:
  81. | s=ruleset | s=media | s=page | s=font_face_rule | s=keyframes_rule
  82. | s=supports_rule | s=viewport_rule
  83. { s }
  84. group_rule_body:
  85. | LBRACE S* statements=nested_statement* RBRACE S*
  86. { statements }
  87. charset:
  88. | CHARSET_SYM name=STRING S* SEMICOL
  89. { Charset name }
  90. import:
  91. | IMPORT_SYM S* tgt=string_or_uri media=media_query_list SEMICOL S*
  92. { Import (tgt, media) }
  93. %inline string_or_uri:
  94. | str=STRING { Strlit str }
  95. | uri=URI { Uri uri }
  96. namespace:
  97. | NAMESPACE_SYM S* prefix=terminated(namespace_prefix, S*)? ns=string_or_uri S* SEMICOL S*
  98. { Namespace (prefix, ns) }
  99. %inline namespace_prefix:
  100. | prefix=IDENT
  101. { prefix }
  102. media:
  103. | MEDIA_SYM queries=media_query_list rulesets=group_rule_body
  104. { Media (queries, rulesets) }
  105. media_query_list:
  106. | S*
  107. { [] }
  108. | S* hd=media_query tl=wspreceded(COMMA, media_query)*
  109. { hd :: tl }
  110. media_query:
  111. | prefix=only_or_not? typ=media_type S* feat=wspreceded(all_and, media_expr)*
  112. { (prefix, Some typ, feat) }
  113. | hd=media_expr tl=wspreceded(all_and, media_expr)*
  114. { (None, None, (hd :: tl)) }
  115. %inline only_or_not:
  116. | ONLY S* { "only" }
  117. | NOT S* { "not" }
  118. %inline media_type:
  119. | id=IDENT { id }
  120. media_expr:
  121. | LPAREN S* feature=media_feature S* value=wspreceded(COLON, expr)? RPAREN S*
  122. { (feature, value) }
  123. %inline media_feature:
  124. | id=IDENT { id }
  125. page:
  126. | PAGE_SYM S* pseudo=pseudo_page? decls=decls_block
  127. { Page (pseudo, decls) }
  128. pseudo_page:
  129. | COLON pseudo=IDENT S*
  130. { pseudo }
  131. font_face_rule:
  132. | FONT_FACE_SYM S* LBRACE S* hd=descriptor_declaration?
  133. tl=wspreceded(SEMICOL, descriptor_declaration?)* RBRACE S*
  134. { Font_face (filter_none (hd :: tl)) }
  135. descriptor_declaration:
  136. | name=property COLON S* value=expr
  137. { (name, value) }
  138. keyframes_rule:
  139. | pre=KEYFRAMES_SYM S* id=IDENT S* LBRACE S* rules=keyframe_ruleset* RBRACE S*
  140. { Keyframes (pre, id, rules) }
  141. keyframe_ruleset:
  142. | selector=keyframe_selector S* decls=decls_block
  143. { (selector, decls) }
  144. keyframe_selector:
  145. | FROM { Ident "from" }
  146. | TO { Ident "to" }
  147. | n=PERCENTAGE { Number (n, Some "%") }
  148. supports_rule:
  149. | SUPPORTS_SYM S* cond=supports_condition S* body=group_rule_body
  150. { Supports (cond, body) }
  151. supports_condition:
  152. | c=supports_negation
  153. | c=supports_conjunction
  154. | c=supports_disjunction
  155. | c=supports_condition_in_parens
  156. { c }
  157. supports_condition_in_parens:
  158. | LPAREN S* c=supports_condition S* RPAREN
  159. | c=supports_declaration_condition
  160. (*XXX: | c=general_enclosed*)
  161. { c }
  162. supports_negation:
  163. | NOT S+ c=supports_condition_in_parens
  164. { Not c }
  165. supports_conjunction:
  166. | hd=supports_condition_in_parens tl=preceded(WS_AND, supports_condition_in_parens)+
  167. { And (hd :: tl) }
  168. supports_disjunction:
  169. | hd=supports_condition_in_parens tl=preceded(WS_OR, supports_condition_in_parens)+
  170. { Or (hd :: tl) }
  171. supports_declaration_condition:
  172. | LPAREN S* decl=supports_declaration RPAREN
  173. { Decl decl }
  174. supports_declaration:
  175. | name=property S* COLON S* value=expr
  176. { (name, value) }
  177. (*XXX:
  178. general_enclosed:
  179. | ( FUNCTION | LPAREN ) ( any | unused )* RPAREN
  180. { Enclosed expr }
  181. any:
  182. [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
  183. | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
  184. | DASHMATCH | ':' | FUNCTION S* [any|unused]* ')'
  185. | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']'
  186. ]
  187. S*;
  188. unused : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*;
  189. *)
  190. viewport_rule:
  191. | pre=VIEWPORT_SYM S* decls=decls_block
  192. { Viewport (pre, decls) }
  193. %inline decls_block:
  194. | LBRACE S* hd=declaration? tl=wspreceded(SEMICOL, declaration?)* RBRACE S*
  195. { filter_none (hd :: tl) }
  196. ruleset:
  197. | selectors_hd = selector
  198. selectors_tl = wspreceded(COMMA, selector)*
  199. decls = decls_block
  200. { Ruleset (selectors_hd :: selectors_tl, decls) }
  201. selector:
  202. | simple=simple_selector S*
  203. { simple }
  204. | left=simple_selector S+ right=selector
  205. { Combinator (left, " ", right) }
  206. | left=simple_selector S* com=combinator right=selector
  207. { Combinator (left, com, right) }
  208. %inline combinator:
  209. | PLUS S* { "+" }
  210. | c=COMBINATOR S* { c }
  211. simple_selector:
  212. | elem=element_name addons=element_addon*
  213. { append_addons elem addons }
  214. | addons=element_addon+
  215. { append_addons No_element addons }
  216. %inline element_addon:
  217. | id=HASH { `Id id }
  218. | addon=cls
  219. | addon=attrib
  220. | addon=pseudo { addon }
  221. element_name:
  222. | tag=IDENT { Element (String.lowercase tag) }
  223. | STAR { All_elements }
  224. cls:
  225. | DOT name=IDENT
  226. { `Class name }
  227. attrib:
  228. | LBRACK S* left=IDENT S* RBRACK
  229. { `Attribute (String.lowercase left, None) }
  230. | LBRACK S* left=IDENT S* op=RELATION right=rel_value RBRACK
  231. { `Attribute (String.lowercase left, Some (op, right)) }
  232. %inline rel_value:
  233. | S* id=IDENT S* { Ident id }
  234. | S* s=STRING S* { Strlit s }
  235. pseudo:
  236. | COLON id=IDENT
  237. { `Pseudo_class (String.lowercase id, None) }
  238. | COLON f=FUNCTION args=wslist(COMMA, simple_selector) RPAREN
  239. { `Pseudo_class (String.lowercase f, Some args) }
  240. | DOUBLE_COLON id=IDENT
  241. { `Pseudo_element (String.lowercase id) }
  242. declaration:
  243. | name=property S* COLON S* value=expr important=boption(ig2(IMPORTANT_SYM, S*))
  244. { (String.lowercase name, value, important) }
  245. %inline property:
  246. | name=IDENT { name }
  247. | STAR name=IDENT { "*" ^ name } (* IE7 property name hack *)
  248. expr:
  249. | l=exprl { concat_terms l }
  250. %inline exprl:
  251. | hd=term tl=opterm* { Term hd :: List.concat tl }
  252. %inline opterm:
  253. | t=term { [Term t] }
  254. | op=operator t=term { [Operator op; Term t] }
  255. %inline operator:
  256. | SLASH S* { "/" }
  257. | COMMA S* { "," }
  258. term:
  259. | op=unary_operator v=numval S* { unary_number (Unary (op, v)) }
  260. | v=numval S* { v }
  261. | str=STRING S* { Strlit str }
  262. | id=IDENT S* { Ident (String.lowercase id) }
  263. | ONLY S* { Ident "only" }
  264. | NOT S* { Ident "not" }
  265. | AND S* { Ident "and" }
  266. | FROM S* { Ident "from" }
  267. | TO S* { Ident "to" }
  268. | uri=URI S* { Uri uri }
  269. | fn=FUNCTION arg=expr RPAREN S* { Function (String.lowercase fn, arg) }
  270. | key=IDENT S* COLON S* value=term
  271. { Key_value (key, ":", value) }
  272. | key=IDENT S* DOT S* value=term
  273. { Key_value (key, ".", value) }
  274. | key=IDENT S* rel=RELATION S* value=term
  275. {
  276. if rel = "="
  277. then Key_value (key, "=", value)
  278. else raise (Syntax_error ("unexpected '" ^ rel ^ "'"))
  279. }
  280. | hex=HASH S*
  281. {
  282. let h = "[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]" in
  283. if Str.string_match (Str.regexp ("^" ^ h ^ "\\(" ^ h ^ "\\)?$")) hex 0
  284. then Hexcolor (String.lowercase hex)
  285. else raise (Syntax_error ("invalid color #" ^ hex))
  286. }
  287. unary_operator:
  288. | MINUS { "-" }
  289. | PLUS { "+" }
  290. %inline numval:
  291. | n=NUMBER { Number (n, None) }
  292. | v=UNIT_VALUE { let n, u = v in Number (n, Some (String.lowercase u)) }
  293. | n=PERCENTAGE { Number (n, Some "%") }