parser.mly 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. %{
  2. (* CSS grammar based on:
  3. * - http://www.w3.org/TR/CSS2/grammar.html
  4. * - http://www.w3.org/TR/css3-mediaqueries/
  5. *)
  6. open Lexing
  7. open Types
  8. let ( |> ) a b = b a
  9. (* TODO: move this to utils *)
  10. let rec filter_none = function
  11. | [] -> []
  12. | None :: tl -> filter_none tl
  13. | Some hd :: tl -> hd :: filter_none tl
  14. type term_t = Term of expr | Operator of string
  15. let rec transform_value f = function
  16. | Concat terms -> Concat (List.map (transform_value f) terms)
  17. | Function (name, arg) -> Function (name, transform_value f arg)
  18. | Unary (op, term) -> Unary (op, transform_value f term)
  19. | Nary (op, terms) -> Nary (op, List.map (transform_value f) terms)
  20. | value -> f value
  21. let concat_terms terms =
  22. let rec transform_ops = function
  23. | [] -> []
  24. | Term left :: Operator op :: Term right :: tl ->
  25. transform_ops (Term (Nary (op, [left; right])) :: tl)
  26. | Term hd :: tl -> hd :: transform_ops tl
  27. | Operator op :: _ -> raise (SyntaxError ("unexpected operator \"" ^ op ^ "\""))
  28. in
  29. let rec flatten_nary = function
  30. | [] -> []
  31. | Nary (op, Nary (op2, left) :: right) :: tl when op2 = op ->
  32. Nary (op, flatten_nary left @ flatten_nary right) :: flatten_nary tl
  33. | hd :: tl -> hd :: flatten_nary tl
  34. in
  35. match terms |> transform_ops |> flatten_nary with
  36. | [hd] -> hd
  37. | l -> Concat l
  38. %}
  39. (* Tokens *)
  40. %token S CDO CDC IMPORT_SYM PAGE_SYM MEDIA_SYM CHARSET_SYM FONT_FACE_SYM
  41. %token NAMESPACE_SYM KEYFRAMES_SYM SUPPORTS_SYM IMPORTANT_SYM
  42. %token <float> PERCENTAGE NUMBER
  43. %token <float * string> UNIT_VALUE
  44. %token <string> COMBINATOR RELATION STRING IDENT HASH URI FUNCTION
  45. %token LPAREN RPAREN LBRACE RBRACE LBRACK RBRACK SEMICOL COLON COMMA DOT PLUS
  46. %token MINUS SLASH STAR ONLY AND OR NOT FROM TO EOF
  47. (* Start symbol *)
  48. %type <Types.stylesheet> stylesheet
  49. %start stylesheet
  50. %%
  51. (* list with arbitrary whitespace between elements and separators *)
  52. %inline wslist(sep, x): S? l=separated_list(sep, terminated(x, S?)) { l }
  53. %inline wspreceded(prefix, x): p=preceded(pair(prefix, S?), x) { p }
  54. cd: CDO S? | CDC S? {}
  55. stylesheet:
  56. | charset = charset? S? cd*
  57. imports = terminated(import, cd*)*
  58. namespaces = terminated(namespace, cd*)*
  59. statements = terminated(nested_statement, cd*)*
  60. EOF
  61. { let charset = match charset with None -> [] | Some c -> [c] in
  62. charset @ imports @ namespaces @ statements }
  63. nested_statement:
  64. | s=ruleset | s=media | s=page | s=font_face_rule | s=keyframes_rule
  65. | s=supports_rule
  66. { s }
  67. group_rule_body:
  68. | LBRACE S? statements=nested_statement* RBRACE S?
  69. { statements }
  70. charset:
  71. | CHARSET_SYM name=STRING S? SEMICOL
  72. { Charset name }
  73. import:
  74. | IMPORT_SYM S? tgt=string_or_uri media=media_query_list SEMICOL S?
  75. { Import (tgt, media) }
  76. %inline string_or_uri:
  77. | str=STRING { Strlit str }
  78. | uri=URI { Uri uri }
  79. namespace:
  80. | NAMESPACE_SYM S? prefix=terminated(namespace_prefix, S?)? ns=string_or_uri S? SEMICOL S?
  81. { Namespace (prefix, ns) }
  82. %inline namespace_prefix:
  83. | prefix=IDENT
  84. { prefix }
  85. media:
  86. | MEDIA_SYM queries=media_query_list rulesets=group_rule_body
  87. { Media (queries, rulesets) }
  88. media_query_list:
  89. | S?
  90. { [] }
  91. | S? hd=media_query tl=wspreceded(COMMA, media_query)*
  92. { hd :: tl }
  93. media_query:
  94. | prefix=only_or_not? typ=media_type S? feat=wspreceded(AND, media_expr)*
  95. { (prefix, Some typ, feat) }
  96. | hd=media_expr tl=wspreceded(AND, media_expr)*
  97. { (None, None, (hd :: tl)) }
  98. %inline only_or_not:
  99. | ONLY S? { "only" }
  100. | NOT S? { "not" }
  101. %inline media_type:
  102. | id=IDENT { id }
  103. media_expr:
  104. | LPAREN S? feature=media_feature S? value=wspreceded(COLON, expr)? RPAREN S?
  105. { (feature, value) }
  106. %inline media_feature:
  107. | id=IDENT { id }
  108. page:
  109. | PAGE_SYM S? pseudo=pseudo_page? decls=decls_block
  110. { Page (pseudo, decls) }
  111. pseudo_page:
  112. | COLON pseudo=IDENT S?
  113. { pseudo }
  114. font_face_rule:
  115. | FONT_FACE_SYM S? LBRACE S? hd=descriptor_declaration?
  116. tl=wspreceded(SEMICOL, descriptor_declaration?)* RBRACE S?
  117. { Font_face (filter_none (hd :: tl)) }
  118. descriptor_declaration:
  119. | name=property COLON S? value=expr
  120. { (name, value) }
  121. keyframes_rule:
  122. | KEYFRAMES_SYM S? id=IDENT S? LBRACE S? rules=keyframe_ruleset* RBRACE S?
  123. { Keyframes (id, rules) }
  124. keyframe_ruleset:
  125. | selector=keyframe_selector S? decls=decls_block
  126. { (selector, decls) }
  127. keyframe_selector:
  128. | FROM { Ident "from" }
  129. | TO { Ident "to" }
  130. | n=PERCENTAGE { Number (n, Some "%") }
  131. supports_rule:
  132. | SUPPORTS_SYM S? cond=supports_condition S? body=group_rule_body
  133. { Supports (cond, body) }
  134. supports_condition:
  135. | c=supports_negation
  136. | c=supports_conjunction
  137. | c=supports_disjunction
  138. | c=supports_condition_in_parens
  139. { c }
  140. supports_condition_in_parens:
  141. | LPAREN S? c=supports_condition S? RPAREN
  142. | c=supports_declaration_condition
  143. (*XXX: | c=general_enclosed*)
  144. { c }
  145. supports_negation:
  146. | NOT S c=supports_condition_in_parens
  147. { Not c }
  148. supports_conjunction:
  149. | hd=supports_condition_in_parens tl=preceded(delimited(S, AND, S), supports_condition_in_parens)+
  150. { And (hd :: tl) }
  151. supports_disjunction:
  152. | hd=supports_condition_in_parens tl=preceded(delimited(S, OR, S), supports_condition_in_parens)+
  153. { Or (hd :: tl) }
  154. supports_declaration_condition:
  155. | LPAREN S? decl=declaration RPAREN
  156. { Decl decl }
  157. (*XXX:
  158. general_enclosed:
  159. | ( FUNCTION | LPAREN ) ( any | unused )* RPAREN
  160. { }
  161. any:
  162. [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
  163. | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
  164. | DASHMATCH | ':' | FUNCTION S* [any|unused]* ')'
  165. | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']'
  166. ]
  167. S*;
  168. unused : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*;
  169. *)
  170. %inline decls_block:
  171. | LBRACE S? hd=declaration? tl=wspreceded(SEMICOL, declaration?)* RBRACE S?
  172. { filter_none (hd :: tl) }
  173. ruleset:
  174. | selectors_hd = selector
  175. selectors_tl = wspreceded(COMMA, selector)*
  176. decls = decls_block
  177. { Ruleset (selectors_hd :: selectors_tl, decls) }
  178. selector:
  179. | simple=simple_selector S?
  180. { Simple simple }
  181. | left=simple_selector S right=selector
  182. { Combinator (Simple left, " ", right) }
  183. | left=simple_selector S? com=combinator right=selector
  184. { Combinator (Simple left, com, right) }
  185. %inline combinator:
  186. | PLUS S? { "+" }
  187. | c=COMBINATOR S? { c }
  188. simple_selector:
  189. | elem=element_name addons=element_addon*
  190. { elem ^ String.concat "" addons }
  191. | addons=element_addon+
  192. { String.concat "" addons }
  193. %inline element_addon:
  194. | a=HASH | a=cls | a=attrib | a=pseudo
  195. { a }
  196. element_name:
  197. | tag=IDENT { tag }
  198. | STAR { "*" }
  199. cls:
  200. | DOT name=IDENT
  201. { "." ^ name }
  202. attrib:
  203. | LBRACK S? left=IDENT S? right=pair(RELATION, rel_value)? RBRACK
  204. { let right = match right with None -> "" | Some (op, term) -> op ^ term in
  205. "[" ^ left ^ right ^ "]" }
  206. %inline rel_value:
  207. | S? id=IDENT S? { id }
  208. | S? s=STRING S? { "\"" ^ s ^ "\"" }
  209. pseudo:
  210. | COLON id=IDENT
  211. { ":" ^ id }
  212. | COLON f=FUNCTION S? arg=terminated(IDENT, S?)? RPAREN
  213. { let arg = match arg with None -> "" | Some id -> id in
  214. ":" ^ f ^ "(" ^ arg ^ ")" }
  215. declaration:
  216. | name=property S? COLON S? value=expr important=boption(pair(IMPORTANT_SYM, S?))
  217. { (String.lowercase name, value, important) }
  218. %inline property: name=IDENT { name }
  219. expr:
  220. | l=exprl { concat_terms l }
  221. %inline exprl:
  222. | hd=term tl=opterm* { Term hd :: List.concat tl }
  223. %inline opterm:
  224. | t=term { [Term t] }
  225. | op=operator t=term { [Operator op; Term t] }
  226. %inline operator:
  227. | SLASH S? { "/" }
  228. | COMMA S? { "," }
  229. term:
  230. | op=unary_operator v=numval S? { Unary (op, v) }
  231. | v=numval S? { v }
  232. | str=STRING S? { Strlit str }
  233. | id=IDENT S? { Ident id }
  234. | uri=URI S? { Uri uri }
  235. | fn=FUNCTION arg=expr RPAREN S? { Function (fn, arg) }
  236. | hex=HASH S?
  237. { let h = "[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]" in
  238. if Str.string_match (Str.regexp ("^" ^ h ^ "\\(" ^ h ^ "\\)?$")) hex 0
  239. then Hexcolor (String.lowercase hex)
  240. else raise (SyntaxError ("invalid color #" ^ hex)) }
  241. unary_operator:
  242. | MINUS { "-" }
  243. | PLUS { "+" }
  244. %inline numval:
  245. | n=NUMBER { Number (n, None) }
  246. | v=UNIT_VALUE { let n, u = v in Number (n, Some u) }
  247. | n=PERCENTAGE { Number (n, Some "%") }