From 70f032a31d397140fd46ddce8de362d4f55eeb61 Mon Sep 17 00:00:00 2001
From: Taddeus Kroes <taddeuskroes@gmail.com>
Date: Mon, 21 Jul 2014 15:06:55 +0200
Subject: [PATCH] Lexer now correctly tracks line numbers + some general
cleanup
---
lexer.mll | 66 +++++++++++++++++++++++++++++++++++++++++-------------
main.ml | 2 +-
parse.ml | 6 ++---
parser.mly | 4 ++--
types.ml | 4 ++--
5 files changed, 59 insertions(+), 23 deletions(-)
diff --git a/lexer.mll b/lexer.mll
index ddcb346..3b2a870 100644
--- a/lexer.mll
+++ b/lexer.mll
@@ -5,11 +5,21 @@
open Parser
open Types
- let next_line lexbuf =
+ let advance_pos lexbuf =
+ let s = Lexing.lexeme lexbuf in
+ let rec search from lines =
+ try
+ ignore (Str.search_forward (Str.regexp "\r\n\\|\r\\|\n") s from);
+ search (Str.match_end ()) (lines + 1)
+ with Not_found ->
+ lines, String.length s - from
+ in
+ let lines, cols = search 0 0 in
+
let pos = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <- {
- pos with pos_bol = lexbuf.lex_curr_pos;
- pos_lnum = pos.pos_lnum + 1
+ pos with pos_bol = lexbuf.lex_curr_pos - cols;
+ pos_lnum = pos.pos_lnum + lines
}
let strip_quotes s = String.sub s 1 (String.length s - 2)
@@ -75,10 +85,9 @@ let uagent = ('-' ("webkit" | "moz" | "ms" | "o") '-')?
rule token = parse
- | s { S }
-
- | comment (* ignore comments *)
- | badcomment { token lexbuf } (* unclosed comment at EOF *)
+ | "\r\n" | '\r' | '\n' { new_line lexbuf; S }
+ | [' ' '\t' '\012']+ { S }
+ | "/*" { comment lexbuf }
| "<!--" { CDO }
| "-->" { CDC }
@@ -86,7 +95,7 @@ rule token = parse
| ['>' '~'] as c { COMBINATOR (Char.escaped c) }
| mystring as s { STRING (strip_quotes s) }
- | badstring { raise (SyntaxError "bad string") }
+ | badstring { raise (Syntax_error "bad string") }
| '#' (name as nm) { HASH nm }
@@ -99,8 +108,8 @@ rule token = parse
| '@' uagent K E Y F R A M E S { KEYFRAMES_SYM }
| '@' S U P P O R T S { SUPPORTS_SYM }
- | (w | comment)* w A N D w (w | comment)* { WS_AND }
- | (w | comment)* w O R w (w | comment)* { WS_OR }
+ | (s | comment)* s A N D s (s | comment)* { advance_pos lexbuf; WS_AND }
+ | (s | comment)* s O R s (s | comment)* { advance_pos lexbuf; WS_OR }
| O N L Y { ONLY }
| N O T { NOT }
@@ -111,7 +120,7 @@ rule token = parse
| ident as id { IDENT id }
- | '!' (w | comment)* I M P O R T A N T { IMPORTANT_SYM }
+ | '!' (s | comment)* I M P O R T A N T { IMPORTANT_SYM }
| (num as n) '%' { PERCENTAGE (float_of_string n) }
| (num as n) (E M | E X | P X | C M | M M | I N | P T | P C | D E G |
@@ -119,9 +128,12 @@ rule token = parse
{ UNIT_VALUE (float_of_string n, u) }
| num as n { NUMBER (float_of_string n) }
- | "url(" w (mystring as uri) w ")" { URI (strip_quotes uri) }
- | "url(" w (url as uri) w ")" { URI uri }
- | baduri { raise (SyntaxError "bad uri") }
+ | "url(" w (mystring as uri) w ")" { advance_pos lexbuf; URI (strip_quotes uri) }
+ | "url(" w (url as uri) w ")" { advance_pos lexbuf; URI uri }
+ | baduri { raise (Syntax_error "bad uri") }
+ (*
+ | "url(" { url_start lexbuf }
+ *)
| (ident as fn) '(' { FUNCTION fn }
@@ -143,4 +155,28 @@ rule token = parse
| eof | '\000' { EOF }
- | _ as c { raise (SyntaxError ("unexpected '" ^ Char.escaped c ^ "'")) }
+ | _ as c { raise (Syntax_error ("unexpected '" ^ Char.escaped c ^ "'")) }
+
+(* Comments *)
+and comment = parse
+ | '\r' | '\n' | "\r\n" { new_line lexbuf; comment lexbuf }
+ | "*/" { token lexbuf }
+ | eof | '\000' { raise (Syntax_error "unclosed comment") }
+ | _ { comment lexbuf }
+
+(*
+(* URLs *)
+and url_start = parse
+ | '\r' | '\n' | "\r\n" { new_line lexbuf; url_start lexbuf }
+ | [' ' '\t' '\012']+ { url_start lexbuf }
+ | urlc+ as uri { url_end uri lexbuf }
+ | ')' { URI "" }
+ | mystring as s { url_end (strip_quotes s) lexbuf }
+ | badstring { raise (Syntax_error "bad string") }
+ | (eof | '\000' | _) as c { raise (Syntax_error ("unexpected '" ^ c ^ "'")) }
+and url_end uri = parse
+ | '\r' | '\n' | "\r\n" { new_line lexbuf; url_end uri lexbuf }
+ | [' ' '\t' '\012']+ { url_end uri lexbuf }
+ | ')' { URI uri }
+ | (eof | '\000' | _) as c { raise (Syntax_error ("unexpected '" ^ c ^ "'")) }
+*)
diff --git a/main.ml b/main.ml
index 79350fd..eb32bcc 100644
--- a/main.ml
+++ b/main.ml
@@ -89,7 +89,7 @@ let main () =
handle_args args;
exit 0
with
- | LocError (loc, msg) ->
+ | Loc_error (loc, msg) ->
Util.prerr_loc_msg (args.verbose >= 1) loc ("Error: " ^ msg);
| Failure err ->
prerr_endline ("Error: " ^ err);
diff --git a/parse.ml b/parse.ml
index 0e997bc..60049f1 100644
--- a/parse.ml
+++ b/parse.ml
@@ -27,7 +27,7 @@ let parse_input display_name content =
let lexbuf = Lexing.from_string content in
lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = display_name };
try Parser.stylesheet Lexer.token lexbuf with
- | SyntaxError msg ->
- raise (LocError (shift_back lexbuf, msg))
+ | Syntax_error msg ->
+ raise (Loc_error (shift_back lexbuf, msg))
| Parser.Error ->
- raise (LocError (shift_back lexbuf, "syntax error"))
+ raise (Loc_error (shift_back lexbuf, "syntax error"))
diff --git a/parser.mly b/parser.mly
index e6b3d1d..f984538 100644
--- a/parser.mly
+++ b/parser.mly
@@ -33,7 +33,7 @@
| Term left :: Operator op :: Term right :: tl ->
transform_ops (Term (Nary (op, [left; right])) :: tl)
| Term hd :: tl -> hd :: transform_ops tl
- | Operator op :: _ -> raise (SyntaxError ("unexpected operator \"" ^ op ^ "\""))
+ | Operator op :: _ -> raise (Syntax_error ("unexpected operator \"" ^ op ^ "\""))
in
let rec flatten_nary = function
| [] -> []
@@ -277,7 +277,7 @@ term:
{ let h = "[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]" in
if Str.string_match (Str.regexp ("^" ^ h ^ "\\(" ^ h ^ "\\)?$")) hex 0
then Hexcolor (String.lowercase hex)
- else raise (SyntaxError ("invalid color #" ^ hex)) }
+ else raise (Syntax_error ("invalid color #" ^ hex)) }
unary_operator:
| MINUS { "-" }
| PLUS { "+" }
diff --git a/types.ml b/types.ml
index dada6de..d5c7a5c 100644
--- a/types.ml
+++ b/types.ml
@@ -57,6 +57,6 @@ type stylesheet = statement list
type loc = string * int * int * int * int
-exception SyntaxError of string
+exception Syntax_error of string
-exception LocError of loc * string
+exception Loc_error of loc * string
--
GitLab