lexer.mll 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. {
  2. open Lexing
  3. open Parser
  4. exception SyntaxError of string
  5. let next_line lexbuf =
  6. let pos = lexbuf.lex_curr_p in
  7. lexbuf.lex_curr_p <- {
  8. pos with pos_bol = lexbuf.lex_curr_pos;
  9. pos_lnum = pos.pos_lnum + 1
  10. }
  11. }
  12. rule token = parse
  13. | '#'' '+['0'-'9']+' '+'"'[^'"']+'"'(' '+['1'-'4'])*'\n' as marker {
  14. (*
  15. * The C preprocessor inserts so called ``line markers'' into the output.
  16. * These markers have the following form:
  17. *
  18. * # <linenum> <filename> <flags>
  19. *
  20. * This marker indicates that all lines after this marker up to the next
  21. * marker come from the file <filename> starting at line <linenum>. After
  22. * the file name can be zero or more flags, these flags are 1, 2, 3, 4.
  23. * These flags can be ignored.
  24. *)
  25. let scan lnum fn =
  26. let filename = String.sub fn 0 (String.length fn - 1) in
  27. let pos = lexbuf.lex_curr_p in
  28. lexbuf.lex_curr_p <- { pos with
  29. pos_fname = filename;
  30. pos_lnum = lnum;
  31. pos_bol = pos.pos_cnum }
  32. in
  33. Scanf.sscanf marker "# %d \"%s" scan;
  34. token lexbuf
  35. }
  36. (*| ['('')''['']''{''}'';'','] as literal { literal }*)
  37. | '(' { LPAREN }
  38. | ')' { RPAREN }
  39. | '[' { LBRACK }
  40. | ']' { RBRACK }
  41. | '{' { LBRACE }
  42. | '}' { RBRACE }
  43. | ';' { SEMICOL }
  44. | ',' { COMMA }
  45. | '=' { ASSIGN }
  46. | '!' { NOT }
  47. | '+' { ADD }
  48. | '-' { SUB }
  49. | '*' { MUL }
  50. | '/' { DIV }
  51. | '%' { MOD }
  52. | "<=" { LE }
  53. | "<" { LT }
  54. | ">=" { GE }
  55. | ">" { GT }
  56. | "==" { EQ }
  57. | "!=" { NE }
  58. | "&&" { AND }
  59. | "||" { OR }
  60. | "if" { IF }
  61. | "else" { ELSE }
  62. | "do" { DO }
  63. | "while" { WHILE }
  64. | "for" { FOR }
  65. | "return" { RETURN }
  66. | "extern" { EXTERN }
  67. | "export" { EXPORT }
  68. | "int" { INT }
  69. | "bool" { BOOL }
  70. | "float" { FLOAT }
  71. | "void" { VOID }
  72. | "true" { BOOL_CONST true }
  73. | "false" { BOOL_CONST false }
  74. | ['0'-'9']+ as i { INT_CONST (int_of_string i) }
  75. | ['0'-'9']+'.'['0'-'9']+ as f { FLOAT_CONST (float_of_string f) }
  76. | ['A'-'Z''a'-'z']['A'-'Z''a'-'z''0'-'9''_']* as id { ID id }
  77. | '\r' | '\n' | "\r\n" { next_line lexbuf; token lexbuf }
  78. | [' ''\t']+ { token lexbuf }
  79. | "//"[^'\n']* { token lexbuf }
  80. | "/*" { comment lexbuf }
  81. | eof | '\000' { EOF }
  82. | _ as chr { raise (SyntaxError ("unexpected char: " ^ Char.escaped chr)) }
  83. (* Multi-line comments *)
  84. and comment = parse
  85. | '\r' | '\n' | "\r\n" { next_line lexbuf; comment lexbuf }
  86. | _ { comment lexbuf }
  87. | "*/" { token lexbuf }