lexer.mll 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. {
  2. open Lexing
  3. open Parser
  4. exception SyntaxError of string
  5. let next_line lexbuf =
  6. let pos = lexbuf.lex_curr_p in
  7. lexbuf.lex_curr_p <- {
  8. pos with pos_bol = lexbuf.lex_curr_pos;
  9. pos_lnum = pos.pos_lnum + 1
  10. }
  11. }
  12. rule token = parse
  13. | '#'' '+['0'-'9']+' '+'"'[^'"']+'"'(' '+['1'-'4'])*'\n' as marker {
  14. (*
  15. * The C preprocessor inserts so called ``line markers'' into the output.
  16. * These markers have the following form:
  17. *
  18. * # <linenum> <filename> <flags>
  19. *
  20. * This marker indicates that all lines after this marker up to the next
  21. * marker come from the file <filename> starting at line <linenum>. After
  22. * the file name can be zero or more flags, these flags are 1, 2, 3, 4.
  23. * These flags can be ignored.
  24. *)
  25. let scan line filename =
  26. let pos = lexbuf.lex_curr_p in
  27. lexbuf.lex_curr_p <- {
  28. pos with pos_fname = filename;
  29. pos_lnum = line - 1
  30. }
  31. in
  32. Scanf.sscanf marker "# %d \"%s\"" scan;
  33. token lexbuf
  34. }
  35. (*| ['('')''['']''{''}'';'','] as literal { literal }*)
  36. | '(' { LPAREN }
  37. | ')' { RPAREN }
  38. | '[' { LBRACK }
  39. | ']' { RBRACK }
  40. | '{' { LBRACE }
  41. | '}' { RBRACE }
  42. | ';' { SEMICOL }
  43. | ',' { COMMA }
  44. | '=' { ASSIGN }
  45. | '!' { NOT }
  46. | '+' { ADD }
  47. | '-' { SUB }
  48. | '*' { MUL }
  49. | '/' { DIV }
  50. | '%' { MOD }
  51. | "<=" { LE }
  52. | "<" { LT }
  53. | ">=" { GE }
  54. | ">" { GT }
  55. | "==" { EQ }
  56. | "!=" { NE }
  57. | "&&" { AND }
  58. | "||" { OR }
  59. | "if" { IF }
  60. | "else" { ELSE }
  61. | "do" { DO }
  62. | "while" { WHILE }
  63. | "for" { FOR }
  64. | "return" { RETURN }
  65. | "extern" { EXTERN }
  66. | "export" { EXPORT }
  67. | "int" { INT }
  68. | "bool" { BOOL }
  69. | "float" { FLOAT }
  70. | "void" { VOID }
  71. | "true" { BOOL_CONST true }
  72. | "false" { BOOL_CONST false }
  73. | ['0'-'9']+ as i { INT_CONST (int_of_string i) }
  74. | ['0'-'9']+'.'['0'-'9']+ as f { FLOAT_CONST (float_of_string f) }
  75. | ['A'-'Z''a'-'z']['A'-'Z''a'-'z''0'-'9''_']* as id { ID id }
  76. | '\r' | '\n' | "\r\n" { next_line lexbuf; token lexbuf }
  77. | [' ''\t']+ { token lexbuf }
  78. | "//"[^'\n']* { token lexbuf }
  79. | "/*"_*"*/" { token lexbuf }
  80. | eof { EOF }
  81. | _ as chr { raise (SyntaxError ("unexpected char: " ^ Char.escaped chr)) }