c.l 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. D [0-9]
  2. L [a-zA-Z_]
  3. H [a-fA-F0-9]
  4. E [Ee][+-]?{D}+
  5. FS (f|F|l|L)
  6. IS (u|U|l|L)*
  7. %{
  8. /* this scanner sourced from: http://www.lysator.liu.se/c/ANSI-C-grammar-l.html */
  9. void count();
  10. int yylineno = 0;
  11. #include <stdio.h>
  12. #include <string.h>
  13. #include "Python.h"
  14. #define YYSTYPE void *
  15. #include "tokens.h"
  16. extern void *py_parser;
  17. extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size);
  18. #define returntoken(tok) /*printf("%d=%s\n", tok, yytext);*/ yylval = PyString_FromString(strdup(yytext)); return (tok);
  19. #define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); }
  20. %}
  21. %%
  22. "/*" { comment(); }
  23. "auto" { count(); returntoken(AUTO); }
  24. "break" { count(); returntoken(BREAK); }
  25. "case" { count(); returntoken(CASE); }
  26. "char" { count(); returntoken(CHAR); }
  27. "const" { count(); returntoken(CONST); }
  28. "continue" { count(); returntoken(CONTINUE); }
  29. "default" { count(); returntoken(DEFAULT); }
  30. "do" { count(); returntoken(DO); }
  31. "double" { count(); returntoken(DOUBLE); }
  32. "else" { count(); returntoken(ELSE); }
  33. "enum" { count(); returntoken(ENUM); }
  34. "extern" { count(); returntoken(EXTERN); }
  35. "float" { count(); returntoken(FLOAT); }
  36. "for" { count(); returntoken(FOR); }
  37. "goto" { count(); returntoken(GOTO); }
  38. "if" { count(); returntoken(IF); }
  39. "int" { count(); returntoken(INT); }
  40. "long" { count(); returntoken(LONG); }
  41. "register" { count(); returntoken(REGISTER); }
  42. "return" { count(); returntoken(RETURN); }
  43. "short" { count(); returntoken(SHORT); }
  44. "signed" { count(); returntoken(SIGNED); }
  45. "sizeof" { count(); returntoken(SIZEOF); }
  46. "static" { count(); returntoken(STATIC); }
  47. "struct" { count(); returntoken(STRUCT); }
  48. "switch" { count(); returntoken(SWITCH); }
  49. "typedef" { count(); returntoken(TYPEDEF); }
  50. "union" { count(); returntoken(UNION); }
  51. "unsigned" { count(); returntoken(UNSIGNED); }
  52. "void" { count(); returntoken(VOID); }
  53. "volatile" { count(); returntoken(VOLATILE); }
  54. "while" { count(); returntoken(WHILE); }
  55. {L}({L}|{D})* { count(); returntoken(check_type()); }
  56. 0[xX]{H}+{IS}? { count(); returntoken(CONSTANT); }
  57. 0{D}+{IS}? { count(); returntoken(CONSTANT); }
  58. {D}+{IS}? { count(); returntoken(CONSTANT); }
  59. L?'(\\.|[^\\'])+' { count(); returntoken(CONSTANT); }
  60. {D}+{E}{FS}? { count(); returntoken(CONSTANT); }
  61. {D}*"."{D}+({E})?{FS}? { count(); returntoken(CONSTANT); }
  62. {D}+"."{D}*({E})?{FS}? { count(); returntoken(CONSTANT); }
  63. L?\"(\\.|[^\\"])*\" { count(); returntoken(STRING_LITERAL); }
  64. "..." { count(); returntoken(ELLIPSIS); }
  65. ">>=" { count(); returntoken(RIGHT_ASSIGN); }
  66. "<<=" { count(); returntoken(LEFT_ASSIGN); }
  67. "+=" { count(); returntoken(ADD_ASSIGN); }
  68. "-=" { count(); returntoken(SUB_ASSIGN); }
  69. "*=" { count(); returntoken(MUL_ASSIGN); }
  70. "/=" { count(); returntoken(DIV_ASSIGN); }
  71. "%=" { count(); returntoken(MOD_ASSIGN); }
  72. "&=" { count(); returntoken(AND_ASSIGN); }
  73. "^=" { count(); returntoken(XOR_ASSIGN); }
  74. "|=" { count(); returntoken(OR_ASSIGN); }
  75. ">>" { count(); returntoken(RIGHT_OP); }
  76. "<<" { count(); returntoken(LEFT_OP); }
  77. "++" { count(); returntoken(INC_OP); }
  78. "--" { count(); returntoken(DEC_OP); }
  79. "->" { count(); returntoken(PTR_OP); }
  80. "&&" { count(); returntoken(BOOL_AND_OP); }
  81. "||" { count(); returntoken(BOOL_OR_OP); }
  82. "<=" { count(); returntoken(LE_OP); }
  83. ">=" { count(); returntoken(GE_OP); }
  84. "==" { count(); returntoken(EQ_OP); }
  85. "!=" { count(); returntoken(NE_OP); }
  86. ";" { count(); returntoken(SEMICOLON); }
  87. ("{"|"<%") { count(); returntoken(LBRACE); }
  88. ("}"|"%>") { count(); returntoken(RBRACE); }
  89. "," { count(); returntoken(COMMA); }
  90. ":" { count(); returntoken(COLON); }
  91. "=" { count(); returntoken(ASSIGN); }
  92. "(" { count(); returntoken(LPAREN); }
  93. ")" { count(); returntoken(RPAREN); }
  94. ("["|"<:") { count(); returntoken(LBRACKET); }
  95. ("]"|":>") { count(); returntoken(RBRACKET); }
  96. "." { count(); returntoken(PERIOD); }
  97. "&" { count(); returntoken(AND_OP); }
  98. "!" { count(); returntoken(BANG); }
  99. "~" { count(); returntoken(TILDE); }
  100. "-" { count(); returntoken(MINUS); }
  101. "+" { count(); returntoken(PLUS); }
  102. "*" { count(); returntoken(STAR); }
  103. "/" { count(); returntoken(SLASH); }
  104. "%" { count(); returntoken(PERCENT); }
  105. "<" { count(); returntoken(LT_OP); }
  106. ">" { count(); returntoken(GT_OP); }
  107. "^" { count(); returntoken(CIRCUMFLEX); }
  108. "|" { count(); returntoken(OR_OP); }
  109. "?" { count(); returntoken(QUESTIONMARK); }
  110. [ \t\v\n\f] { count(); }
  111. . { /* ignore bad characters */ }
  112. %%
  113. yywrap()
  114. {
  115. return(1);
  116. }
  117. comment()
  118. {
  119. char c, c1;
  120. loop:
  121. while ((c = input()) != '*' && c != 0)
  122. /*putchar(c)*/;
  123. if ((c1 = input()) != '/' && c != 0)
  124. {
  125. unput(c1);
  126. goto loop;
  127. }
  128. if (c != 0)
  129. /*putchar(c1)*/;
  130. }
  131. int column = 0;
  132. void count()
  133. {
  134. int i;
  135. for (i = 0; yytext[i] != '\0'; i++)
  136. if (yytext[i] == '\n')
  137. column = 0;
  138. else if (yytext[i] == '\t')
  139. column += 8 - (column % 8);
  140. else
  141. column++;
  142. /*ECHO*/;
  143. }
  144. int check_type()
  145. {
  146. /*
  147. * pseudo code --- this is what it should check
  148. *
  149. * if (yytext == type_name)
  150. * return(TYPE_NAME);
  151. *
  152. * return(IDENTIFIER);
  153. */
  154. /*
  155. * it actually will only return IDENTIFIER
  156. */
  157. return(IDENTIFIER);
  158. }