parser.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. import ply.lex as lex
  2. import ply.yacc as yacc
  3. from statement import Statement as S, Block
  4. # Global statements administration
  5. statements = []
  6. tokens = ('NEWLINE', 'WORD', 'COMMENT', 'DIRECTIVE', 'COMMA', 'COLON')
  7. # Tokens
  8. def t_NEWLINE(t):
  9. r'\n+'
  10. t.lexer.lineno += t.value.count('\n')
  11. return t
  12. def t_COLON(t):
  13. r':'
  14. return t
  15. def t_COMMA(t):
  16. r','
  17. return t
  18. def t_COMMENT(t):
  19. r'\#.*'
  20. t.value = t.value[1:]
  21. return t
  22. def t_DIRECTIVE(t):
  23. r'\..*'
  24. return t
  25. def t_hex_word(t):
  26. r'0x([0-9a-fA-F]{8}|[0-9a-fA-F]{4})'
  27. t.type = 'WORD'
  28. return t
  29. def t_offset_address(t):
  30. r'[0-9]+\([a-zA-Z0-9$_.]+\)'
  31. t.type = 'WORD'
  32. return t
  33. def t_int(t):
  34. r'-?[0-9]+'
  35. t.type = 'WORD'
  36. return t
  37. def t_WORD(t):
  38. r'[a-zA-Z0-9$_.+()-]+'
  39. return t
  40. # Ignore whitespaces
  41. t_ignore = ' \t'
  42. def t_error(t):
  43. print('Illegal character "%s"' % t.value[0])
  44. t.lexer.skip(1)
  45. # Build the lexer
  46. lexer = lex.lex()
  47. # Parsing rules
  48. start = 'input'
  49. def p_input(p):
  50. '''input :
  51. | input line'''
  52. pass
  53. def p_line_instruction(p):
  54. 'line : instruction NEWLINE'
  55. pass
  56. def p_line_comment(p):
  57. 'line : COMMENT NEWLINE'
  58. statements.append(S('comment', p[1], inline=False))
  59. def p_line_inline_comment(p):
  60. 'line : instruction COMMENT NEWLINE'
  61. statements.append(S('comment', p[2], inline=True))
  62. def p_instruction_command(p):
  63. 'instruction : command'
  64. pass
  65. def p_instruction_directive(p):
  66. 'instruction : DIRECTIVE'
  67. statements.append(S('directive', p[1]))
  68. def p_instruction_label(p):
  69. 'instruction : WORD COLON'
  70. statements.append(S('label', p[1]))
  71. def p_command(p):
  72. '''command : WORD WORD COMMA WORD COMMA WORD
  73. | WORD WORD COMMA WORD
  74. | WORD WORD
  75. | WORD'''
  76. statements.append(S('command', p[1], *list(p)[2::2]))
  77. def p_error(p):
  78. print 'Syntax error at "%s" on line %d' % (p.value, lexer.lineno)
  79. # Build YACC
  80. yacc.yacc()
  81. def parse_file(filename):
  82. """Parse a given Assembly file, return a Block with Statement objects
  83. containing the parsed instructions."""
  84. global statements
  85. statements = []
  86. try:
  87. content = open(filename).read()
  88. yacc.parse(content)
  89. except IOError:
  90. raise Exception('File "%s" could not be opened' % filename)
  91. return Block(statements)