parser.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. import ply.lex as lex
  2. import ply.yacc as yacc
  3. from statement import Statement as S
  4. from program import Program
  5. # Global statements administration
  6. statements = []
  7. tokens = ('NEWLINE', 'WORD', 'COMMENT', 'DIRECTIVE', 'COMMA', 'COLON')
  8. # Tokens
  9. def t_NEWLINE(t):
  10. r'\n+'
  11. t.lexer.lineno += t.value.count('\n')
  12. return t
  13. def t_COLON(t):
  14. r':'
  15. return t
  16. def t_COMMA(t):
  17. r','
  18. return t
  19. def t_COMMENT(t):
  20. r'\#.*'
  21. t.value = t.value[1:]
  22. return t
  23. def t_DIRECTIVE(t):
  24. r'\..*'
  25. return t
  26. def t_hex_word(t):
  27. r'0x([0-9a-fA-F]{8}|[0-9a-fA-F]{4})'
  28. t.type = 'WORD'
  29. return t
  30. def t_offset_address(t):
  31. r'[0-9]+\([a-zA-Z0-9$_.]+\)'
  32. t.type = 'WORD'
  33. return t
  34. def t_int(t):
  35. r'-?[0-9]+'
  36. t.type = 'WORD'
  37. t.value = int(t.value)
  38. return t
  39. def t_WORD(t):
  40. r'[a-zA-Z0-9$_.+()-]+'
  41. return t
  42. # Ignore whitespaces
  43. t_ignore = ' \t'
  44. def t_error(t):
  45. print('Illegal character "%s"' % t.value[0])
  46. t.lexer.skip(1)
  47. # Build the lexer
  48. lexer = lex.lex()
  49. # Parsing rules
  50. start = 'input'
  51. def p_input(p):
  52. '''input :
  53. | input line'''
  54. pass
  55. def p_line_instruction(p):
  56. 'line : instruction NEWLINE'
  57. pass
  58. def p_line_comment(p):
  59. 'line : COMMENT NEWLINE'
  60. statements.append(S('comment', p[1]))
  61. def p_line_inline_comment(p):
  62. 'line : instruction COMMENT NEWLINE'
  63. # Add the inline comment to the last parsed statement
  64. statements[-1].options['comment'] = p[2]
  65. def p_instruction_command(p):
  66. 'instruction : command'
  67. pass
  68. def p_instruction_directive(p):
  69. 'instruction : DIRECTIVE'
  70. statements.append(S('directive', p[1]))
  71. def p_instruction_label(p):
  72. 'instruction : WORD COLON'
  73. statements.append(S('label', p[1]))
  74. def p_command(p):
  75. '''command : WORD WORD COMMA WORD COMMA WORD
  76. | WORD WORD COMMA WORD
  77. | WORD WORD
  78. | WORD'''
  79. statements.append(S('command', p[1], *list(p)[2::2]))
  80. def p_error(p):
  81. print 'Syntax error at "%s" on line %d' % (p.value, lexer.lineno)
  82. # Build YACC
  83. yacc.yacc()
  84. def parse_file(filename):
  85. """Parse a given Assembly file, return a Block with Statement objects
  86. containing the parsed instructions."""
  87. global statements
  88. statements = []
  89. try:
  90. content = open(filename).read()
  91. yacc.parse(content)
  92. except IOError:
  93. raise Exception('File "%s" could not be opened' % filename)
  94. return Program(statements)