Code cleanup in pybison's python file.

parent 858c8461
......@@ -19,17 +19,21 @@ If you wish to use this software in a commercial application,
and wish to depart from the GPL licensing requirements,
please contact the author and apply for a commercial license.
"""
#@+others
#@+node:imports
import sys, os, sha, re, imp, traceback, xml.dom, xml.dom.minidom, types
import sys
import os
import sha
import re
import imp
import traceback
import xml.dom
import xml.dom.minidom
import types
import distutils.sysconfig
import distutils.ccompiler
from bison_ import ParserEngine, unquoted
#@-node:imports
#@+node:globals
reSpaces = re.compile("\\s+")
reSpaces = re.compile('\\s+')
class ParserSyntaxError(Exception):
......@@ -48,7 +52,7 @@ class BisonError:
"""
_pyBisonError = 1
def __init__(self, value="syntax error"):
def __init__(self, value='syntax error'):
self.value = value
......@@ -81,7 +85,7 @@ class BisonNode:
self.kw = kw
def __str__(self):
return "<BisonNode:%s>" % self.target
return '<BisonNode:%s>' % self.target
def __repr__(self):
return str(self)
......@@ -105,7 +109,8 @@ class BisonNode:
return self
return self.values[item[0]][item[1:]]
else:
raise TypeError("Can only index %s objects with an int or a list/tuple" % self.__class.__name__)
raise TypeError('Can only index %s objects with an int or a'
' list/tuple' % self.__class.__name__)
def __len__(self):
......@@ -122,17 +127,18 @@ class BisonNode:
For debugging - prints a recursive dump of a parse tree node and its children
"""
specialAttribs = ['option', 'target', 'names', 'values']
indents = " " * indent * 2
indents = ' ' * indent * 2
#print "%s%s: %s %s" % (indents, self.target, self.option, self.names)
print "%s%s:" % (indents, self.target)
print '%s%s:' % (indents, self.target)
for name, val in self.kw.items() + zip(self.names, self.values):
if name in specialAttribs or name.startswith("_"):
if name in specialAttribs or name.startswith('_'):
continue
if isinstance(val, BisonNode):
val.dump(indent+1)
else:
print indents + " %s=%s" % (name, val)
print indents + ' %s=%s' % (name, val)
def toxml(self):
"""
......@@ -145,7 +151,8 @@ class BisonNode:
def toprettyxml(self, indent=' ', newl='\n', encoding=None):
"""
returns a human-readable xml serialisation of this node and its children
Returns a human-readable xml serialisation of this node and its
children.
"""
return self.toxmldoc().toprettyxml(indent=indent,
newl=newl,
......@@ -153,7 +160,8 @@ class BisonNode:
def toxmldoc(self):
"""
Returns the node and its children as an xml.dom.minidom.Document object
Returns the node and its children as an xml.dom.minidom.Document
object.
"""
d = xml.dom.minidom.Document()
d.appendChild(self.toxmlelem(d))
......@@ -161,7 +169,7 @@ class BisonNode:
def toxmlelem(self, docobj):
"""
Returns a DOM Element object of this node and its children
Returns a DOM Element object of this node and its children.
"""
specialAttribs = ['option', 'target', 'names', 'values']
......@@ -170,16 +178,18 @@ class BisonNode:
# set attribs
for name, val in self.kw.items():
if name in ['names', 'values'] or name.startswith("_"):
if name in ['names', 'values'] or name.startswith('_'):
continue
x.setAttribute(name, str(val))
#x.setAttribute('target', self.target)
#x.setAttribute('option', self.option)
# and add the children
for name, val in zip(self.names, self.values):
if name in specialAttribs or name.startswith("_"):
if name in specialAttribs or name.startswith('_'):
continue
if isinstance(val, BisonNode):
x.appendChild(val.toxmlelem(docobj))
else:
......@@ -205,20 +215,20 @@ class BisonParser(object):
# override these if you need to
# command and options for running yacc/bison, except for filename arg
bisonCmd = ["bison", "-d", "-v", '-t']
bisonCmd = ['bison', '-d', '-v', '-t']
bisonFile = "tmp.y"
bisonCFile = "tmp.tab.c"
bisonHFile = "tmp.tab.h" # name of header file generated by bison cmd
bisonFile = 'tmp.y'
bisonCFile = 'tmp.tab.c'
bisonHFile = 'tmp.tab.h' # name of header file generated by bison cmd
bisonCFile1 = "tmp.bison.c" # c output file from bison gets renamed to this
bisonHFile1 = "tokens.h" # bison-generated header file gets renamed to this
bisonCFile1 = 'tmp.bison.c' # c output file from bison gets renamed to this
bisonHFile1 = 'tokens.h' # bison-generated header file gets renamed to this
flexCmd = ["flex", ] # command and options for running [f]lex, except for filename arg
flexFile = "tmp.l"
flexCFile = "lex.yy.c"
flexCmd = ['flex', ] # command and options for running [f]lex, except for filename arg
flexFile = 'tmp.l'
flexCFile = 'lex.yy.c'
flexCFile1 = "tmp.lex.c" # c output file from lex gets renamed to this
flexCFile1 = 'tmp.lex.c' # c output file from lex gets renamed to this
cflags_pre = ['-fPIC'] # = CFLAGS added before all arguments.
cflags_post = ['-O3','-g'] # = CFLAGS added after all arguments.
......@@ -266,11 +276,11 @@ class BisonParser(object):
fileobj = kw.get('file', None)
if fileobj:
if type(fileobj) == type(""):
if isinstance(fileobj, str):
try:
fileobj = open(fileobj, "rb")
fileobj = open(fileobj, 'rb')
except:
raise Exception("Cannot open input file %s" % fileobj)
raise Exception('Cannot open input file %s' % fileobj)
self.file = fileobj
else:
self.file = sys.stdin
......@@ -286,7 +296,7 @@ class BisonParser(object):
# if engine lib name not declared, invent ont
if not self.bisonEngineLibName:
self.bisonEngineLibName = self.__class__.__module__ + "-parser"
self.bisonEngineLibName = self.__class__.__module__ + '-parser'
# get an engine
self.engine = ParserEngine(self)
......@@ -320,7 +330,7 @@ class BisonParser(object):
# % (targetname, repr(self.last))
else:
if self.verbose:
print "no handler for %s, using default" % targetname
print 'no handler for %s, using default' % targetname
self.last = BisonNode(targetname, option=option, names=names, values=values)
# reset any resulting errors (assume they've been handled)
......@@ -342,16 +352,16 @@ class BisonParser(object):
- debug - enables garrulous parser debugging output, default 0
"""
if self.verbose:
print "Parser.run: calling engine"
print 'Parser.run: calling engine'
# grab keywords
fileobj = kw.get('file', self.file)
if type(fileobj) == type(""):
if isinstance(fileobj, str):
filename = fileobj
try:
fileobj = open(fileobj, "rb")
fileobj = open(fileobj, 'rb')
except:
raise Exception("Cannot open input file %s" % fileobj)
raise Exception('Cannot open input file "%s"' % fileobj)
else:
filename = None
fileobj = None
......@@ -375,18 +385,20 @@ class BisonParser(object):
self.engine.runEngine(debug)
if self.lasterror:
#print "Got error: %s" % repr(self.error)
if filename != None:
raise ParserSyntaxError("%s:%d: '%s' near '%s'" % ((filename,) + self.lasterror))
raise ParserSyntaxError('%s:%d: "%s" near "%s"'
% ((filename,) + self.lasterror))
else:
raise ParserSyntaxError("Line %d: '%s' near '%s'" % self.lasterror)
raise ParserSyntaxError('Line %d: "%s" near "%s"'
% self.lasterror)
# restore old values
self.file = oldfile
self.read = oldread
if self.verbose:
print "Parser.run: back from engine"
print 'Parser.run: back from engine'
return self.last
def read(self, nbytes):
......@@ -400,15 +412,19 @@ class BisonParser(object):
"""
# default to stdin
if self.verbose:
print "Parser.read: want %s bytes" % nbytes
print 'Parser.read: want %s bytes' % nbytes
bytes = self.file.readline(nbytes)
if self.verbose:
print "Parser.read: got %s bytes" % len(bytes)
print 'Parser.read: got %s bytes' % len(bytes)
return bytes
def _error(self, linenum, msg, tok):
print "Parser: line %s: syntax error '%s' before '%s'" % (linenum, msg, tok)
print 'Parser: line %s: syntax error "%s" before "%s"' \
% (linenum, msg, tok)
def error(self, value):
"""
......@@ -484,15 +500,15 @@ class BisonParser(object):
namespace = globals()
objname = xmlobj.tagName
classname = objname + "_Node"
classname = objname + '_Node'
classobj = namespace.get(classname, None)
namespacekeys = namespace.keys()
# barf if node is not a known parse node or token
if (not classobj) and objname not in self.tokens:
raise Exception("Cannot reconstitute %s: can't find required node class or token %s" % (
objname, classname))
raise Exception('Cannot reconstitute %s: can\'t find required'
' node class or token %s' % (objname, classname))
if classobj:
nodeobj = classobj()
......@@ -500,28 +516,27 @@ class BisonParser(object):
# add the attribs
for k,v in xmlobj.attributes.items():
setattr(nodeobj, k, v)
else:
nodeobj = None
#print "----------------"
#print "objname=%s" % repr(objname)
#print "classname=%s" % repr(classname)
#print "classobj=%s" % repr(classobj)
#print "nodeobj=%s" % repr(nodeobj)
#print '----------------'
#print 'objname=%s' % repr(objname)
#print 'classname=%s' % repr(classname)
#print 'classobj=%s' % repr(classobj)
#print 'nodeobj=%s' % repr(nodeobj)
# now add the children
for child in xmlobj.childNodes:
#print "%s attributes=%s" % (child, child.attributes.items())
#print '%s attributes=%s' % (child, child.attributes.items())
childname = child.attributes['target'].value
#print "childname=%s" % childname
if childname + "_Node" in namespacekeys:
#print "we have a node for class %s" % classname
#print 'childname=%s' % childname
if childname + '_Node' in namespacekeys:
#print 'we have a node for class %s' % classname
childobj = self.loadxmlobj(child, namespace)
else:
# it's a token
childobj = child.childNodes[0].nodeValue
#print "got token %s=%s" % (childname, childobj)
#print 'got token %s=%s' % (childname, childobj)
nodeobj.names.append(childname)
nodeobj.values.append(childobj)
......@@ -533,10 +548,9 @@ class BisonParser(object):
def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
"""
Rips the rules, tokens and precedences from a bison file, and the
verbatim text from a lex file and generates
a boilerplate python file containing a Parser class with handler
methods and grammar attributes
Rips the rules, tokens and precedences from a bison file, and the verbatim
text from a lex file and generates a boilerplate python file containing a
Parser class with handler methods and grammar attributes.
Arguments:
- bisonfileName - name of input bison script
......@@ -544,56 +558,60 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
- pyfileName - name of output python file
- generateClasses - flag - default 0 - if 1, causes a unique class to
be defined for each parse target, and for the corresponding target
handler method in the main Parser class to use this class when creating
the node.
handler method in the main Parser class to use this class when
creating the node.
"""
# try to create output file
try:
pyfile = file(pyfileName, "w")
pyfile = file(pyfileName, 'w')
except:
raise Exception("Cannot create output file '%s'" % pyfileName)
raise Exception('Cannot create output file "%s"' % pyfileName)
# try to open/read the bison file
try:
rawBison = file(bisonfileName).read()
except:
raise Exception("Cannot open bison file %s" % bisonfileName)
raise Exception('Cannot open bison file "%s"' % bisonfileName)
# try to open/read the lex file
try:
rawLex = file(lexfileName).read()
except:
raise Exception("Cannot open lex file %s" % lexfileName)
raise Exception('Cannot open lex file %s' % lexfileName)
# break up into the three '%%'-separated sections
try:
prologue, rulesRaw, epilogue = rawBison.split("\n%%\n")
prologue, rulesRaw, epilogue = rawBison.split('\n%%\n')
except:
raise Exception(
"File %s is not a properly formatted bison file"
" (needs 3 sections separated by %%%%" % (bisonfileName)
'File %s is not a properly formatted bison file'
' (needs 3 sections separated by %%%%' % (bisonfileName)
)
# --------------------------------------
# process prologue
prologue = prologue.split("%}")[-1].strip() # ditch the C code
prologue = re.sub("\\n([\t ]+)", " ", prologue) # join broken lines
prologue = prologue.split('%}')[-1].strip() # ditch the C code
prologue = re.sub('\\n([\t ]+)', ' ', prologue) # join broken lines
#prologueLines = [line.strip() for line in prologue.split("\n")]
lines = prologue.split("\n")
#prologueLines = [line.strip() for line in prologue.split('\n')]
lines = prologue.split('\n')
tmp = []
for line in lines:
tmp.append(line.strip())
prologueLines = tmp
prologueLines = filter(None, prologueLines)
tokens = []
precRules = []
for line in prologueLines:
words = reSpaces.split(line)
kwd = words[0]
args = words[1:]
if kwd == '%token':
tokens.extend(args)
elif kwd in ['%left', '%right', '%nonassoc']:
......@@ -603,37 +621,29 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
# -------------------------------------------------------------
# process rules
rulesRaw = re.sub("\\n([\t ]+)", " ", rulesRaw) # join broken lines
#rulesLines = filter(None, [r.strip() for r in rulesRaw.split(";")])
rulesLines = []
#for r in rulesRaw.split(";"):
for r in re.split(unquoted % ";", rulesRaw):
r = r.strip()
if r:
rulesLines.append(r)
rulesRaw = re.sub('\\n([\t ]+)', ' ', rulesRaw) # join broken lines
rulesLines = filter('', map(str.strip, re.split(unquoted % ';', rulesRaw)))
rules = []
for rule in rulesLines:
#print "--"
#print '--'
#print repr(rule)
#tgt, terms = rule.split(":")
#tgt, terms = rule.split(':')
try:
tgt, terms = re.split(unquoted % ":", rule)
tgt, terms = re.split(unquoted % ':', rule)
except ValueError:
print "Error in rule: %s" % rule
print 'Error in rule: %s' % rule
raise
tgt, terms = tgt.strip(), terms.strip()
#terms = [t.strip() for t in terms.split("|")]
#terms = [t.strip() for t in terms.split('|')]
#terms = [reSpaces.split(t) for t in terms]
tmp = []
#for t in terms.split("|"):
for t in re.split(unquoted % r"\|", terms):
#for t in terms.split('|'):
for t in re.split(unquoted % r'\|', terms):
t = t.strip()
tmp.append(reSpaces.split(t))
......@@ -642,7 +652,7 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
rules.append((tgt, terms))
# now we have our rulebase, we can churn out our skeleton Python file
pyfile.write("\n".join([
pyfile.write('\n'.join([
'#!/usr/bin/env python',
'',
'"""',
......@@ -654,8 +664,8 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
'',
'from bison import BisonParser, BisonNode, BisonError',
'',
'bisonFile = "%s" # original bison file' % bisonfileName,
'lexFile = "%s" # original flex file' % lexfileName,
'bisonFile = \'%s\' # original bison file' % bisonfileName,
'lexFile = \'%s\' # original flex file' % lexfileName,
'\n',
]))
......@@ -674,7 +684,7 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
'',
' def __str__(self):',
' """Customise as needed"""',
' return "<%s instance at 0x%x>" % (self.__class__.__name__, hash(self))',
' return \'<%s instance at 0x%x>\' % (self.__class__.__name__, hash(self))',
'',
' def __repr__(self):',
' """Customise as needed"""',
......@@ -695,9 +705,7 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
# now spit out class decs for every parse target
for target, options in rules:
tmp = []
for t in options:
tmp.append(" ".join(t))
tmp = map(' '.join, options)
# totally self-indulgent grammatical pedantry
if target[0].lower() in ['a','e','i','o','u']:
......@@ -720,7 +728,7 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
# start churning out the class dec
pyfile.write("\n".join([
pyfile.write('\n'.join([
'class Parser(BisonParser):',
' """',
' bison Parser class generated automatically by bison2py from the',
......@@ -740,7 +748,7 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
# add the default node class
if not generateClasses:
pyfile.write("\n".join([
pyfile.write('\n'.join([
' # -------------------------------------------------',
' # Default class to use for creating new parse nodes',
' # -------------------------------------------------',
......@@ -749,22 +757,21 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
]))
# add the name of the dynamic library we need
libfileName = os.path.splitext(os.path.split(pyfileName)[1])[0] + "-engine"
pyfile.write("\n".join([
libfileName = os.path.splitext(os.path.split(pyfileName)[1])[0] \
+ '-engine'
pyfile.write('\n'.join([
' # --------------------------------------------',
' # basename of binary parser engine dynamic lib',
' # --------------------------------------------',
' bisonEngineLibName = "%s"' % (parser.buildDirectory + libfileName),
' bisonEngineLibName = \'%s\'' % (parser.buildDirectory + libfileName),
'\n',
]))
# add the tokens
#pyfile.write(' tokens = (%s,)\n\n' % ", ".join(['"%s"' % t for t in tokens]))
tmp = []
for t in tokens:
#tmp.append('"'+t+'"')
tmp.append(t)
toks = ", ".join(tmp)
#pyfile.write(' tokens = (%s,)\n\n' % ', '.join([''%s'' % t for t in tokens]))
toks = ', '.join(tokens)
pyfile.write(' # ----------------------------------------------------------------\n')
pyfile.write(' # lexer tokens - these must match those in your lex script (below)\n')
pyfile.write(' # ----------------------------------------------------------------\n')
......@@ -776,59 +783,52 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
pyfile.write(' # ------------------------------\n')
pyfile.write(' precedences = (\n')
for prec in precRules:
tmp = []
for p in prec[1]:
#tmp.append('"'+p+'"')
tmp.append(p)
precline = ", ".join(tmp)
#pyfile.write(' ("%s", (%s,)),\n' % (
pyfile.write(' ("%s", %s,),\n' % (
precline = ', '.join(prec[1])
pyfile.write(' (\'%s\', %s,),\n' % (
prec[0][1:], # left/right/nonassoc, quote-wrapped, no '%s'
tmp, # quote-wrapped targets
)
)
pyfile.write(' )\n\n'),
pyfile.write("\n".join([
pyfile.write('\n'.join([
' # ---------------------------------------------------------------',
' # Declare the start target here (by name)',
' # ---------------------------------------------------------------',
' start = "%s"' % startTarget,
' start = \'%s\'' % startTarget,
'\n',
]))
# now the interesting bit - write the rule handler methods
pyfile.write("\n".join([
pyfile.write('\n'.join([
' # ---------------------------------------------------------------',
' # These methods are the python handlers for the bison targets.',
' # (which get called by the bison code each time the corresponding',
' # parse target is unambiguously reached)',
' #',
" # WARNING - don't touch the method docstrings unless you know what",
" # you are doing - they are in bison rule syntax, and are passed",
" # verbatim to bison to build the parser engine library.",
' # WARNING - don\'t touch the method docstrings unless you know what',
' # you are doing - they are in bison rule syntax, and are passed',
' # verbatim to bison to build the parser engine library.',
' # ---------------------------------------------------------------',
'\n',
]))
for target, options in rules:
tmp = []
for t in options:
tmp.append(" ".join(t))
tmp = map(' '.join, options)
if generateClasses:
nodeClassName = target + "_Node"
nodeClassName = target + '_Node'
else:
nodeClassName = 'self.defaultNodeClass'
pyfile.write("\n".join([
pyfile.write('\n'.join([
' def on_%s(self, target, option, names, values):' % target,
' """',
' %s' % target,
' : ' + "\n | ".join(tmp),
' : ' + '\n | '.join(tmp),
' """',
' return %s(' % nodeClassName,
' target="%s",' % target,
' target=\'%s\',' % target,
' option=option,',
' names=names,',
' values=values)',
......@@ -836,7 +836,7 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
]))
# now the ugly bit - add the raw lex script
pyfile.write("\n".join([
pyfile.write('\n'.join([
' # -----------------------------------------',
' # raw lex script, verbatim here',
' # -----------------------------------------',
......@@ -851,14 +851,14 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
]))
# and now, create a main for testing which either reads stdin, or a filename arg
pyfile.write("\n".join([
pyfile.write('\n'.join([
'def usage():',
' print "%s: PyBison parser derived from %s and %s" % (sys.argv[0], bisonFile, lexFile)',
' print "Usage: %s [-k] [-v] [-d] [filename]" % sys.argv[0]',
' print " -k Keep temporary files used in building parse engine lib"',
' print " -v Enable verbose messages while parser is running"',
' print " -d Enable garrulous debug messages from parser engine"',
' print " filename path of a file to parse, defaults to stdin"',
' print \'%s: PyBison parser derived from %s and %s\' % (sys.argv[0], bisonFile, lexFile)',
' print \'Usage: %s [-k] [-v] [-d] [filename]\' % sys.argv[0]',
' print \' -k Keep temporary files used in building parse engine lib\'',
' print \' -v Enable verbose messages while parser is running\'',
' print \' -d Enable garrulous debug messages from parser engine\'',
' print \' filename path of a file to parse, defaults to stdin\'',
'',
'def main(*args):',
' """',
......@@ -870,21 +870,21 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
' debug = 0',
' filename = None',
'',
' for s in ["-h", "-help", "--h", "--help", "-?"]:',
' for s in [\'-h\', \'-help\', \'--h\', \'--help\', \'-?\']:',
' if s in args:',
' usage()',
' sys.exit(0)',
'',
' if len(args) > 0:',
' if "-k" in args:',
' if \'-k\' in args:',
' keepfiles = 1',
' args.remove("-k")',
' if "-v" in args:',
' args.remove(\'-k\')',
' if \'-v\' in args:',
' verbose = 1',
' args.remove("-v")',
' if "-d" in args:',
' args.remove(\'-v\')',
' if \'-d\' in args:',
' debug = 1',
' args.remove("-d")',
' args.remove(\'-d\')',
' if len(args) > 0:',
' filename = args[0]',
'',
......@@ -892,7 +892,7 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
' tree = p.run(file=filename, debug=debug)',
' return tree',
'',
'if __name__ == "__main__":',
'if __name__ == \'__main__\':',
' main(*(sys.argv[1:]))',
'',
'',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment