Improved exception handling and moved bisonToParser to own file.

parent 2331ecff
...@@ -82,26 +82,24 @@ cdef public object py_callback(object parser, char *target, int option, \ ...@@ -82,26 +82,24 @@ cdef public object py_callback(object parser, char *target, int option, \
cdef void *val cdef void *val
cdef char *termname cdef char *termname
#if parser.verbose:
# print 'py_callback: called with nargs=%d' % nargs
names = PyList_New(nargs) names = PyList_New(nargs)
values = PyList_New(nargs) values = PyList_New(nargs)
Py_INCREF(names) Py_INCREF(names)
Py_INCREF(values) Py_INCREF(values)
#for i in range(nargs):
# print 'i=%d' % i , <char*>va_arg(ap, str_type), \
# hex(<int>va_arg(ap, str_type))
for i in range(nargs): for i in range(nargs):
termname = <char*>va_arg(ap, str_type) termname = <char*>va_arg(ap, str_type)
PyList_SetItem(names, i, termname) PyList_SetItem(names, i, termname)
Py_INCREF(termname) Py_INCREF(termname)
val = <void *>va_arg(ap, void_type) val = <void *>va_arg(ap, void_type)
if val:
valobj = <object>val valobj = <object>val
else:
valobj = None
PyList_SetItem(values, i, valobj) PyList_SetItem(values, i, valobj)
Py_INCREF(valobj) Py_INCREF(valobj)
...@@ -114,6 +112,8 @@ cdef public object py_callback(object parser, char *target, int option, \ ...@@ -114,6 +112,8 @@ cdef public object py_callback(object parser, char *target, int option, \
#signal.signal(signal.SIGALRM, parser.handle_timeout) #signal.signal(signal.SIGALRM, parser.handle_timeout)
#signal.alarm(parser.timeout) #signal.alarm(parser.timeout)
va_end(ap)
res = parser._handle(target, option, names, values) res = parser._handle(target, option, names, values)
#signal.alarm(0) #signal.alarm(0)
...@@ -121,8 +121,6 @@ cdef public object py_callback(object parser, char *target, int option, \ ...@@ -121,8 +121,6 @@ cdef public object py_callback(object parser, char *target, int option, \
#if parser.verbose: #if parser.verbose:
# print 'py_callback: handler returned:', res # print 'py_callback: handler returned:', res
va_end(ap)
return res return res
# callback routine for reading input # callback routine for reading input
...@@ -140,6 +138,12 @@ cdef public void py_input(object parser, char *buf, int *result, int max_size): ...@@ -140,6 +138,12 @@ cdef public void py_input(object parser, char *buf, int *result, int max_size):
if parser.verbose: if parser.verbose:
print '\npy_input: got %s bytes' % buflen print '\npy_input: got %s bytes' % buflen
if buflen == 0 and parser.file:
# Marks the Python file object as being closed from Python's point of
# view. This does not close the associated C stream (which is not
# necessary here, otherwise use "os.close(0)").
parser.file.close()
import sys, os, sha, re, imp, traceback import sys, os, sha, re, imp, traceback
import shutil import shutil
...@@ -269,6 +273,42 @@ cdef class ParserEngine: ...@@ -269,6 +273,42 @@ cdef class ParserEngine:
if parser.verbose: if parser.verbose:
print 'Successfully loaded library' print 'Successfully loaded library'
def generate_exception_handler(self):
s = ''
#s = s + ' if ($$ && $$ != Py_None && PyObject_HasAttrString($$, "_pyBisonError"))\n'
#s = s + ' {\n'
#s = s + ' yyerror(PyString_AsString(PyObject_GetAttrString(py_parser, "lasterror")));\n'
#s = s + ' Py_INCREF(Py_None);\n'
#s = s + ' YYERROR;\n'
#s = s + ' }\n'
s += ' if ($$ && $$ != Py_None)\n'
s += ' {\n'
s += ' if (PyObject_HasAttrString($$, "_pyBisonError"))\n'
s += ' {\n'
s += ' //PyObject* lasterror = PyObject_GetAttrString(py_parser, "lasterror");\n'
s += ' //if (lasterror && PyString_Check(lasterror))\n'
s += ' // yyerror(PyString_AsString(lasterror));\n'
s += ' //else\n'
s += ' // yyerror("No \\"lasterror\\" attribute set in BisonError or not a string");\n'
s += ' Py_INCREF(Py_None);\n'
s += ' YYERROR;\n'
s += ' }\n'
s += ' }\n'
#s += ' else\n'
#s += ' {\n'
#s += ' PyObject* obj = PyErr_Occurred();\n'
#s += ' if (obj)\n'
#s += ' {\n'
#s += ' fprintf(stderr, "exception caught in bison_:\\n");\n'
#s += ' PyErr_Print();\n'
#s += ' YYERROR;\n'
#s += ' }\n'
#s += ' }\n'
return s
def buildLib(self): def buildLib(self):
""" """
Creates the parser engine lib Creates the parser engine lib
...@@ -322,7 +362,6 @@ cdef class ParserEngine: ...@@ -322,7 +362,6 @@ cdef class ParserEngine:
"%{", "%{",
'', '',
'#include "Python.h"', '#include "Python.h"',
"#include <stdio.h>",
"extern FILE *yyin;", "extern FILE *yyin;",
"extern int yylineno;" "extern int yylineno;"
"extern char *yytext;", "extern char *yytext;",
...@@ -415,7 +454,7 @@ cdef class ParserEngine: ...@@ -415,7 +454,7 @@ cdef class ParserEngine:
# now, we have the correct terms count # now, we have the correct terms count
action = action % (i + 1) action = action % (i + 1)
# assemble the full rule + action, ad to list # assemble the full rule + action, add to list
action = action + ",\n " action = action + ",\n "
action = action + ",\n ".join(args) + "\n );\n" action = action + ",\n ".join(args) + "\n );\n"
...@@ -424,13 +463,10 @@ cdef class ParserEngine: ...@@ -424,13 +463,10 @@ cdef class ParserEngine:
action = action + " Py_INCREF(Py_None);\n" action = action + " Py_INCREF(Py_None);\n"
action = action + " yyclearin;\n" action = action + " yyclearin;\n"
action = action + " if ($$ && $$ != Py_None && PyObject_HasAttrString($$, \"_pyBisonError\"))\n" action = action + self.generate_exception_handler()
action = action + " {\n"
action = action + " yyerror(PyString_AsString(PyObject_GetAttrString(py_parser, \"lasterror\")));\n" action = action + ' }\n'
action = action + " Py_INCREF(Py_None);\n"
action = action + " YYERROR;\n"
action = action + " }\n"
action = action + " }\n"
options.append(" ".join(option) + action) options.append(" ".join(option) + action)
idx = idx + 1 idx = idx + 1
write(" | ".join(options) + " ;\n\n") write(" | ".join(options) + " ;\n\n")
......
#@+leo-ver=4
#@+node:@file src/python/bison.py
""" """
Wrapper module for interfacing with Bison (yacc) Wrapper module for interfacing with Bison (yacc)
Written April 2004 by David McNab <david@freenet.org.nz> Written April 2004 by David McNab <david@freenet.org.nz>
Copyright (c) 2004 by David McNab, all rights reserved. Copyright (c) 2004 by David McNab, all rights reserved.
Released under the GNU General Public License, a copy Released under the GNU General Public License, a copy of which should appear in
of which should appear in this distribution in the file this distribution in the file called 'COPYING'. If this file is missing, then
called 'COPYING'. If this file is missing, then you can you can obtain a copy of the GPL license document from the GNU website at
obtain a copy of the GPL license document from the GNU http://www.gnu.org.
website at http://www.gnu.org.
This software is released with no warranty whatsoever. This software is released with no warranty whatsoever. Use it at your own
Use it at your own risk. risk.
If you wish to use this software in a commercial application, If you wish to use this software in a commercial application, and wish to
and wish to depart from the GPL licensing requirements, depart from the GPL licensing requirements, please contact the author and apply
please contact the author and apply for a commercial license. for a commercial license.
""" """
import sys import sys
import os #import imp
import sha
import re
import imp
import traceback import traceback
import xml.dom import xml.dom
import xml.dom.minidom import xml.dom.minidom
import types import types
import distutils.sysconfig #import distutils.sysconfig
import distutils.ccompiler #import distutils.ccompiler
from bison_ import ParserEngine, unquoted
reSpaces = re.compile('\\s+') from bison_ import ParserEngine
class ParserSyntaxError(Exception): class ParserSyntaxError(Exception):
...@@ -44,7 +37,7 @@ class TimeoutError(Exception): ...@@ -44,7 +37,7 @@ class TimeoutError(Exception):
pass pass
class BisonError: class BisonError(object):
""" """
Flags an error to yyparse() Flags an error to yyparse()
...@@ -56,6 +49,13 @@ class BisonError: ...@@ -56,6 +49,13 @@ class BisonError:
self.value = value self.value = value
class BisonException(Exception):
_pyBisonError = 1
def __init__(self, value='syntax error'):
self.value = value
class BisonNode: class BisonNode:
""" """
Generic class for wrapping parse targets. Generic class for wrapping parse targets.
...@@ -323,16 +323,15 @@ class BisonParser(object): ...@@ -323,16 +323,15 @@ class BisonParser(object):
print 'BisonParser._handle: call handler at line %s with: %s' \ print 'BisonParser._handle: call handler at line %s with: %s' \
% (hdlrline, str((targetname, option, names, values))) % (hdlrline, str((targetname, option, names, values)))
#self.last = handler(target=targetname, option=option, names=names,
# values=values)
try: try:
self.last = handler(target=targetname, option=option, names=names, self.last = handler(target=targetname, option=option,
values=values) names=names, values=values)
except Exception as e: except:
self.lasterror = e #traceback.print_exception(*sys.exc_info())
print type(e), str(e) return self.error(sys.exc_info())
#traceback.print_last() # raise
#traceback.print_stack()
traceback.print_stack()
raise
#if self.verbose: #if self.verbose:
# print 'handler for %s returned %s' \ # print 'handler for %s returned %s' \
...@@ -343,8 +342,6 @@ class BisonParser(object): ...@@ -343,8 +342,6 @@ class BisonParser(object):
self.last = BisonNode(targetname, option=option, names=names, values=values) self.last = BisonNode(targetname, option=option, names=names, values=values)
# reset any resulting errors (assume they've been handled) # reset any resulting errors (assume they've been handled)
if self.lasterror:
print 'lasterror:', self.lasterror
#self.lasterror = None #self.lasterror = None
# assumedly the last thing parsed is at the top of the tree # assumedly the last thing parsed is at the top of the tree
...@@ -391,25 +388,37 @@ class BisonParser(object): ...@@ -391,25 +388,37 @@ class BisonParser(object):
if read: if read:
self.read = read self.read = read
if self.verbose and self.file.closed:
print 'Parser.run(): self.file', self.file, 'is closed'
# TODO: add option to fail on first error.
while not self.file.closed:
# do the parsing job, spew if error # do the parsing job, spew if error
self.last = None
self.lasterror = None self.lasterror = None
self.engine.runEngine(debug) self.engine.runEngine(debug)
if self.lasterror: if self.lasterror:
if filename != None: self.report_last_error(filename, self.lasterror)
raise ParserSyntaxError('%s:%d: "%s" near "%s"'
% ((filename,) + self.lasterror)) if self.verbose:
else: print 'Parser.run: back from engine'
raise ParserSyntaxError('Line %d: "%s" near "%s"'
% self.lasterror) if self.verbose and not self.file.closed:
print 'last:', self.last
if self.verbose:
print 'last:', self.last
# restore old values # restore old values
self.file = oldfile self.file = oldfile
self.read = oldread self.read = oldread
if self.verbose: if self.verbose:
print 'Parser.run: back from engine' print '------------------ result=', self.last
# TODO: return last result (see while loop):
# return self.last[:-1]
return self.last return self.last
def read(self, nbytes): def read(self, nbytes):
...@@ -433,7 +442,7 @@ class BisonParser(object): ...@@ -433,7 +442,7 @@ class BisonParser(object):
return bytes return bytes
def _error(self, linenum, msg, tok): def _error(self, linenum, msg, tok):
# TODO: should this function be removed?
print 'Parser: line %s: syntax error "%s" before "%s"' \ print 'Parser: line %s: syntax error "%s" before "%s"' \
% (linenum, msg, tok) % (linenum, msg, tok)
...@@ -441,25 +450,53 @@ class BisonParser(object): ...@@ -441,25 +450,53 @@ class BisonParser(object):
""" """
Return the result of this method from a handler to notify a syntax error Return the result of this method from a handler to notify a syntax error
""" """
# TODO: should this function be removed?
self.lasterror = value self.lasterror = value
return BisonError(value) return BisonError(value)
def exception(self, exception):
# TODO: should this function be removed?
self.lastexception = exception
return BisonException(exception)
def report_last_error(self, filename, error):
if filename != None:
msg = '%s:%d: "%s" near "%s"' \
% ((filename,) + error)
if not self.interactive:
raise ParserSyntaxError(msg)
print >>sys.stderr, msg
elif isinstance(error[0], int):
msg = 'Line %d: "%s" near "%s"' % error
if not self.interactive:
raise ParserSyntaxError(msg)
print >>sys.stderr, msg
else:
traceback.print_exception(*error)
def toxml(self): def toxml(self):
""" """
Serialises the parse tree and returns it as a raw xml string Serialises the parse tree and returns it as a raw xml string
""" """
# TODO: should this function be moved to another file?
return self.last.toxml() return self.last.toxml()
def toxmldoc(self): def toxmldoc(self):
""" """
Returns an xml.dom.minidom.Document object containing the parse tree Returns an xml.dom.minidom.Document object containing the parse tree
""" """
# TODO: should this function be moved to another file?
return self.last.toxmldoc() return self.last.toxmldoc()
def toprettyxml(self): def toprettyxml(self):
""" """
Returns a human-readable xml representation of the parse tree Returns a human-readable xml representation of the parse tree
""" """
# TODO: should this function be moved to another file?
return self.last.toprettyxml() return self.last.toprettyxml()
def loadxml(self, raw, namespace=None): def loadxml(self, raw, namespace=None):
...@@ -477,6 +514,7 @@ class BisonParser(object): ...@@ -477,6 +514,7 @@ class BisonParser(object):
Returns: Returns:
- root node object of reconstituted parse tree - root node object of reconstituted parse tree
""" """
# TODO: should this function be moved to another file?
doc = xml.dom.minidom.parseString(raw) doc = xml.dom.minidom.parseString(raw)
tree = self.loadxmldoc(doc, namespace) tree = self.loadxmldoc(doc, namespace)
self.last = tree self.last = tree
...@@ -492,6 +530,7 @@ class BisonParser(object): ...@@ -492,6 +530,7 @@ class BisonParser(object):
- namespace - a dict from which to find the classes needed - namespace - a dict from which to find the classes needed
to translate the document into a tree of parse nodes to translate the document into a tree of parse nodes
""" """
# TODO: should this function be moved to another file?
return self.loadxmlobj(xmldoc.childNodes[0], namespace) return self.loadxmlobj(xmldoc.childNodes[0], namespace)
def loadxmlobj(self, xmlobj, namespace=None): def loadxmlobj(self, xmlobj, namespace=None):
...@@ -504,6 +543,7 @@ class BisonParser(object): ...@@ -504,6 +543,7 @@ class BisonParser(object):
- namespace - a namespace from which the node classes - namespace - a namespace from which the node classes
needed for reconstituting the tree, can be found needed for reconstituting the tree, can be found
""" """
# TODO: should this function be moved to another file?
# check on namespace # check on namespace
if type(namespace) is types.ModuleType: if type(namespace) is types.ModuleType:
namespace = namespace.__dict__ namespace = namespace.__dict__
...@@ -556,355 +596,3 @@ class BisonParser(object): ...@@ -556,355 +596,3 @@ class BisonParser(object):
def _globals(self): def _globals(self):
return globals().keys() return globals().keys()
def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
"""
Rips the rules, tokens and precedences from a bison file, and the verbatim
text from a lex file and generates a boilerplate python file containing a
Parser class with handler methods and grammar attributes.
Arguments:
- bisonfileName - name of input bison script
- lexfileName - name of input flex script
- pyfileName - name of output python file
- generateClasses - flag - default 0 - if 1, causes a unique class to
be defined for each parse target, and for the corresponding target
handler method in the main Parser class to use this class when
creating the node.
"""
# try to create output file
try:
pyfile = file(pyfileName, 'w')
except:
raise Exception('Cannot create output file "%s"' % pyfileName)
# try to open/read the bison file
try:
rawBison = file(bisonfileName).read()
except:
raise Exception('Cannot open bison file "%s"' % bisonfileName)
# try to open/read the lex file
try:
rawLex = file(lexfileName).read()
except:
raise Exception('Cannot open lex file %s' % lexfileName)
# break up into the three '%%'-separated sections
try:
prologue, rulesRaw, epilogue = rawBison.split('\n%%\n')
except:
raise Exception(
'File %s is not a properly formatted bison file'
' (needs 3 sections separated by %%%%' % (bisonfileName)
)
# --------------------------------------
# process prologue
prologue = prologue.split('%}')[-1].strip() # ditch the C code
prologue = re.sub('\\n([\t ]+)', ' ', prologue) # join broken lines
#prologueLines = [line.strip() for line in prologue.split('\n')]
lines = prologue.split('\n')
tmp = []
for line in lines:
tmp.append(line.strip())
prologueLines = tmp
prologueLines = filter(None, prologueLines)
tokens = []
precRules = []
for line in prologueLines:
words = reSpaces.split(line)
kwd = words[0]
args = words[1:]
if kwd == '%token':
tokens.extend(args)
elif kwd in ['%left', '%right', '%nonassoc']:
precRules.append((kwd, args))
elif kwd == '%start':
startTarget = args[0]
# -------------------------------------------------------------
# process rules
rulesRaw = re.sub('\\n([\t ]+)', ' ', rulesRaw) # join broken lines
rulesLines = filter('', map(str.strip, re.split(unquoted % ';', rulesRaw)))
rules = []
for rule in rulesLines:
#print '--'
#print repr(rule)
#tgt, terms = rule.split(':')
try:
tgt, terms = re.split(unquoted % ':', rule)
except ValueError:
print 'Error in rule: %s' % rule
raise
tgt, terms = tgt.strip(), terms.strip()
#terms = [t.strip() for t in terms.split('|')]
#terms = [reSpaces.split(t) for t in terms]
tmp = []
#for t in terms.split('|'):
for t in re.split(unquoted % r'\|', terms):
t = t.strip()
tmp.append(reSpaces.split(t))
terms = tmp
rules.append((tgt, terms))
# now we have our rulebase, we can churn out our skeleton Python file
pyfile.write('\n'.join([
'#!/usr/bin/env python',
'',
'"""',
'PyBison file automatically generated from grammar file %s' % bisonfileName,
'You can edit this module, or import it and subclass the Parser class',
'"""',
'',
'import sys',
'',
'from bison import BisonParser, BisonNode, BisonError',
'',
'bisonFile = \'%s\' # original bison file' % bisonfileName,
'lexFile = \'%s\' # original flex file' % lexfileName,
'\n',
]))
# if generating target classes
if generateClasses:
# create a base class for all nodes
pyfile.write("\n".join([
'class ParseNode(BisonNode):',
' """',
' This is the base class from which all your',
' parse nodes are derived.',
' Add methods to this class as you need them',
' """',
' def __init__(self, **kw):',
' BisonNode.__init__(self, **kw)',
'',
' def __str__(self):',
' """Customise as needed"""',
' return \'<%s instance at 0x%x>\' % (self.__class__.__name__, hash(self))',
'',
' def __repr__(self):',
' """Customise as needed"""',
' return str(self)',
'',
' def dump(self, indent=0):',
' """',
' Dump out human-readable, indented parse tree',
' Customise as needed - here, or in the node-specific subclasses',
' """',
' BisonNode.dump(self, indent) # alter as needed',
'\n',
'# ------------------------------------------------------',
'# Define a node class for each grammar target',
'# ------------------------------------------------------',
'\n',
]))
# now spit out class decs for every parse target
for target, options in rules:
tmp = map(' '.join, options)
# totally self-indulgent grammatical pedantry
if target[0].lower() in ['a','e','i','o','u']:
plural = 'n'
else:
plural = ''
pyfile.write("\n".join([
'class %s_Node(ParseNode):' % target,
' """',
' Holds a%s "%s" parse target and its components.' % (plural, target),
' """',
' def __init__(self, **kw):',
' ParseNode.__init__(self, **kw)',
'',
' def dump(self, indent=0):',
' ParseNode.dump(self, indent)',
'\n',
]))
# start churning out the class dec
pyfile.write('\n'.join([
'class Parser(BisonParser):',
' """',
' bison Parser class generated automatically by bison2py from the',
' grammar file "%s" and lex file "%s"' % (bisonfileName, lexfileName),
'',
' You may (and probably should) edit the methods in this class.',
' You can freely edit the rules (in the method docstrings), the',
' tokens list, the start symbol, and the precedences.',
'',
' Each time this class is instantiated, a hashing technique in the',
' base class detects if you have altered any of the rules. If any',
' changes are detected, a new dynamic lib for the parser engine',
' will be generated automatically.',
' """',
'\n',
]))
# add the default node class
if not generateClasses:
pyfile.write('\n'.join([
' # -------------------------------------------------',
' # Default class to use for creating new parse nodes',
' # -------------------------------------------------',
' defaultNodeClass = BisonNode',
'\n',
]))
# add the name of the dynamic library we need
libfileName = os.path.splitext(os.path.split(pyfileName)[1])[0] \
+ '-engine'
pyfile.write('\n'.join([
' # --------------------------------------------',
' # basename of binary parser engine dynamic lib',
' # --------------------------------------------',
' bisonEngineLibName = \'%s\'' % (parser.buildDirectory + libfileName),
'\n',
]))
# add the tokens
#pyfile.write(' tokens = (%s,)\n\n' % ', '.join([''%s'' % t for t in tokens]))
toks = ', '.join(tokens)
pyfile.write(' # ----------------------------------------------------------------\n')
pyfile.write(' # lexer tokens - these must match those in your lex script (below)\n')
pyfile.write(' # ----------------------------------------------------------------\n')
pyfile.write(' tokens = %s\n\n' % tmp)
# add the precedences
pyfile.write(' # ------------------------------\n')
pyfile.write(' # precedences\n')
pyfile.write(' # ------------------------------\n')
pyfile.write(' precedences = (\n')
for prec in precRules:
precline = ', '.join(prec[1])
pyfile.write(' (\'%s\', %s,),\n' % (
prec[0][1:], # left/right/nonassoc, quote-wrapped, no '%s'
tmp, # quote-wrapped targets
)
)
pyfile.write(' )\n\n'),
pyfile.write('\n'.join([
' # ---------------------------------------------------------------',
' # Declare the start target here (by name)',
' # ---------------------------------------------------------------',
' start = \'%s\'' % startTarget,
'\n',
]))
# now the interesting bit - write the rule handler methods
pyfile.write('\n'.join([
' # ---------------------------------------------------------------',
' # These methods are the python handlers for the bison targets.',
' # (which get called by the bison code each time the corresponding',
' # parse target is unambiguously reached)',
' #',
' # WARNING - don\'t touch the method docstrings unless you know what',
' # you are doing - they are in bison rule syntax, and are passed',
' # verbatim to bison to build the parser engine library.',
' # ---------------------------------------------------------------',
'\n',
]))
for target, options in rules:
tmp = map(' '.join, options)
if generateClasses:
nodeClassName = target + '_Node'
else:
nodeClassName = 'self.defaultNodeClass'
pyfile.write('\n'.join([
' def on_%s(self, target, option, names, values):' % target,
' """',
' %s' % target,
' : ' + '\n | '.join(tmp),
' """',
' return %s(' % nodeClassName,
' target=\'%s\',' % target,
' option=option,',
' names=names,',
' values=values)',
'\n',
]))
# now the ugly bit - add the raw lex script
pyfile.write('\n'.join([
' # -----------------------------------------',
' # raw lex script, verbatim here',
' # -----------------------------------------',
' lexscript = r"""',
rawLex,
' """',
' # -----------------------------------------',
' # end raw lex script',
' # -----------------------------------------',
'',
'',
]))
# and now, create a main for testing which either reads stdin, or a filename arg
pyfile.write('\n'.join([
'def usage():',
' print \'%s: PyBison parser derived from %s and %s\' % (sys.argv[0], bisonFile, lexFile)',
' print \'Usage: %s [-k] [-v] [-d] [filename]\' % sys.argv[0]',
' print \' -k Keep temporary files used in building parse engine lib\'',
' print \' -v Enable verbose messages while parser is running\'',
' print \' -d Enable garrulous debug messages from parser engine\'',
' print \' filename path of a file to parse, defaults to stdin\'',
'',
'def main(*args):',
' """',
' Unit-testing func',
' """',
'',
' keepfiles = 0',
' verbose = 0',
' debug = 0',
' filename = None',
'',
' for s in [\'-h\', \'-help\', \'--h\', \'--help\', \'-?\']:',
' if s in args:',
' usage()',
' sys.exit(0)',
'',
' if len(args) > 0:',
' if \'-k\' in args:',
' keepfiles = 1',
' args.remove(\'-k\')',
' if \'-v\' in args:',
' verbose = 1',
' args.remove(\'-v\')',
' if \'-d\' in args:',
' debug = 1',
' args.remove(\'-d\')',
' if len(args) > 0:',
' filename = args[0]',
'',
' p = Parser(verbose=verbose, keepfiles=keepfiles)',
' tree = p.run(file=filename, debug=debug)',
' return tree',
'',
'if __name__ == \'__main__\':',
' main(*(sys.argv[1:]))',
'',
'',
]))
"""
Module for converting a bison file to a PyBison-python file.
Written April 2004 by David McNab <david@freenet.org.nz>
Copyright (c) 2004 by David McNab, all rights reserved.
Released under the GNU General Public License, a copy of which should appear in
this distribution in the file called 'COPYING'. If this file is missing, then
you can obtain a copy of the GPL license document from the GNU website at
http://www.gnu.org.
This software is released with no warranty whatsoever. Use it at your own
risk.
If you wish to use this software in a commercial application, and wish to
depart from the GPL licensing requirements, please contact the author and apply
for a commercial license.
"""
import re
import os
from bison_ import unquoted
reSpaces = re.compile('\\s+')
def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
"""
Rips the rules, tokens and precedences from a bison file, and the verbatim
text from a lex file and generates a boilerplate python file containing a
Parser class with handler methods and grammar attributes.
Arguments:
* bisonfileName - name of input bison script
* lexfileName - name of input flex script
* pyfileName - name of output python file
* generateClasses - flag - default 0 - if 1, causes a unique class to
be defined for each parse target, and for the corresponding target
handler method in the main Parser class to use this class when creating
the node.
"""
# try to create output file
try:
pyfile = file(pyfileName, 'w')
except:
raise Exception('Cannot create output file "%s"' % pyfileName)
# try to open/read the bison file
try:
rawBison = file(bisonfileName).read()
except:
raise Exception('Cannot open bison file "%s"' % bisonfileName)
# try to open/read the lex file
try:
rawLex = file(lexfileName).read()
except:
raise Exception('Cannot open lex file %s' % lexfileName)
# break up into the three '%%'-separated sections
try:
prologue, rulesRaw, epilogue = rawBison.split('\n%%\n')
except:
raise Exception(
'File %s is not a properly formatted bison file'
' (needs 3 sections separated by %%%%' % (bisonfileName)
)
# --------------------------------------
# process prologue
prologue = prologue.split('%}')[-1].strip() # ditch the C code
prologue = re.sub('\\n([\t ]+)', ' ', prologue) # join broken lines
#prologueLines = [line.strip() for line in prologue.split('\n')]
lines = prologue.split('\n')
tmp = []
for line in lines:
tmp.append(line.strip())
prologueLines = tmp
prologueLines = filter(None, prologueLines)
tokens = []
precRules = []
for line in prologueLines:
words = reSpaces.split(line)
kwd = words[0]
args = words[1:]
if kwd == '%token':
tokens.extend(args)
elif kwd in ['%left', '%right', '%nonassoc']:
precRules.append((kwd, args))
elif kwd == '%start':
startTarget = args[0]
# -------------------------------------------------------------
# process rules
rulesRaw = re.sub('\\n([\t ]+)', ' ', rulesRaw) # join broken lines
rulesLines = filter('', map(str.strip, re.split(unquoted % ';', rulesRaw)))
rules = []
for rule in rulesLines:
#print '--'
#print repr(rule)
#tgt, terms = rule.split(':')
try:
tgt, terms = re.split(unquoted % ':', rule)
except ValueError:
print 'Error in rule: %s' % rule
raise
tgt, terms = tgt.strip(), terms.strip()
#terms = [t.strip() for t in terms.split('|')]
#terms = [reSpaces.split(t) for t in terms]
tmp = []
#for t in terms.split('|'):
for t in re.split(unquoted % r'\|', terms):
t = t.strip()
tmp.append(reSpaces.split(t))
terms = tmp
rules.append((tgt, terms))
# now we have our rulebase, we can churn out our skeleton Python file
pyfile.write('\n'.join([
'#!/usr/bin/env python',
'',
'"""',
'PyBison file automatically generated from grammar file %s' % bisonfileName,
'You can edit this module, or import it and subclass the Parser class',
'"""',
'',
'import sys',
'',
'from bison import BisonParser, BisonNode, BisonError',
'',
'bisonFile = \'%s\' # original bison file' % bisonfileName,
'lexFile = \'%s\' # original flex file' % lexfileName,
'\n',
]))
# if generating target classes
if generateClasses:
# create a base class for all nodes
pyfile.write("\n".join([
'class ParseNode(BisonNode):',
' """',
' This is the base class from which all your',
' parse nodes are derived.',
' Add methods to this class as you need them',
' """',
' def __init__(self, **kw):',
' BisonNode.__init__(self, **kw)',
'',
' def __str__(self):',
' """Customise as needed"""',
' return \'<%s instance at 0x%x>\' % (self.__class__.__name__, hash(self))',
'',
' def __repr__(self):',
' """Customise as needed"""',
' return str(self)',
'',
' def dump(self, indent=0):',
' """',
' Dump out human-readable, indented parse tree',
' Customise as needed - here, or in the node-specific subclasses',
' """',
' BisonNode.dump(self, indent) # alter as needed',
'\n',
'# ------------------------------------------------------',
'# Define a node class for each grammar target',
'# ------------------------------------------------------',
'\n',
]))
# now spit out class decs for every parse target
for target, options in rules:
tmp = map(' '.join, options)
# totally self-indulgent grammatical pedantry
if target[0].lower() in ['a','e','i','o','u']:
plural = 'n'
else:
plural = ''
pyfile.write("\n".join([
'class %s_Node(ParseNode):' % target,
' """',
' Holds a%s "%s" parse target and its components.' % (plural, target),
' """',
' def __init__(self, **kw):',
' ParseNode.__init__(self, **kw)',
'',
' def dump(self, indent=0):',
' ParseNode.dump(self, indent)',
'\n',
]))
# start churning out the class dec
pyfile.write('\n'.join([
'class Parser(BisonParser):',
' """',
' bison Parser class generated automatically by bison2py from the',
' grammar file "%s" and lex file "%s"' % (bisonfileName, lexfileName),
'',
' You may (and probably should) edit the methods in this class.',
' You can freely edit the rules (in the method docstrings), the',
' tokens list, the start symbol, and the precedences.',
'',
' Each time this class is instantiated, a hashing technique in the',
' base class detects if you have altered any of the rules. If any',
' changes are detected, a new dynamic lib for the parser engine',
' will be generated automatically.',
' """',
'\n',
]))
# add the default node class
if not generateClasses:
pyfile.write('\n'.join([
' # -------------------------------------------------',
' # Default class to use for creating new parse nodes',
' # -------------------------------------------------',
' defaultNodeClass = BisonNode',
'\n',
]))
# add the name of the dynamic library we need
libfileName = os.path.splitext(os.path.split(pyfileName)[1])[0] \
+ '-engine'
pyfile.write('\n'.join([
' # --------------------------------------------',
' # basename of binary parser engine dynamic lib',
' # --------------------------------------------',
' bisonEngineLibName = \'%s\'' % libfileName,
'\n',
]))
# add the tokens
#pyfile.write(' tokens = (%s,)\n\n' % ', '.join([''%s'' % t for t in tokens]))
#toks = ', '.join(tokens)
pyfile.write(' # ----------------------------------------------------------------\n')
pyfile.write(' # lexer tokens - these must match those in your lex script (below)\n')
pyfile.write(' # ----------------------------------------------------------------\n')
pyfile.write(' tokens = %s\n\n' % tmp)
# add the precedences
pyfile.write(' # ------------------------------\n')
pyfile.write(' # precedences\n')
pyfile.write(' # ------------------------------\n')
pyfile.write(' precedences = (\n')
for prec in precRules:
#precline = ', '.join(prec[1])
pyfile.write(' (\'%s\', %s,),\n' % (
prec[0][1:], # left/right/nonassoc, quote-wrapped, no '%s'
tmp, # quote-wrapped targets
)
)
pyfile.write(' )\n\n'),
pyfile.write('\n'.join([
' # ---------------------------------------------------------------',
' # Declare the start target here (by name)',
' # ---------------------------------------------------------------',
' start = \'%s\'' % startTarget,
'\n',
]))
# now the interesting bit - write the rule handler methods
pyfile.write('\n'.join([
' # ---------------------------------------------------------------',
' # These methods are the python handlers for the bison targets.',
' # (which get called by the bison code each time the corresponding',
' # parse target is unambiguously reached)',
' #',
' # WARNING - don\'t touch the method docstrings unless you know what',
' # you are doing - they are in bison rule syntax, and are passed',
' # verbatim to bison to build the parser engine library.',
' # ---------------------------------------------------------------',
'\n',
]))
for target, options in rules:
tmp = map(' '.join, options)
if generateClasses:
nodeClassName = target + '_Node'
else:
nodeClassName = 'self.defaultNodeClass'
pyfile.write('\n'.join([
' def on_%s(self, target, option, names, values):' % target,
' """',
' %s' % target,
' : ' + '\n | '.join(tmp),
' """',
' return %s(' % nodeClassName,
' target=\'%s\',' % target,
' option=option,',
' names=names,',
' values=values)',
'\n',
]))
# now the ugly bit - add the raw lex script
pyfile.write('\n'.join([
' # -----------------------------------------',
' # raw lex script, verbatim here',
' # -----------------------------------------',
' lexscript = r"""',
rawLex,
' """',
' # -----------------------------------------',
' # end raw lex script',
' # -----------------------------------------',
'',
'',
]))
# and now, create a main for testing which either reads stdin, or a filename arg
pyfile.write('\n'.join([
'def usage():',
' print \'%s: PyBison parser derived from %s and %s\' % (sys.argv[0], bisonFile, lexFile)',
' print \'Usage: %s [-k] [-v] [-d] [filename]\' % sys.argv[0]',
' print \' -k Keep temporary files used in building parse engine lib\'',
' print \' -v Enable verbose messages while parser is running\'',
' print \' -d Enable garrulous debug messages from parser engine\'',
' print \' filename path of a file to parse, defaults to stdin\'',
'',
'def main(*args):',
' """',
' Unit-testing func',
' """',
'',
' keepfiles = 0',
' verbose = 0',
' debug = 0',
' filename = None',
'',
' for s in [\'-h\', \'-help\', \'--h\', \'--help\', \'-?\']:',
' if s in args:',
' usage()',
' sys.exit(0)',
'',
' if len(args) > 0:',
' if \'-k\' in args:',
' keepfiles = 1',
' args.remove(\'-k\')',
' if \'-v\' in args:',
' verbose = 1',
' args.remove(\'-v\')',
' if \'-d\' in args:',
' debug = 1',
' args.remove(\'-d\')',
' if len(args) > 0:',
' filename = args[0]',
'',
' p = Parser(verbose=verbose, keepfiles=keepfiles)',
' tree = p.run(file=filename, debug=debug)',
' return tree',
'',
'if __name__ == \'__main__\':',
' main(*(sys.argv[1:]))',
'',
'',
]))
#@+leo-ver=4 #!/usr/bin/env python
#@+node:@file utils/bison2py.py
#@@language python
""" """
Utility which creates a boilerplate pybison-compatible Utility which creates a boilerplate pybison-compatible
python file from a yacc file and lex file python file from a yacc file and lex file
...@@ -9,24 +6,17 @@ python file from a yacc file and lex file ...@@ -9,24 +6,17 @@ python file from a yacc file and lex file
Run it with 2 arguments - filename.y and filename.l Run it with 2 arguments - filename.y and filename.l
Output is filename.py Output is filename.py
""" """
#@+others import sys
#@+node:imports
import sys, os, re, getopt
from bison import bisonToPython from bison import bisonToPython
#@-node:imports
#@+node:globals
argv = sys.argv
argc = len(argv)
progname = argv[0]
reSpaces = re.compile("\\s+")
#@-node:globals
#@+node:usage
def usage(s=None): def usage(s=None):
""" """
Display usage info and exit Display usage info and exit
""" """
progname = sys.argv[0]
if s: if s:
print progname+": "+s print progname+": "+s
...@@ -45,13 +35,13 @@ def usage(s=None): ...@@ -45,13 +35,13 @@ def usage(s=None):
sys.exit(1) sys.exit(1)
#@-node:usage
#@+node:main
def main(): def main():
""" """
Command-line interface for bison2py Command-line interface for bison2py
""" """
global argc, argv argv = sys.argv
argc = len(argv)
if '-c' in argv: if '-c' in argv:
generateClasses = 1 generateClasses = 1
...@@ -72,11 +62,6 @@ def main(): ...@@ -72,11 +62,6 @@ def main():
bisonToPython(bisonfile, lexfile, pyfile, generateClasses) bisonToPython(bisonfile, lexfile, pyfile, generateClasses)
#@-node:main
#@+node:MAINLINE
if __name__ == '__main__': if __name__ == '__main__':
main() main()
#@-node:MAINLINE
#@-others
#@-node:@file utils/bison2py.py
#@-leo
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment