Improved exception handling and moved bisonToParser to own file.

260359e3 · Sander Mathijs van Veen · 2331ecff · 260359e3 · 260359e3 · 260359e3
Commit 260359e3 authored 13 years ago by Sander Mathijs van Veen
--- a/src/pyrex/bison_.pyx
+++ b/src/pyrex/bison_.pyx
@@ -82,26 +82,24 @@ cdef public object py_callback(object parser, char *target, int option, \
    cdef void *val
    cdef char *termname
-    #if parser.verbose:
-    #    print 'py_callback: called with nargs=%d' % nargs
    names = PyList_New(nargs)
    values = PyList_New(nargs)
    Py_INCREF(names)
    Py_INCREF(values)
-    #for i in range(nargs):
-    #    print 'i=%d' % i , <char*>va_arg(ap, str_type), \
-    #                       hex(<int>va_arg(ap, str_type))
    for i in range(nargs):
        termname = <char*>va_arg(ap, str_type)
        PyList_SetItem(names, i, termname)
        Py_INCREF(termname)
        val = <void *>va_arg(ap, void_type)
-        valobj = <object>val
+        if val:
+            valobj = <object>val
+        else:
+            valobj = None
        PyList_SetItem(values, i, valobj)
        Py_INCREF(valobj)
@@ -114,6 +112,8 @@ cdef public object py_callback(object parser, char *target, int option, \
    #signal.signal(signal.SIGALRM, parser.handle_timeout)
    #signal.alarm(parser.timeout)
+    va_end(ap)
    res = parser._handle(target, option, names, values)
    #signal.alarm(0)
@@ -121,8 +121,6 @@ cdef public object py_callback(object parser, char *target, int option, \
    #if parser.verbose:
    #    print 'py_callback: handler returned:', res
-    va_end(ap)
    return res
 # callback routine for reading input
@@ -140,6 +138,12 @@ cdef public void py_input(object parser, char *buf, int *result, int max_size):
    if parser.verbose:
        print '\npy_input: got %s bytes' % buflen
+    if buflen == 0 and parser.file:
+        # Marks the Python file object as being closed from Python's point of
+        # view. This does not close the associated C stream (which is not
+        # necessary here, otherwise use "os.close(0)").
+        parser.file.close()
 import sys, os, sha, re, imp, traceback
 import shutil
@@ -269,6 +273,42 @@ cdef class ParserEngine:
        if parser.verbose:
            print 'Successfully loaded library'
+    def generate_exception_handler(self):
+        s = ''
+        #s = s + '          if ($$ && $$ != Py_None && PyObject_HasAttrString($$, "_pyBisonError"))\n'
+        #s = s + '          {\n'
+        #s = s + '              yyerror(PyString_AsString(PyObject_GetAttrString(py_parser, "lasterror")));\n'
+        #s = s + '             Py_INCREF(Py_None);\n'
+        #s = s + '             YYERROR;\n'
+        #s = s + '          }\n'
+        s += '          if ($$ && $$ != Py_None)\n'
+        s += '          {\n'
+        s += '            if (PyObject_HasAttrString($$, "_pyBisonError"))\n'
+        s += '            {\n'
+        s += '              //PyObject* lasterror = PyObject_GetAttrString(py_parser, "lasterror");\n'
+        s += '              //if (lasterror && PyString_Check(lasterror))\n'
+        s += '              //  yyerror(PyString_AsString(lasterror));\n'
+        s += '              //else\n'
+        s += '              //  yyerror("No \\"lasterror\\" attribute set in BisonError or not a string");\n'
+        s += '              Py_INCREF(Py_None);\n'
+        s += '              YYERROR;\n'
+        s += '            }\n'
+        s += '          }\n'
+        #s += '          else\n'
+        #s += '          {\n'
+        #s += '            PyObject* obj = PyErr_Occurred();\n'
+        #s += '            if (obj)\n'
+        #s += '            {\n'
+        #s += '              fprintf(stderr, "exception caught in bison_:\\n");\n'
+        #s += '              PyErr_Print();\n'
+        #s += '              YYERROR;\n'
+        #s += '            }\n'
+        #s += '          }\n'
+        return s
    def buildLib(self):
        """
        Creates the parser engine lib
@@ -322,7 +362,6 @@ cdef class ParserEngine:
            "%{",
            '',
            '#include "Python.h"',
-            "#include <stdio.h>",
            "extern FILE *yyin;",
            "extern int yylineno;"
            "extern char *yytext;",
@@ -415,7 +454,7 @@ cdef class ParserEngine:
                    # now, we have the correct terms count
                    action = action % (i + 1)
-                    # assemble the full rule + action, ad to list
+                    # assemble the full rule + action, add to list
                    action = action + ",\n            "
                    action = action + ",\n            ".join(args) + "\n            );\n"
@@ -424,13 +463,10 @@ cdef class ParserEngine:
                        action = action + "             Py_INCREF(Py_None);\n"
                        action = action + "             yyclearin;\n"
-                    action = action + "          if ($$ && $$ != Py_None && PyObject_HasAttrString($$, \"_pyBisonError\"))\n"
+                    action = action + self.generate_exception_handler()
-                    action = action + "          {\n"
-                    action = action + "             yyerror(PyString_AsString(PyObject_GetAttrString(py_parser, \"lasterror\")));\n"
+                    action = action + '        }\n'
-                    action = action + "             Py_INCREF(Py_None);\n"
-                    action = action + "             YYERROR;\n"
-                    action = action + "          }\n"
-                    action = action + "        }\n"
                    options.append(" ".join(option) + action)
                    idx = idx + 1
                write("    | ".join(options) + "    ;\n\n")

--- a/src/python/bison.py
+++ b/src/python/bison.py
-#@+leo-ver=4
-#@+node:@file src/python/bison.py
 """
 Wrapper module for interfacing with Bison (yacc)
 Written April 2004 by David McNab <david@freenet.org.nz>
 Copyright (c) 2004 by David McNab, all rights reserved.
-Released under the GNU General Public License, a copy
+Released under the GNU General Public License, a copy of which should appear in
-of which should appear in this distribution in the file
+this distribution in the file called 'COPYING'. If this file is missing, then
-called 'COPYING'. If this file is missing, then you can
+you can obtain a copy of the GPL license document from the GNU website at
-obtain a copy of the GPL license document from the GNU
+http://www.gnu.org.
-website at http://www.gnu.org.
-This software is released with no warranty whatsoever.
+This software is released with no warranty whatsoever. Use it at your own
-Use it at your own risk.
+risk.
-If you wish to use this software in a commercial application,
+If you wish to use this software in a commercial application, and wish to
-and wish to depart from the GPL licensing requirements,
+depart from the GPL licensing requirements, please contact the author and apply
-please contact the author and apply for a commercial license.
+for a commercial license.
 """
 import sys
-import os
+#import imp
-import sha
-import re
-import imp
 import traceback
 import xml.dom
 import xml.dom.minidom
 import types
-import distutils.sysconfig
+#import distutils.sysconfig
-import distutils.ccompiler
+#import distutils.ccompiler
-from bison_ import ParserEngine, unquoted
-reSpaces = re.compile('\\s+')
+from bison_ import ParserEngine
 class ParserSyntaxError(Exception):
@@ -44,7 +37,7 @@ class TimeoutError(Exception):
    pass
-class BisonError:
+class BisonError(object):
    """
    Flags an error to yyparse()
@@ -56,6 +49,13 @@ class BisonError:
        self.value = value
+class BisonException(Exception):
+    _pyBisonError = 1
+    def __init__(self, value='syntax error'):
+        self.value = value
 class BisonNode:
    """
    Generic class for wrapping parse targets.
@@ -323,16 +323,15 @@ class BisonParser(object):
                print 'BisonParser._handle: call handler at line %s with: %s' \
                      % (hdlrline, str((targetname, option, names, values)))
+            #self.last = handler(target=targetname, option=option, names=names,
+            #                    values=values)
            try:
-                self.last = handler(target=targetname, option=option, names=names,
+                self.last = handler(target=targetname, option=option,
-                                    values=values)
+                                    names=names, values=values)
-            except Exception as e:
+            except:
-                self.lasterror = e
+                #traceback.print_exception(*sys.exc_info())
-                print type(e), str(e)
+                return self.error(sys.exc_info())
-                #traceback.print_last()
+            #    raise
-                #traceback.print_stack()
-                traceback.print_stack()
-                raise
            #if self.verbose:
            #    print 'handler for %s returned %s' \
@@ -343,8 +342,6 @@ class BisonParser(object):
            self.last = BisonNode(targetname, option=option, names=names, values=values)
        # reset any resulting errors (assume they've been handled)
-        if self.lasterror:
-            print 'lasterror:', self.lasterror
        #self.lasterror = None
        # assumedly the last thing parsed is at the top of the tree
@@ -391,25 +388,37 @@ class BisonParser(object):
        if read:
            self.read = read
-        # do the parsing job, spew if error
+        if self.verbose and self.file.closed:
-        self.lasterror = None
+            print 'Parser.run(): self.file', self.file, 'is closed'
-        self.engine.runEngine(debug)
-        if self.lasterror:
+        # TODO: add option to fail on first error.
-            if filename != None:
+        while not self.file.closed:
-                raise ParserSyntaxError('%s:%d: "%s" near "%s"' 
+            # do the parsing job, spew if error
-                                        % ((filename,) + self.lasterror))
+            self.last = None
-            else:
+            self.lasterror = None
-                raise ParserSyntaxError('Line %d: "%s" near "%s"' 
+            self.engine.runEngine(debug)
-                                        % self.lasterror)
+            if self.lasterror:
+                self.report_last_error(filename, self.lasterror)
+            if self.verbose:
+                print 'Parser.run: back from engine'
+            if self.verbose and not self.file.closed:
+                print 'last:', self.last
+        if self.verbose:
+            print 'last:', self.last
        # restore old values
        self.file = oldfile
        self.read = oldread
        if self.verbose:
-            print 'Parser.run: back from engine'
+            print '------------------ result=', self.last
+        # TODO: return last result (see while loop):
+        # return self.last[:-1]
        return self.last
    def read(self, nbytes):
@@ -433,7 +442,7 @@ class BisonParser(object):
        return bytes
    def _error(self, linenum, msg, tok):
+        # TODO: should this function be removed?
        print 'Parser: line %s: syntax error "%s" before "%s"' \
              % (linenum, msg, tok)
@@ -441,25 +450,53 @@ class BisonParser(object):
        """
        Return the result of this method from a handler to notify a syntax error
        """
+        # TODO: should this function be removed?
        self.lasterror = value
        return BisonError(value)
+    def exception(self, exception):
+        # TODO: should this function be removed?
+        self.lastexception = exception
+        return BisonException(exception)
+    def report_last_error(self, filename, error):
+        if filename != None:
+            msg = '%s:%d: "%s" near "%s"' \
+                    % ((filename,) + error)
+            if not self.interactive:
+                raise ParserSyntaxError(msg)
+            print >>sys.stderr, msg
+        elif isinstance(error[0], int):
+            msg = 'Line %d: "%s" near "%s"' % error
+            if not self.interactive:
+                raise ParserSyntaxError(msg)
+            print >>sys.stderr, msg
+        else:
+            traceback.print_exception(*error)
    def toxml(self):
        """
        Serialises the parse tree and returns it as a raw xml string
        """
+        # TODO: should this function be moved to another file?
        return self.last.toxml()
    def toxmldoc(self):
        """
        Returns an xml.dom.minidom.Document object containing the parse tree
        """
+        # TODO: should this function be moved to another file?
        return self.last.toxmldoc()
    def toprettyxml(self):
        """
        Returns a human-readable xml representation of the parse tree
        """
+        # TODO: should this function be moved to another file?
        return self.last.toprettyxml()
    def loadxml(self, raw, namespace=None):
@@ -477,6 +514,7 @@ class BisonParser(object):
        Returns:
            - root node object of reconstituted parse tree
        """
+        # TODO: should this function be moved to another file?
        doc = xml.dom.minidom.parseString(raw)
        tree = self.loadxmldoc(doc, namespace)
        self.last = tree
@@ -492,6 +530,7 @@ class BisonParser(object):
            - namespace - a dict from which to find the classes needed
              to translate the document into a tree of parse nodes
        """
+        # TODO: should this function be moved to another file?
        return self.loadxmlobj(xmldoc.childNodes[0], namespace)
    def loadxmlobj(self, xmlobj, namespace=None):
@@ -504,6 +543,7 @@ class BisonParser(object):
            - namespace - a namespace from which the node classes
              needed for reconstituting the tree, can be found
        """
+        # TODO: should this function be moved to another file?
        # check on namespace
        if type(namespace) is types.ModuleType:
            namespace = namespace.__dict__
@@ -556,355 +596,3 @@ class BisonParser(object):
    def _globals(self):
        return globals().keys()
-def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
-    """
-    Rips the rules, tokens and precedences from a bison file, and the verbatim
-    text from a lex file and generates a boilerplate python file containing a
-    Parser class with handler methods and grammar attributes.
-    Arguments:
-        - bisonfileName - name of input bison script
-        - lexfileName - name of input flex script
-        - pyfileName - name of output python file
-        - generateClasses - flag - default 0 - if 1, causes a unique class to
-          be defined for each parse target, and for the corresponding target
-          handler method in the main Parser class to use this class when
-          creating the node.
-    """
-    # try to create output file
-    try:
-        pyfile = file(pyfileName, 'w')
-    except:
-        raise Exception('Cannot create output file "%s"' % pyfileName)
-    # try to open/read the bison file
-    try:
-        rawBison = file(bisonfileName).read()
-    except:
-        raise Exception('Cannot open bison file "%s"' % bisonfileName)
-    # try to open/read the lex file
-    try:
-        rawLex = file(lexfileName).read()
-    except:
-        raise Exception('Cannot open lex file %s' % lexfileName)
-    # break up into the three '%%'-separated sections
-    try:
-        prologue, rulesRaw, epilogue = rawBison.split('\n%%\n')
-    except:
-        raise Exception(
-            'File %s is not a properly formatted bison file'
-            ' (needs 3 sections separated by %%%%' % (bisonfileName)
-            )
-    # --------------------------------------
-    # process prologue
-    prologue = prologue.split('%}')[-1].strip() # ditch the C code
-    prologue = re.sub('\\n([\t ]+)', ' ', prologue) # join broken lines
-    #prologueLines = [line.strip() for line in prologue.split('\n')]
-    lines = prologue.split('\n')
-    tmp = []
-    for line in lines:
-        tmp.append(line.strip())
-    prologueLines = tmp
-    prologueLines = filter(None, prologueLines)
-    tokens = []
-    precRules = []
-    for line in prologueLines:
-        words = reSpaces.split(line)
-        kwd = words[0]
-        args = words[1:]
-        if kwd == '%token':
-            tokens.extend(args)
-        elif kwd in ['%left', '%right', '%nonassoc']:
-            precRules.append((kwd, args))
-        elif kwd == '%start':
-            startTarget = args[0]
-    # -------------------------------------------------------------
-    # process rules
-    rulesRaw = re.sub('\\n([\t ]+)', ' ', rulesRaw) # join broken lines
-    rulesLines = filter('', map(str.strip, re.split(unquoted % ';', rulesRaw)))
-    rules = []
-    for rule in rulesLines:
-        #print '--'
-        #print repr(rule)
-        #tgt, terms = rule.split(':')
-        try:
-            tgt, terms = re.split(unquoted % ':', rule)
-        except ValueError:
-            print 'Error in rule: %s' % rule
-            raise
-        tgt, terms = tgt.strip(), terms.strip()
-        #terms = [t.strip() for t in terms.split('|')]
-        #terms = [reSpaces.split(t) for t in terms]
-        tmp = []
-        #for t in terms.split('|'):
-        for t in re.split(unquoted % r'\|', terms):
-            t = t.strip()
-            tmp.append(reSpaces.split(t))
-        terms = tmp
-        rules.append((tgt, terms))
-    # now we have our rulebase, we can churn out our skeleton Python file
-    pyfile.write('\n'.join([
-        '#!/usr/bin/env python',
-        '',
-        '"""',
-        'PyBison file automatically generated from grammar file %s' % bisonfileName,
-        'You can edit this module, or import it and subclass the Parser class',
-        '"""',
-        '',
-        'import sys',
-        '',
-        'from bison import BisonParser, BisonNode, BisonError',
-        '',
-        'bisonFile = \'%s\'  # original bison file' % bisonfileName,
-        'lexFile = \'%s\'    # original flex file' % lexfileName,
-        '\n',
-        ]))
-    # if generating target classes
-    if generateClasses:
-        # create a base class for all nodes
-        pyfile.write("\n".join([
-            'class ParseNode(BisonNode):',
-            '    """',
-            '    This is the base class from which all your',
-            '    parse nodes are derived.',
-            '    Add methods to this class as you need them',
-            '    """',
-            '    def __init__(self, **kw):',
-            '        BisonNode.__init__(self, **kw)',
-            '',
-            '    def __str__(self):',
-            '        """Customise as needed"""',
-            '        return \'<%s instance at 0x%x>\' % (self.__class__.__name__, hash(self))',
-            '',
-            '    def __repr__(self):',
-            '        """Customise as needed"""',
-            '        return str(self)',
-            '',
-            '    def dump(self, indent=0):',
-            '        """',
-            '        Dump out human-readable, indented parse tree',
-            '        Customise as needed - here, or in the node-specific subclasses',
-            '        """',
-            '        BisonNode.dump(self, indent) # alter as needed',
-            '\n',
-            '# ------------------------------------------------------',
-            '# Define a node class for each grammar target',
-            '# ------------------------------------------------------',
-            '\n',
-            ]))
-        # now spit out class decs for every parse target
-        for target, options in rules:
-            tmp = map(' '.join, options)
-            # totally self-indulgent grammatical pedantry
-            if target[0].lower() in ['a','e','i','o','u']:
-                plural = 'n'
-            else:
-                plural = ''
-            pyfile.write("\n".join([
-                'class %s_Node(ParseNode):' % target,
-                '    """',
-                '    Holds a%s "%s" parse target and its components.' % (plural, target),
-                '    """',
-                '    def __init__(self, **kw):',
-                '        ParseNode.__init__(self, **kw)',
-                '',
-                '    def dump(self, indent=0):',
-                '        ParseNode.dump(self, indent)',
-                '\n',
-                ]))
-    # start churning out the class dec
-    pyfile.write('\n'.join([
-        'class Parser(BisonParser):',
-        '    """',
-        '    bison Parser class generated automatically by bison2py from the',
-        '    grammar file "%s" and lex file "%s"' % (bisonfileName, lexfileName),
-        '',
-        '    You may (and probably should) edit the methods in this class.',
-        '    You can freely edit the rules (in the method docstrings), the',
-        '    tokens list, the start symbol, and the precedences.',
-        '',
-        '    Each time this class is instantiated, a hashing technique in the',
-        '    base class detects if you have altered any of the rules. If any',
-        '    changes are detected, a new dynamic lib for the parser engine',
-        '    will be generated automatically.',
-        '    """',
-        '\n',
-        ]))
-    # add the default node class
-    if not generateClasses:
-        pyfile.write('\n'.join([
-            '    # -------------------------------------------------',
-            '    # Default class to use for creating new parse nodes',
-            '    # -------------------------------------------------',
-            '    defaultNodeClass = BisonNode',
-            '\n',
-            ]))
-    # add the name of the dynamic library we need
-    libfileName = os.path.splitext(os.path.split(pyfileName)[1])[0] \
-                  + '-engine'
-    pyfile.write('\n'.join([
-        '    # --------------------------------------------',
-        '    # basename of binary parser engine dynamic lib',
-        '    # --------------------------------------------',
-        '    bisonEngineLibName = \'%s\'' % (parser.buildDirectory + libfileName),
-        '\n',
-        ]))
-    # add the tokens
-    #pyfile.write('    tokens = (%s,)\n\n' % ', '.join([''%s'' % t for t in tokens]))
-    toks = ', '.join(tokens)
-    pyfile.write('    # ----------------------------------------------------------------\n')
-    pyfile.write('    # lexer tokens - these must match those in your lex script (below)\n')
-    pyfile.write('    # ----------------------------------------------------------------\n')
-    pyfile.write('    tokens = %s\n\n' % tmp)
-    # add the precedences
-    pyfile.write('    # ------------------------------\n')
-    pyfile.write('    # precedences\n')
-    pyfile.write('    # ------------------------------\n')
-    pyfile.write('    precedences = (\n')
-    for prec in precRules:
-        precline = ', '.join(prec[1])
-        pyfile.write('        (\'%s\', %s,),\n' % (
-                prec[0][1:], # left/right/nonassoc, quote-wrapped, no '%s'
-                tmp,  # quote-wrapped targets
-                )
-            )
-    pyfile.write('        )\n\n'),
-    pyfile.write('\n'.join([
-        '    # ---------------------------------------------------------------',
-        '    # Declare the start target here (by name)',
-        '    # ---------------------------------------------------------------',
-        '    start = \'%s\'' % startTarget,
-        '\n',
-        ]))
-    # now the interesting bit - write the rule handler methods
-    pyfile.write('\n'.join([
-        '    # ---------------------------------------------------------------',
-        '    # These methods are the python handlers for the bison targets.',
-        '    # (which get called by the bison code each time the corresponding',
-        '    # parse target is unambiguously reached)',
-        '    #',
-        '    # WARNING - don\'t touch the method docstrings unless you know what',
-        '    # you are doing - they are in bison rule syntax, and are passed',
-        '    # verbatim to bison to build the parser engine library.',
-        '    # ---------------------------------------------------------------',
-        '\n',
-        ]))
-    for target, options in rules:
-        tmp = map(' '.join, options)
-        if generateClasses:
-            nodeClassName = target + '_Node'
-        else:
-            nodeClassName = 'self.defaultNodeClass'
-        pyfile.write('\n'.join([
-            '    def on_%s(self, target, option, names, values):' % target,
-            '        """',
-            '        %s' % target,
-            '            : ' + '\n            | '.join(tmp),
-            '        """',
-            '        return %s(' % nodeClassName,
-            '            target=\'%s\','  % target,
-            '            option=option,',
-            '            names=names,',
-            '            values=values)',
-            '\n',
-            ]))
-    # now the ugly bit - add the raw lex script
-    pyfile.write('\n'.join([
-        '    # -----------------------------------------',
-        '    # raw lex script, verbatim here',
-        '    # -----------------------------------------',
-        '    lexscript = r"""',
-        rawLex,
-        '    """',
-        '    # -----------------------------------------',
-        '    # end raw lex script',
-        '    # -----------------------------------------',
-        '',
-        '',
-        ]))
-    # and now, create a main for testing which either reads stdin, or a filename arg
-    pyfile.write('\n'.join([
-        'def usage():',
-        '    print \'%s: PyBison parser derived from %s and %s\' % (sys.argv[0], bisonFile, lexFile)',
-        '    print \'Usage: %s [-k] [-v] [-d] [filename]\' % sys.argv[0]',
-        '    print \'  -k       Keep temporary files used in building parse engine lib\'',
-        '    print \'  -v       Enable verbose messages while parser is running\'',
-        '    print \'  -d       Enable garrulous debug messages from parser engine\'',
-        '    print \'  filename path of a file to parse, defaults to stdin\'',
-        '',
-        'def main(*args):',
-        '    """',
-        '    Unit-testing func',
-        '    """',
-        '',
-        '    keepfiles = 0',
-        '    verbose = 0',
-        '    debug = 0',
-        '    filename = None',
-        '',
-        '    for s in [\'-h\', \'-help\', \'--h\', \'--help\', \'-?\']:',
-        '        if s in args:',
-        '            usage()',
-        '            sys.exit(0)',
-        '',
-        '    if len(args) > 0:',
-        '        if \'-k\' in args:',
-        '            keepfiles = 1',
-        '            args.remove(\'-k\')',
-        '        if \'-v\' in args:',
-        '            verbose = 1',
-        '            args.remove(\'-v\')',
-        '        if \'-d\' in args:',
-        '            debug = 1',
-        '            args.remove(\'-d\')',
-        '    if len(args) > 0:',
-        '        filename = args[0]',
-        '',
-        '    p = Parser(verbose=verbose, keepfiles=keepfiles)',
-        '    tree = p.run(file=filename, debug=debug)',
-        '    return tree',
-        '',
-        'if __name__ == \'__main__\':',
-        '    main(*(sys.argv[1:]))',
-        '',
-        '',
-        ]))
--- a/src/python/convert.py
+++ b/src/python/convert.py
+"""
+Module for converting a bison file to a PyBison-python file.
+Written April 2004 by David McNab <david@freenet.org.nz>
+Copyright (c) 2004 by David McNab, all rights reserved.
+Released under the GNU General Public License, a copy of which should appear in
+this distribution in the file called 'COPYING'. If this file is missing, then
+you can obtain a copy of the GPL license document from the GNU website at
+http://www.gnu.org.
+This software is released with no warranty whatsoever. Use it at your own
+risk.
+If you wish to use this software in a commercial application, and wish to
+depart from the GPL licensing requirements, please contact the author and apply
+for a commercial license.
+"""
+import re
+import os
+from bison_ import unquoted
+reSpaces = re.compile('\\s+')
+def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
+    """
+    Rips the rules, tokens and precedences from a bison file, and the verbatim
+    text from a lex file and generates a boilerplate python file containing a
+    Parser class with handler methods and grammar attributes.
+    Arguments:
+     * bisonfileName - name of input bison script
+     * lexfileName - name of input flex script
+     * pyfileName - name of output python file
+     * generateClasses - flag - default 0 - if 1, causes a unique class to
+       be defined for each parse target, and for the corresponding target
+       handler method in the main Parser class to use this class when creating
+       the node.
+    """
+    # try to create output file
+    try:
+        pyfile = file(pyfileName, 'w')
+    except:
+        raise Exception('Cannot create output file "%s"' % pyfileName)
+    # try to open/read the bison file
+    try:
+        rawBison = file(bisonfileName).read()
+    except:
+        raise Exception('Cannot open bison file "%s"' % bisonfileName)
+    # try to open/read the lex file
+    try:
+        rawLex = file(lexfileName).read()
+    except:
+        raise Exception('Cannot open lex file %s' % lexfileName)
+    # break up into the three '%%'-separated sections
+    try:
+        prologue, rulesRaw, epilogue = rawBison.split('\n%%\n')
+    except:
+        raise Exception(
+            'File %s is not a properly formatted bison file'
+            ' (needs 3 sections separated by %%%%' % (bisonfileName)
+            )
+    # --------------------------------------
+    # process prologue
+    prologue = prologue.split('%}')[-1].strip() # ditch the C code
+    prologue = re.sub('\\n([\t ]+)', ' ', prologue) # join broken lines
+    #prologueLines = [line.strip() for line in prologue.split('\n')]
+    lines = prologue.split('\n')
+    tmp = []
+    for line in lines:
+        tmp.append(line.strip())
+    prologueLines = tmp
+    prologueLines = filter(None, prologueLines)
+    tokens = []
+    precRules = []
+    for line in prologueLines:
+        words = reSpaces.split(line)
+        kwd = words[0]
+        args = words[1:]
+        if kwd == '%token':
+            tokens.extend(args)
+        elif kwd in ['%left', '%right', '%nonassoc']:
+            precRules.append((kwd, args))
+        elif kwd == '%start':
+            startTarget = args[0]
+    # -------------------------------------------------------------
+    # process rules
+    rulesRaw = re.sub('\\n([\t ]+)', ' ', rulesRaw) # join broken lines
+    rulesLines = filter('', map(str.strip, re.split(unquoted % ';', rulesRaw)))
+    rules = []
+    for rule in rulesLines:
+        #print '--'
+        #print repr(rule)
+        #tgt, terms = rule.split(':')
+        try:
+            tgt, terms = re.split(unquoted % ':', rule)
+        except ValueError:
+            print 'Error in rule: %s' % rule
+            raise
+        tgt, terms = tgt.strip(), terms.strip()
+        #terms = [t.strip() for t in terms.split('|')]
+        #terms = [reSpaces.split(t) for t in terms]
+        tmp = []
+        #for t in terms.split('|'):
+        for t in re.split(unquoted % r'\|', terms):
+            t = t.strip()
+            tmp.append(reSpaces.split(t))
+        terms = tmp
+        rules.append((tgt, terms))
+    # now we have our rulebase, we can churn out our skeleton Python file
+    pyfile.write('\n'.join([
+        '#!/usr/bin/env python',
+        '',
+        '"""',
+        'PyBison file automatically generated from grammar file %s' % bisonfileName,
+        'You can edit this module, or import it and subclass the Parser class',
+        '"""',
+        '',
+        'import sys',
+        '',
+        'from bison import BisonParser, BisonNode, BisonError',
+        '',
+        'bisonFile = \'%s\'  # original bison file' % bisonfileName,
+        'lexFile = \'%s\'    # original flex file' % lexfileName,
+        '\n',
+        ]))
+    # if generating target classes
+    if generateClasses:
+        # create a base class for all nodes
+        pyfile.write("\n".join([
+            'class ParseNode(BisonNode):',
+            '    """',
+            '    This is the base class from which all your',
+            '    parse nodes are derived.',
+            '    Add methods to this class as you need them',
+            '    """',
+            '    def __init__(self, **kw):',
+            '        BisonNode.__init__(self, **kw)',
+            '',
+            '    def __str__(self):',
+            '        """Customise as needed"""',
+            '        return \'<%s instance at 0x%x>\' % (self.__class__.__name__, hash(self))',
+            '',
+            '    def __repr__(self):',
+            '        """Customise as needed"""',
+            '        return str(self)',
+            '',
+            '    def dump(self, indent=0):',
+            '        """',
+            '        Dump out human-readable, indented parse tree',
+            '        Customise as needed - here, or in the node-specific subclasses',
+            '        """',
+            '        BisonNode.dump(self, indent) # alter as needed',
+            '\n',
+            '# ------------------------------------------------------',
+            '# Define a node class for each grammar target',
+            '# ------------------------------------------------------',
+            '\n',
+            ]))
+        # now spit out class decs for every parse target
+        for target, options in rules:
+            tmp = map(' '.join, options)
+            # totally self-indulgent grammatical pedantry
+            if target[0].lower() in ['a','e','i','o','u']:
+                plural = 'n'
+            else:
+                plural = ''
+            pyfile.write("\n".join([
+                'class %s_Node(ParseNode):' % target,
+                '    """',
+                '    Holds a%s "%s" parse target and its components.' % (plural, target),
+                '    """',
+                '    def __init__(self, **kw):',
+                '        ParseNode.__init__(self, **kw)',
+                '',
+                '    def dump(self, indent=0):',
+                '        ParseNode.dump(self, indent)',
+                '\n',
+                ]))
+    # start churning out the class dec
+    pyfile.write('\n'.join([
+        'class Parser(BisonParser):',
+        '    """',
+        '    bison Parser class generated automatically by bison2py from the',
+        '    grammar file "%s" and lex file "%s"' % (bisonfileName, lexfileName),
+        '',
+        '    You may (and probably should) edit the methods in this class.',
+        '    You can freely edit the rules (in the method docstrings), the',
+        '    tokens list, the start symbol, and the precedences.',
+        '',
+        '    Each time this class is instantiated, a hashing technique in the',
+        '    base class detects if you have altered any of the rules. If any',
+        '    changes are detected, a new dynamic lib for the parser engine',
+        '    will be generated automatically.',
+        '    """',
+        '\n',
+        ]))
+    # add the default node class
+    if not generateClasses:
+        pyfile.write('\n'.join([
+            '    # -------------------------------------------------',
+            '    # Default class to use for creating new parse nodes',
+            '    # -------------------------------------------------',
+            '    defaultNodeClass = BisonNode',
+            '\n',
+            ]))
+    # add the name of the dynamic library we need
+    libfileName = os.path.splitext(os.path.split(pyfileName)[1])[0] \
+                  + '-engine'
+    pyfile.write('\n'.join([
+        '    # --------------------------------------------',
+        '    # basename of binary parser engine dynamic lib',
+        '    # --------------------------------------------',
+        '    bisonEngineLibName = \'%s\'' % libfileName,
+        '\n',
+        ]))
+    # add the tokens
+    #pyfile.write('    tokens = (%s,)\n\n' % ', '.join([''%s'' % t for t in tokens]))
+    #toks = ', '.join(tokens)
+    pyfile.write('    # ----------------------------------------------------------------\n')
+    pyfile.write('    # lexer tokens - these must match those in your lex script (below)\n')
+    pyfile.write('    # ----------------------------------------------------------------\n')
+    pyfile.write('    tokens = %s\n\n' % tmp)
+    # add the precedences
+    pyfile.write('    # ------------------------------\n')
+    pyfile.write('    # precedences\n')
+    pyfile.write('    # ------------------------------\n')
+    pyfile.write('    precedences = (\n')
+    for prec in precRules:
+        #precline = ', '.join(prec[1])
+        pyfile.write('        (\'%s\', %s,),\n' % (
+                prec[0][1:], # left/right/nonassoc, quote-wrapped, no '%s'
+                tmp,  # quote-wrapped targets
+                )
+            )
+    pyfile.write('        )\n\n'),
+    pyfile.write('\n'.join([
+        '    # ---------------------------------------------------------------',
+        '    # Declare the start target here (by name)',
+        '    # ---------------------------------------------------------------',
+        '    start = \'%s\'' % startTarget,
+        '\n',
+        ]))
+    # now the interesting bit - write the rule handler methods
+    pyfile.write('\n'.join([
+        '    # ---------------------------------------------------------------',
+        '    # These methods are the python handlers for the bison targets.',
+        '    # (which get called by the bison code each time the corresponding',
+        '    # parse target is unambiguously reached)',
+        '    #',
+        '    # WARNING - don\'t touch the method docstrings unless you know what',
+        '    # you are doing - they are in bison rule syntax, and are passed',
+        '    # verbatim to bison to build the parser engine library.',
+        '    # ---------------------------------------------------------------',
+        '\n',
+        ]))
+    for target, options in rules:
+        tmp = map(' '.join, options)
+        if generateClasses:
+            nodeClassName = target + '_Node'
+        else:
+            nodeClassName = 'self.defaultNodeClass'
+        pyfile.write('\n'.join([
+            '    def on_%s(self, target, option, names, values):' % target,
+            '        """',
+            '        %s' % target,
+            '            : ' + '\n            | '.join(tmp),
+            '        """',
+            '        return %s(' % nodeClassName,
+            '            target=\'%s\','  % target,
+            '            option=option,',
+            '            names=names,',
+            '            values=values)',
+            '\n',
+            ]))
+    # now the ugly bit - add the raw lex script
+    pyfile.write('\n'.join([
+        '    # -----------------------------------------',
+        '    # raw lex script, verbatim here',
+        '    # -----------------------------------------',
+        '    lexscript = r"""',
+        rawLex,
+        '    """',
+        '    # -----------------------------------------',
+        '    # end raw lex script',
+        '    # -----------------------------------------',
+        '',
+        '',
+        ]))
+    # and now, create a main for testing which either reads stdin, or a filename arg
+    pyfile.write('\n'.join([
+        'def usage():',
+        '    print \'%s: PyBison parser derived from %s and %s\' % (sys.argv[0], bisonFile, lexFile)',
+        '    print \'Usage: %s [-k] [-v] [-d] [filename]\' % sys.argv[0]',
+        '    print \'  -k       Keep temporary files used in building parse engine lib\'',
+        '    print \'  -v       Enable verbose messages while parser is running\'',
+        '    print \'  -d       Enable garrulous debug messages from parser engine\'',
+        '    print \'  filename path of a file to parse, defaults to stdin\'',
+        '',
+        'def main(*args):',
+        '    """',
+        '    Unit-testing func',
+        '    """',
+        '',
+        '    keepfiles = 0',
+        '    verbose = 0',
+        '    debug = 0',
+        '    filename = None',
+        '',
+        '    for s in [\'-h\', \'-help\', \'--h\', \'--help\', \'-?\']:',
+        '        if s in args:',
+        '            usage()',
+        '            sys.exit(0)',
+        '',
+        '    if len(args) > 0:',
+        '        if \'-k\' in args:',
+        '            keepfiles = 1',
+        '            args.remove(\'-k\')',
+        '        if \'-v\' in args:',
+        '            verbose = 1',
+        '            args.remove(\'-v\')',
+        '        if \'-d\' in args:',
+        '            debug = 1',
+        '            args.remove(\'-d\')',
+        '    if len(args) > 0:',
+        '        filename = args[0]',
+        '',
+        '    p = Parser(verbose=verbose, keepfiles=keepfiles)',
+        '    tree = p.run(file=filename, debug=debug)',
+        '    return tree',
+        '',
+        'if __name__ == \'__main__\':',
+        '    main(*(sys.argv[1:]))',
+        '',
+        '',
+        ]))
--- a/utils/bison2py.py
+++ b/utils/bison2py.py
-#@+leo-ver=4
+#!/usr/bin/env python
-#@+node:@file utils/bison2py.py
-#@@language python
 """
 Utility which creates a boilerplate pybison-compatible
 python file from a yacc file and lex file
@@ -9,27 +6,20 @@ python file from a yacc file and lex file
 Run it with 2 arguments - filename.y and filename.l
 Output is filename.py
 """
-#@+others
+import sys
-#@+node:imports
-import sys, os, re, getopt
 from bison import bisonToPython
-#@-node:imports
-#@+node:globals
-argv = sys.argv
-argc = len(argv)
-progname = argv[0]
-reSpaces = re.compile("\\s+")
-#@-node:globals
-#@+node:usage
 def usage(s=None):
    """
    Display usage info and exit
    """
+    progname = sys.argv[0]
    if s:
        print progname+": "+s
    print "\n".join([
        "Usage: %s [-c] basefilename" % progname,
        "   or: %s [-c] grammarfile.y lexfile.l pyfile.py" % progname,
@@ -42,16 +32,16 @@ def usage(s=None):
        'The "-c" argument causes the creation of a unique node class',
        'for each parse target - highly recommended for complex grammars',
        ])
    sys.exit(1)
-#@-node:usage
-#@+node:main
 def main():
    """
    Command-line interface for bison2py
    """
-    global argc, argv
+    argv = sys.argv
+    argc = len(argv)
    if '-c' in argv:
        generateClasses = 1
@@ -69,14 +59,9 @@ def main():
        bisonfile, lexfile, pyfile = argv[1:4]
    else:
        usage("Bad argument count")
    bisonToPython(bisonfile, lexfile, pyfile, generateClasses)
-#@-node:main
-#@+node:MAINLINE
 if __name__ == '__main__':
    main()
-#@-node:MAINLINE
-#@-others
-#@-node:@file utils/bison2py.py
-#@-leo