bison.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918
  1. #@+leo-ver=4
  2. #@+node:@file src/python/bison.py
  3. """
  4. Wrapper module for interfacing with Bison (yacc)
  5. Written April 2004 by David McNab <david@freenet.org.nz>
  6. Copyright (c) 2004 by David McNab, all rights reserved.
  7. Released under the GNU General Public License, a copy
  8. of which should appear in this distribution in the file
  9. called 'COPYING'. If this file is missing, then you can
  10. obtain a copy of the GPL license document from the GNU
  11. website at http://www.gnu.org.
  12. This software is released with no warranty whatsoever.
  13. Use it at your own risk.
  14. If you wish to use this software in a commercial application,
  15. and wish to depart from the GPL licensing requirements,
  16. please contact the author and apply for a commercial license.
  17. """
  18. #@+others
  19. #@+node:imports
  20. import sys, os, sha, re, imp, traceback, xml.dom, xml.dom.minidom, types
  21. import distutils.sysconfig
  22. import distutils.ccompiler
  23. from bison_ import ParserEngine, unquoted
  24. #@-node:imports
  25. #@+node:globals
  26. reSpaces = re.compile("\\s+")
  27. #@-node:globals
  28. #@+node:exceptions
  29. class ParserSyntaxError(Exception):
  30. pass
  31. #@-node:exceptions
  32. #@+node:class BisonError
  33. class BisonError:
  34. """
  35. Flags an error to yyparse()
  36. You should return this in your actions to notify a syntax error
  37. """
  38. _pyBisonError = 1
  39. def __init__(self, value="syntax error"):
  40. self.value = value
  41. #@-node:class BisonError
  42. #@+node:class BisonNode
  43. class BisonNode:
  44. """
  45. Generic class for wrapping parse targets.
  46. Arguments:
  47. - targetname - the name of the parse target being wrapped.
  48. - items - optional - a list of items comprising a clause
  49. in the target rule - typically this will only be used
  50. by the PyBison callback mechanism.
  51. Keywords:
  52. - any keywords you want (except 'items'), with any type of value.
  53. keywords will be stored as attributes in the constructed object.
  54. """
  55. #@ @+others
  56. #@+node:__init__
  57. def __init__(self, **kw):
  58. self.__dict__.update(kw)
  59. # ensure some default attribs
  60. self.target = kw.get('target', 'UnnamedTarget')
  61. self.names = kw.get('names', [])
  62. self.values = kw.get('values', [])
  63. self.option = kw.get('option', 0)
  64. # mirror this dict to simplify dumping
  65. self.kw = kw
  66. #@-node:__init__
  67. #@+node:__str__
  68. def __str__(self):
  69. return "<BisonNode:%s>" % self.target
  70. #@-node:__str__
  71. #@+node:__repr__
  72. def __repr__(self):
  73. return str(self)
  74. #@-node:__repr__
  75. #@+node:__getitem__
  76. def __getitem__(self, item):
  77. """
  78. Retrieves the ith value from this node, or child nodes
  79. If the subscript is a single number, it will be used as an
  80. index into this node's children list.
  81. If the subscript is a list or tuple, we recursively fetch
  82. the item by using the first element as an index into this
  83. node's children, the second element as an index into that
  84. child node's children, and so on
  85. """
  86. if type(item) in [type(0), type(0L)]:
  87. return self.values[item]
  88. elif type(item) in [type(()), type([])]:
  89. if len(item) == 0:
  90. return self
  91. return self.values[item[0]][item[1:]]
  92. else:
  93. raise TypeError("Can only index %s objects with an int or a list/tuple" % self.__class.__name__)
  94. #@-node:__getitem__
  95. #@+node:__len__
  96. def __len__(self):
  97. return len(self.values)
  98. #@-node:__len__
  99. #@+node:__getslice__
  100. def __getslice__(self, fromidx, toidx):
  101. return self.values[fromidx:toidx]
  102. #@-node:__getslice__
  103. #@+node:__iter__
  104. def __iter__(self):
  105. return iter(self.values)
  106. #@-node:__iter__
  107. #@+node:dump
  108. def dump(self, indent=0):
  109. """
  110. For debugging - prints a recursive dump of a parse tree node and its children
  111. """
  112. specialAttribs = ['option', 'target', 'names', 'values']
  113. indents = " " * indent * 2
  114. #print "%s%s: %s %s" % (indents, self.target, self.option, self.names)
  115. print "%s%s:" % (indents, self.target)
  116. for name, val in self.kw.items() + zip(self.names, self.values):
  117. if name in specialAttribs or name.startswith("_"):
  118. continue
  119. if isinstance(val, BisonNode):
  120. val.dump(indent+1)
  121. else:
  122. print indents + " %s=%s" % (name, val)
  123. #@-node:dump
  124. #@+node:toxml
  125. def toxml(self):
  126. """
  127. Returns an xml serialisation of this node and its children, as a raw string
  128. Called on the toplevel node, the xml is a representation of the
  129. entire parse tree.
  130. """
  131. return self.toxmldoc().toxml()
  132. #@-node:toxml
  133. #@+node:toprettyxml
  134. def toprettyxml(self, indent=' ', newl='\n', encoding=None):
  135. """
  136. returns a human-readable xml serialisation of this node and its children
  137. """
  138. return self.toxmldoc().toprettyxml(indent=indent,
  139. newl=newl,
  140. encoding=encoding)
  141. #@-node:toprettyxml
  142. #@+node:toxmldoc
  143. def toxmldoc(self):
  144. """
  145. Returns the node and its children as an xml.dom.minidom.Document object
  146. """
  147. d = xml.dom.minidom.Document()
  148. d.appendChild(self.toxmlelem(d))
  149. return d
  150. #@-node:toxmldoc
  151. #@+node:toxmlelem
  152. def toxmlelem(self, docobj):
  153. """
  154. Returns a DOM Element object of this node and its children
  155. """
  156. specialAttribs = ['option', 'target', 'names', 'values']
  157. # generate an xml element obj for this node
  158. x = docobj.createElement(self.target)
  159. # set attribs
  160. for name, val in self.kw.items():
  161. if name in ['names', 'values'] or name.startswith("_"):
  162. continue
  163. x.setAttribute(name, str(val))
  164. #x.setAttribute('target', self.target)
  165. #x.setAttribute('option', self.option)
  166. # and add the children
  167. for name, val in zip(self.names, self.values):
  168. if name in specialAttribs or name.startswith("_"):
  169. continue
  170. if isinstance(val, BisonNode):
  171. x.appendChild(val.toxmlelem(docobj))
  172. else:
  173. sn = docobj.createElement(name)
  174. sn.setAttribute('target', name)
  175. tn = docobj.createTextNode(val)
  176. sn.appendChild(tn)
  177. x.appendChild(sn)
  178. # done
  179. return x
  180. #@-node:toxmlelem
  181. #@-others
  182. #@-node:class BisonNode
  183. #@+node:class BisonParser
  184. class BisonParser(object):
  185. """
  186. Base parser class
  187. You should subclass this, and provide a bunch of methods called
  188. 'on_TargetName', where 'TargetName' is the name of each target in
  189. your grammar (.y) file.
  190. """
  191. #@ @+others
  192. #@+node:attributes
  193. # ---------------------------------------
  194. # override these if you need to
  195. # command and options for running yacc/bison, except for filename arg
  196. bisonCmd = ["bison", "-d", "-v", '-t']
  197. bisonFile = "tmp.y"
  198. bisonCFile = "tmp.tab.c"
  199. bisonHFile = "tmp.tab.h" # name of header file generated by bison cmd
  200. bisonCFile1 = "tmp.bison.c" # c output file from bison gets renamed to this
  201. bisonHFile1 = "tokens.h" # bison-generated header file gets renamed to this
  202. flexCmd = ["flex", ] # command and options for running [f]lex, except for filename arg
  203. flexFile = "tmp.l"
  204. flexCFile = "lex.yy.c"
  205. flexCFile1 = "tmp.lex.c" # c output file from lex gets renamed to this
  206. cflags_pre = ['-fPIC'] # = CFLAGS added before all arguments.
  207. cflags_post = ['-O3','-g'] # = CFLAGS added after all arguments.
  208. buildDirectory = './' # Directory used to store the generated / compiled files.
  209. debugSymbols = 1 # Add debugging symbols to the binary files.
  210. verbose = 0
  211. file = None # default to sys.stdin
  212. last = None # last parsed target, top of parse tree
  213. lasterror = None # gets set if there was an error
  214. keepfiles = 0 # set to 1 to keep temporary engine build files
  215. bisonEngineLibName = None # defaults to 'modulename-engine'
  216. defaultNodeClass = BisonNode # class to use by default for creating new parse nodes
  217. #@-node:attributes
  218. #@+node:__init__
  219. def __init__(self, **kw):
  220. """
  221. Abstract representation of parser
  222. Keyword arguments:
  223. - read - a callable accepting an int arg (nbytes) and returning a string,
  224. default is this class' read() method
  225. - file - a file object, or string of a pathname to open as a file, defaults
  226. to sys.stdin. Note that you can leave this blank, and pass a file keyword
  227. argument to the .run() method.
  228. - verbose - set to 1 to enable verbose output messages, default 0
  229. - keepfiles - if non-zero, keeps any files generated in the
  230. course of building the parser engine; by default, all these
  231. files get deleted upon a successful engine build
  232. - defaultNodeClass - the class to use for creating parse nodes, default
  233. is self.defaultNodeClass (in this base class, BisonNode)
  234. """
  235. # setup
  236. read = kw.get('read', None)
  237. if read:
  238. self.read = read
  239. fileobj = kw.get('file', None)
  240. if fileobj:
  241. if type(fileobj) == type(""):
  242. try:
  243. fileobj = open(fileobj, "rb")
  244. except:
  245. raise Exception("Cannot open input file %s" % fileobj)
  246. self.file = fileobj
  247. else:
  248. self.file = sys.stdin
  249. nodeClass = kw.get('defaultNodeClass', None)
  250. if nodeClass:
  251. self.defaultNodeClass = nodeClass
  252. self.verbose = kw.get('verbose', 0)
  253. if kw.has_key('keepfiles'):
  254. self.keepfiles = kw['keepfiles']
  255. # if engine lib name not declared, invent ont
  256. if not self.bisonEngineLibName:
  257. self.bisonEngineLibName = self.__class__.__module__ + "-parser"
  258. # get an engine
  259. self.engine = ParserEngine(self)
  260. def __getitem__(self, idx):
  261. return self.last[idx]
  262. def _handle(self, targetname, option, names, values):
  263. """
  264. Callback which receives a target from parser, as a targetname
  265. and list of term names and values.
  266. Tries to dispatch to on_TargetName() methods if they exist,
  267. otherwise wraps the target in a BisonNode object
  268. """
  269. handler = getattr(self, 'on_'+targetname, None)
  270. if handler:
  271. if self.verbose:
  272. try:
  273. hdlrline = handler.func_code.co_firstlineno
  274. except:
  275. hdlrline = handler.__init__.func_code.co_firstlineno
  276. print '_handle: invoking handler at line %s for "%s"' \
  277. % (hdlrline, targetname)
  278. self.last = handler(target=targetname, option=option, names=names,
  279. values=values)
  280. #if self.verbose:
  281. # print 'handler for %s returned %s' \
  282. # % (targetname, repr(self.last))
  283. else:
  284. if self.verbose:
  285. print "no handler for %s, using default" % targetname
  286. self.last = BisonNode(targetname, option=option, names=names, values=values)
  287. # reset any resulting errors (assume they've been handled)
  288. #self.lasterror = None
  289. # assumedly the last thing parsed is at the top of the tree
  290. return self.last
  291. def run(self, **kw):
  292. """
  293. Runs the parser, and returns the top-most parse target.
  294. Keywords:
  295. - file - either a string, comprising a file to open and read input from, or
  296. a Python file object
  297. - debug - enables garrulous parser debugging output, default 0
  298. """
  299. if self.verbose:
  300. print "Parser.run: calling engine"
  301. # grab keywords
  302. fileobj = kw.get('file', self.file)
  303. if type(fileobj) == type(""):
  304. filename = fileobj
  305. try:
  306. fileobj = open(fileobj, "rb")
  307. except:
  308. raise Exception("Cannot open input file %s" % fileobj)
  309. else:
  310. filename = None
  311. fileobj = None
  312. read = kw.get('read', self.read)
  313. debug = kw.get('debug', 0)
  314. # back up existing attribs
  315. oldfile = self.file
  316. oldread = self.read
  317. # plug in new ones, if given
  318. if fileobj:
  319. self.file = fileobj
  320. if read:
  321. self.read = read
  322. # do the parsing job, spew if error
  323. self.lasterror = None
  324. self.engine.runEngine(debug)
  325. if self.lasterror:
  326. #print "Got error: %s" % repr(self.error)
  327. if filename != None:
  328. raise ParserSyntaxError("%s:%d: '%s' near '%s'" % ((filename,) + self.lasterror))
  329. else:
  330. raise ParserSyntaxError("Line %d: '%s' near '%s'" % self.lasterror)
  331. # restore old values
  332. self.file = oldfile
  333. self.read = oldread
  334. if self.verbose:
  335. print "Parser.run: back from engine"
  336. return self.last
  337. def read(self, nbytes):
  338. """
  339. Override this in your subclass, if you desire.
  340. Arguments:
  341. - nbytes - the maximum length of the string which you may return.
  342. DO NOT return a string longer than this, or else Bad Things will
  343. happen.
  344. """
  345. # default to stdin
  346. if self.verbose:
  347. print "Parser.read: want %s bytes" % nbytes
  348. bytes = self.file.readline(nbytes)
  349. if self.verbose:
  350. print "Parser.read: got %s bytes" % len(bytes)
  351. return bytes
  352. def _error(self, linenum, msg, tok):
  353. print "Parser: line %s: syntax error '%s' before '%s'" % (linenum, msg, tok)
  354. def error(self, value):
  355. """
  356. Return the result of this method from a handler to notify a syntax error
  357. """
  358. self.lasterror = value
  359. return BisonError(value)
  360. def toxml(self):
  361. """
  362. Serialises the parse tree and returns it as a raw xml string
  363. """
  364. return self.last.toxml()
  365. def toxmldoc(self):
  366. """
  367. Returns an xml.dom.minidom.Document object containing the parse tree
  368. """
  369. return self.last.toxmldoc()
  370. def toprettyxml(self):
  371. """
  372. Returns a human-readable xml representation of the parse tree
  373. """
  374. return self.last.toprettyxml()
  375. def loadxml(self, raw, namespace=None):
  376. """
  377. Loads a parse tree from raw xml text
  378. Stores it in the '.last' attribute, which is where the root node
  379. of parsed text gets stored
  380. Arguments:
  381. - raw - string containing the raw xml
  382. - namespace - a dict or module object, where the node classes required for
  383. reconstituting the parse tree, can be found
  384. Returns:
  385. - root node object of reconstituted parse tree
  386. """
  387. doc = xml.dom.minidom.parseString(raw)
  388. tree = self.loadxmldoc(doc, namespace)
  389. self.last = tree
  390. return tree
  391. def loadxmldoc(self, xmldoc, namespace=None):
  392. """
  393. Returns a reconstituted parse tree, loaded from an
  394. xml.dom.minidom.Document instance
  395. Arguments:
  396. - xmldoc - an xml.dom.minidom.Document instance
  397. - namespace - a dict from which to find the classes needed
  398. to translate the document into a tree of parse nodes
  399. """
  400. return self.loadxmlobj(xmldoc.childNodes[0], namespace)
  401. def loadxmlobj(self, xmlobj, namespace=None):
  402. """
  403. Returns a node object, being a parse tree, reconstituted from an
  404. xml.dom.minidom.Element object
  405. Arguments:
  406. - xmlobj - an xml.dom.minidom.Element instance
  407. - namespace - a namespace from which the node classes
  408. needed for reconstituting the tree, can be found
  409. """
  410. # check on namespace
  411. if type(namespace) is types.ModuleType:
  412. namespace = namespace.__dict__
  413. elif namespace == None:
  414. namespace = globals()
  415. objname = xmlobj.tagName
  416. classname = objname + "_Node"
  417. classobj = namespace.get(classname, None)
  418. namespacekeys = namespace.keys()
  419. # barf if node is not a known parse node or token
  420. if (not classobj) and objname not in self.tokens:
  421. raise Exception("Cannot reconstitute %s: can't find required node class or token %s" % (
  422. objname, classname))
  423. if classobj:
  424. nodeobj = classobj()
  425. # add the attribs
  426. for k,v in xmlobj.attributes.items():
  427. setattr(nodeobj, k, v)
  428. else:
  429. nodeobj = None
  430. #print "----------------"
  431. #print "objname=%s" % repr(objname)
  432. #print "classname=%s" % repr(classname)
  433. #print "classobj=%s" % repr(classobj)
  434. #print "nodeobj=%s" % repr(nodeobj)
  435. # now add the children
  436. for child in xmlobj.childNodes:
  437. #print "%s attributes=%s" % (child, child.attributes.items())
  438. childname = child.attributes['target'].value
  439. #print "childname=%s" % childname
  440. if childname + "_Node" in namespacekeys:
  441. #print "we have a node for class %s" % classname
  442. childobj = self.loadxmlobj(child, namespace)
  443. else:
  444. # it's a token
  445. childobj = child.childNodes[0].nodeValue
  446. #print "got token %s=%s" % (childname, childobj)
  447. nodeobj.names.append(childname)
  448. nodeobj.values.append(childobj)
  449. return nodeobj
  450. def _globals(self):
  451. return globals().keys()
  452. def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0):
  453. """
  454. Rips the rules, tokens and precedences from a bison file, and the
  455. verbatim text from a lex file and generates
  456. a boilerplate python file containing a Parser class with handler
  457. methods and grammar attributes
  458. Arguments:
  459. - bisonfileName - name of input bison script
  460. - lexfileName - name of input flex script
  461. - pyfileName - name of output python file
  462. - generateClasses - flag - default 0 - if 1, causes a unique class to
  463. be defined for each parse target, and for the corresponding target
  464. handler method in the main Parser class to use this class when creating
  465. the node.
  466. """
  467. # try to create output file
  468. try:
  469. pyfile = file(pyfileName, "w")
  470. except:
  471. raise Exception("Cannot create output file '%s'" % pyfileName)
  472. # try to open/read the bison file
  473. try:
  474. rawBison = file(bisonfileName).read()
  475. except:
  476. raise Exception("Cannot open bison file %s" % bisonfileName)
  477. # try to open/read the lex file
  478. try:
  479. rawLex = file(lexfileName).read()
  480. except:
  481. raise Exception("Cannot open lex file %s" % lexfileName)
  482. # break up into the three '%%'-separated sections
  483. try:
  484. prologue, rulesRaw, epilogue = rawBison.split("\n%%\n")
  485. except:
  486. raise Exception(
  487. "File %s is not a properly formatted bison file"
  488. " (needs 3 sections separated by %%%%" % (bisonfileName)
  489. )
  490. # --------------------------------------
  491. # process prologue
  492. prologue = prologue.split("%}")[-1].strip() # ditch the C code
  493. prologue = re.sub("\\n([\t ]+)", " ", prologue) # join broken lines
  494. #prologueLines = [line.strip() for line in prologue.split("\n")]
  495. lines = prologue.split("\n")
  496. tmp = []
  497. for line in lines:
  498. tmp.append(line.strip())
  499. prologueLines = tmp
  500. prologueLines = filter(None, prologueLines)
  501. tokens = []
  502. precRules = []
  503. for line in prologueLines:
  504. words = reSpaces.split(line)
  505. kwd = words[0]
  506. args = words[1:]
  507. if kwd == '%token':
  508. tokens.extend(args)
  509. elif kwd in ['%left', '%right', '%nonassoc']:
  510. precRules.append((kwd, args))
  511. elif kwd == '%start':
  512. startTarget = args[0]
  513. # -------------------------------------------------------------
  514. # process rules
  515. rulesRaw = re.sub("\\n([\t ]+)", " ", rulesRaw) # join broken lines
  516. #rulesLines = filter(None, [r.strip() for r in rulesRaw.split(";")])
  517. rulesLines = []
  518. #for r in rulesRaw.split(";"):
  519. for r in re.split(unquoted % ";", rulesRaw):
  520. r = r.strip()
  521. if r:
  522. rulesLines.append(r)
  523. rules = []
  524. for rule in rulesLines:
  525. #print "--"
  526. #print repr(rule)
  527. #tgt, terms = rule.split(":")
  528. try:
  529. tgt, terms = re.split(unquoted % ":", rule)
  530. except ValueError:
  531. print "Error in rule: %s" % rule
  532. raise
  533. tgt, terms = tgt.strip(), terms.strip()
  534. #terms = [t.strip() for t in terms.split("|")]
  535. #terms = [reSpaces.split(t) for t in terms]
  536. tmp = []
  537. #for t in terms.split("|"):
  538. for t in re.split(unquoted % r"\|", terms):
  539. t = t.strip()
  540. tmp.append(reSpaces.split(t))
  541. terms = tmp
  542. rules.append((tgt, terms))
  543. # now we have our rulebase, we can churn out our skeleton Python file
  544. pyfile.write("\n".join([
  545. '#!/usr/bin/env python',
  546. '',
  547. '"""',
  548. 'PyBison file automatically generated from grammar file %s' % bisonfileName,
  549. 'You can edit this module, or import it and subclass the Parser class',
  550. '"""',
  551. '',
  552. 'import sys',
  553. '',
  554. 'from bison import BisonParser, BisonNode, BisonError',
  555. '',
  556. 'bisonFile = "%s" # original bison file' % bisonfileName,
  557. 'lexFile = "%s" # original flex file' % lexfileName,
  558. '\n',
  559. ]))
  560. # if generating target classes
  561. if generateClasses:
  562. # create a base class for all nodes
  563. pyfile.write("\n".join([
  564. 'class ParseNode(BisonNode):',
  565. ' """',
  566. ' This is the base class from which all your',
  567. ' parse nodes are derived.',
  568. ' Add methods to this class as you need them',
  569. ' """',
  570. ' def __init__(self, **kw):',
  571. ' BisonNode.__init__(self, **kw)',
  572. '',
  573. ' def __str__(self):',
  574. ' """Customise as needed"""',
  575. ' return "<%s instance at 0x%x>" % (self.__class__.__name__, hash(self))',
  576. '',
  577. ' def __repr__(self):',
  578. ' """Customise as needed"""',
  579. ' return str(self)',
  580. '',
  581. ' def dump(self, indent=0):',
  582. ' """',
  583. ' Dump out human-readable, indented parse tree',
  584. ' Customise as needed - here, or in the node-specific subclasses',
  585. ' """',
  586. ' BisonNode.dump(self, indent) # alter as needed',
  587. '\n',
  588. '# ------------------------------------------------------',
  589. '# Define a node class for each grammar target',
  590. '# ------------------------------------------------------',
  591. '\n',
  592. ]))
  593. # now spit out class decs for every parse target
  594. for target, options in rules:
  595. tmp = []
  596. for t in options:
  597. tmp.append(" ".join(t))
  598. # totally self-indulgent grammatical pedantry
  599. if target[0].lower() in ['a','e','i','o','u']:
  600. plural = 'n'
  601. else:
  602. plural = ''
  603. pyfile.write("\n".join([
  604. 'class %s_Node(ParseNode):' % target,
  605. ' """',
  606. ' Holds a%s "%s" parse target and its components.' % (plural, target),
  607. ' """',
  608. ' def __init__(self, **kw):',
  609. ' ParseNode.__init__(self, **kw)',
  610. '',
  611. ' def dump(self, indent=0):',
  612. ' ParseNode.dump(self, indent)',
  613. '\n',
  614. ]))
  615. # start churning out the class dec
  616. pyfile.write("\n".join([
  617. 'class Parser(BisonParser):',
  618. ' """',
  619. ' bison Parser class generated automatically by bison2py from the',
  620. ' grammar file "%s" and lex file "%s"' % (bisonfileName, lexfileName),
  621. '',
  622. ' You may (and probably should) edit the methods in this class.',
  623. ' You can freely edit the rules (in the method docstrings), the',
  624. ' tokens list, the start symbol, and the precedences.',
  625. '',
  626. ' Each time this class is instantiated, a hashing technique in the',
  627. ' base class detects if you have altered any of the rules. If any',
  628. ' changes are detected, a new dynamic lib for the parser engine',
  629. ' will be generated automatically.',
  630. ' """',
  631. '\n',
  632. ]))
  633. # add the default node class
  634. if not generateClasses:
  635. pyfile.write("\n".join([
  636. ' # -------------------------------------------------',
  637. ' # Default class to use for creating new parse nodes',
  638. ' # -------------------------------------------------',
  639. ' defaultNodeClass = BisonNode',
  640. '\n',
  641. ]))
  642. # add the name of the dynamic library we need
  643. libfileName = os.path.splitext(os.path.split(pyfileName)[1])[0] + "-engine"
  644. pyfile.write("\n".join([
  645. ' # --------------------------------------------',
  646. ' # basename of binary parser engine dynamic lib',
  647. ' # --------------------------------------------',
  648. ' bisonEngineLibName = "%s"' % (parser.buildDirectory + libfileName),
  649. '\n',
  650. ]))
  651. # add the tokens
  652. #pyfile.write(' tokens = (%s,)\n\n' % ", ".join(['"%s"' % t for t in tokens]))
  653. tmp = []
  654. for t in tokens:
  655. #tmp.append('"'+t+'"')
  656. tmp.append(t)
  657. toks = ", ".join(tmp)
  658. pyfile.write(' # ----------------------------------------------------------------\n')
  659. pyfile.write(' # lexer tokens - these must match those in your lex script (below)\n')
  660. pyfile.write(' # ----------------------------------------------------------------\n')
  661. pyfile.write(' tokens = %s\n\n' % tmp)
  662. # add the precedences
  663. pyfile.write(' # ------------------------------\n')
  664. pyfile.write(' # precedences\n')
  665. pyfile.write(' # ------------------------------\n')
  666. pyfile.write(' precedences = (\n')
  667. for prec in precRules:
  668. tmp = []
  669. for p in prec[1]:
  670. #tmp.append('"'+p+'"')
  671. tmp.append(p)
  672. precline = ", ".join(tmp)
  673. #pyfile.write(' ("%s", (%s,)),\n' % (
  674. pyfile.write(' ("%s", %s,),\n' % (
  675. prec[0][1:], # left/right/nonassoc, quote-wrapped, no '%s'
  676. tmp, # quote-wrapped targets
  677. )
  678. )
  679. pyfile.write(' )\n\n'),
  680. pyfile.write("\n".join([
  681. ' # ---------------------------------------------------------------',
  682. ' # Declare the start target here (by name)',
  683. ' # ---------------------------------------------------------------',
  684. ' start = "%s"' % startTarget,
  685. '\n',
  686. ]))
  687. # now the interesting bit - write the rule handler methods
  688. pyfile.write("\n".join([
  689. ' # ---------------------------------------------------------------',
  690. ' # These methods are the python handlers for the bison targets.',
  691. ' # (which get called by the bison code each time the corresponding',
  692. ' # parse target is unambiguously reached)',
  693. ' #',
  694. " # WARNING - don't touch the method docstrings unless you know what",
  695. " # you are doing - they are in bison rule syntax, and are passed",
  696. " # verbatim to bison to build the parser engine library.",
  697. ' # ---------------------------------------------------------------',
  698. '\n',
  699. ]))
  700. for target, options in rules:
  701. tmp = []
  702. for t in options:
  703. tmp.append(" ".join(t))
  704. if generateClasses:
  705. nodeClassName = target + "_Node"
  706. else:
  707. nodeClassName = 'self.defaultNodeClass'
  708. pyfile.write("\n".join([
  709. ' def on_%s(self, target, option, names, values):' % target,
  710. ' """',
  711. ' %s' % target,
  712. ' : ' + "\n | ".join(tmp),
  713. ' """',
  714. ' return %s(' % nodeClassName,
  715. ' target="%s",' % target,
  716. ' option=option,',
  717. ' names=names,',
  718. ' values=values)',
  719. '\n',
  720. ]))
  721. # now the ugly bit - add the raw lex script
  722. pyfile.write("\n".join([
  723. ' # -----------------------------------------',
  724. ' # raw lex script, verbatim here',
  725. ' # -----------------------------------------',
  726. ' lexscript = r"""',
  727. rawLex,
  728. ' """',
  729. ' # -----------------------------------------',
  730. ' # end raw lex script',
  731. ' # -----------------------------------------',
  732. '',
  733. '',
  734. ]))
  735. # and now, create a main for testing which either reads stdin, or a filename arg
  736. pyfile.write("\n".join([
  737. 'def usage():',
  738. ' print "%s: PyBison parser derived from %s and %s" % (sys.argv[0], bisonFile, lexFile)',
  739. ' print "Usage: %s [-k] [-v] [-d] [filename]" % sys.argv[0]',
  740. ' print " -k Keep temporary files used in building parse engine lib"',
  741. ' print " -v Enable verbose messages while parser is running"',
  742. ' print " -d Enable garrulous debug messages from parser engine"',
  743. ' print " filename path of a file to parse, defaults to stdin"',
  744. '',
  745. 'def main(*args):',
  746. ' """',
  747. ' Unit-testing func',
  748. ' """',
  749. '',
  750. ' keepfiles = 0',
  751. ' verbose = 0',
  752. ' debug = 0',
  753. ' filename = None',
  754. '',
  755. ' for s in ["-h", "-help", "--h", "--help", "-?"]:',
  756. ' if s in args:',
  757. ' usage()',
  758. ' sys.exit(0)',
  759. '',
  760. ' if len(args) > 0:',
  761. ' if "-k" in args:',
  762. ' keepfiles = 1',
  763. ' args.remove("-k")',
  764. ' if "-v" in args:',
  765. ' verbose = 1',
  766. ' args.remove("-v")',
  767. ' if "-d" in args:',
  768. ' debug = 1',
  769. ' args.remove("-d")',
  770. ' if len(args) > 0:',
  771. ' filename = args[0]',
  772. '',
  773. ' p = Parser(verbose=verbose, keepfiles=keepfiles)',
  774. ' tree = p.run(file=filename, debug=debug)',
  775. ' return tree',
  776. '',
  777. 'if __name__ == "__main__":',
  778. ' main(*(sys.argv[1:]))',
  779. '',
  780. '',
  781. ]))