You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

419 lines
12 KiB

"""ANTLR3 runtime package"""
# begin[licence]
#
# [The "BSD licence"]
# Copyright (c) 2005-2008 Terence Parr
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# end[licence]
from antlr3.constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE
############################################################################
#
# basic token interface
#
############################################################################
class Token(object):
"""@brief Abstract token baseclass."""
def getText(self):
"""@brief Get the text of the token.
Using setter/getter methods is deprecated. Use o.text instead.
"""
raise NotImplementedError
def setText(self, text):
"""@brief Set the text of the token.
Using setter/getter methods is deprecated. Use o.text instead.
"""
raise NotImplementedError
def getType(self):
"""@brief Get the type of the token.
Using setter/getter methods is deprecated. Use o.type instead."""
raise NotImplementedError
def setType(self, ttype):
"""@brief Get the type of the token.
Using setter/getter methods is deprecated. Use o.type instead."""
raise NotImplementedError
def getLine(self):
"""@brief Get the line number on which this token was matched
Lines are numbered 1..n
Using setter/getter methods is deprecated. Use o.line instead."""
raise NotImplementedError
def setLine(self, line):
"""@brief Set the line number on which this token was matched
Using setter/getter methods is deprecated. Use o.line instead."""
raise NotImplementedError
def getCharPositionInLine(self):
"""@brief Get the column of the tokens first character,
Columns are numbered 0..n-1
Using setter/getter methods is deprecated. Use o.charPositionInLine instead."""
raise NotImplementedError
def setCharPositionInLine(self, pos):
"""@brief Set the column of the tokens first character,
Using setter/getter methods is deprecated. Use o.charPositionInLine instead."""
raise NotImplementedError
def getChannel(self):
"""@brief Get the channel of the token
Using setter/getter methods is deprecated. Use o.channel instead."""
raise NotImplementedError
def setChannel(self, channel):
"""@brief Set the channel of the token
Using setter/getter methods is deprecated. Use o.channel instead."""
raise NotImplementedError
def getTokenIndex(self):
"""@brief Get the index in the input stream.
An index from 0..n-1 of the token object in the input stream.
This must be valid in order to use the ANTLRWorks debugger.
Using setter/getter methods is deprecated. Use o.index instead."""
raise NotImplementedError
def setTokenIndex(self, index):
"""@brief Set the index in the input stream.
Using setter/getter methods is deprecated. Use o.index instead."""
raise NotImplementedError
def getInputStream(self):
"""@brief From what character stream was this token created.
You don't have to implement but it's nice to know where a Token
comes from if you have include files etc... on the input."""
raise NotImplementedError
def setInputStream(self, input):
"""@brief From what character stream was this token created.
You don't have to implement but it's nice to know where a Token
comes from if you have include files etc... on the input."""
raise NotImplementedError
############################################################################
#
# token implementations
#
# Token
# +- CommonToken
# \- ClassicToken
#
############################################################################
class CommonToken(Token):
"""@brief Basic token implementation.
This implementation does not copy the text from the input stream upon
creation, but keeps start/stop pointers into the stream to avoid
unnecessary copy operations.
"""
def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
input=None, start=None, stop=None, oldToken=None):
Token.__init__(self)
if oldToken is not None:
self.type = oldToken.type
self.line = oldToken.line
self.charPositionInLine = oldToken.charPositionInLine
self.channel = oldToken.channel
self.index = oldToken.index
self._text = oldToken._text
self.input = oldToken.input
if isinstance(oldToken, CommonToken):
self.start = oldToken.start
self.stop = oldToken.stop
else:
self.type = type
self.input = input
self.charPositionInLine = -1 # set to invalid position
self.line = 0
self.channel = channel
#What token number is this from 0..n-1 tokens; < 0 implies invalid index
self.index = -1
# We need to be able to change the text once in a while. If
# this is non-null, then getText should return this. Note that
# start/stop are not affected by changing this.
self._text = text
# The char position into the input buffer where this token starts
self.start = start
# The char position into the input buffer where this token stops
# This is the index of the last char, *not* the index after it!
self.stop = stop
def getText(self):
if self._text is not None:
return self._text
if self.input is None:
return None
if self.start < self.input.size() and self.stop < self.input.size():
return self.input.substring(self.start, self.stop)
return '<EOF>'
def setText(self, text):
"""
Override the text for this token. getText() will return this text
rather than pulling from the buffer. Note that this does not mean
that start/stop indexes are not valid. It means that that input
was converted to a new string in the token object.
"""
self._text = text
text = property(getText, setText)
def getType(self):
return self.type
def setType(self, ttype):
self.type = ttype
def getTypeName(self):
return str(self.type)
typeName = property(lambda s: s.getTypeName())
def getLine(self):
return self.line
def setLine(self, line):
self.line = line
def getCharPositionInLine(self):
return self.charPositionInLine
def setCharPositionInLine(self, pos):
self.charPositionInLine = pos
def getChannel(self):
return self.channel
def setChannel(self, channel):
self.channel = channel
def getTokenIndex(self):
return self.index
def setTokenIndex(self, index):
self.index = index
def getInputStream(self):
return self.input
def setInputStream(self, input):
self.input = input
def __str__(self):
if self.type == EOF:
return "<EOF>"
channelStr = ""
if self.channel > 0:
channelStr = ",channel=" + str(self.channel)
txt = self.text
if txt is not None:
txt = txt.replace("\n","\\\\n")
txt = txt.replace("\r","\\\\r")
txt = txt.replace("\t","\\\\t")
else:
txt = "<no text>"
return "[@%d,%d:%d=%r,<%s>%s,%d:%d]" % (
self.index,
self.start, self.stop,
txt,
self.typeName, channelStr,
self.line, self.charPositionInLine
)
class ClassicToken(Token):
"""@brief Alternative token implementation.
A Token object like we'd use in ANTLR 2.x; has an actual string created
and associated with this object. These objects are needed for imaginary
tree nodes that have payload objects. We need to create a Token object
that has a string; the tree node will point at this token. CommonToken
has indexes into a char stream and hence cannot be used to introduce
new strings.
"""
def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
oldToken=None
):
Token.__init__(self)
if oldToken is not None:
self.text = oldToken.text
self.type = oldToken.type
self.line = oldToken.line
self.charPositionInLine = oldToken.charPositionInLine
self.channel = oldToken.channel
self.text = text
self.type = type
self.line = None
self.charPositionInLine = None
self.channel = channel
self.index = None
def getText(self):
return self.text
def setText(self, text):
self.text = text
def getType(self):
return self.type
def setType(self, ttype):
self.type = ttype
def getLine(self):
return self.line
def setLine(self, line):
self.line = line
def getCharPositionInLine(self):
return self.charPositionInLine
def setCharPositionInLine(self, pos):
self.charPositionInLine = pos
def getChannel(self):
return self.channel
def setChannel(self, channel):
self.channel = channel
def getTokenIndex(self):
return self.index
def setTokenIndex(self, index):
self.index = index
def getInputStream(self):
return None
def setInputStream(self, input):
pass
def toString(self):
channelStr = ""
if self.channel > 0:
channelStr = ",channel=" + str(self.channel)
txt = self.text
if txt is None:
txt = "<no text>"
return "[@%r,%r,<%r>%s,%r:%r]" % (self.index,
txt,
self.type,
channelStr,
self.line,
self.charPositionInLine
)
__str__ = toString
__repr__ = toString
INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
# will avoid creating a token for this symbol and try to fetch another.
SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)