196 lines
6.6 KiB
196 lines
6.6 KiB
"""ANTLR3 runtime package"""
|
|
|
|
# begin[licence]
|
|
#
|
|
# [The "BSD licence"]
|
|
# Copyright (c) 2005-2012 Terence Parr
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# 3. The name of the author may not be used to endorse or promote products
|
|
# derived from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# end[licence]
|
|
|
|
from .constants import EOF
|
|
from .exceptions import NoViableAltException, BacktrackingFailed
|
|
|
|
|
|
class DFA(object):
|
|
"""@brief A DFA implemented as a set of transition tables.
|
|
|
|
Any state that has a semantic predicate edge is special; those states
|
|
are generated with if-then-else structures in a specialStateTransition()
|
|
which is generated by cyclicDFA template.
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
recognizer, decisionNumber,
|
|
eot, eof, min, max, accept, special, transition
|
|
):
|
|
## Which recognizer encloses this DFA? Needed to check backtracking
|
|
self.recognizer = recognizer
|
|
|
|
self.decisionNumber = decisionNumber
|
|
self.eot = eot
|
|
self.eof = eof
|
|
self.min = min
|
|
self.max = max
|
|
self.accept = accept
|
|
self.special = special
|
|
self.transition = transition
|
|
|
|
|
|
def predict(self, input):
|
|
"""
|
|
From the input stream, predict what alternative will succeed
|
|
using this DFA (representing the covering regular approximation
|
|
to the underlying CFL). Return an alternative number 1..n. Throw
|
|
an exception upon error.
|
|
"""
|
|
mark = input.mark()
|
|
s = 0 # we always start at s0
|
|
try:
|
|
for _ in range(50000):
|
|
specialState = self.special[s]
|
|
if specialState >= 0:
|
|
s = self.specialStateTransition(specialState, input)
|
|
if s == -1:
|
|
self.noViableAlt(s, input)
|
|
return 0
|
|
input.consume()
|
|
continue
|
|
|
|
if self.accept[s] >= 1:
|
|
return self.accept[s]
|
|
|
|
# look for a normal char transition
|
|
c = input.LA(1)
|
|
|
|
if c >= self.min[s] and c <= self.max[s]:
|
|
# move to next state
|
|
snext = self.transition[s][c-self.min[s]]
|
|
|
|
if snext < 0:
|
|
# was in range but not a normal transition
|
|
# must check EOT, which is like the else clause.
|
|
# eot[s]>=0 indicates that an EOT edge goes to another
|
|
# state.
|
|
if self.eot[s] >= 0: # EOT Transition to accept state?
|
|
s = self.eot[s]
|
|
input.consume()
|
|
# TODO: I had this as return accept[eot[s]]
|
|
# which assumed here that the EOT edge always
|
|
# went to an accept...faster to do this, but
|
|
# what about predicated edges coming from EOT
|
|
# target?
|
|
continue
|
|
|
|
self.noViableAlt(s, input)
|
|
return 0
|
|
|
|
s = snext
|
|
input.consume()
|
|
continue
|
|
|
|
if self.eot[s] >= 0:
|
|
s = self.eot[s]
|
|
input.consume()
|
|
continue
|
|
|
|
# EOF Transition to accept state?
|
|
if c == EOF and self.eof[s] >= 0:
|
|
return self.accept[self.eof[s]]
|
|
|
|
# not in range and not EOF/EOT, must be invalid symbol
|
|
self.noViableAlt(s, input)
|
|
return 0
|
|
|
|
else:
|
|
raise RuntimeError("DFA bang!")
|
|
|
|
finally:
|
|
input.rewind(mark)
|
|
|
|
|
|
def noViableAlt(self, s, input):
|
|
if self.recognizer._state.backtracking > 0:
|
|
raise BacktrackingFailed
|
|
|
|
nvae = NoViableAltException(
|
|
self.getDescription(),
|
|
self.decisionNumber,
|
|
s,
|
|
input
|
|
)
|
|
|
|
self.error(nvae)
|
|
raise nvae
|
|
|
|
|
|
def error(self, nvae):
|
|
"""A hook for debugging interface"""
|
|
pass
|
|
|
|
|
|
def specialStateTransition(self, s, input):
|
|
return -1
|
|
|
|
|
|
def getDescription(self):
|
|
return "n/a"
|
|
|
|
|
|
## def specialTransition(self, state, symbol):
|
|
## return 0
|
|
|
|
|
|
@classmethod
|
|
def unpack(cls, string):
|
|
"""@brief Unpack the runlength encoded table data.
|
|
|
|
Terence implemented packed table initializers, because Java has a
|
|
size restriction on .class files and the lookup tables can grow
|
|
pretty large. The generated JavaLexer.java of the Java.g example
|
|
would be about 15MB with uncompressed array initializers.
|
|
|
|
Python does not have any size restrictions, but the compilation of
|
|
such large source files seems to be pretty memory hungry. The memory
|
|
consumption of the python process grew to >1.5GB when importing a
|
|
15MB lexer, eating all my swap space and I was to impacient to see,
|
|
if it could finish at all. With packed initializers that are unpacked
|
|
at import time of the lexer module, everything works like a charm.
|
|
|
|
"""
|
|
|
|
ret = []
|
|
for i in range(0, len(string) - 1, 2):
|
|
(n, v) = ord(string[i]), ord(string[i + 1])
|
|
|
|
if v == 0xFFFF:
|
|
v = -1
|
|
|
|
ret += [v] * n
|
|
|
|
return ret
|