You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
190 lines
4.9 KiB
190 lines
4.9 KiB
import antlr3
|
|
import testbase
|
|
import unittest
|
|
import os
|
|
import sys
|
|
from cStringIO import StringIO
|
|
import difflib
|
|
import textwrap
|
|
|
|
class t012lexerXML(testbase.ANTLRTest):
|
|
def setUp(self):
|
|
self.compileGrammar('t012lexerXMLLexer.g')
|
|
|
|
|
|
def lexerClass(self, base):
|
|
class TLexer(base):
|
|
def emitErrorMessage(self, msg):
|
|
# report errors to /dev/null
|
|
pass
|
|
|
|
def reportError(self, re):
|
|
# no error recovery yet, just crash!
|
|
raise re
|
|
|
|
return TLexer
|
|
|
|
|
|
def testValid(self):
|
|
inputPath = os.path.splitext(__file__)[0] + '.input'
|
|
stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8'))
|
|
lexer = self.getLexer(stream)
|
|
|
|
while True:
|
|
token = lexer.nextToken()
|
|
if token.type == self.lexerModule.EOF:
|
|
break
|
|
|
|
|
|
output = unicode(lexer.outbuf.getvalue(), 'utf-8')
|
|
|
|
outputPath = os.path.splitext(__file__)[0] + '.output'
|
|
testOutput = unicode(open(outputPath).read(), 'utf-8')
|
|
|
|
success = (output == testOutput)
|
|
if not success:
|
|
d = difflib.Differ()
|
|
r = d.compare(output.splitlines(1), testOutput.splitlines(1))
|
|
self.fail(
|
|
''.join([l.encode('ascii', 'backslashreplace') for l in r])
|
|
)
|
|
|
|
|
|
def testMalformedInput1(self):
|
|
input = textwrap.dedent("""\
|
|
<?xml version='1.0'?>
|
|
<document d>
|
|
</document>
|
|
""")
|
|
|
|
stream = antlr3.StringStream(input)
|
|
lexer = self.getLexer(stream)
|
|
|
|
try:
|
|
while True:
|
|
token = lexer.nextToken()
|
|
if token.type == antlr3.EOF:
|
|
break
|
|
|
|
raise AssertionError
|
|
|
|
except antlr3.NoViableAltException, exc:
|
|
assert exc.unexpectedType == '>', repr(exc.unexpectedType)
|
|
assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
|
|
assert exc.line == 2, repr(exc.line)
|
|
|
|
|
|
def testMalformedInput2(self):
|
|
input = textwrap.dedent("""\
|
|
<?tml version='1.0'?>
|
|
<document>
|
|
</document>
|
|
""")
|
|
|
|
stream = antlr3.StringStream(input)
|
|
lexer = self.getLexer(stream)
|
|
|
|
try:
|
|
while True:
|
|
token = lexer.nextToken()
|
|
if token.type == antlr3.EOF:
|
|
break
|
|
|
|
raise AssertionError
|
|
|
|
except antlr3.MismatchedSetException, exc:
|
|
assert exc.unexpectedType == 't', repr(exc.unexpectedType)
|
|
assert exc.charPositionInLine == 2, repr(exc.charPositionInLine)
|
|
assert exc.line == 1, repr(exc.line)
|
|
|
|
|
|
def testMalformedInput3(self):
|
|
input = textwrap.dedent("""\
|
|
<?xml version='1.0'?>
|
|
<docu ment attr="foo">
|
|
</document>
|
|
""")
|
|
|
|
stream = antlr3.StringStream(input)
|
|
lexer = self.getLexer(stream)
|
|
|
|
try:
|
|
while True:
|
|
token = lexer.nextToken()
|
|
if token.type == antlr3.EOF:
|
|
break
|
|
|
|
raise AssertionError
|
|
|
|
except antlr3.NoViableAltException, exc:
|
|
assert exc.unexpectedType == 'a', repr(exc.unexpectedType)
|
|
assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
|
|
assert exc.line == 2, repr(exc.line)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|
|
|
|
|
|
## # run an infinite loop with randomly mangled input
|
|
## while True:
|
|
## print "ping"
|
|
|
|
## input = """\
|
|
## <?xml version='1.0'?>
|
|
## <!DOCTYPE component [
|
|
## <!ELEMENT component (PCDATA|sub)*>
|
|
## <!ATTLIST component
|
|
## attr CDATA #IMPLIED
|
|
## attr2 CDATA #IMPLIED
|
|
## >
|
|
## <!ELMENT sub EMPTY>
|
|
|
|
## ]>
|
|
## <component attr="val'ue" attr2='val"ue'>
|
|
## <!-- This is a comment -->
|
|
## Text
|
|
## <![CDATA[huhu]]>
|
|
## &
|
|
## <
|
|
## <?xtal cursor='11'?>
|
|
## <sub/>
|
|
## <sub></sub>
|
|
## </component>
|
|
## """
|
|
|
|
## import random
|
|
## input = list(input) # make it mutable
|
|
## for _ in range(3):
|
|
## p1 = random.randrange(len(input))
|
|
## p2 = random.randrange(len(input))
|
|
|
|
## c1 = input[p1]
|
|
## input[p1] = input[p2]
|
|
## input[p2] = c1
|
|
## input = ''.join(input) # back to string
|
|
|
|
## stream = antlr3.StringStream(input)
|
|
## lexer = Lexer(stream)
|
|
|
|
## try:
|
|
## while True:
|
|
## token = lexer.nextToken()
|
|
## if token.type == EOF:
|
|
## break
|
|
|
|
## except antlr3.RecognitionException, exc:
|
|
## print exc
|
|
## for l in input.splitlines()[0:exc.line]:
|
|
## print l
|
|
## print ' '*exc.charPositionInLine + '^'
|
|
|
|
## except BaseException, exc:
|
|
## print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())])
|
|
## print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine)
|
|
## print
|
|
|
|
## raise
|
|
|