You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

190 lines
4.9 KiB

import antlr3
import testbase
import unittest
import os
import sys
from cStringIO import StringIO
import difflib
import textwrap
class t012lexerXML(testbase.ANTLRTest):
def setUp(self):
self.compileGrammar('t012lexerXMLLexer.g')
def lexerClass(self, base):
class TLexer(base):
def emitErrorMessage(self, msg):
# report errors to /dev/null
pass
def reportError(self, re):
# no error recovery yet, just crash!
raise re
return TLexer
def testValid(self):
inputPath = os.path.splitext(__file__)[0] + '.input'
stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8'))
lexer = self.getLexer(stream)
while True:
token = lexer.nextToken()
if token.type == self.lexerModule.EOF:
break
output = unicode(lexer.outbuf.getvalue(), 'utf-8')
outputPath = os.path.splitext(__file__)[0] + '.output'
testOutput = unicode(open(outputPath).read(), 'utf-8')
success = (output == testOutput)
if not success:
d = difflib.Differ()
r = d.compare(output.splitlines(1), testOutput.splitlines(1))
self.fail(
''.join([l.encode('ascii', 'backslashreplace') for l in r])
)
def testMalformedInput1(self):
input = textwrap.dedent("""\
<?xml version='1.0'?>
<document d>
</document>
""")
stream = antlr3.StringStream(input)
lexer = self.getLexer(stream)
try:
while True:
token = lexer.nextToken()
if token.type == antlr3.EOF:
break
raise AssertionError
except antlr3.NoViableAltException, exc:
assert exc.unexpectedType == '>', repr(exc.unexpectedType)
assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
assert exc.line == 2, repr(exc.line)
def testMalformedInput2(self):
input = textwrap.dedent("""\
<?tml version='1.0'?>
<document>
</document>
""")
stream = antlr3.StringStream(input)
lexer = self.getLexer(stream)
try:
while True:
token = lexer.nextToken()
if token.type == antlr3.EOF:
break
raise AssertionError
except antlr3.MismatchedSetException, exc:
assert exc.unexpectedType == 't', repr(exc.unexpectedType)
assert exc.charPositionInLine == 2, repr(exc.charPositionInLine)
assert exc.line == 1, repr(exc.line)
def testMalformedInput3(self):
input = textwrap.dedent("""\
<?xml version='1.0'?>
<docu ment attr="foo">
</document>
""")
stream = antlr3.StringStream(input)
lexer = self.getLexer(stream)
try:
while True:
token = lexer.nextToken()
if token.type == antlr3.EOF:
break
raise AssertionError
except antlr3.NoViableAltException, exc:
assert exc.unexpectedType == 'a', repr(exc.unexpectedType)
assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
assert exc.line == 2, repr(exc.line)
if __name__ == '__main__':
unittest.main()
## # run an infinite loop with randomly mangled input
## while True:
## print "ping"
## input = """\
## <?xml version='1.0'?>
## <!DOCTYPE component [
## <!ELEMENT component (PCDATA|sub)*>
## <!ATTLIST component
## attr CDATA #IMPLIED
## attr2 CDATA #IMPLIED
## >
## <!ELMENT sub EMPTY>
## ]>
## <component attr="val'ue" attr2='val"ue'>
## <!-- This is a comment -->
## Text
## <![CDATA[huhu]]>
## &amp;
## &lt;
## <?xtal cursor='11'?>
## <sub/>
## <sub></sub>
## </component>
## """
## import random
## input = list(input) # make it mutable
## for _ in range(3):
## p1 = random.randrange(len(input))
## p2 = random.randrange(len(input))
## c1 = input[p1]
## input[p1] = input[p2]
## input[p2] = c1
## input = ''.join(input) # back to string
## stream = antlr3.StringStream(input)
## lexer = Lexer(stream)
## try:
## while True:
## token = lexer.nextToken()
## if token.type == EOF:
## break
## except antlr3.RecognitionException, exc:
## print exc
## for l in input.splitlines()[0:exc.line]:
## print l
## print ' '*exc.charPositionInLine + '^'
## except BaseException, exc:
## print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())])
## print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine)
## print
## raise