You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

607 lines
19 KiB

#!/usr/bin/env python3
#
# Copyright (C) 2018 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""This module contains the unit tests to check the Lexer class."""
import sys
import unittest
from blueprint import Lexer, LexerError, Token
#------------------------------------------------------------------------------
# Python 2 compatibility
#------------------------------------------------------------------------------
if sys.version_info >= (3,):
py3_str = str # pylint: disable=invalid-name
else:
def py3_str(string):
"""Convert a string into a utf-8 encoded string."""
return unicode(string).encode('utf-8')
#------------------------------------------------------------------------------
# LexerError
#------------------------------------------------------------------------------
class LexerErrorTest(unittest.TestCase):
"""Unit tests for LexerError class."""
def test_lexer_error(self):
"""Test LexerError __init__(), __str__(), line, column, and message."""
exc = LexerError('a %', 2, 'unexpected character')
self.assertEqual(exc.line, 1)
self.assertEqual(exc.column, 3)
self.assertEqual(exc.message, 'unexpected character')
self.assertEqual(str(exc), 'LexerError: 1:3: unexpected character')
exc = LexerError('a\nb\ncde %', 8, 'unexpected character')
self.assertEqual(exc.line, 3)
self.assertEqual(exc.column, 5)
self.assertEqual(exc.message, 'unexpected character')
self.assertEqual(str(exc), 'LexerError: 3:5: unexpected character')
def test_hierarchy(self):
"""Test the hierarchy of LexerError."""
with self.assertRaises(ValueError):
raise LexerError('a', 0, 'error')
class LexComputeLineColumn(unittest.TestCase):
"""Unit tests for Lexer.compute_line_column() method."""
def test_compute_line_column(self):
"""Test the line and column computation."""
# Line 1
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 0)
self.assertEqual(line, 1)
self.assertEqual(column, 1)
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 1)
self.assertEqual(line, 1)
self.assertEqual(column, 2)
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 2)
self.assertEqual(line, 1)
self.assertEqual(column, 3)
# Line 2
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 3)
self.assertEqual(line, 2)
self.assertEqual(column, 1)
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 4)
self.assertEqual(line, 2)
self.assertEqual(column, 2)
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 5)
self.assertEqual(line, 2)
self.assertEqual(column, 3)
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 6)
self.assertEqual(line, 2)
self.assertEqual(column, 4)
# Line 3
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 7)
self.assertEqual(line, 3)
self.assertEqual(column, 1)
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 8)
self.assertEqual(line, 3)
self.assertEqual(column, 2)
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 9)
self.assertEqual(line, 3)
self.assertEqual(column, 3)
# Line 4 (empty line)
line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 10)
self.assertEqual(line, 4)
self.assertEqual(column, 1)
#------------------------------------------------------------------------------
# Lex.lex_string()
#------------------------------------------------------------------------------
class LexStringTest(unittest.TestCase):
"""Unit tests for the Lexer.lex_string() method."""
def test_raw_string_lit(self):
"""Test whether Lexer.lex_string() can tokenize raw string literal."""
end, lit = Lexer.lex_string('`a`', 0)
self.assertEqual(end, 3)
self.assertEqual(lit, 'a')
end, lit = Lexer.lex_string('`a\nb`', 0)
self.assertEqual(end, 5)
self.assertEqual(lit, 'a\nb')
end, lit = Lexer.lex_string('"a""b"', 3)
self.assertEqual(end, 6)
self.assertEqual(lit, 'b')
with self.assertRaises(LexerError) as ctx:
Lexer.lex_string('`a', 0)
self.assertEqual(ctx.exception.line, 1)
self.assertEqual(ctx.exception.column, 3)
with self.assertRaises(LexerError) as ctx:
Lexer.lex_string('"a\nb"', 0)
self.assertEqual(ctx.exception.line, 1)
self.assertEqual(ctx.exception.column, 3)
def test_interpreted_string_literal(self):
"""Test whether Lexer.lex_string() can tokenize interpreted string
literal."""
end, lit = Lexer.lex_string('"a"', 0)
self.assertEqual(end, 3)
self.assertEqual(lit, 'a')
end, lit = Lexer.lex_string('"n"', 0)
self.assertEqual(end, 3)
self.assertEqual(lit, 'n')
with self.assertRaises(LexerError) as ctx:
Lexer.lex_string('"\\', 0)
self.assertEqual(ctx.exception.line, 1)
self.assertEqual(ctx.exception.column, 2)
def test_literal_escape_char(self):
"""Test whether Lexer.lex_string() can tokenize interpreted string
literal with a escaped character."""
end, lit = Lexer.lex_string('"\\a"', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\a')
end, lit = Lexer.lex_string('"\\b"', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\b')
end, lit = Lexer.lex_string('"\\f"', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\f')
end, lit = Lexer.lex_string('"\\n"', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\n')
end, lit = Lexer.lex_string('"\\r"', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\r')
end, lit = Lexer.lex_string('"\\t"', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\t')
end, lit = Lexer.lex_string('"\\v"', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\v')
end, lit = Lexer.lex_string('"\\\\"', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\\')
end, lit = Lexer.lex_string('"\\\'"', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\'')
end, lit = Lexer.lex_string('"\\\""', 0)
self.assertEqual(end, 4)
self.assertEqual(lit, '\"')
with self.assertRaises(LexerError) as ctx:
Lexer.lex_string('"\\?"', 0)
self.assertEqual(ctx.exception.line, 1)
self.assertEqual(ctx.exception.column, 2)
def test_literal_escape_octal(self):
"""Test whether Lexer.lex_string() can tokenize interpreted string
literal with an octal escape sequence."""
end, lit = Lexer.lex_string('"\\000"', 0)
self.assertEqual(end, 6)
self.assertEqual(lit, '\0')
end, lit = Lexer.lex_string('"\\377"', 0)
self.assertEqual(end, 6)
self.assertEqual(lit, '\377')
tests = [
'"\\0',
'"\\0" ',
'"\\09" ',
'"\\009"',
]
for test in tests:
with self.assertRaises(LexerError) as ctx:
Lexer.lex_string(test, 0)
self.assertEqual(ctx.exception.line, 1)
self.assertEqual(ctx.exception.column, 2)
def test_literal_escape_hex(self):
"""Test whether Lexer.lex_string() can tokenize interpreted string
literal with a hexadecimal escape sequence."""
end, lit = Lexer.lex_string('"\\x00"', 0)
self.assertEqual(end, 6)
self.assertEqual(lit, '\0')
end, lit = Lexer.lex_string('"\\xff"', 0)
self.assertEqual(end, 6)
self.assertEqual(lit, '\xff')
tests = [
'"\\x',
'"\\x" ',
'"\\x0" ',
'"\\xg" ',
'"\\x0g"',
]
for test in tests:
with self.assertRaises(LexerError) as ctx:
Lexer.lex_string(test, 0)
self.assertEqual(ctx.exception.line, 1)
self.assertEqual(ctx.exception.column, 2)
def test_literal_escape_little_u(self):
"""Test whether Lexer.lex_string() can tokenize interpreted string
literal with a little u escape sequence."""
end, lit = Lexer.lex_string('"\\u0000"', 0)
self.assertEqual(end, 8)
self.assertEqual(lit, '\0')
end, lit = Lexer.lex_string('"\\uffff"', 0)
self.assertEqual(end, 8)
self.assertEqual(lit, py3_str(u'\uffff'))
tests = [
'"\\u',
'"\\u" ',
'"\\u0" ',
'"\\ug" ',
'"\\u0g" ',
'"\\u00g" ',
'"\\u000g"',
]
for test in tests:
with self.assertRaises(LexerError) as ctx:
Lexer.lex_string(test, 0)
self.assertEqual(ctx.exception.line, 1)
self.assertEqual(ctx.exception.column, 2)
def test_literal_escape_big_u(self):
"""Test whether Lexer.lex_string() can tokenize interpreted string
literal with a big u escape sequence."""
end, lit = Lexer.lex_string('"\\U00000000"', 0)
self.assertEqual(end, 12)
self.assertEqual(lit, '\0')
end, lit = Lexer.lex_string('"\\U0001ffff"', 0)
self.assertEqual(end, 12)
self.assertEqual(lit, py3_str(u'\U0001ffff'))
tests = [
'"\\U',
'"\\U" ',
'"\\U0" ',
'"\\Ug" ',
'"\\U0g" ',
'"\\U00g" ',
'"\\U000g" ',
'"\\U000g" ',
'"\\U0000g" ',
'"\\U00000g" ',
'"\\U000000g" ',
'"\\U0000000g"',
]
for test in tests:
with self.assertRaises(LexerError) as ctx:
Lexer.lex_string(test, 0)
self.assertEqual(ctx.exception.line, 1)
self.assertEqual(ctx.exception.column, 2)
#------------------------------------------------------------------------------
# Lexer.lex()
#------------------------------------------------------------------------------
class LexTest(unittest.TestCase):
"""Unit tests for the Lexer.lex() method."""
def test_lex_char(self):
"""Test whether Lexer.lex() can lex a character."""
token, end, lit = Lexer.lex('(', 0)
self.assertEqual(token, Token.LPAREN)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex(')', 0)
self.assertEqual(token, Token.RPAREN)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('[', 0)
self.assertEqual(token, Token.LBRACKET)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex(']', 0)
self.assertEqual(token, Token.RBRACKET)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('{', 0)
self.assertEqual(token, Token.LBRACE)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('}', 0)
self.assertEqual(token, Token.RBRACE)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex(':', 0)
self.assertEqual(token, Token.COLON)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('=', 0)
self.assertEqual(token, Token.ASSIGN)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('+', 0)
self.assertEqual(token, Token.PLUS)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex(',', 0)
self.assertEqual(token, Token.COMMA)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
def test_lex_assign_plus(self):
"""Test whether Lexer.lex() can lex `+=` without problems."""
token, end, lit = Lexer.lex('+=', 0)
self.assertEqual(token, Token.ASSIGNPLUS)
self.assertEqual(end, 2)
self.assertEqual(lit, None)
def test_lex_space(self):
"""Test whether Lexer.lex() can lex whitespaces."""
token, end, lit = Lexer.lex(' ', 0)
self.assertEqual(token, Token.SPACE)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('\t', 0)
self.assertEqual(token, Token.SPACE)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('\r', 0)
self.assertEqual(token, Token.SPACE)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('\n', 0)
self.assertEqual(token, Token.SPACE)
self.assertEqual(end, 1)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('\n \r\t\n', 0)
self.assertEqual(token, Token.SPACE)
self.assertEqual(end, 5)
self.assertEqual(lit, None)
def test_lex_comment(self):
"""Test whether Lexer.lex() can lex comments."""
token, end, lit = Lexer.lex('// abcd', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 7)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('// abcd\nnext', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 7)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/*a\nb*/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 7)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/*a\n *b*/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 9)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/*a**b*/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 8)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/*a***b*/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 9)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/**/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 4)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/***/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 5)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/**a*/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 6)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/*a**/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 6)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/***a*/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 7)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('/*a***/', 0)
self.assertEqual(token, Token.COMMENT)
self.assertEqual(end, 7)
self.assertEqual(lit, None)
def test_lex_string(self):
"""Test whether Lexer.lex() can lex a string."""
token, end, lit = Lexer.lex('"a"', 0)
self.assertEqual(token, Token.STRING)
self.assertEqual(end, 3)
self.assertEqual(lit, 'a')
token, end, lit = Lexer.lex('`a\nb`', 0)
self.assertEqual(token, Token.STRING)
self.assertEqual(end, 5)
self.assertEqual(lit, 'a\nb')
def test_lex_ident(self):
"""Test whether Lexer.lex() can lex an identifier."""
token, end, lit = Lexer.lex('ident', 0)
self.assertEqual(token, Token.IDENT)
self.assertEqual(end, 5)
self.assertEqual(lit, 'ident')
def test_lex_offset(self):
"""Test the offset argument of Lexer.lex()."""
token, end, lit = Lexer.lex('a "b"', 0)
self.assertEqual(token, Token.IDENT)
self.assertEqual(end, 1)
self.assertEqual(lit, 'a')
token, end, lit = Lexer.lex('a "b"', end)
self.assertEqual(token, Token.SPACE)
self.assertEqual(end, 2)
self.assertEqual(lit, None)
token, end, lit = Lexer.lex('a "b"', end)
self.assertEqual(token, Token.STRING)
self.assertEqual(end, 5)
self.assertEqual(lit, 'b')
#------------------------------------------------------------------------------
# Lexer class test
#------------------------------------------------------------------------------
class LexerTest(unittest.TestCase):
"""Unit tests for the Lexer class."""
def test_lexer(self):
"""Test token, start, end, literal, and consume()."""
lexer = Lexer('a b //a\n "c"', 0)
self.assertEqual(lexer.start, 0)
self.assertEqual(lexer.end, 1)
self.assertEqual(lexer.token, Token.IDENT)
self.assertEqual(lexer.literal, 'a')
lexer.consume(Token.IDENT)
self.assertEqual(lexer.start, 2)
self.assertEqual(lexer.end, 3)
self.assertEqual(lexer.token, Token.IDENT)
self.assertEqual(lexer.literal, 'b')
lexer.consume(Token.IDENT)
self.assertEqual(lexer.start, 9)
self.assertEqual(lexer.end, 12)
self.assertEqual(lexer.token, Token.STRING)
self.assertEqual(lexer.literal, 'c')
lexer.consume(Token.STRING)
self.assertEqual(lexer.start, 12)
self.assertEqual(lexer.end, 12)
self.assertEqual(lexer.token, Token.EOF)
self.assertEqual(lexer.literal, None)
def test_lexer_offset(self):
"""Test the offset argument of Lexer.__init__()."""
lexer = Lexer('a b', 2)
self.assertEqual(lexer.start, 2)
self.assertEqual(lexer.end, 3)
self.assertEqual(lexer.token, Token.IDENT)
self.assertEqual(lexer.literal, 'b')
lexer.consume(Token.IDENT)
self.assertEqual(lexer.start, 3)
self.assertEqual(lexer.end, 3)
self.assertEqual(lexer.token, Token.EOF)
self.assertEqual(lexer.literal, None)
lexer.consume(Token.EOF)
def test_lexer_path(self):
"""Test the path attribute of the Lexer object."""
lexer = Lexer('content', path='test_path')
self.assertEqual(lexer.path, 'test_path')
if __name__ == '__main__':
unittest.main()