You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
278 lines
9.3 KiB
278 lines
9.3 KiB
// Copyright 2020 The Pigweed Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
// use this file except in compliance with the License. You may obtain a copy of
|
|
// the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
// License for the specific language governing permissions and limitations under
|
|
// the License.
|
|
|
|
#include "pw_tokenizer/token_database.h"
|
|
|
|
#include <cstring>
|
|
#include <string>
|
|
#include <string_view>
|
|
|
|
#include "gtest/gtest.h"
|
|
|
|
namespace pw::tokenizer {
|
|
namespace {
|
|
|
|
using namespace std::literals::string_view_literals;
|
|
|
|
// Use alignas to ensure that the data is properly aligned for database entries.
|
|
// This avoids unaligned memory reads.
|
|
alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
|
|
"TOKENS\0\0\x03\x00\x00\x00\0\0\0\0"
|
|
"\x01\0\0\0\0\0\0\0"
|
|
"\x02\0\0\0\0\0\0\0"
|
|
"\xFF\0\0\0\0\0\0\0"
|
|
"hi!\0"
|
|
"goodbye\0"
|
|
":)";
|
|
|
|
alignas(TokenDatabase::RawEntry) constexpr char kEmptyData[] =
|
|
"TOKENS\0\0\x00\x00\x00\x00\0\0\0"; // Last byte is null terminator.
|
|
|
|
alignas(TokenDatabase::RawEntry) constexpr char kBadMagic[] =
|
|
"TOKENs\0\0\x03\x00\x00\x00\0\0\0\0"
|
|
"\x01\0\0\0\0\0\0\0"
|
|
"hi!\0";
|
|
|
|
alignas(TokenDatabase::RawEntry) constexpr char kBadVersion[] =
|
|
"TOKENS\0\1\x00\0\0\0\0\0\0\0";
|
|
|
|
alignas(TokenDatabase::RawEntry) constexpr char kBadEntryCount[] =
|
|
"TOKENS\0\0\xff\x00\x00\x00\0\0\0\0";
|
|
|
|
// Use signed data and a size with the top bit set to test that the entry count
|
|
// is read correctly, without per-byte sign extension.
|
|
alignas(TokenDatabase::RawEntry) constexpr signed char kSignedWithTopBit[] =
|
|
"TOKENS\0\0\x80\x00\x00\x00\0\0\0\0"
|
|
// Entries
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 32
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 64
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 96
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
|
|
"TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 128
|
|
// Strings (empty)
|
|
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 32
|
|
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 64
|
|
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 96
|
|
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; // 128
|
|
|
|
constexpr TokenDatabase kBasicDatabase = TokenDatabase::Create<kBasicData>();
|
|
static_assert(kBasicDatabase.size() == 3u);
|
|
|
|
TEST(TokenDatabase, EntryCount) {
|
|
static_assert(TokenDatabase::Create<kBasicData>().size() == 3u);
|
|
static_assert(TokenDatabase::Create(kEmptyData).size() == 0u);
|
|
EXPECT_EQ(TokenDatabase::Create<kSignedWithTopBit>().size(), 0x80u);
|
|
}
|
|
|
|
TEST(TokenDatabase, ValidCheck) {
|
|
char basic_data[sizeof(kBasicData)];
|
|
std::memcpy(basic_data, kBasicData, sizeof(basic_data));
|
|
EXPECT_TRUE(TokenDatabase::IsValid(basic_data));
|
|
|
|
static_assert(TokenDatabase::IsValid(kBasicData));
|
|
static_assert(TokenDatabase::IsValid(kEmptyData));
|
|
static_assert(TokenDatabase::IsValid(kSignedWithTopBit));
|
|
|
|
static_assert(!TokenDatabase::IsValid(kBadMagic));
|
|
static_assert(!TokenDatabase::IsValid(kBadVersion));
|
|
static_assert(!TokenDatabase::IsValid(kBadEntryCount));
|
|
|
|
static_assert(!TokenDatabase::IsValid("TOKENS\0\0\0\0")); // too short
|
|
static_assert(!TokenDatabase::IsValid("TOKENS\0\1\0\0\0\0\0\0\0\0"));
|
|
static_assert(!TokenDatabase::IsValid("TOKENSv0\0\0\0\0\0\0\0\0"));
|
|
static_assert(!TokenDatabase::IsValid("tokens\0\0\0\0\0\0\0\0\0\0"));
|
|
|
|
// No string table; this is one byte too short.
|
|
static_assert(
|
|
!TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate"sv));
|
|
|
|
// Add one byte for the string table.
|
|
static_assert(
|
|
TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate\0"sv));
|
|
|
|
static_assert(
|
|
!TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
|
|
"WXYZdate"
|
|
"WXYZdate"
|
|
"\0"sv));
|
|
static_assert(
|
|
TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
|
|
"WXYZdate"
|
|
"WXYZdate"
|
|
"hi\0\0"sv));
|
|
static_assert(
|
|
TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
|
|
"WXYZdate"
|
|
"WXYZdate"
|
|
"hi\0hello\0"sv));
|
|
}
|
|
|
|
TEST(TokenDatabase, Iterator) {
|
|
auto it = kBasicDatabase.begin();
|
|
EXPECT_EQ(it->token, 1u);
|
|
EXPECT_STREQ(it.entry().string, "hi!");
|
|
|
|
++it;
|
|
EXPECT_EQ(it->token, 2u);
|
|
EXPECT_STREQ(it.entry().string, "goodbye");
|
|
EXPECT_EQ(it - kBasicDatabase.begin(), 1);
|
|
|
|
++it;
|
|
EXPECT_EQ(it->token, 0xFFu);
|
|
EXPECT_STREQ(it.entry().string, ":)");
|
|
EXPECT_EQ(it - kBasicDatabase.begin(), 2);
|
|
|
|
++it;
|
|
EXPECT_EQ(it, kBasicDatabase.end());
|
|
EXPECT_EQ(static_cast<size_t>(it - kBasicDatabase.begin()),
|
|
kBasicDatabase.size());
|
|
}
|
|
|
|
TEST(TokenDatabase, Iterator_PreIncrement) {
|
|
auto it = kBasicDatabase.begin();
|
|
EXPECT_EQ((++it)->token, 2u);
|
|
EXPECT_STREQ(it.entry().string, "goodbye");
|
|
}
|
|
|
|
TEST(TokenDatabase, Iterator_PostIncrement) {
|
|
auto it = kBasicDatabase.begin();
|
|
EXPECT_EQ((it++)->token, 1u);
|
|
|
|
EXPECT_EQ(it->token, 2u);
|
|
EXPECT_STREQ(it.entry().string, "goodbye");
|
|
}
|
|
|
|
TEST(TokenDatabase, SingleEntryLookup_FirstEntry) {
|
|
auto match = kBasicDatabase.Find(1);
|
|
ASSERT_EQ(match.size(), 1u);
|
|
EXPECT_FALSE(match.empty());
|
|
EXPECT_STREQ(match[0].string, "hi!");
|
|
|
|
for (const auto& entry : match) {
|
|
EXPECT_EQ(entry.token, 1u);
|
|
EXPECT_STREQ(entry.string, "hi!");
|
|
}
|
|
}
|
|
|
|
TEST(TokenDatabase, SingleEntryLookup_MiddleEntry) {
|
|
auto match = kBasicDatabase.Find(2);
|
|
ASSERT_EQ(match.size(), 1u);
|
|
EXPECT_FALSE(match.empty());
|
|
EXPECT_STREQ(match[0].string, "goodbye");
|
|
}
|
|
|
|
TEST(TokenDatabase, SingleEntryLookup_LastEntry) {
|
|
auto match = kBasicDatabase.Find(0xff);
|
|
ASSERT_EQ(match.size(), 1u);
|
|
EXPECT_STREQ(match[0].string, ":)");
|
|
EXPECT_FALSE(match.empty());
|
|
}
|
|
|
|
TEST(TokenDatabase, SingleEntryLookup_NonPresent) {
|
|
EXPECT_TRUE(kBasicDatabase.Find(0).empty());
|
|
EXPECT_TRUE(kBasicDatabase.Find(3).empty());
|
|
EXPECT_TRUE(kBasicDatabase.Find(10239).empty());
|
|
EXPECT_TRUE(kBasicDatabase.Find(0xFFFFFFFFu).empty());
|
|
}
|
|
|
|
TEST(TokenDatabase, SingleEntryLookup_NoMatches) {
|
|
// Can also create the database at runtime.
|
|
TokenDatabase tokens = TokenDatabase::Create(kBasicData);
|
|
const auto match = tokens.Find(42);
|
|
ASSERT_EQ(match.size(), 0u);
|
|
EXPECT_TRUE(match.empty());
|
|
|
|
for (const auto& entry : match) {
|
|
FAIL(); // There were no matches, so this code should never execute.
|
|
static_cast<void>(entry);
|
|
}
|
|
}
|
|
|
|
alignas(TokenDatabase::RawEntry) constexpr char kCollisionsData[] =
|
|
"TOKENS\0\0\x05\0\0\0\0\0\0\0"
|
|
"\x01\0\0\0date"
|
|
"\x01\0\0\0date"
|
|
"\x01\0\0\0date"
|
|
"\x02\0\0\0date"
|
|
"\xFF\0\0\0date"
|
|
"hi!\0goodbye\0:)\0\0";
|
|
|
|
constexpr TokenDatabase kCollisions = TokenDatabase::Create<kCollisionsData>();
|
|
static_assert(kCollisions.size() == 5u);
|
|
|
|
TEST(TokenDatabase, MultipleEntriesWithSameToken) {
|
|
TokenDatabase::Entries match = kCollisions.Find(1);
|
|
|
|
EXPECT_EQ(match.begin()->token, 1u);
|
|
EXPECT_EQ(match.end()->token, 2u);
|
|
ASSERT_EQ(match.size(), 3u);
|
|
|
|
EXPECT_STREQ(match[0].string, "hi!");
|
|
EXPECT_STREQ(match[1].string, "goodbye");
|
|
EXPECT_STREQ(match[2].string, ":)");
|
|
|
|
for (const auto& entry : match) {
|
|
EXPECT_EQ(entry.token, 1u);
|
|
}
|
|
}
|
|
|
|
TEST(TokenDatabase, Empty) {
|
|
constexpr TokenDatabase empty_db = TokenDatabase::Create<kEmptyData>();
|
|
static_assert(empty_db.size() == 0u);
|
|
static_assert(empty_db.ok());
|
|
|
|
EXPECT_TRUE(empty_db.Find(0).empty());
|
|
EXPECT_TRUE(empty_db.Find(123).empty());
|
|
|
|
for (const auto& entry : empty_db) {
|
|
FAIL(); // The database is empty; this should never execute.
|
|
static_cast<void>(entry);
|
|
}
|
|
}
|
|
|
|
TEST(TokenDatabase, NullDatabase) {
|
|
constexpr TokenDatabase empty_db;
|
|
|
|
static_assert(empty_db.size() == 0u);
|
|
static_assert(!empty_db.ok());
|
|
EXPECT_TRUE(empty_db.Find(0).empty());
|
|
}
|
|
|
|
TEST(TokenDatabase, InvalidData) {
|
|
constexpr TokenDatabase bad_db = TokenDatabase::Create("TOKENS\0\0");
|
|
|
|
static_assert(!bad_db.ok());
|
|
EXPECT_TRUE(bad_db.Find(0).empty());
|
|
}
|
|
|
|
TEST(TokenDatabase, FromString) {
|
|
TokenDatabase bad_db = TokenDatabase::Create(std::string("wow!"));
|
|
|
|
EXPECT_FALSE(bad_db.ok());
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace pw::tokenizer
|