You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
290 lines
10 KiB
290 lines
10 KiB
// Copyright 2020 The Pigweed Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
// use this file except in compliance with the License. You may obtain a copy of
|
|
// the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
// License for the specific language governing permissions and limitations under
|
|
// the License.
|
|
|
|
#include "pw_tokenizer/detokenize.h"
|
|
|
|
#include <string_view>
|
|
|
|
#include "gtest/gtest.h"
|
|
|
|
namespace pw::tokenizer {
|
|
namespace {
|
|
|
|
using namespace std::literals::string_view_literals;
|
|
|
|
// Use a shorter name for the error string macro.
|
|
#define ERR PW_TOKENIZER_ARG_DECODING_ERROR
|
|
|
|
// Use alignas to ensure that the data is properly aligned to be read from a
|
|
// token database entry struct. This avoids unaligned memory reads.
|
|
alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
|
|
"TOKENS\0\0"
|
|
"\x04\x00\x00\x00"
|
|
"\0\0\0\0"
|
|
"\x01\x00\x00\x00----"
|
|
"\x05\x00\x00\x00----"
|
|
"\xFF\x00\x00\x00----"
|
|
"\xFF\xEE\xEE\xDD----"
|
|
"One\0"
|
|
"TWO\0"
|
|
"333\0"
|
|
"FOUR";
|
|
|
|
class Detokenize : public ::testing::Test {
|
|
protected:
|
|
Detokenize() : detok_(TokenDatabase::Create<kBasicData>()) {}
|
|
Detokenizer detok_;
|
|
};
|
|
|
|
TEST_F(Detokenize, NoFormatting) {
|
|
EXPECT_EQ(detok_.Detokenize("\1\0\0\0"sv).BestString(), "One");
|
|
EXPECT_EQ(detok_.Detokenize("\5\0\0\0"sv).BestString(), "TWO");
|
|
EXPECT_EQ(detok_.Detokenize("\xff\x00\x00\x00"sv).BestString(), "333");
|
|
EXPECT_EQ(detok_.Detokenize("\xff\xee\xee\xdd"sv).BestString(), "FOUR");
|
|
}
|
|
|
|
TEST_F(Detokenize, BestString_MissingToken_IsEmpty) {
|
|
EXPECT_FALSE(detok_.Detokenize("").ok());
|
|
EXPECT_TRUE(detok_.Detokenize("", 0u).BestString().empty());
|
|
EXPECT_TRUE(detok_.Detokenize("\1", 1u).BestString().empty());
|
|
EXPECT_TRUE(detok_.Detokenize("\1\0"sv).BestString().empty());
|
|
EXPECT_TRUE(detok_.Detokenize("\1\0\0"sv).BestString().empty());
|
|
EXPECT_TRUE(detok_.Detokenize("\0\0\0"sv).BestString().empty());
|
|
}
|
|
|
|
TEST_F(Detokenize, BestString_UnknownToken_IsEmpty) {
|
|
EXPECT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
|
|
EXPECT_TRUE(detok_.Detokenize("\0\0\0\0"sv).BestString().empty());
|
|
EXPECT_TRUE(detok_.Detokenize("\2\0\0\0"sv).BestString().empty());
|
|
EXPECT_TRUE(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestString().empty());
|
|
EXPECT_TRUE(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestString().empty());
|
|
}
|
|
|
|
TEST_F(Detokenize, BestStringWithErrors_MissingToken_ErrorMessage) {
|
|
EXPECT_FALSE(detok_.Detokenize("").ok());
|
|
EXPECT_EQ(detok_.Detokenize("", 0u).BestStringWithErrors(),
|
|
ERR("missing token"));
|
|
EXPECT_EQ(detok_.Detokenize("\1", 1u).BestStringWithErrors(),
|
|
ERR("missing token"));
|
|
EXPECT_EQ(detok_.Detokenize("\1\0"sv).BestStringWithErrors(),
|
|
ERR("missing token"));
|
|
EXPECT_EQ(detok_.Detokenize("\1\0\0"sv).BestStringWithErrors(),
|
|
ERR("missing token"));
|
|
EXPECT_EQ(detok_.Detokenize("\0\0\0"sv).BestStringWithErrors(),
|
|
ERR("missing token"));
|
|
}
|
|
|
|
TEST_F(Detokenize, BestStringWithErrors_UnknownToken_ErrorMessage) {
|
|
EXPECT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
|
|
EXPECT_EQ(detok_.Detokenize("\0\0\0\0"sv).BestStringWithErrors(),
|
|
ERR("unknown token 00000000"));
|
|
EXPECT_EQ(detok_.Detokenize("\2\0\0\0"sv).BestStringWithErrors(),
|
|
ERR("unknown token 00000002"));
|
|
EXPECT_EQ(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestStringWithErrors(),
|
|
ERR("unknown token 76543210"));
|
|
EXPECT_EQ(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestStringWithErrors(),
|
|
ERR("unknown token fedcba98"));
|
|
}
|
|
|
|
alignas(TokenDatabase::RawEntry) constexpr char kDataWithArguments[] =
|
|
"TOKENS\0\0"
|
|
"\x09\x00\x00\x00"
|
|
"\0\0\0\0"
|
|
"\x00\x00\x00\x00----"
|
|
"\x0A\x0B\x0C\x0D----"
|
|
"\x0E\x0F\x00\x01----"
|
|
"\xAA\xAA\xAA\xAA----"
|
|
"\xBB\xBB\xBB\xBB----"
|
|
"\xCC\xCC\xCC\xCC----"
|
|
"\xDD\xDD\xDD\xDD----"
|
|
"\xEE\xEE\xEE\xEE----"
|
|
"\xFF\xFF\xFF\xFF----"
|
|
"\0"
|
|
"Use the %s, %s.\0"
|
|
"Now there are %d of %s!\0"
|
|
"%c!\0" // AA
|
|
"%hhu!\0" // BB
|
|
"%hu!\0" // CC
|
|
"%u!\0" // DD
|
|
"%lu!\0" // EE
|
|
"%llu!"; // FF
|
|
|
|
constexpr TokenDatabase kWithArgs = TokenDatabase::Create<kDataWithArguments>();
|
|
|
|
using Case = std::pair<std::string_view, std::string_view>;
|
|
|
|
template <typename... Args>
|
|
auto TestCases(Args... args) {
|
|
return std::array<Case, sizeof...(Args)>{args...};
|
|
}
|
|
|
|
class DetokenizeWithArgs : public ::testing::Test {
|
|
protected:
|
|
DetokenizeWithArgs() : detok_(kWithArgs) {}
|
|
|
|
Detokenizer detok_;
|
|
};
|
|
|
|
TEST_F(DetokenizeWithArgs, NoMatches) {
|
|
EXPECT_TRUE(detok_.Detokenize("\x23\xab\xc9\x87"sv).matches().empty());
|
|
}
|
|
|
|
TEST_F(DetokenizeWithArgs, SingleMatch) {
|
|
EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).matches().size(), 1u);
|
|
}
|
|
|
|
TEST_F(DetokenizeWithArgs, Empty) {
|
|
EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).BestString(), "");
|
|
}
|
|
|
|
TEST_F(DetokenizeWithArgs, Successful) {
|
|
// Run through test cases, but don't include cases that use %hhu or %llu since
|
|
// these are not currently supported in arm-none-eabi-gcc.
|
|
for (auto [data, expected] : TestCases(
|
|
Case{"\x0A\x0B\x0C\x0D\5force\4Luke"sv, "Use the force, Luke."},
|
|
Case{"\x0E\x0F\x00\x01\4\4them"sv, "Now there are 2 of them!"},
|
|
Case{"\xAA\xAA\xAA\xAA\xfc\x01"sv, "~!"},
|
|
Case{"\xCC\xCC\xCC\xCC\xfe\xff\x07"sv, "65535!"},
|
|
Case{"\xDD\xDD\xDD\xDD\xfe\xff\x07"sv, "65535!"},
|
|
Case{"\xDD\xDD\xDD\xDD\xfe\xff\xff\xff\x1f"sv, "4294967295!"},
|
|
Case{"\xEE\xEE\xEE\xEE\xfe\xff\x07"sv, "65535!"},
|
|
Case{"\xEE\xEE\xEE\xEE\xfe\xff\xff\xff\x1f"sv, "4294967295!"})) {
|
|
EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
|
|
}
|
|
}
|
|
|
|
TEST_F(DetokenizeWithArgs, ExtraDataError) {
|
|
auto error = detok_.Detokenize("\x00\x00\x00\x00MORE data"sv);
|
|
EXPECT_FALSE(error.ok());
|
|
EXPECT_EQ("", error.BestString());
|
|
}
|
|
|
|
TEST_F(DetokenizeWithArgs, MissingArgumentError) {
|
|
auto error = detok_.Detokenize("\x0A\x0B\x0C\x0D\5force"sv);
|
|
EXPECT_FALSE(error.ok());
|
|
EXPECT_EQ(error.BestString(), "Use the force, %s.");
|
|
EXPECT_EQ(error.BestStringWithErrors(),
|
|
"Use the force, " ERR("%s MISSING") ".");
|
|
}
|
|
|
|
TEST_F(DetokenizeWithArgs, DecodingError) {
|
|
auto error = detok_.Detokenize("\x0E\x0F\x00\x01\xFF"sv);
|
|
EXPECT_FALSE(error.ok());
|
|
EXPECT_EQ(error.BestString(), "Now there are %d of %s!");
|
|
EXPECT_EQ(error.BestStringWithErrors(),
|
|
"Now there are " ERR("%d ERROR") " of " ERR("%s SKIPPED") "!");
|
|
}
|
|
|
|
alignas(TokenDatabase::RawEntry) constexpr char kDataWithCollisions[] =
|
|
"TOKENS\0\0"
|
|
"\x0F\x00\x00\x00"
|
|
"\0\0\0\0"
|
|
"\x00\x00\x00\x00\xff\xff\xff\xff" // 1
|
|
"\x00\x00\x00\x00\x01\x02\x03\x04" // 2
|
|
"\x00\x00\x00\x00\xff\xff\xff\xff" // 3
|
|
"\x00\x00\x00\x00\xff\xff\xff\xff" // 4
|
|
"\x00\x00\x00\x00\xff\xff\xff\xff" // 5
|
|
"\x00\x00\x00\x00\xff\xff\xff\xff" // 6
|
|
"\x00\x00\x00\x00\xff\xff\xff\xff" // 7
|
|
"\xAA\xAA\xAA\xAA\x00\x00\x00\x00" // 8
|
|
"\xAA\xAA\xAA\xAA\xff\xff\xff\xff" // 9
|
|
"\xBB\xBB\xBB\xBB\xff\xff\xff\xff" // A
|
|
"\xBB\xBB\xBB\xBB\xff\xff\xff\xff" // B
|
|
"\xCC\xCC\xCC\xCC\xff\xff\xff\xff" // C
|
|
"\xCC\xCC\xCC\xCC\xff\xff\xff\xff" // D
|
|
"\xDD\xDD\xDD\xDD\xff\xff\xff\xff" // E
|
|
"\xDD\xDD\xDD\xDD\xff\xff\xff\xff" // F
|
|
// String table
|
|
"This string is present\0" // 1
|
|
"This string is removed\0" // 2
|
|
"One arg %d\0" // 3
|
|
"One arg %s\0" // 4
|
|
"Two args %s %u\0" // 5
|
|
"Two args %s %s %% %% %%\0" // 6
|
|
"Four args %d %d %d %d\0" // 7
|
|
"This one is removed\0" // 8
|
|
"This one is present\0" // 9
|
|
"Two ints %d %d\0" // A
|
|
"Three ints %d %d %d\0" // B
|
|
"Three strings %s %s %s\0" // C
|
|
"Two strings %s %s\0" // D
|
|
"Three %s %s %s\0" // E
|
|
"Five %d %d %d %d %s\0"; // F
|
|
|
|
constexpr TokenDatabase kWithCollisions =
|
|
TokenDatabase::Create<kDataWithCollisions>();
|
|
|
|
class DetokenizeWithCollisions : public ::testing::Test {
|
|
protected:
|
|
DetokenizeWithCollisions() : detok_(kWithCollisions) {}
|
|
|
|
Detokenizer detok_;
|
|
};
|
|
|
|
TEST_F(DetokenizeWithCollisions, Collision_AlwaysPreferSuccessfulDecode) {
|
|
for (auto [data, expected] :
|
|
TestCases(Case{"\0\0\0\0"sv, "This string is present"},
|
|
Case{"\0\0\0\0\x01"sv, "One arg -1"},
|
|
Case{"\0\0\0\0\x80"sv, "One arg [...]"},
|
|
Case{"\0\0\0\0\4Hey!\x04"sv, "Two args Hey! 2"})) {
|
|
EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
|
|
}
|
|
}
|
|
|
|
TEST_F(DetokenizeWithCollisions, Collision_PreferDecodingAllBytes) {
|
|
for (auto [data, expected] :
|
|
TestCases(Case{"\0\0\0\0\x80\x80\x80\x80\x00"sv, "Two args [...] 0"},
|
|
Case{"\0\0\0\0\x08?"sv, "One arg %s"},
|
|
Case{"\0\0\0\0\x01!\x01\x80"sv, "Two args ! \x80 % % %"})) {
|
|
EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
|
|
}
|
|
}
|
|
|
|
TEST_F(DetokenizeWithCollisions, Collision_PreferFewestDecodingErrors) {
|
|
for (auto [data, expected] :
|
|
TestCases(Case{"\xBB\xBB\xBB\xBB\x00"sv, "Two ints 0 %d"},
|
|
Case{"\xCC\xCC\xCC\xCC\2Yo\5?"sv, "Two strings Yo %s"})) {
|
|
EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
|
|
}
|
|
}
|
|
|
|
TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs) {
|
|
auto result = detok_.Detokenize("\xDD\xDD\xDD\xDD\x01\x02\x01\x04\x05"sv);
|
|
EXPECT_EQ((std::string_view)result.matches()[0].value(), "Five -1 1 -1 2 %s");
|
|
EXPECT_EQ((std::string_view)result.matches()[1].value(), "Three \2 \4 %s"sv);
|
|
}
|
|
|
|
TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs_NoPercent) {
|
|
// The "Two args %s %s ..." string successfully decodes this, and has more
|
|
// "arguments", because of %%, but %% doesn't count as as a decoded argument.
|
|
EXPECT_EQ(detok_.Detokenize("\0\0\0\0\x01\x00\x01\x02"sv).BestString(),
|
|
"Four args -1 0 -1 1");
|
|
}
|
|
|
|
TEST_F(DetokenizeWithCollisions, Collision_PreferStillPresentString) {
|
|
for (auto [data, expected] :
|
|
TestCases(Case{"\x00\x00\x00\x00"sv, "This string is present"},
|
|
Case{"\xAA\xAA\xAA\xAA"sv, "This one is present"})) {
|
|
EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
|
|
}
|
|
}
|
|
|
|
TEST_F(DetokenizeWithCollisions, Collision_TracksAllMatches) {
|
|
auto result = detok_.Detokenize("\0\0\0\0"sv);
|
|
EXPECT_EQ(result.matches().size(), 7u);
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace pw::tokenizer
|