You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
252 lines
10 KiB
252 lines
10 KiB
// Copyright 2020 The Pigweed Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
// use this file except in compliance with the License. You may obtain a copy of
|
|
// the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
// License for the specific language governing permissions and limitations under
|
|
// the License.
|
|
|
|
// This tests the C hashing macros and C++ hashing functions.
|
|
|
|
#include "pw_tokenizer/hash.h"
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
|
|
#include "gtest/gtest.h"
|
|
#include "pw_preprocessor/util.h"
|
|
#include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_128_hash_macro.h"
|
|
#include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_256_hash_macro.h"
|
|
#include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_80_hash_macro.h"
|
|
#include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_96_hash_macro.h"
|
|
#include "pw_tokenizer_private/generated_hash_test_cases.h"
|
|
|
|
namespace pw::tokenizer {
|
|
namespace {
|
|
|
|
// Make sure the array test cases file isn't empty.
|
|
static_assert(PW_ARRAY_SIZE(kHashTests) > 10,
|
|
"The test cases array is suspiciously small");
|
|
|
|
constexpr bool CheckGeneratedCases() {
|
|
for (const auto [string, hash_length, python_hash, macro_hash] : kHashTests) {
|
|
const uint32_t calculated_hash =
|
|
PwTokenizer65599FixedLengthHash(string, hash_length);
|
|
if (calculated_hash != python_hash || calculated_hash != macro_hash) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static_assert(CheckGeneratedCases(),
|
|
"Hashes in the generated test cases must match");
|
|
TEST(Hashing, GeneratedCasesAtRuntime) {
|
|
for (const auto [string, hash_length, python_hash, macro_hash] : kHashTests) {
|
|
const uint32_t calculated_hash =
|
|
PwTokenizer65599FixedLengthHash(string, hash_length);
|
|
EXPECT_EQ(calculated_hash, python_hash);
|
|
EXPECT_EQ(calculated_hash, macro_hash);
|
|
}
|
|
}
|
|
|
|
// Gets the size of the string, excluding the null terminator. A uint32_t is
|
|
// used instead of a size_t since the hash calculation requires a uint32_t.
|
|
template <uint32_t kSizeWithNull>
|
|
constexpr uint32_t StringLength(const char (&)[kSizeWithNull]) {
|
|
static_assert(kSizeWithNull > 0u);
|
|
return kSizeWithNull - 1; // subtract the null terminator
|
|
}
|
|
|
|
TEST(Hashing, Runtime) PW_NO_SANITIZE("unsigned-integer-overflow") {
|
|
// Coefficients for the hash terms; k1 is 1.
|
|
static constexpr uint32_t k2 = k65599HashConstant;
|
|
static constexpr uint32_t k3 = k65599HashConstant * k2;
|
|
static constexpr uint32_t k4 = k65599HashConstant * k3;
|
|
static constexpr uint32_t k5 = k65599HashConstant * k4;
|
|
|
|
// Hash a few things at hash length 4
|
|
// The coefficient calculation is done modulo 0x100000000, so the unsigned
|
|
// integer overflows of the hash terms are intentional.
|
|
EXPECT_EQ(PwTokenizer65599FixedLengthHash("", 4), StringLength(""));
|
|
EXPECT_EQ(PwTokenizer65599FixedLengthHash("1", 4),
|
|
StringLength("1") + k2 * '1');
|
|
EXPECT_EQ(PwTokenizer65599FixedLengthHash("12", 4),
|
|
StringLength("12") + k2 * '1' + k3 * '2');
|
|
EXPECT_EQ(PwTokenizer65599FixedLengthHash("123", 4),
|
|
StringLength("123") + k2 * '1' + k3 * '2' + k4 * '3');
|
|
EXPECT_EQ(PwTokenizer65599FixedLengthHash("1234", 4),
|
|
StringLength("1234") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
|
|
EXPECT_EQ(PwTokenizer65599FixedLengthHash("12345", 4),
|
|
StringLength("12345") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
|
|
EXPECT_EQ(PwTokenizer65599FixedLengthHash("123456", 4),
|
|
StringLength("123456") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
|
|
}
|
|
|
|
#define _CHECK_HASH_LENGTH(string, length) \
|
|
static_assert(PwTokenizer65599FixedLengthHash( \
|
|
std::string_view(string, sizeof(string) - 1), length) == \
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_##length##_HASH(string), \
|
|
#length "-byte hash mismatch!")
|
|
|
|
// Use std::string_view so that \0 can appear in strings.
|
|
#define TEST_SUPPORTED_HASHES(string_literal) \
|
|
_CHECK_HASH_LENGTH(string_literal, 80); \
|
|
_CHECK_HASH_LENGTH(string_literal, 96); \
|
|
_CHECK_HASH_LENGTH(string_literal, 128); \
|
|
_CHECK_HASH_LENGTH(string_literal, 256); \
|
|
static_assert( \
|
|
PwTokenizer65599FixedLengthHash( \
|
|
std::string_view(string_literal, sizeof(string_literal) - 1), \
|
|
sizeof(string_literal) - 1) == Hash(string_literal), \
|
|
"Hash function mismatch!"); \
|
|
EXPECT_EQ(PwTokenizer65599FixedLengthHash( \
|
|
std::string_view(string_literal, sizeof(string_literal) - 1), \
|
|
sizeof(string_literal) - 1), \
|
|
pw_tokenizer_65599FixedLengthHash(string_literal, \
|
|
sizeof(string_literal) - 1, \
|
|
sizeof(string_literal) - 1))
|
|
|
|
TEST(HashMacro, Empty) { TEST_SUPPORTED_HASHES(""); }
|
|
|
|
TEST(HashMacro, NullTerminatorsAreIncludedInHash) {
|
|
using namespace std::literals; // use an sv literal to include the \0
|
|
|
|
TEST_SUPPORTED_HASHES("hello\0there");
|
|
TEST_SUPPORTED_HASHES("\0");
|
|
TEST_SUPPORTED_HASHES("\0\0");
|
|
TEST_SUPPORTED_HASHES("\0\0\0");
|
|
|
|
static_assert(
|
|
PwTokenizer65599FixedLengthHash(""sv, 80) !=
|
|
PwTokenizer65599FixedLengthHash("\0"sv, 80),
|
|
"Hashes of \\0 strings should differ since the lengths are included");
|
|
static_assert(
|
|
PwTokenizer65599FixedLengthHash("\0"sv, 80) !=
|
|
PwTokenizer65599FixedLengthHash("\0\0"sv, 80),
|
|
"Hashes of \\0 strings should differ since the lengths are included");
|
|
static_assert(
|
|
PwTokenizer65599FixedLengthHash("\0\0"sv, 80) !=
|
|
PwTokenizer65599FixedLengthHash("\0\0\0"sv, 80),
|
|
"Hashes of \\0 strings should differ since the lengths are included");
|
|
}
|
|
|
|
TEST(HashMacro, OneChar) {
|
|
TEST_SUPPORTED_HASHES("a");
|
|
TEST_SUPPORTED_HASHES("c");
|
|
TEST_SUPPORTED_HASHES("A");
|
|
TEST_SUPPORTED_HASHES("B");
|
|
TEST_SUPPORTED_HASHES("C");
|
|
TEST_SUPPORTED_HASHES("$");
|
|
TEST_SUPPORTED_HASHES("%");
|
|
TEST_SUPPORTED_HASHES("^");
|
|
TEST_SUPPORTED_HASHES("\xa1");
|
|
TEST_SUPPORTED_HASHES("\xff");
|
|
}
|
|
|
|
TEST(HashMacro, Phrases) {
|
|
TEST_SUPPORTED_HASHES("WASD");
|
|
TEST_SUPPORTED_HASHES("hello, world");
|
|
TEST_SUPPORTED_HASHES("Let the wookie win.");
|
|
TEST_SUPPORTED_HASHES("\x01 more test, just for \xffun");
|
|
}
|
|
|
|
TEST(HashMacro, HashesChange) {
|
|
static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("") !=
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("\0"));
|
|
static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("a") !=
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("b"));
|
|
static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("z") !=
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("aa"));
|
|
static_assert(
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("make sure hashes change") !=
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("MAKE SURE HASHES CHANGE"));
|
|
}
|
|
|
|
// Define string literals to test boundary conditions.
|
|
#define LITERAL_79 \
|
|
"0123456789ABCDEF" \
|
|
"0123456789ABCDEF" \
|
|
"0123456789ABCDEF" \
|
|
"0123456789ABCDEF" \
|
|
"0123456789ABCDE"
|
|
#define LITERAL_80 LITERAL_79 "F"
|
|
|
|
#define LITERAL_96 LITERAL_80 "0123456789ABCDEF"
|
|
|
|
#define LITERAL_128 \
|
|
LITERAL_96 \
|
|
"0123456789ABCDEF" \
|
|
"0123456789ABCDEF"
|
|
|
|
static_assert(sizeof(LITERAL_79) - 1 == 79);
|
|
static_assert(sizeof(LITERAL_80) - 1 == 80);
|
|
static_assert(sizeof(LITERAL_96) - 1 == 96);
|
|
static_assert(sizeof(LITERAL_128) - 1 == 128);
|
|
|
|
TEST(HashMacro, HashNearFixedHashLength_80) {
|
|
// 79, 80, 81, 82 bytes
|
|
TEST_SUPPORTED_HASHES(LITERAL_79);
|
|
TEST_SUPPORTED_HASHES(LITERAL_80);
|
|
TEST_SUPPORTED_HASHES(LITERAL_80 "!");
|
|
TEST_SUPPORTED_HASHES(LITERAL_80 "!!");
|
|
}
|
|
|
|
TEST(HashMacro, HashNearFixedHashLength_96) {
|
|
// 95, 96, 97, 98 bytes
|
|
TEST_SUPPORTED_HASHES(LITERAL_80 "0123456789ABCDE");
|
|
TEST_SUPPORTED_HASHES(LITERAL_96);
|
|
TEST_SUPPORTED_HASHES(LITERAL_96 "!");
|
|
TEST_SUPPORTED_HASHES(LITERAL_96 "XY");
|
|
}
|
|
|
|
TEST(HashMacro, HashNearFixedHashLength_128) {
|
|
// 127, 128, 129, 130 bytes
|
|
TEST_SUPPORTED_HASHES(LITERAL_96
|
|
"0123456789ABCDEF"
|
|
"0123456789ABCDE");
|
|
TEST_SUPPORTED_HASHES(LITERAL_128);
|
|
TEST_SUPPORTED_HASHES(LITERAL_128 "!");
|
|
TEST_SUPPORTED_HASHES(LITERAL_128 "AB");
|
|
}
|
|
|
|
TEST(HashMacro, HashVeryLongStrings) {
|
|
TEST_SUPPORTED_HASHES(
|
|
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
|
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
|
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
|
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
|
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
|
|
TEST_SUPPORTED_HASHES(LITERAL_128 LITERAL_128 LITERAL_128);
|
|
}
|
|
|
|
TEST(HashMacro, ExtraCharactersAreIgnored) {
|
|
static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "?") ==
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "."));
|
|
|
|
static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "ABCD") ==
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "zyxw"));
|
|
|
|
static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "?") ==
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "."));
|
|
|
|
static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "ABCD") ==
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "zyxw"));
|
|
|
|
static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "?") ==
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "."));
|
|
|
|
static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "ABCD") ==
|
|
PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "zyxw"));
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace pw::tokenizer
|