You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

492 lines
15 KiB

/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "hash_set.h"
#include <forward_list>
#include <map>
#include <sstream>
#include <string>
#include <string_view>
#include <unordered_set>
#include <vector>
#include <gtest/gtest.h>
#include "hash_map.h"
namespace art {
struct IsEmptyFnString {
void MakeEmpty(std::string& item) const {
item.clear();
}
bool IsEmpty(const std::string& item) const {
return item.empty();
}
};
class HashSetTest : public testing::Test {
public:
HashSetTest() : seed_(97421), unique_number_(0) {
}
std::string RandomString(size_t len) {
std::ostringstream oss;
for (size_t i = 0; i < len; ++i) {
oss << static_cast<char>('A' + PRand() % 64);
}
static_assert(' ' < 'A', "space must be less than a");
oss << " " << unique_number_++; // Relies on ' ' < 'A'
return oss.str();
}
void SetSeed(size_t seed) {
seed_ = seed;
}
size_t PRand() { // Pseudo random.
seed_ = seed_ * 1103515245 + 12345;
return seed_;
}
private:
size_t seed_;
size_t unique_number_;
};
TEST_F(HashSetTest, TestSmoke) {
HashSet<std::string, IsEmptyFnString> hash_set;
const std::string test_string = "hello world 1234";
ASSERT_TRUE(hash_set.empty());
ASSERT_EQ(hash_set.size(), 0U);
hash_set.insert(test_string);
auto it = hash_set.find(test_string);
ASSERT_EQ(*it, test_string);
auto after_it = hash_set.erase(it);
ASSERT_TRUE(after_it == hash_set.end());
ASSERT_TRUE(hash_set.empty());
ASSERT_EQ(hash_set.size(), 0U);
it = hash_set.find(test_string);
ASSERT_TRUE(it == hash_set.end());
}
TEST_F(HashSetTest, TestInsertAndErase) {
HashSet<std::string, IsEmptyFnString> hash_set;
static constexpr size_t count = 1000;
std::vector<std::string> strings;
for (size_t i = 0; i < count; ++i) {
// Insert a bunch of elements and make sure we can find them.
strings.push_back(RandomString(10));
hash_set.insert(strings[i]);
auto it = hash_set.find(strings[i]);
ASSERT_TRUE(it != hash_set.end());
ASSERT_EQ(*it, strings[i]);
}
ASSERT_EQ(strings.size(), hash_set.size());
// Try to erase the odd strings.
for (size_t i = 1; i < count; i += 2) {
auto it = hash_set.find(strings[i]);
ASSERT_TRUE(it != hash_set.end());
ASSERT_EQ(*it, strings[i]);
hash_set.erase(it);
}
// Test removed.
for (size_t i = 1; i < count; i += 2) {
auto it = hash_set.find(strings[i]);
ASSERT_TRUE(it == hash_set.end());
}
for (size_t i = 0; i < count; i += 2) {
auto it = hash_set.find(strings[i]);
ASSERT_TRUE(it != hash_set.end());
ASSERT_EQ(*it, strings[i]);
}
}
TEST_F(HashSetTest, TestIterator) {
HashSet<std::string, IsEmptyFnString> hash_set;
ASSERT_TRUE(hash_set.begin() == hash_set.end());
static constexpr size_t count = 1000;
std::vector<std::string> strings;
for (size_t i = 0; i < count; ++i) {
// Insert a bunch of elements and make sure we can find them.
strings.push_back(RandomString(10));
hash_set.insert(strings[i]);
}
// Make sure we visit each string exactly once.
std::map<std::string, size_t> found_count;
for (const std::string& s : hash_set) {
++found_count[s];
}
for (size_t i = 0; i < count; ++i) {
ASSERT_EQ(found_count[strings[i]], 1U);
}
found_count.clear();
// Remove all the elements with iterator erase.
for (auto it = hash_set.begin(); it != hash_set.end();) {
++found_count[*it];
it = hash_set.erase(it);
ASSERT_EQ(hash_set.Verify(), 0U);
}
for (size_t i = 0; i < count; ++i) {
ASSERT_EQ(found_count[strings[i]], 1U);
}
}
TEST_F(HashSetTest, TestSwap) {
HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
std::vector<std::string> strings;
static constexpr size_t count = 1000;
for (size_t i = 0; i < count; ++i) {
strings.push_back(RandomString(10));
hash_seta.insert(strings[i]);
}
std::swap(hash_seta, hash_setb);
hash_seta.insert("TEST");
hash_setb.insert("TEST2");
for (size_t i = 0; i < count; ++i) {
strings.push_back(RandomString(10));
hash_seta.insert(strings[i]);
}
}
TEST_F(HashSetTest, TestShrink) {
HashSet<std::string, IsEmptyFnString> hash_set;
std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
for (size_t i = 0; i < strings.size(); ++i) {
// Insert some strings into the beginning of our hash set to establish an initial size
hash_set.insert(strings[i]);
}
hash_set.ShrinkToMaximumLoad();
const double initial_load = hash_set.CalculateLoadFactor();
// Insert a bunch of random strings to guarantee that we grow the capacity.
std::vector<std::string> random_strings;
static constexpr size_t count = 1000;
for (size_t i = 0; i < count; ++i) {
random_strings.push_back(RandomString(10));
hash_set.insert(random_strings[i]);
}
// Erase all the extra strings which guarantees that our load factor will be really bad.
for (size_t i = 0; i < count; ++i) {
hash_set.erase(hash_set.find(random_strings[i]));
}
const double bad_load = hash_set.CalculateLoadFactor();
EXPECT_GT(initial_load, bad_load);
// Shrink again, the load factor should be good again.
hash_set.ShrinkToMaximumLoad();
EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
// Make sure all the initial elements we had are still there
for (const std::string& initial_string : strings) {
EXPECT_NE(hash_set.end(), hash_set.find(initial_string))
<< "expected to find " << initial_string;
}
}
TEST_F(HashSetTest, TestLoadFactor) {
HashSet<std::string, IsEmptyFnString> hash_set;
static constexpr size_t kStringCount = 1000;
static constexpr double kEpsilon = 0.01;
for (size_t i = 0; i < kStringCount; ++i) {
hash_set.insert(RandomString(i % 10 + 1));
}
// Check that changing the load factor resizes the table to be within the target range.
EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor());
EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
hash_set.SetLoadFactor(0.1, 0.3);
EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor());
EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor());
EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
hash_set.SetLoadFactor(0.6, 0.8);
EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
}
TEST_F(HashSetTest, TestStress) {
HashSet<std::string, IsEmptyFnString> hash_set;
std::unordered_set<std::string> std_set;
std::vector<std::string> strings;
static constexpr size_t string_count = 2000;
static constexpr size_t operations = 100000;
static constexpr size_t target_size = 5000;
for (size_t i = 0; i < string_count; ++i) {
strings.push_back(RandomString(i % 10 + 1));
}
const size_t seed = time(nullptr);
SetSeed(seed);
LOG(INFO) << "Starting stress test with seed " << seed;
for (size_t i = 0; i < operations; ++i) {
ASSERT_EQ(hash_set.size(), std_set.size());
size_t delta = std::abs(static_cast<ssize_t>(target_size) -
static_cast<ssize_t>(hash_set.size()));
size_t n = PRand();
if (n % target_size == 0) {
hash_set.clear();
std_set.clear();
ASSERT_TRUE(hash_set.empty());
ASSERT_TRUE(std_set.empty());
} else if (n % target_size < delta) {
// Skew towards adding elements until we are at the desired size.
const std::string& s = strings[PRand() % string_count];
hash_set.insert(s);
std_set.insert(s);
ASSERT_EQ(*hash_set.find(s), *std_set.find(s));
} else {
const std::string& s = strings[PRand() % string_count];
auto it1 = hash_set.find(s);
auto it2 = std_set.find(s);
ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
if (it1 != hash_set.end()) {
ASSERT_EQ(*it1, *it2);
hash_set.erase(it1);
std_set.erase(it2);
}
}
}
}
struct IsEmptyStringPair {
void MakeEmpty(std::pair<std::string, int>& pair) const {
pair.first.clear();
}
bool IsEmpty(const std::pair<std::string, int>& pair) const {
return pair.first.empty();
}
};
TEST_F(HashSetTest, TestHashMap) {
HashMap<std::string, int, IsEmptyStringPair> hash_map;
hash_map.insert(std::make_pair(std::string("abcd"), 123));
hash_map.insert(std::make_pair(std::string("abcd"), 124));
hash_map.insert(std::make_pair(std::string("bags"), 444));
auto it = hash_map.find(std::string("abcd"));
ASSERT_EQ(it->second, 123);
hash_map.erase(it);
it = hash_map.find(std::string("abcd"));
ASSERT_EQ(it, hash_map.end());
}
struct IsEmptyFnVectorInt {
void MakeEmpty(std::vector<int>& item) const {
item.clear();
}
bool IsEmpty(const std::vector<int>& item) const {
return item.empty();
}
};
template <typename T>
size_t HashIntSequence(T begin, T end) {
size_t hash = 0;
for (auto iter = begin; iter != end; ++iter) {
hash = hash * 2 + *iter;
}
return hash;
}
struct VectorIntHashEquals {
std::size_t operator()(const std::vector<int>& item) const {
return HashIntSequence(item.begin(), item.end());
}
std::size_t operator()(const std::forward_list<int>& item) const {
return HashIntSequence(item.begin(), item.end());
}
bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
return a == b;
}
bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
auto aiter = a.begin();
auto biter = b.begin();
while (aiter != a.end() && biter != b.end()) {
if (*aiter != *biter) {
return false;
}
aiter++;
biter++;
}
return (aiter == a.end() && biter == b.end());
}
};
TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
hash_set.insert(std::vector<int>({1, 2, 3, 4}));
hash_set.insert(std::vector<int>({4, 2}));
ASSERT_EQ(hash_set.end(), hash_set.find(std::vector<int>({1, 1, 1, 1})));
ASSERT_NE(hash_set.end(), hash_set.find(std::vector<int>({1, 2, 3, 4})));
ASSERT_EQ(hash_set.end(), hash_set.find(std::forward_list<int>({1, 1, 1, 1})));
ASSERT_NE(hash_set.end(), hash_set.find(std::forward_list<int>({1, 2, 3, 4})));
}
TEST_F(HashSetTest, TestReserve) {
HashSet<std::string, IsEmptyFnString> hash_set;
std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
for (size_t size : sizes) {
hash_set.reserve(size);
const size_t buckets_before = hash_set.NumBuckets();
// Check that we expanded enough.
CHECK_GE(hash_set.ElementsUntilExpand(), size);
// Try inserting elements until we are at our reserve size and ensure the hash set did not
// expand.
while (hash_set.size() < size) {
hash_set.insert(std::to_string(hash_set.size()));
}
CHECK_EQ(hash_set.NumBuckets(), buckets_before);
}
// Check the behaviour for shrinking, it does not necessarily resize down.
constexpr size_t size = 100;
hash_set.reserve(size);
CHECK_GE(hash_set.ElementsUntilExpand(), size);
}
TEST_F(HashSetTest, IteratorConversion) {
const char* test_string = "test string";
HashSet<std::string> hash_set;
HashSet<std::string>::iterator it = hash_set.insert(test_string).first;
HashSet<std::string>::const_iterator cit = it;
ASSERT_TRUE(it == cit);
ASSERT_EQ(*it, *cit);
}
TEST_F(HashSetTest, StringSearchStringView) {
const char* test_string = "test string";
HashSet<std::string> hash_set;
HashSet<std::string>::iterator insert_pos = hash_set.insert(test_string).first;
HashSet<std::string>::iterator it = hash_set.find(std::string_view(test_string));
ASSERT_TRUE(it == insert_pos);
}
TEST_F(HashSetTest, DoubleInsert) {
const char* test_string = "test string";
HashSet<std::string> hash_set;
hash_set.insert(test_string);
hash_set.insert(test_string);
ASSERT_EQ(1u, hash_set.size());
}
TEST_F(HashSetTest, Preallocated) {
static const size_t kBufferSize = 64;
uint32_t buffer[kBufferSize];
HashSet<uint32_t> hash_set(buffer, kBufferSize);
size_t max_without_resize = kBufferSize * hash_set.GetMaxLoadFactor();
for (size_t i = 0; i != max_without_resize; ++i) {
hash_set.insert(i);
}
ASSERT_FALSE(hash_set.owns_data_);
hash_set.insert(max_without_resize);
ASSERT_TRUE(hash_set.owns_data_);
}
class SmallIndexEmptyFn {
public:
void MakeEmpty(uint16_t& item) const {
item = std::numeric_limits<uint16_t>::max();
}
bool IsEmpty(const uint16_t& item) const {
return item == std::numeric_limits<uint16_t>::max();
}
};
class StatefulHashFn {
public:
explicit StatefulHashFn(const std::vector<std::string>* strings)
: strings_(strings) {}
size_t operator() (const uint16_t& index) const {
CHECK_LT(index, strings_->size());
return (*this)((*strings_)[index]);
}
size_t operator() (std::string_view s) const {
return DataHash()(s);
}
private:
const std::vector<std::string>* strings_;
};
class StatefulPred {
public:
explicit StatefulPred(const std::vector<std::string>* strings)
: strings_(strings) {}
bool operator() (const uint16_t& lhs, const uint16_t& rhs) const {
CHECK_LT(rhs, strings_->size());
return (*this)(lhs, (*strings_)[rhs]);
}
bool operator() (const uint16_t& lhs, std::string_view rhs) const {
CHECK_LT(lhs, strings_->size());
return (*strings_)[lhs] == rhs;
}
private:
const std::vector<std::string>* strings_;
};
TEST_F(HashSetTest, StatefulHashSet) {
std::vector<std::string> strings{
"duplicate",
"a",
"b",
"xyz",
"___",
"123",
"placeholder",
"duplicate"
};
const size_t duplicateFirstIndex = 0;
const size_t duplicateSecondIndex = strings.size() - 1u;
const size_t otherIndex = 1u;
StatefulHashFn hashfn(&strings);
StatefulPred pred(&strings);
HashSet<uint16_t, SmallIndexEmptyFn, StatefulHashFn, StatefulPred> hash_set(hashfn, pred);
for (size_t index = 0, size = strings.size(); index != size; ++index) {
bool inserted = hash_set.insert(index).second;
ASSERT_EQ(index != duplicateSecondIndex, inserted) << index;
}
// Check search by string.
for (size_t index = 0, size = strings.size(); index != size; ++index) {
auto it = hash_set.find(strings[index]);
ASSERT_FALSE(it == hash_set.end());
ASSERT_EQ(index == duplicateSecondIndex ? duplicateFirstIndex : index, *it) << index;
}
ASSERT_TRUE(hash_set.find("missing") == hash_set.end());
// Check search by index.
for (size_t index = 0, size = strings.size(); index != size; ++index) {
auto it = hash_set.find(index);
ASSERT_FALSE(it == hash_set.end());
ASSERT_EQ(index == duplicateSecondIndex ? duplicateFirstIndex : index, *it) << index;
}
// Note: Searching for index >= strings.size() is not supported by Stateful{HashFn,Pred}.
// Test removal and search by missing index.
auto remove_it = hash_set.find(otherIndex);
ASSERT_FALSE(remove_it == hash_set.end());
hash_set.erase(remove_it);
auto search_it = hash_set.find(otherIndex);
ASSERT_TRUE(search_it == hash_set.end());
}
} // namespace art