You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
345 lines
8.9 KiB
345 lines
8.9 KiB
//===- StringHash.h -------------------------------------------------------===//
|
|
//
|
|
// The MCLinker Project
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#ifndef MCLD_ADT_STRINGHASH_H_
|
|
#define MCLD_ADT_STRINGHASH_H_
|
|
|
|
#include <llvm/ADT/StringRef.h>
|
|
#include <llvm/Support/DataTypes.h>
|
|
|
|
#include <cassert>
|
|
#include <cctype>
|
|
#include <functional>
|
|
|
|
namespace mcld {
|
|
namespace hash {
|
|
|
|
enum Type { RS, JS, PJW, ELF, BKDR, SDBM, DJB, DEK, BP, FNV, AP, ES };
|
|
|
|
/** \class template<uint32_t TYPE> StringHash
|
|
* \brief the template StringHash class, for specification
|
|
*/
|
|
template <uint32_t TYPE>
|
|
struct StringHash
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
assert(false && "Undefined StringHash function.\n");
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<RSHash>
|
|
* \brief RS StringHash funciton
|
|
*/
|
|
template <>
|
|
struct StringHash<RS>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
const unsigned int b = 378551;
|
|
uint32_t a = 63689;
|
|
uint32_t hash_val = 0;
|
|
|
|
for (unsigned int i = 0; i < pKey.size(); ++i) {
|
|
hash_val = hash_val * a + pKey[i];
|
|
a = a * b;
|
|
}
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<JSHash>
|
|
* \brief JS hash funciton
|
|
*/
|
|
template <>
|
|
struct StringHash<JS>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
uint32_t hash_val = 1315423911;
|
|
|
|
for (unsigned int i = 0; i < pKey.size(); ++i) {
|
|
hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
|
|
}
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<PJW>
|
|
* \brief P.J. Weinberger hash function
|
|
*/
|
|
template <>
|
|
struct StringHash<PJW>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
const unsigned int BitsInUnsignedInt =
|
|
(unsigned int)(sizeof(unsigned int) * 8);
|
|
const unsigned int ThreeQuarters =
|
|
(unsigned int)((BitsInUnsignedInt * 3) / 4);
|
|
const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8);
|
|
const unsigned int HighBits = (unsigned int)(0xFFFFFFFF)
|
|
<< (BitsInUnsignedInt - OneEighth);
|
|
uint32_t hash_val = 0;
|
|
uint32_t test = 0;
|
|
|
|
for (unsigned int i = 0; i < pKey.size(); ++i) {
|
|
hash_val = (hash_val << OneEighth) + pKey[i];
|
|
|
|
if ((test = hash_val & HighBits) != 0) {
|
|
hash_val = ((hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
|
|
}
|
|
}
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<ELF>
|
|
* \brief ELF hash function.
|
|
*/
|
|
template <>
|
|
struct StringHash<ELF>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
uint32_t hash_val = 0;
|
|
uint32_t x = 0;
|
|
|
|
for (unsigned int i = 0; i < pKey.size(); ++i) {
|
|
hash_val = (hash_val << 4) + pKey[i];
|
|
if ((x = hash_val & 0xF0000000L) != 0)
|
|
hash_val ^= (x >> 24);
|
|
hash_val &= ~x;
|
|
}
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<BKDR>
|
|
* \brief BKDR hash function
|
|
*/
|
|
template <>
|
|
struct StringHash<BKDR>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
const uint32_t seed = 131;
|
|
uint32_t hash_val = 0;
|
|
|
|
for (uint32_t i = 0; i < pKey.size(); ++i)
|
|
hash_val = (hash_val * seed) + pKey[i];
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<SDBM>
|
|
* \brief SDBM hash function
|
|
* 0.049s in 100000 test
|
|
*/
|
|
template <>
|
|
struct StringHash<SDBM>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
uint32_t hash_val = 0;
|
|
|
|
for (uint32_t i = 0; i < pKey.size(); ++i)
|
|
hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<DJB>
|
|
* \brief DJB hash function
|
|
* 0.057s in 100000 test
|
|
*/
|
|
template <>
|
|
struct StringHash<DJB>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
uint32_t hash_val = 5381;
|
|
|
|
for (uint32_t i = 0; i < pKey.size(); ++i)
|
|
hash_val = ((hash_val << 5) + hash_val) + pKey[i];
|
|
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<DEK>
|
|
* \brief DEK hash function
|
|
* 0.60s
|
|
*/
|
|
template <>
|
|
struct StringHash<DEK>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
uint32_t hash_val = pKey.size();
|
|
|
|
for (uint32_t i = 0; i < pKey.size(); ++i)
|
|
hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
|
|
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<BP>
|
|
* \brief BP hash function
|
|
* 0.057s
|
|
*/
|
|
template <>
|
|
struct StringHash<BP>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
uint32_t hash_val = 0;
|
|
for (uint32_t i = 0; i < pKey.size(); ++i)
|
|
hash_val = hash_val << 7 ^ pKey[i];
|
|
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<FNV>
|
|
* \brief FNV hash function
|
|
* 0.058s
|
|
*/
|
|
template <>
|
|
struct StringHash<FNV>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
const uint32_t fnv_prime = 0x811C9DC5;
|
|
uint32_t hash_val = 0;
|
|
for (uint32_t i = 0; i < pKey.size(); ++i) {
|
|
hash_val *= fnv_prime;
|
|
hash_val ^= pKey[i];
|
|
}
|
|
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<AP>
|
|
* \brief AP hash function
|
|
* 0.060s
|
|
*/
|
|
template <>
|
|
struct StringHash<AP>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pKey) const {
|
|
unsigned int hash_val = 0xAAAAAAAA;
|
|
|
|
for (uint32_t i = 0; i < pKey.size(); ++i) {
|
|
hash_val ^= ((i & 1) == 0)
|
|
? ((hash_val << 7) ^ pKey[i] * (hash_val >> 3))
|
|
: (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
|
|
}
|
|
|
|
return hash_val;
|
|
}
|
|
};
|
|
|
|
/** \class StringHash<ES>
|
|
* \brief This is a revision of Edward Sayers' string characteristic function.
|
|
*
|
|
* 31-28 27 26 25 - 0
|
|
* +----+---+---+------------+
|
|
* | . | N | - | a/A ~ z/Z |
|
|
* +----+---+---+------------+
|
|
*
|
|
* . (bit 31~28) - The number of '.' characters
|
|
* N (bit 27) - Are there any numbers in the string
|
|
* - (bit 26) - Does the string have '-' character
|
|
* bit 25~0 - Bit 25 is set only if the string contains a 'a' or 'A', and
|
|
* Bit 24 is set only if ... 'b' or 'B', ...
|
|
*/
|
|
template <>
|
|
struct StringHash<ES>
|
|
: public std::unary_function<const llvm::StringRef, uint32_t> {
|
|
uint32_t operator()(const llvm::StringRef pString) const {
|
|
uint32_t result = 0x0;
|
|
unsigned int dot = 0;
|
|
std::string::size_type idx;
|
|
for (idx = 0; idx < pString.size(); ++idx) {
|
|
int cur_char = pString[idx];
|
|
|
|
if ('.' == cur_char) {
|
|
++dot;
|
|
continue;
|
|
}
|
|
|
|
if (isdigit(cur_char)) {
|
|
result |= (1 << 27);
|
|
continue;
|
|
}
|
|
|
|
if ('_' == cur_char) {
|
|
result |= (1 << 26);
|
|
continue;
|
|
}
|
|
|
|
if (isupper(cur_char)) {
|
|
result |= (1 << (cur_char - 'A'));
|
|
continue;
|
|
}
|
|
|
|
if (islower(cur_char)) {
|
|
result |= (1 << (cur_char - 'a'));
|
|
continue;
|
|
}
|
|
}
|
|
result |= (dot << 28);
|
|
return result;
|
|
}
|
|
|
|
/** \func may_include
|
|
* \brief is it possible that pRule is a sub-string of pInString
|
|
*/
|
|
static bool may_include(uint32_t pRule, uint32_t pInString) {
|
|
uint32_t in_c = pInString << 4;
|
|
uint32_t r_c = pRule << 4;
|
|
|
|
uint32_t res = (in_c ^ r_c) & r_c;
|
|
if (0 != res)
|
|
return false;
|
|
|
|
uint32_t in_dot = pInString >> 28;
|
|
uint32_t r_dot = pRule >> 28;
|
|
if (r_dot > in_dot)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
};
|
|
|
|
/** \class template<uint32_t TYPE> StringCompare
|
|
* \brief the template StringCompare class, for specification
|
|
*/
|
|
template <typename STRING_TYPE>
|
|
struct StringCompare : public std::binary_function<const STRING_TYPE&,
|
|
const STRING_TYPE&,
|
|
bool> {
|
|
bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const {
|
|
return X == Y;
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct StringCompare<const char*>
|
|
: public std::binary_function<const char*, const char*, bool> {
|
|
bool operator()(const char* X, const char* Y) const {
|
|
return (std::strcmp(X, Y) == 0);
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct StringCompare<char*>
|
|
: public std::binary_function<const char*, const char*, bool> {
|
|
bool operator()(const char* X, const char* Y) const {
|
|
return (std::strcmp(X, Y) == 0);
|
|
}
|
|
};
|
|
|
|
} // namespace hash
|
|
} // namespace mcld
|
|
|
|
#endif // MCLD_ADT_STRINGHASH_H_
|