libmoost
/home/mhx/git/github/libmoost/include/moost/algorithm/fast_hash.hpp
Go to the documentation of this file.
00001 /* vim:set ts=3 sw=3 sts=3 et: */
00028 #ifndef MOOST_ALGORITHM_FAST_HASH_HPP
00029 #define MOOST_ALGORITHM_FAST_HASH_HPP
00030 
00055 #include <functional>
00056 #include <string>
00057 
00058 #include <boost/type_traits/is_pod.hpp>
00059 #include <boost/static_assert.hpp>
00060 
00061 namespace moost { namespace algorithm {
00062 
00064 // *** BIG IMPORTANT WARNING!!! ***
00065 // If you plan to use fast_hash to hash a custom type, including structs you
00066 // need to understand that they can and probably will contain padding. Since
00067 // the compiler is not at liberty to initialise padding, if you create two
00068 // instances of a struct and initialise each member with the same value there
00069 // is a very good change when you hash each instance you will generate
00070 // different digests. The only way to make this safe is to use memset to zero
00071 // out the whole object first. Note that struct foo = {0} does NOT set all
00072 // bytes in the objects memory to 0, it does a member by member initialisation
00073 // and thus padding is still uninitialised.
00074 // Also, this should NOT be used for non-POD types, since the memory layout
00075 // of a non-POD type is compiler specific and there can be no assumptions
00076 // made. If you need to hash a custom object you are better off making an
00077 // array of bytes out of the various member values that should form part
00078 // of the hash and then send that to fast_hash. For example, if we have
00079 // a non-POD class that contains of key/value pairs and the key is made up of
00080 // 2 integer values (a composite key) this would be one safe way to create
00081 // a hash from the key...
00082 //       int32_t i32s[] = {key.first, key.second};
00083 //       size_ t h = moost::algorithm::fast_hash(i32s, sizeof(i32s));
00084 
00086 
00087 static const size_t DEFAULT_SEED = 5381;
00088 
00089 //#define moost_fasthash_default_seed__ 5381
00090 
00092 inline size_t fast_hash(const void* data_in, size_t size, size_t seed = DEFAULT_SEED)
00093 {
00094    const unsigned char*    data = (const unsigned char*) data_in;
00095 
00096 // [13/6/2011 ricky] Not really sure why this is unsigned int (it's not portable)
00097 //                   but I've added the explicit cast to shut up Windows!
00098 #ifdef WIN32
00099    unsigned int h = (unsigned int) seed;
00100 #else
00101    unsigned int h = seed;
00102 #endif
00103 
00104    while (size > 0) {
00105       size--;
00106       h = (h << 16) + (h << 6) - h + (unsigned) data[size];
00107    }
00108    return h;
00109 }
00110 
00112 template <size_t TSeed = DEFAULT_SEED>
00113 struct FastHashFunctor
00114 {
00115    template <typename T>
00116    size_t operator()(const T& p) const
00117    {
00118 #ifndef MOOST_FASTHASH_NO_ISPOD_CHECK
00119       BOOST_STATIC_ASSERT((boost::is_pod<T>::value));
00120 #endif
00121 
00122       return fast_hash(&p, sizeof(T), TSeed);
00123    }
00124 
00125    size_t operator()( const void* key, size_t size ) const
00126    { return fast_hash(key, size, TSeed); }
00127 
00128    // overrides default seed
00129    size_t operator()( const void* key, size_t size, size_t seed ) const
00130    { return fast_hash(key, size, seed); }
00131 
00132    // specialization for strings
00133    size_t operator()( const std::string& str) const
00134    { return fast_hash( str.data(), str.size(), TSeed ); }
00135 
00136    // specialization for strings, override seed
00137    size_t operator()( const std::string& str, size_t seed) const
00138    { return fast_hash( str.data(), str.size(), seed ); }
00139 };
00140 
00141 //#undef moost_fasthash_default_seed__
00142 
00143 typedef FastHashFunctor<> FastHash;
00144 
00146 
00147 }} // end of namespaces
00148 
00149 #endif // MOOST_ALGORITHM_FAST_HASH_HPP