libmoost
|
00001 /* vim:set ts=3 sw=3 sts=3 et: */ 00028 #ifndef MOOST_ALGORITHM_KETAMA_PARTITIONER_HPP__ 00029 #define MOOST_ALGORITHM_KETAMA_PARTITIONER_HPP__ 00030 00031 #include <boost/random/mersenne_twister.hpp> 00032 00033 #include "partitioner.hpp" 00034 00035 #include <vector> 00036 #include <algorithm> 00037 #include <limits> 00038 00039 namespace moost { namespace algorithm { 00040 00044 template<typename T> 00045 class ketama_partitioner: public partitioner<T> 00046 { 00047 private: 00048 00049 /* erikf raised a concern about boost::mt19937 potentially changing and thus 00050 * included a copy of mersenne_twister.hpp in moost. Not only is this ugly, 00051 * it has also been the source of link-level errors with recent versions of 00052 * boost due to refactoring of the implementation. 00053 * 00054 * The algorithm itself is well-defined and extremely unlikely to ever be 00055 * changed. The only thing that's slightly more probable to change (even 00056 * though I don't think it ever will) is the default seed, which is why we 00057 * keep a copy of the "original" seed here and explicitly seed the RNGs. 00058 */ 00059 static uint32_t default_seed() 00060 { 00061 return 5489u; 00062 } 00063 00064 struct bucket_hash 00065 { 00066 bucket_hash(size_t bucket_, size_t hash_) : 00067 bucket(bucket_), hash(hash_) 00068 { 00069 } 00070 size_t bucket; 00071 size_t hash; 00072 bool operator <(const bucket_hash & other) const 00073 { 00074 return hash < other.hash; 00075 } 00076 }; 00077 00078 std::vector<bucket_hash> m_bhashes; 00079 00080 // stolen from http://isthe.com/chongo/tech/comp/fnv/ 00081 unsigned int fnv_hash(const void *key, size_t len) const 00082 { 00083 const unsigned char *p = (const unsigned char*) key; 00084 unsigned int h = 2166136261UL; 00085 00086 for (size_t i = 0; i != len; i++) 00087 h = (h * 16777619) ^ p[i]; 00088 00089 return h; 00090 } 00091 00092 public: 00093 00094 ketama_partitioner(size_t num_buckets, size_t num_hashes = 4096) 00095 : partitioner<T> (num_buckets) 00096 { 00097 boost::mt19937 gen(default_seed()); 00098 00099 // for each bucket 00100 for (size_t i = 0; i != num_buckets; ++i) 00101 { 00102 // for each hash 00103 for (size_t j = 0; j != num_hashes; ++j) 00104 { 00105 // add it to the ring 00106 m_bhashes.push_back(bucket_hash(i, gen())); 00107 } 00108 } 00109 00110 // now sort the whole darn thing 00111 std::sort(m_bhashes.begin(), m_bhashes.end()); 00112 } 00113 00114 template <typename Y> 00115 ketama_partitioner( const std::vector<Y>& buckets, size_t num_hashes = 4096) 00116 : partitioner<T>( buckets.size() ) 00117 { 00118 boost::mt19937 gen(default_seed()); 00119 typename std::vector<Y>::const_iterator it; 00120 // for each bucket 00121 size_t i = 0; 00122 for ( it = buckets.begin(); it != buckets.end(); ++it, ++i ) 00123 { 00124 gen.seed( fnv_hash( &(*it), sizeof(Y)) ); 00125 // for each hash 00126 for (size_t j = 0; j != num_hashes; ++j) 00127 { 00128 // add it to the ring 00129 m_bhashes.push_back(bucket_hash(i, gen())); 00130 } 00131 } 00132 00133 // now sort the whole darn thing 00134 std::sort(m_bhashes.begin(), m_bhashes.end()); 00135 } 00136 00138 ketama_partitioner( const std::vector<std::string>& buckets, size_t num_hashes = 4096) 00139 : partitioner<T>( buckets.size() ) 00140 { 00141 boost::mt19937 gen(default_seed()); 00142 std::vector<std::string>::const_iterator it; 00143 // for each bucket 00144 size_t i = 0; 00145 for ( it = buckets.begin(); it != buckets.end(); ++it, ++i ) 00146 { 00147 gen.seed( fnv_hash(it->c_str(), it->length()) ); 00148 // for each hash 00149 for (size_t j = 0; j != num_hashes; ++j) 00150 { 00151 // add it to the ring 00152 m_bhashes.push_back(bucket_hash(i, gen())); 00153 } 00154 } 00155 00156 // now sort the whole darn thing 00157 std::sort(m_bhashes.begin(), m_bhashes.end()); 00158 } 00159 00160 size_t partition(const T & key) const 00161 { 00162 typename std::vector<bucket_hash>::const_iterator it = std::lower_bound(m_bhashes.begin(), m_bhashes.end(), 00163 bucket_hash(0, fnv_hash(&key, sizeof(T)))); 00164 if (it == m_bhashes.end()) 00165 it = m_bhashes.begin(); 00166 return it->bucket; 00167 } 00168 }; 00169 00171 template <> 00172 inline size_t ketama_partitioner<std::string>::partition(const std::string& key) const 00173 { 00174 std::vector<bucket_hash>::const_iterator it = std::lower_bound(m_bhashes.begin(), m_bhashes.end(), 00175 bucket_hash(0, fnv_hash(key.c_str(), key.length()))); 00176 if (it == m_bhashes.end()) 00177 it = m_bhashes.begin(); 00178 return it->bucket; 00179 } 00180 00181 }} // moost::algorithm 00182 00183 #endif // MOOST_ALGORITHM_KETAMA_PARTITIONER_HPP__