libmoost
/home/mhx/git/github/libmoost/include/moost/algorithm/ketama_partitioner.hpp
Go to the documentation of this file.
00001 /* vim:set ts=3 sw=3 sts=3 et: */
00028 #ifndef MOOST_ALGORITHM_KETAMA_PARTITIONER_HPP__
00029 #define MOOST_ALGORITHM_KETAMA_PARTITIONER_HPP__
00030 
00031 #include <boost/random/mersenne_twister.hpp>
00032 
00033 #include "partitioner.hpp"
00034 
00035 #include <vector>
00036 #include <algorithm>
00037 #include <limits>
00038 
00039 namespace moost { namespace algorithm {
00040 
00044 template<typename T>
00045 class ketama_partitioner: public partitioner<T>
00046 {
00047 private:
00048 
00049   /* erikf raised a concern about boost::mt19937 potentially changing and thus
00050    * included a copy of mersenne_twister.hpp in moost. Not only is this ugly,
00051    * it has also been the source of link-level errors with recent versions of
00052    * boost due to refactoring of the implementation.
00053    *
00054    * The algorithm itself is well-defined and extremely unlikely to ever be
00055    * changed. The only thing that's slightly more probable to change (even
00056    * though I don't think it ever will) is the default seed, which is why we
00057    * keep a copy of the "original" seed here and explicitly seed the RNGs.
00058    */
00059   static uint32_t default_seed()
00060   {
00061      return 5489u;
00062   }
00063 
00064   struct bucket_hash
00065   {
00066     bucket_hash(size_t bucket_, size_t hash_) :
00067       bucket(bucket_), hash(hash_)
00068     {
00069     }
00070     size_t bucket;
00071     size_t hash;
00072     bool operator <(const bucket_hash & other) const
00073     {
00074       return hash < other.hash;
00075     }
00076   };
00077 
00078   std::vector<bucket_hash> m_bhashes;
00079 
00080   // stolen from http://isthe.com/chongo/tech/comp/fnv/
00081   unsigned int fnv_hash(const void *key, size_t len) const
00082   {
00083     const unsigned char *p = (const unsigned char*) key;
00084     unsigned int h = 2166136261UL;
00085 
00086     for (size_t i = 0; i != len; i++)
00087       h = (h * 16777619) ^ p[i];
00088 
00089     return h;
00090   }
00091 
00092 public:
00093 
00094   ketama_partitioner(size_t num_buckets, size_t num_hashes = 4096)
00095   : partitioner<T> (num_buckets)
00096   {
00097     boost::mt19937 gen(default_seed());
00098 
00099     // for each bucket
00100     for (size_t i = 0; i != num_buckets; ++i)
00101     {
00102       // for each hash
00103       for (size_t j = 0; j != num_hashes; ++j)
00104       {
00105         // add it to the ring
00106         m_bhashes.push_back(bucket_hash(i, gen()));
00107       }
00108     }
00109 
00110     // now sort the whole darn thing
00111     std::sort(m_bhashes.begin(), m_bhashes.end());
00112   }
00113 
00114   template <typename Y>
00115   ketama_partitioner( const std::vector<Y>& buckets, size_t num_hashes = 4096)
00116   : partitioner<T>( buckets.size() )
00117   {
00118      boost::mt19937 gen(default_seed());
00119      typename std::vector<Y>::const_iterator it;
00120      // for each bucket
00121      size_t i = 0;
00122      for ( it = buckets.begin(); it != buckets.end(); ++it, ++i )
00123      {
00124         gen.seed( fnv_hash( &(*it), sizeof(Y)) );
00125         // for each hash
00126         for (size_t j = 0; j != num_hashes; ++j)
00127         {
00128            // add it to the ring
00129            m_bhashes.push_back(bucket_hash(i, gen()));
00130         }
00131      }
00132 
00133      // now sort the whole darn thing
00134      std::sort(m_bhashes.begin(), m_bhashes.end());
00135   }
00136 
00138   ketama_partitioner( const std::vector<std::string>& buckets, size_t num_hashes = 4096)
00139      : partitioner<T>( buckets.size() )
00140   {
00141      boost::mt19937 gen(default_seed());
00142      std::vector<std::string>::const_iterator it;
00143      // for each bucket
00144      size_t i = 0;
00145      for ( it = buckets.begin(); it != buckets.end(); ++it, ++i )
00146      {
00147         gen.seed( fnv_hash(it->c_str(), it->length()) );
00148         // for each hash
00149         for (size_t j = 0; j != num_hashes; ++j)
00150         {
00151            // add it to the ring
00152            m_bhashes.push_back(bucket_hash(i, gen()));
00153         }
00154      }
00155 
00156      // now sort the whole darn thing
00157      std::sort(m_bhashes.begin(), m_bhashes.end());
00158   }
00159 
00160   size_t partition(const T & key) const
00161   {
00162     typename std::vector<bucket_hash>::const_iterator it = std::lower_bound(m_bhashes.begin(), m_bhashes.end(),
00163         bucket_hash(0, fnv_hash(&key, sizeof(T))));
00164     if (it == m_bhashes.end())
00165       it = m_bhashes.begin();
00166     return it->bucket;
00167   }
00168 };
00169 
00171 template <>
00172 inline size_t ketama_partitioner<std::string>::partition(const std::string& key) const
00173 {
00174    std::vector<bucket_hash>::const_iterator it = std::lower_bound(m_bhashes.begin(), m_bhashes.end(),
00175       bucket_hash(0, fnv_hash(key.c_str(), key.length())));
00176    if (it == m_bhashes.end())
00177       it = m_bhashes.begin();
00178    return it->bucket;
00179 }
00180 
00181 }} // moost::algorithm
00182 
00183 #endif // MOOST_ALGORITHM_KETAMA_PARTITIONER_HPP__