libmoost
/home/mhx/git/github/libmoost/include/moost/container/memory_mapped_dataset/hash_multimap.hpp
Go to the documentation of this file.
00001 /* vim:set ts=3 sw=3 sts=3 et: */
00028 #ifndef MOOST_CONTAINER_MEMORY_MAPPED_DATASET_HASH_MULTIMAP_HPP__
00029 #define MOOST_CONTAINER_MEMORY_MAPPED_DATASET_HASH_MULTIMAP_HPP__
00030 
00031 #include <string>
00032 #include <vector>
00033 #include <stdexcept>
00034 #include <iterator>
00035 
00036 #include <boost/type_traits/is_pod.hpp>
00037 #include <boost/noncopyable.hpp>
00038 #include <boost/cstdint.hpp>
00039 
00040 #include "section_writer_base.hpp"
00041 #include "pod_pair.hpp"
00042 
00043 namespace moost { namespace container {
00044 
00055 template <typename Key, typename T, class HashFcn = MMD_DEFAULT_HASH_FCN<Key>, typename IndexType = boost::uint64_t>
00056 class mmd_hash_multimap : public boost::noncopyable
00057 {
00058    BOOST_STATIC_ASSERT_MSG(boost::is_pod<Key>::value, "mmd_hash_multimap<> template can only handle POD key types");
00059    BOOST_STATIC_ASSERT_MSG(boost::is_pod<T>::value, "mmd_hash_multimap<> template can only handle POD value types");
00060 
00061 public:
00062    static const size_t MMD_HASH_ALIGNMENT = 16;
00063    static const size_t MMD_HASH_BITS = 10;   // you'll usually want to pick this larger for huge tables
00064 
00065    typedef Key key_type;
00066    typedef T mapped_type;
00067    typedef pod_pair<Key, T> value_type;
00068 
00069    typedef const value_type *const_iterator;
00070    typedef size_t size_type;
00071 
00072    typedef const T& const_reference;
00073    typedef const T* const_pointer;
00074 
00075    typedef IndexType index_type;
00076 
00077    typedef std::bidirectional_iterator_tag iterator_category;
00078 
00079 private:
00080    static bool compare(const value_type& a, const value_type& b)
00081    {
00082       return a.first < b.first;
00083    }
00084 
00085 public:
00086    class writer : public mmd_section_writer_base
00087    {
00088    public:
00089       writer(memory_mapped_dataset::writer& wr, const std::string& name, size_t hash_bits = MMD_HASH_BITS, size_t alignment = MMD_HASH_ALIGNMENT)
00090          : mmd_section_writer_base(wr, name, "mmd_hash_multimap", alignment)
00091          , m_size(0)
00092          , m_hash_mask((1 << hash_bits) - 1)
00093       {
00094          if (hash_bits < 1 || hash_bits > 8*sizeof(size_type))
00095          {
00096             throw std::runtime_error("invalid value for hash_bits");
00097          }
00098 
00099          m_values.resize(1 << hash_bits);
00100 
00101          setattr("key_size", sizeof(key_type));
00102          setattr("mapped_size", sizeof(mapped_type));
00103          setattr("elem_size", sizeof(value_type));
00104          setattr("index_elem_size", sizeof(index_type));
00105          setattr("hash_bits", hash_bits);
00106       }
00107 
00108       writer& operator<< (const value_type& e)
00109       {
00110          insert(e);
00111          return *this;
00112       }
00113 
00114       void insert(const value_type& e)
00115       {
00116          m_values[HashFcn()(e.first) & m_hash_mask].push_back(e);
00117          ++m_size;
00118       }
00119 
00120       size_type size() const
00121       {
00122          return m_size;
00123       }
00124 
00125    protected:
00126       void pre_commit()      // all the writing actually happens here
00127       {
00128          std::vector<index_type> index;
00129 
00130          index.reserve(m_values.size() + 1);
00131          index.push_back(0);
00132 
00133          for (typename std::vector< std::vector<value_type> >::iterator vi = m_values.begin(); vi != m_values.end(); ++vi)
00134          {
00135             std::sort(vi->begin(), vi->end(), compare);
00136             write(*vi);
00137             index.push_back(index.back() + vi->size());
00138          }
00139 
00140          write(index);
00141 
00142          setattr("size", m_size);
00143       }
00144 
00145    private:
00146       size_type m_size;
00147       const size_type m_hash_mask;
00148       std::vector< std::vector<value_type> > m_values;
00149    };
00150 
00151    mmd_hash_multimap()
00152       : m_index(0)
00153       , m_hash_mask(0)
00154       , m_begin(0)
00155       , m_end(0)
00156       , m_hash_bits(0)
00157    {
00158    }
00159 
00160    mmd_hash_multimap(const memory_mapped_dataset& mmd, const std::string& name)
00161    {
00162       set(mmd, name);
00163    }
00164 
00165    void set(const memory_mapped_dataset& mmd, const std::string& name)
00166    {
00167       const memory_mapped_dataset::section_info& info = mmd.find(name, "mmd_hash_multimap");
00168 
00169       if (info.getattr<size_t>("key_size") != sizeof(key_type))
00170       {
00171          throw std::runtime_error("wrong key size for hash_multimap " + name + " in dataset " + mmd.description());
00172       }
00173 
00174       if (info.getattr<size_t>("mapped_size") != sizeof(mapped_type))
00175       {
00176          throw std::runtime_error("wrong mapped size for hash_multimap " + name + " in dataset " + mmd.description());
00177       }
00178 
00179       if (info.getattr<size_t>("elem_size") != sizeof(value_type))
00180       {
00181          throw std::runtime_error("wrong element size for hash_multimap " + name + " in dataset " + mmd.description());
00182       }
00183 
00184       if (info.getattr<size_t>("index_elem_size") != sizeof(index_type))
00185       {
00186          throw std::runtime_error("wrong index element size for hash_multimap " + name + " in dataset " + mmd.description());
00187       }
00188 
00189       m_hash_bits = info.getattr<size_type>("hash_bits");
00190       m_hash_mask = (1 << m_hash_bits) - 1;
00191 
00192       size_type index_size = (1 << m_hash_bits) + 1;
00193       size_type table_size = info.getattr<size_type>("size");
00194 
00195       m_begin = mmd.data<value_type>(info.offset(), table_size);
00196       m_end = m_begin + table_size;
00197       m_index = mmd.data<index_type>(info.offset() + sizeof(value_type)*table_size, index_size);
00198    }
00199 
00200    size_type hash_bits() const
00201    {
00202       return m_hash_bits;
00203    }
00204 
00205    void warm_cache() const
00206    {
00207       memory_mapped_dataset::warm_cache(m_begin, m_end);
00208    }
00209 
00210    const_iterator begin() const
00211    {
00212       return m_begin;
00213    }
00214 
00215    const_iterator end() const
00216    {
00217       return m_end;
00218    }
00219 
00220    size_type size() const
00221    {
00222       return m_end - m_begin;
00223    }
00224 
00225    bool empty() const
00226    {
00227       return size() == 0;
00228    }
00229 
00230    const_iterator lower_bound(const key_type& x) const
00231    {
00232       if (m_index)
00233       {
00234          size_t hash = HashFcn()(x) & m_hash_mask;
00235          value_type search;
00236          search.first = x;
00237          return std::lower_bound(&m_begin[m_index[hash]], &m_begin[m_index[hash + 1]], search, compare);
00238       }
00239       else
00240       {
00241          return m_end;
00242       }
00243    }
00244 
00245    // TODO: anyone feel free to add more methods for better compatibility with std::multimap<> :-)
00246 
00247 private:
00248    const index_type *m_index;
00249    size_t m_hash_mask;
00250    const_iterator m_begin;
00251    const_iterator m_end;
00252    size_type m_hash_bits;
00253 };
00254 
00255 }}
00256 
00257 #endif