libmoost
|
00001 /* vim:set ts=3 sw=3 sts=3 et: */ 00028 #ifndef MOOST_CONTAINER_MEMORY_MAPPED_DATASET_HASH_MULTIMAP_HPP__ 00029 #define MOOST_CONTAINER_MEMORY_MAPPED_DATASET_HASH_MULTIMAP_HPP__ 00030 00031 #include <string> 00032 #include <vector> 00033 #include <stdexcept> 00034 #include <iterator> 00035 00036 #include <boost/type_traits/is_pod.hpp> 00037 #include <boost/noncopyable.hpp> 00038 #include <boost/cstdint.hpp> 00039 00040 #include "section_writer_base.hpp" 00041 #include "pod_pair.hpp" 00042 00043 namespace moost { namespace container { 00044 00055 template <typename Key, typename T, class HashFcn = MMD_DEFAULT_HASH_FCN<Key>, typename IndexType = boost::uint64_t> 00056 class mmd_hash_multimap : public boost::noncopyable 00057 { 00058 BOOST_STATIC_ASSERT_MSG(boost::is_pod<Key>::value, "mmd_hash_multimap<> template can only handle POD key types"); 00059 BOOST_STATIC_ASSERT_MSG(boost::is_pod<T>::value, "mmd_hash_multimap<> template can only handle POD value types"); 00060 00061 public: 00062 static const size_t MMD_HASH_ALIGNMENT = 16; 00063 static const size_t MMD_HASH_BITS = 10; // you'll usually want to pick this larger for huge tables 00064 00065 typedef Key key_type; 00066 typedef T mapped_type; 00067 typedef pod_pair<Key, T> value_type; 00068 00069 typedef const value_type *const_iterator; 00070 typedef size_t size_type; 00071 00072 typedef const T& const_reference; 00073 typedef const T* const_pointer; 00074 00075 typedef IndexType index_type; 00076 00077 typedef std::bidirectional_iterator_tag iterator_category; 00078 00079 private: 00080 static bool compare(const value_type& a, const value_type& b) 00081 { 00082 return a.first < b.first; 00083 } 00084 00085 public: 00086 class writer : public mmd_section_writer_base 00087 { 00088 public: 00089 writer(memory_mapped_dataset::writer& wr, const std::string& name, size_t hash_bits = MMD_HASH_BITS, size_t alignment = MMD_HASH_ALIGNMENT) 00090 : mmd_section_writer_base(wr, name, "mmd_hash_multimap", alignment) 00091 , m_size(0) 00092 , m_hash_mask((1 << hash_bits) - 1) 00093 { 00094 if (hash_bits < 1 || hash_bits > 8*sizeof(size_type)) 00095 { 00096 throw std::runtime_error("invalid value for hash_bits"); 00097 } 00098 00099 m_values.resize(1 << hash_bits); 00100 00101 setattr("key_size", sizeof(key_type)); 00102 setattr("mapped_size", sizeof(mapped_type)); 00103 setattr("elem_size", sizeof(value_type)); 00104 setattr("index_elem_size", sizeof(index_type)); 00105 setattr("hash_bits", hash_bits); 00106 } 00107 00108 writer& operator<< (const value_type& e) 00109 { 00110 insert(e); 00111 return *this; 00112 } 00113 00114 void insert(const value_type& e) 00115 { 00116 m_values[HashFcn()(e.first) & m_hash_mask].push_back(e); 00117 ++m_size; 00118 } 00119 00120 size_type size() const 00121 { 00122 return m_size; 00123 } 00124 00125 protected: 00126 void pre_commit() // all the writing actually happens here 00127 { 00128 std::vector<index_type> index; 00129 00130 index.reserve(m_values.size() + 1); 00131 index.push_back(0); 00132 00133 for (typename std::vector< std::vector<value_type> >::iterator vi = m_values.begin(); vi != m_values.end(); ++vi) 00134 { 00135 std::sort(vi->begin(), vi->end(), compare); 00136 write(*vi); 00137 index.push_back(index.back() + vi->size()); 00138 } 00139 00140 write(index); 00141 00142 setattr("size", m_size); 00143 } 00144 00145 private: 00146 size_type m_size; 00147 const size_type m_hash_mask; 00148 std::vector< std::vector<value_type> > m_values; 00149 }; 00150 00151 mmd_hash_multimap() 00152 : m_index(0) 00153 , m_hash_mask(0) 00154 , m_begin(0) 00155 , m_end(0) 00156 , m_hash_bits(0) 00157 { 00158 } 00159 00160 mmd_hash_multimap(const memory_mapped_dataset& mmd, const std::string& name) 00161 { 00162 set(mmd, name); 00163 } 00164 00165 void set(const memory_mapped_dataset& mmd, const std::string& name) 00166 { 00167 const memory_mapped_dataset::section_info& info = mmd.find(name, "mmd_hash_multimap"); 00168 00169 if (info.getattr<size_t>("key_size") != sizeof(key_type)) 00170 { 00171 throw std::runtime_error("wrong key size for hash_multimap " + name + " in dataset " + mmd.description()); 00172 } 00173 00174 if (info.getattr<size_t>("mapped_size") != sizeof(mapped_type)) 00175 { 00176 throw std::runtime_error("wrong mapped size for hash_multimap " + name + " in dataset " + mmd.description()); 00177 } 00178 00179 if (info.getattr<size_t>("elem_size") != sizeof(value_type)) 00180 { 00181 throw std::runtime_error("wrong element size for hash_multimap " + name + " in dataset " + mmd.description()); 00182 } 00183 00184 if (info.getattr<size_t>("index_elem_size") != sizeof(index_type)) 00185 { 00186 throw std::runtime_error("wrong index element size for hash_multimap " + name + " in dataset " + mmd.description()); 00187 } 00188 00189 m_hash_bits = info.getattr<size_type>("hash_bits"); 00190 m_hash_mask = (1 << m_hash_bits) - 1; 00191 00192 size_type index_size = (1 << m_hash_bits) + 1; 00193 size_type table_size = info.getattr<size_type>("size"); 00194 00195 m_begin = mmd.data<value_type>(info.offset(), table_size); 00196 m_end = m_begin + table_size; 00197 m_index = mmd.data<index_type>(info.offset() + sizeof(value_type)*table_size, index_size); 00198 } 00199 00200 size_type hash_bits() const 00201 { 00202 return m_hash_bits; 00203 } 00204 00205 void warm_cache() const 00206 { 00207 memory_mapped_dataset::warm_cache(m_begin, m_end); 00208 } 00209 00210 const_iterator begin() const 00211 { 00212 return m_begin; 00213 } 00214 00215 const_iterator end() const 00216 { 00217 return m_end; 00218 } 00219 00220 size_type size() const 00221 { 00222 return m_end - m_begin; 00223 } 00224 00225 bool empty() const 00226 { 00227 return size() == 0; 00228 } 00229 00230 const_iterator lower_bound(const key_type& x) const 00231 { 00232 if (m_index) 00233 { 00234 size_t hash = HashFcn()(x) & m_hash_mask; 00235 value_type search; 00236 search.first = x; 00237 return std::lower_bound(&m_begin[m_index[hash]], &m_begin[m_index[hash + 1]], search, compare); 00238 } 00239 else 00240 { 00241 return m_end; 00242 } 00243 } 00244 00245 // TODO: anyone feel free to add more methods for better compatibility with std::multimap<> :-) 00246 00247 private: 00248 const index_type *m_index; 00249 size_t m_hash_mask; 00250 const_iterator m_begin; 00251 const_iterator m_end; 00252 size_type m_hash_bits; 00253 }; 00254 00255 }} 00256 00257 #endif