libmoost
|
00001 /* vim:set ts=3 sw=3 sts=3 et: */ 00028 #ifndef MOOST_CONTAINER_BIT_FILTER_HPP_ 00029 #define MOOST_CONTAINER_BIT_FILTER_HPP_ 00030 00031 #include <vector> 00032 #include <iostream> 00033 00034 #include <bm/bm.h> 00035 #include <bm/bmserial.h> 00036 00037 namespace moost { namespace container { 00038 00039 namespace bit_filter_types 00040 { 00041 00047 struct default_hash 00048 { 00049 template <typename itemT> 00050 size_t operator()(itemT const & t) const 00051 { 00052 return t; 00053 } 00054 }; 00055 00059 typedef std::vector<unsigned char> serial_buffer_t; 00060 00061 } 00062 00063 template <typename itemT, typename hashT = bit_filter_types::default_hash> 00064 class bit_filter 00065 { 00066 public: 00067 typedef itemT item_type; 00068 typedef hashT hash_type; 00069 typedef bit_filter<item_type, hash_type> this_type; 00070 typedef bit_filter_types::serial_buffer_t serial_buffer_t; 00071 00075 bit_filter( 00076 size_t const size, 00077 hash_type const & ht = hash_type() 00078 ) : size_(size), ht_(ht) 00079 { 00080 } 00081 00085 template <typename inputIteratorT> 00086 bit_filter( 00087 size_t const size, 00088 inputIteratorT beg, 00089 inputIteratorT end, 00090 bool boptimise = false, 00091 hash_type const & ht = hash_type() 00092 ) : size_(size), ht_(ht) 00093 { 00094 insert(beg, end, boptimise); 00095 } 00096 00100 bit_filter(bit_filter const & rhs) 00101 : size_(rhs.size_), bv_(rhs.bv_), ht_(rhs.ht_) 00102 { 00103 } 00104 00108 bit_filter & operator = (bit_filter const & rhs) 00109 { 00110 if(this != &rhs) 00111 { 00112 size_ = rhs.size_; 00113 bv_ = rhs.bv_; 00114 ht_ = rhs.ht_; 00115 } 00116 00117 return *this; 00118 } 00119 00123 void insert(item_type const & t, bool boptimise = false) 00124 { 00125 if(!find(t)) 00126 { 00127 bv_.set(ht_(t) % size_); 00128 } 00129 00130 if(boptimise) { optimize(); } 00131 } 00132 00136 template <typename inputIteratorT> 00137 void insert(inputIteratorT beg, inputIteratorT end, bool boptimise = false) 00138 { 00139 while(beg != end) 00140 { 00141 insert(*beg); 00142 ++beg; 00143 } 00144 00145 if(boptimise) { optimize(); } 00146 } 00147 00151 template <typename inputIteratorT> 00152 void replace(inputIteratorT beg, inputIteratorT end) 00153 { 00154 clear(); 00155 insert(beg, end); 00156 } 00157 00161 void clear() 00162 { 00163 bv_.clear(); 00164 } 00165 00169 size_t size() const 00170 { 00171 return size_; 00172 } 00173 00177 bool find(item_type const & t) const 00178 { 00179 return bv_.test(ht_(t) % size_); 00180 } 00181 00186 template <typename inputIteratorT> 00187 size_t find(inputIteratorT beg, inputIteratorT end) const 00188 { 00189 size_t cnt = 0; 00190 while(beg != end) 00191 { 00192 if(find(*beg)) 00193 { 00194 ++cnt; 00195 } 00196 00197 ++beg; 00198 } 00199 00200 return cnt; 00201 } 00202 00208 template <typename inputIteratorT, typename outputIteratorT> 00209 size_t find(inputIteratorT beg, inputIteratorT end, outputIteratorT out) const 00210 { 00211 size_t cnt = 0; 00212 while(beg != end) 00213 { 00214 if(find(*beg)) 00215 { 00216 ++cnt; 00217 *out = *beg; 00218 ++out; 00219 } 00220 00221 ++beg; 00222 } 00223 00224 return cnt; 00225 } 00226 00230 bool find(this_type const & rhs) const 00231 { 00232 return (this == &rhs) ? true : (bm::any_and(bv_, rhs.bv_) != 0); 00233 } 00234 00238 void optimize() 00239 { 00240 bv_.optimize(); 00241 } 00242 00248 size_t serialize(serial_buffer_t & buf, bool boptimise = true) 00249 { 00250 // Since we can't deallocate vector memory and since we initially 00251 // allocate far more than we'll need we use a temporary that can be 00252 // thrown away after, so we only used as much memory as needed. 00253 00254 if(boptimise) { optimize(); } 00255 00256 bm::bvector<>::statistics st; 00257 bm::serializer<bm::bvector<> > bvs; 00258 00259 // Optimize things 00260 bvs.byte_order_serialization(false); // We don't care about supporting different endianess 00261 bvs.gap_length_serialization(false); // We don't care about GAP levels 00262 bvs.set_compression_level(4); // We want maximum compression 00263 00264 bv_.calc_stat(&st); 00265 00266 serial_buffer_t tmp_buf(st.max_serialize_mem); 00267 size_t const used = bvs.serialize(bv_, &tmp_buf[0], tmp_buf.size()); 00268 buf.resize(used); 00269 buf.insert(buf.begin(), tmp_buf.begin(), tmp_buf.begin() + used); 00270 00271 return used; 00272 } 00273 00278 void deserialize(serial_buffer_t const & buf, bool bclear = true) 00279 { 00280 if(bclear) { bv_.clear(); } 00281 if(!buf.empty()) 00282 { 00283 bm::deserialize(bv_, &buf[0]); 00284 } 00285 } 00286 00290 bool operator == (this_type const & rhs) const 00291 { 00292 return bv_ == rhs.bv_; 00293 } 00294 00298 bool operator != (this_type const & rhs) const 00299 { 00300 return bv_ != rhs.bv_; 00301 } 00302 00307 size_t memory() const 00308 { 00309 bm::bvector<>::statistics st; 00310 bv_.calc_stat(&st); 00311 return st.memory_used; 00312 } 00313 00317 size_t count () const 00318 { 00319 return bv_.count(); 00320 } 00321 00322 private: 00323 size_t size_; 00324 bm::bvector<> bv_; 00325 hash_type ht_; 00326 }; 00327 00328 template <typename itemT, typename hashT> 00329 bit_filter<itemT, hashT> & operator >> ( 00330 bit_filter<itemT, hashT> & bf, 00331 typename bit_filter<itemT, hashT>::serial_buffer_t & buf 00332 ) 00333 { 00334 bf.serialize(buf); 00335 return bf; 00336 } 00337 00338 template <typename itemT, typename hashT> 00339 bit_filter<itemT, hashT> & operator << ( 00340 bit_filter<itemT, hashT> & bf, 00341 typename bit_filter<itemT, hashT>::serial_buffer_t const & buf 00342 ) 00343 { 00344 bf.deserialize(buf); 00345 return bf; 00346 } 00347 00348 }} 00349 00350 #endif