libmoost
/home/mhx/git/github/libmoost/include/moost/container/bit_filter.hpp
Go to the documentation of this file.
00001 /* vim:set ts=3 sw=3 sts=3 et: */
00028 #ifndef MOOST_CONTAINER_BIT_FILTER_HPP_
00029 #define MOOST_CONTAINER_BIT_FILTER_HPP_
00030 
00031 #include <vector>
00032 #include <iostream>
00033 
00034 #include <bm/bm.h>
00035 #include <bm/bmserial.h>
00036 
00037 namespace moost { namespace container {
00038 
00039 namespace bit_filter_types
00040 {
00041 
00047 struct default_hash
00048 {
00049    template <typename itemT>
00050    size_t operator()(itemT const & t) const
00051    {
00052       return t;
00053    }
00054 };
00055 
00059 typedef std::vector<unsigned char> serial_buffer_t;
00060 
00061 }
00062 
00063 template <typename itemT, typename hashT = bit_filter_types::default_hash>
00064 class bit_filter
00065 {
00066 public:
00067    typedef itemT item_type;
00068    typedef hashT hash_type;
00069    typedef bit_filter<item_type, hash_type> this_type;
00070    typedef bit_filter_types::serial_buffer_t serial_buffer_t;
00071 
00075    bit_filter(
00076       size_t const size,
00077       hash_type const & ht = hash_type()
00078       ) : size_(size), ht_(ht)
00079    {
00080    }
00081 
00085    template <typename inputIteratorT>
00086    bit_filter(
00087       size_t const size,
00088       inputIteratorT beg,
00089       inputIteratorT end,
00090       bool boptimise = false,
00091       hash_type const & ht = hash_type()
00092       ) : size_(size), ht_(ht)
00093    {
00094       insert(beg, end, boptimise);
00095    }
00096 
00100    bit_filter(bit_filter const & rhs)
00101        : size_(rhs.size_), bv_(rhs.bv_), ht_(rhs.ht_)
00102    {
00103    }
00104 
00108    bit_filter & operator = (bit_filter const & rhs)
00109    {
00110       if(this != &rhs)
00111       {
00112          size_ = rhs.size_;
00113          bv_ = rhs.bv_;
00114          ht_ = rhs.ht_;
00115       }
00116 
00117       return *this;
00118    }
00119 
00123    void insert(item_type const & t, bool boptimise = false)
00124    {
00125       if(!find(t))
00126       {
00127          bv_.set(ht_(t) % size_);
00128       }
00129 
00130       if(boptimise) { optimize(); }
00131    }
00132 
00136    template <typename inputIteratorT>
00137    void insert(inputIteratorT beg, inputIteratorT end, bool boptimise = false)
00138    {
00139       while(beg != end)
00140       {
00141          insert(*beg);
00142          ++beg;
00143       }
00144 
00145       if(boptimise) { optimize(); }
00146    }
00147 
00151    template <typename inputIteratorT>
00152    void replace(inputIteratorT beg, inputIteratorT end)
00153    {
00154       clear();
00155       insert(beg, end);
00156    }
00157 
00161    void clear()
00162    {
00163       bv_.clear();
00164    }
00165 
00169    size_t size() const
00170    {
00171       return size_;
00172    }
00173 
00177    bool find(item_type const & t) const
00178    {
00179       return bv_.test(ht_(t) % size_);
00180    }
00181 
00186    template <typename inputIteratorT>
00187    size_t find(inputIteratorT beg, inputIteratorT end) const
00188    {
00189       size_t cnt = 0;
00190       while(beg != end)
00191       {
00192          if(find(*beg))
00193          {
00194             ++cnt;
00195          }
00196 
00197          ++beg;
00198       }
00199 
00200       return cnt;
00201    }
00202 
00208    template <typename inputIteratorT, typename outputIteratorT>
00209    size_t find(inputIteratorT beg, inputIteratorT end, outputIteratorT out) const
00210    {
00211       size_t cnt = 0;
00212       while(beg != end)
00213       {
00214          if(find(*beg))
00215          {
00216             ++cnt;
00217             *out = *beg;
00218             ++out;
00219          }
00220 
00221          ++beg;
00222       }
00223 
00224       return cnt;
00225    }
00226 
00230    bool find(this_type const & rhs) const
00231    {
00232       return (this == &rhs) ? true : (bm::any_and(bv_, rhs.bv_) != 0);
00233    }
00234 
00238    void optimize()
00239    {
00240       bv_.optimize();
00241    }
00242 
00248    size_t serialize(serial_buffer_t & buf, bool boptimise = true)
00249    {
00250       // Since we can't deallocate vector memory and since we initially
00251       // allocate far more than we'll need we use a temporary that can be
00252       // thrown away after, so we only used as much memory as needed.
00253 
00254       if(boptimise) { optimize(); }
00255 
00256       bm::bvector<>::statistics st;
00257       bm::serializer<bm::bvector<> > bvs;
00258 
00259       // Optimize things
00260       bvs.byte_order_serialization(false); // We don't care about supporting different endianess
00261       bvs.gap_length_serialization(false); // We don't care about GAP levels
00262       bvs.set_compression_level(4);         // We want maximum compression
00263 
00264       bv_.calc_stat(&st);
00265 
00266       serial_buffer_t tmp_buf(st.max_serialize_mem);
00267       size_t const used = bvs.serialize(bv_, &tmp_buf[0], tmp_buf.size());
00268       buf.resize(used);
00269       buf.insert(buf.begin(), tmp_buf.begin(), tmp_buf.begin() + used);
00270 
00271       return used;
00272    }
00273 
00278    void deserialize(serial_buffer_t const & buf, bool bclear = true)
00279    {
00280       if(bclear) { bv_.clear(); }
00281       if(!buf.empty())
00282       {
00283          bm::deserialize(bv_, &buf[0]);
00284       }
00285    }
00286 
00290    bool operator == (this_type const & rhs) const
00291    {
00292       return bv_ == rhs.bv_;
00293    }
00294 
00298    bool operator != (this_type const & rhs) const
00299    {
00300       return bv_ != rhs.bv_;
00301    }
00302 
00307    size_t memory() const
00308    {
00309       bm::bvector<>::statistics st;
00310       bv_.calc_stat(&st);
00311       return st.memory_used;
00312    }
00313 
00317    size_t count () const
00318    {
00319       return bv_.count();
00320    }
00321 
00322 private:
00323    size_t size_;
00324    bm::bvector<> bv_;
00325    hash_type ht_;
00326 };
00327 
00328 template <typename itemT, typename hashT>
00329 bit_filter<itemT, hashT> &  operator >> (
00330    bit_filter<itemT, hashT> & bf,
00331    typename bit_filter<itemT, hashT>::serial_buffer_t & buf
00332    )
00333 {
00334    bf.serialize(buf);
00335    return bf;
00336 }
00337 
00338 template <typename itemT, typename hashT>
00339 bit_filter<itemT, hashT> & operator << (
00340    bit_filter<itemT, hashT> & bf,
00341    typename bit_filter<itemT, hashT>::serial_buffer_t const & buf
00342    )
00343 {
00344    bf.deserialize(buf);
00345    return bf;
00346 }
00347 
00348 }}
00349 
00350 #endif