libmoost
/home/mhx/git/github/libmoost/include/moost/container/neigh_multi_map.hpp
Go to the documentation of this file.
00001 /* vim:set ts=3 sw=3 sts=3 et: */
00028 #ifndef __NEIGH_MULTI_MAP_CONTAINER_H
00029 #define __NEIGH_MULTI_MAP_CONTAINER_H
00030 
00031 #include <string>
00032 #include <vector>
00033 #include <algorithm>
00034 #include <limits>
00035 #include <fstream>
00036 #include <stdexcept>
00037 
00038 #include <boost/cstdint.hpp>
00039 
00040 #include "multi_map.hpp"
00041 #include "dense_hash_map.hpp"
00042 
00043 namespace moost { namespace container {
00044 
00045 // -----------------------------------------------------------------------------
00046 
00047 typedef std::pair<int, float> entry_type;
00048 struct IdentityPolicy
00049 {
00050    inline bool operator()(int entry, int& res) const
00051    {
00052       res = entry;
00053       return true;
00054    }
00055 };
00056 
00057 // -----------------------------------------------------------------------------
00058 
00099 template < typename TLocMap = moost::container::dense_hash_map<int, multimap_value_type> >
00100 class neigh_multi_map :
00101    public multi_map< int, std::pair<int, float>, TLocMap >
00102 {
00103 public:
00104 
00105    typedef typename multi_map< int, std::pair<int, float>, TLocMap >::loc_map_policy_type loc_map_policy_type;
00106 
00107 public:
00108 
00109    //neigh_multi_map( const loc_map_policy_type& locHandlerPolicy = loc_map_policy_type() )
00110    //   : multi_map<int, std::pair<int, float>, TLocMap>(locHandlerPolicy)
00111    //{}
00112 
00113    // the default policy uses dense hash map, and the default empty key is 0,
00114    // so if your data has zeroes for keys you have to make sure
00115    // you are using a different policy, i.e.
00116    // neigh_multi_map<> m( neigh_multi_map<>::loc_map_policy_type(-1) );
00117    neigh_multi_map(const loc_map_policy_type& locHandlerPolicy )
00118       : multi_map<int, std::pair<int, float>, TLocMap>(locHandlerPolicy), m_externalLocMap(true)
00119    {}
00120 
00121    neigh_multi_map()
00122       : m_externalLocMap(false)
00123    {}
00124 
00125 
00126    // WhichKey is either 1 or 2
00127    // Expect to be sorted by key
00128    void create_map( const std::string& dataFileName,
00129                     int maxEntriesPerVec = (std::numeric_limits<int>::max)() )
00130    {
00131       IdentityPolicy ip;
00132       create_map(dataFileName, ip, maxEntriesPerVec);
00133    }
00134 
00135    // WhichKey is either 1 or 2
00136    // Expect to be sorted by key
00137 
00138    template <typename TranformIDPolicy>
00139    void create_map( const std::string& dataFileName,
00140                     const TranformIDPolicy& getIDPolicy,
00141                     int maxEntriesPerVec = (std::numeric_limits<int>::max)() );
00142 
00143    inline void create_map_from_vector( std::vector<std::pair<int, std::pair<int, float> > >& i2i )
00144    {
00145       multi_map< int, std::pair<int, float>, TLocMap >::template create_map<1>(i2i.begin(), i2i.end());
00146    }
00147 
00148 private:
00149 
00150    using multi_map< int, std::pair<int, float>, TLocMap >::m_data;
00151    using multi_map< int, std::pair<int, float>, TLocMap >::m_locations;
00152 
00153    using multi_map< int, std::pair<int, float>, TLocMap >::m_locHandlerPolicy;
00154 
00155    bool m_externalLocMap;
00156 
00157    // -----------------------------------------------------------------------------
00158 
00159 #ifdef _WIN32
00160    // In order to seek with large files in windows use
00161    // this function to "extract" the file pointer from
00162    // a stream:
00163    //
00164    // FILE* fp = getFilePointer(fileStream.rdbuf());
00165    //
00166    // then use the 64bits seek function:
00167    //
00168    // _fseeki64( fp, pos, SEEK_SET);
00169    //
00170    inline FILE* getFilePointer(std::filebuf* pFileBuf)
00171    {
00172       return reinterpret_cast<FILE*>(
00173          *reinterpret_cast<FILE**>(
00174          reinterpret_cast<char*>(pFileBuf)+76 ) );
00175    }
00176 #endif
00177 
00178 };
00179 
00180 // -----------------------------------------------------------------------------
00181 // -----------------------------------------------------------------------------
00182 
00183 template <typename TLocMap>
00184 template <typename TranformIDPolicy>
00185 void neigh_multi_map<TLocMap>::create_map( const std::string& dataFileName,
00186                                            const TranformIDPolicy& getIDPolicy,
00187                                            int maxEntriesPerVec /*= (std::numeric_limits<int>::max)() */ )
00188 {
00189    std::ifstream fileSource(dataFileName.c_str(), std::ios::binary);
00190    if ( !fileSource.is_open() )
00191       throw std::runtime_error("Cannot open file <" + dataFileName + ">!");
00192 
00193 #ifdef _WIN32
00194    FILE* fp = getFilePointer(fileSource.rdbuf());
00195 #endif
00196 
00197    int numKeys;
00198    fileSource.read( reinterpret_cast<char*>( &numKeys ), sizeof(int) );
00199    if ( fileSource.eof() )
00200       throw std::runtime_error("Empty source on <" + dataFileName + ">!");
00201 
00203    // scanning
00204    boost::int64_t currPos = sizeof(int);
00205    int tmpID, numEntries;
00206    int numToRead;
00207    int totEntries = 0;
00208 
00209    for (int i = 0;; ++i)
00210    {
00211       fileSource.read( reinterpret_cast<char*>(&tmpID), sizeof(int) );
00212       if ( fileSource.eof() )
00213          break;
00214       fileSource.read( reinterpret_cast<char*>( &numEntries ), sizeof(int) );
00215       numToRead = (std::min)( numEntries, maxEntriesPerVec );
00216       totEntries += numToRead;
00217 
00218       currPos += sizeof(int) + // tmpID
00219                  sizeof(int) + // numEntries
00220                  numEntries * sizeof(std::pair<int, float>);
00221 
00222 #ifdef _WIN32
00223       _fseeki64( fp, currPos, SEEK_SET);
00224 #else
00225       fileSource.seekg( currPos, std::ios::beg );
00226 #endif
00227    }
00228 
00229    // we've got the number!
00230    if ( totEntries == 0 )
00231       throw std::runtime_error("Empty source on <" + dataFileName + ">!");
00232 
00233    // allocating
00234    this->m_data.resize(totEntries);
00235 
00236    if ( !m_externalLocMap )
00237       m_locHandlerPolicy.resize(this->m_locations, numKeys);
00238    //TLocHandler::reserve(this->m_locations, numKeys);
00239    //this->m_locations.resize(numKeys);
00240 
00243 
00244    // now loading
00245    fileSource.clear();
00246    currPos = sizeof(int);
00247 
00248 #ifdef _WIN32
00249    fileSource.seekg( static_cast<long>(currPos), std::ios::beg );
00250 #else
00251    fileSource.seekg( currPos, std::ios::beg );
00252 #endif
00253    int entryPos = 0;
00254 
00255    int transformedID;
00256 
00257    for (int i = 0;; ++i)
00258    {
00259       fileSource.read( reinterpret_cast<char*>(&tmpID), sizeof(int) );
00260       if ( fileSource.eof() )
00261          break;
00262 
00263       fileSource.read( reinterpret_cast<char*>( &numEntries ), sizeof(int) );
00264       numToRead = (std::min)( numEntries, maxEntriesPerVec );
00265 
00266       if ( numToRead == 0 )
00267          continue;
00268 
00269       if ( entryPos + numToRead > totEntries )
00270          throw std::runtime_error("There were more entries than what was found during scan!");
00271 
00272       fileSource.read( reinterpret_cast<char*>( &(m_data[entryPos]) ), numToRead * sizeof(entry_type) );
00273 
00274       if ( getIDPolicy(tmpID, transformedID) )
00275          m_locations[transformedID] = std::make_pair(entryPos, numToRead);
00276 
00277       entryPos += numToRead;
00278       currPos += sizeof(int) + // tmpID
00279                  sizeof(int) + // numEntries
00280                  numEntries * sizeof(std::pair<int, float>);
00281 
00282 #ifdef _WIN32
00283       _fseeki64( fp, currPos, SEEK_SET);
00284 #else
00285       fileSource.seekg( currPos, std::ios::beg );
00286 #endif
00287    }
00288 }
00289 
00290 // -----------------------------------------------------------------------------
00291 
00292 }}
00293 
00294 #endif // __NEIGH_MULTI_MAP_CONTAINER_H
00295 
00296 // -----------------------------------------------------------------------------