libmoost
|
00001 /* vim:set ts=3 sw=3 sts=3 et: */ 00028 #ifndef __NEIGH_MULTI_MAP_CONTAINER_H 00029 #define __NEIGH_MULTI_MAP_CONTAINER_H 00030 00031 #include <string> 00032 #include <vector> 00033 #include <algorithm> 00034 #include <limits> 00035 #include <fstream> 00036 #include <stdexcept> 00037 00038 #include <boost/cstdint.hpp> 00039 00040 #include "multi_map.hpp" 00041 #include "dense_hash_map.hpp" 00042 00043 namespace moost { namespace container { 00044 00045 // ----------------------------------------------------------------------------- 00046 00047 typedef std::pair<int, float> entry_type; 00048 struct IdentityPolicy 00049 { 00050 inline bool operator()(int entry, int& res) const 00051 { 00052 res = entry; 00053 return true; 00054 } 00055 }; 00056 00057 // ----------------------------------------------------------------------------- 00058 00099 template < typename TLocMap = moost::container::dense_hash_map<int, multimap_value_type> > 00100 class neigh_multi_map : 00101 public multi_map< int, std::pair<int, float>, TLocMap > 00102 { 00103 public: 00104 00105 typedef typename multi_map< int, std::pair<int, float>, TLocMap >::loc_map_policy_type loc_map_policy_type; 00106 00107 public: 00108 00109 //neigh_multi_map( const loc_map_policy_type& locHandlerPolicy = loc_map_policy_type() ) 00110 // : multi_map<int, std::pair<int, float>, TLocMap>(locHandlerPolicy) 00111 //{} 00112 00113 // the default policy uses dense hash map, and the default empty key is 0, 00114 // so if your data has zeroes for keys you have to make sure 00115 // you are using a different policy, i.e. 00116 // neigh_multi_map<> m( neigh_multi_map<>::loc_map_policy_type(-1) ); 00117 neigh_multi_map(const loc_map_policy_type& locHandlerPolicy ) 00118 : multi_map<int, std::pair<int, float>, TLocMap>(locHandlerPolicy), m_externalLocMap(true) 00119 {} 00120 00121 neigh_multi_map() 00122 : m_externalLocMap(false) 00123 {} 00124 00125 00126 // WhichKey is either 1 or 2 00127 // Expect to be sorted by key 00128 void create_map( const std::string& dataFileName, 00129 int maxEntriesPerVec = (std::numeric_limits<int>::max)() ) 00130 { 00131 IdentityPolicy ip; 00132 create_map(dataFileName, ip, maxEntriesPerVec); 00133 } 00134 00135 // WhichKey is either 1 or 2 00136 // Expect to be sorted by key 00137 00138 template <typename TranformIDPolicy> 00139 void create_map( const std::string& dataFileName, 00140 const TranformIDPolicy& getIDPolicy, 00141 int maxEntriesPerVec = (std::numeric_limits<int>::max)() ); 00142 00143 inline void create_map_from_vector( std::vector<std::pair<int, std::pair<int, float> > >& i2i ) 00144 { 00145 multi_map< int, std::pair<int, float>, TLocMap >::template create_map<1>(i2i.begin(), i2i.end()); 00146 } 00147 00148 private: 00149 00150 using multi_map< int, std::pair<int, float>, TLocMap >::m_data; 00151 using multi_map< int, std::pair<int, float>, TLocMap >::m_locations; 00152 00153 using multi_map< int, std::pair<int, float>, TLocMap >::m_locHandlerPolicy; 00154 00155 bool m_externalLocMap; 00156 00157 // ----------------------------------------------------------------------------- 00158 00159 #ifdef _WIN32 00160 // In order to seek with large files in windows use 00161 // this function to "extract" the file pointer from 00162 // a stream: 00163 // 00164 // FILE* fp = getFilePointer(fileStream.rdbuf()); 00165 // 00166 // then use the 64bits seek function: 00167 // 00168 // _fseeki64( fp, pos, SEEK_SET); 00169 // 00170 inline FILE* getFilePointer(std::filebuf* pFileBuf) 00171 { 00172 return reinterpret_cast<FILE*>( 00173 *reinterpret_cast<FILE**>( 00174 reinterpret_cast<char*>(pFileBuf)+76 ) ); 00175 } 00176 #endif 00177 00178 }; 00179 00180 // ----------------------------------------------------------------------------- 00181 // ----------------------------------------------------------------------------- 00182 00183 template <typename TLocMap> 00184 template <typename TranformIDPolicy> 00185 void neigh_multi_map<TLocMap>::create_map( const std::string& dataFileName, 00186 const TranformIDPolicy& getIDPolicy, 00187 int maxEntriesPerVec /*= (std::numeric_limits<int>::max)() */ ) 00188 { 00189 std::ifstream fileSource(dataFileName.c_str(), std::ios::binary); 00190 if ( !fileSource.is_open() ) 00191 throw std::runtime_error("Cannot open file <" + dataFileName + ">!"); 00192 00193 #ifdef _WIN32 00194 FILE* fp = getFilePointer(fileSource.rdbuf()); 00195 #endif 00196 00197 int numKeys; 00198 fileSource.read( reinterpret_cast<char*>( &numKeys ), sizeof(int) ); 00199 if ( fileSource.eof() ) 00200 throw std::runtime_error("Empty source on <" + dataFileName + ">!"); 00201 00203 // scanning 00204 boost::int64_t currPos = sizeof(int); 00205 int tmpID, numEntries; 00206 int numToRead; 00207 int totEntries = 0; 00208 00209 for (int i = 0;; ++i) 00210 { 00211 fileSource.read( reinterpret_cast<char*>(&tmpID), sizeof(int) ); 00212 if ( fileSource.eof() ) 00213 break; 00214 fileSource.read( reinterpret_cast<char*>( &numEntries ), sizeof(int) ); 00215 numToRead = (std::min)( numEntries, maxEntriesPerVec ); 00216 totEntries += numToRead; 00217 00218 currPos += sizeof(int) + // tmpID 00219 sizeof(int) + // numEntries 00220 numEntries * sizeof(std::pair<int, float>); 00221 00222 #ifdef _WIN32 00223 _fseeki64( fp, currPos, SEEK_SET); 00224 #else 00225 fileSource.seekg( currPos, std::ios::beg ); 00226 #endif 00227 } 00228 00229 // we've got the number! 00230 if ( totEntries == 0 ) 00231 throw std::runtime_error("Empty source on <" + dataFileName + ">!"); 00232 00233 // allocating 00234 this->m_data.resize(totEntries); 00235 00236 if ( !m_externalLocMap ) 00237 m_locHandlerPolicy.resize(this->m_locations, numKeys); 00238 //TLocHandler::reserve(this->m_locations, numKeys); 00239 //this->m_locations.resize(numKeys); 00240 00243 00244 // now loading 00245 fileSource.clear(); 00246 currPos = sizeof(int); 00247 00248 #ifdef _WIN32 00249 fileSource.seekg( static_cast<long>(currPos), std::ios::beg ); 00250 #else 00251 fileSource.seekg( currPos, std::ios::beg ); 00252 #endif 00253 int entryPos = 0; 00254 00255 int transformedID; 00256 00257 for (int i = 0;; ++i) 00258 { 00259 fileSource.read( reinterpret_cast<char*>(&tmpID), sizeof(int) ); 00260 if ( fileSource.eof() ) 00261 break; 00262 00263 fileSource.read( reinterpret_cast<char*>( &numEntries ), sizeof(int) ); 00264 numToRead = (std::min)( numEntries, maxEntriesPerVec ); 00265 00266 if ( numToRead == 0 ) 00267 continue; 00268 00269 if ( entryPos + numToRead > totEntries ) 00270 throw std::runtime_error("There were more entries than what was found during scan!"); 00271 00272 fileSource.read( reinterpret_cast<char*>( &(m_data[entryPos]) ), numToRead * sizeof(entry_type) ); 00273 00274 if ( getIDPolicy(tmpID, transformedID) ) 00275 m_locations[transformedID] = std::make_pair(entryPos, numToRead); 00276 00277 entryPos += numToRead; 00278 currPos += sizeof(int) + // tmpID 00279 sizeof(int) + // numEntries 00280 numEntries * sizeof(std::pair<int, float>); 00281 00282 #ifdef _WIN32 00283 _fseeki64( fp, currPos, SEEK_SET); 00284 #else 00285 fileSource.seekg( currPos, std::ios::beg ); 00286 #endif 00287 } 00288 } 00289 00290 // ----------------------------------------------------------------------------- 00291 00292 }} 00293 00294 #endif // __NEIGH_MULTI_MAP_CONTAINER_H 00295 00296 // -----------------------------------------------------------------------------