libmoost
|
00001 /* vim:set ts=3 sw=3 sts=3 et: */ 00028 #ifndef MOOST_CONTAINER_MEMORY_MAPPED_DATASET_DATASET_HPP__ 00029 #define MOOST_CONTAINER_MEMORY_MAPPED_DATASET_DATASET_HPP__ 00030 00031 #include <string> 00032 #include <stdexcept> 00033 #include <fstream> 00034 #include <sstream> 00035 #include <algorithm> 00036 #include <cstring> 00037 00038 #include <boost/archive/text_iarchive.hpp> 00039 #include <boost/archive/text_oarchive.hpp> 00040 #include <boost/serialization/string.hpp> 00041 #include <boost/serialization/map.hpp> 00042 #include <boost/iostreams/device/mapped_file.hpp> 00043 #include <boost/type_traits/is_pod.hpp> 00044 #include <boost/lexical_cast.hpp> 00045 #include <boost/noncopyable.hpp> 00046 #include <boost/cstdint.hpp> 00047 00048 #include "config.hpp" 00049 00050 namespace moost { namespace container { 00051 00063 class memory_mapped_dataset : public boost::noncopyable 00064 { 00065 private: 00066 static const boost::uint32_t MMD_MAGIC = 0x7473614C; 00067 static const boost::uint32_t MMD_VERSION = 1; 00068 00069 static const size_t MAP_PAGE_SIZE = 4096; 00070 00079 struct mmd_header // must be POD 00080 { 00081 boost::uint32_t mmd_magic; 00082 boost::uint32_t mmd_version; 00083 boost::uint64_t index_offset; 00084 boost::uint64_t index_length; 00085 }; 00086 00087 public: 00101 class section_info 00102 { 00103 private: 00104 typedef std::map<std::string, std::string> attribute_map_type; 00105 00106 public: 00107 section_info() 00108 : m_offset(0) 00109 , m_alignment(0) 00110 { 00111 } 00112 00113 section_info(const std::string& type, size_t alignment) 00114 : m_type(type) 00115 , m_offset(0) 00116 , m_alignment(alignment) 00117 { 00118 } 00119 00120 boost::uint64_t offset() const 00121 { 00122 return m_offset; 00123 } 00124 00125 size_t alignment() const 00126 { 00127 return m_alignment; 00128 } 00129 00130 void set_offset(boost::uint64_t offset) 00131 { 00132 m_offset = offset; 00133 } 00134 00135 template <typename T> 00136 void setattr(const std::string& name, const T& value) 00137 { 00138 m_attributes[name] = boost::lexical_cast<std::string>(value); 00139 } 00140 00141 template <typename T> 00142 const T getattr(const std::string& name) const 00143 { 00144 attribute_map_type::const_iterator it = m_attributes.find(name); 00145 00146 if (it == m_attributes.end()) 00147 { 00148 throw std::runtime_error("no such attribute " + name); 00149 } 00150 00151 return boost::lexical_cast<T>(it->second); 00152 } 00153 00154 const std::string& type() const 00155 { 00156 return m_type; 00157 } 00158 00159 private: 00160 friend class boost::serialization::access; 00161 00162 template <class Archive> 00163 void serialize(Archive & ar, const unsigned int /* version */) 00164 { 00165 ar & m_type & m_offset & m_attributes; 00166 } 00167 00168 std::string m_type; 00169 boost::uint64_t m_offset; 00170 const size_t m_alignment; 00171 attribute_map_type m_attributes; 00172 }; 00173 00174 typedef std::map<std::string, section_info> section_map_type; 00175 00187 class writer 00188 { 00189 public: 00201 writer(const std::string& map_file_name, const std::string& dataset_name, boost::uint32_t format_version) 00202 : m_ofs(map_file_name.c_str(), std::ios::binary | std::ios::trunc) 00203 , m_dataset_name(dataset_name) 00204 , m_format_version(format_version) 00205 { 00206 if (!m_ofs) 00207 { 00208 throw std::runtime_error("failed to open file " + map_file_name); 00209 } 00210 00211 if (dataset_name.empty()) 00212 { 00213 throw std::runtime_error("empty dataset name"); 00214 } 00215 00216 m_header.mmd_magic = MMD_MAGIC; 00217 m_header.mmd_version = MMD_VERSION; 00218 m_header.index_offset = 0; 00219 m_header.index_length = 0; 00220 00221 write(m_header); 00222 } 00223 00224 ~writer() 00225 { 00226 try 00227 { 00228 close(); 00229 } 00230 catch (...) 00231 { 00232 } 00233 } 00234 00241 void close() 00242 { 00243 if (m_ofs.is_open()) 00244 { 00245 m_header.index_offset = m_ofs.tellp(); 00246 boost::archive::text_oarchive oa(m_ofs); 00247 oa << m_dataset_name << m_format_version << m_section_map; 00248 m_header.index_length = static_cast<boost::uint64_t>(m_ofs.tellp()) - m_header.index_offset; 00249 m_ofs.seekp(0); 00250 write(m_header); 00251 m_ofs.close(); 00252 } 00253 } 00254 00255 void create_section(const std::string& name, const std::string& type, size_t alignment) 00256 { 00257 if (name.empty()) 00258 { 00259 throw std::runtime_error("invalid empty section name"); 00260 } 00261 00262 if (type.empty()) 00263 { 00264 throw std::runtime_error("invalid empty section type"); 00265 } 00266 00267 if (alignment == 0 || (alignment & (alignment - 1)) != 0) 00268 { 00269 throw std::runtime_error("alignment must be a power of 2"); 00270 } 00271 00272 std::pair<section_map_type::iterator, bool> rv = m_section_map.insert(std::make_pair(name, section_info(type, alignment))); 00273 00274 if (!rv.second) 00275 { 00276 throw std::runtime_error("attempt to create duplicate section " + name); 00277 } 00278 } 00279 00280 void uncreate_section(const std::string& name) 00281 { 00282 if (name.empty()) 00283 { 00284 throw std::runtime_error("invalid empty section name"); 00285 } 00286 00287 section_map_type::iterator it = m_section_map.find(name); 00288 00289 if (it == m_section_map.end()) 00290 { 00291 throw std::runtime_error("attempt to uncreate non-existent section " + name); 00292 } 00293 00294 if (it->second.offset()) 00295 { 00296 throw std::runtime_error("cannot uncreate section " + name + " that has already been written to"); 00297 } 00298 00299 m_section_map.erase(it); 00300 } 00301 00302 void write(const std::string& section, const char *data, size_t size) 00303 { 00304 set_active_section(section); 00305 m_ofs.write(data, size); 00306 } 00307 00308 void commit_section(const std::string& section) 00309 { 00310 set_active_section(section); 00311 } 00312 00313 template <typename T> 00314 void setattr(const std::string& section, const std::string& attr, const T& value) 00315 { 00316 find(section).setattr(attr, value); 00317 } 00318 00319 private: 00320 section_info& find(const std::string& section) 00321 { 00322 section_map_type::iterator it = m_section_map.find(section); 00323 00324 if (it == m_section_map.end()) 00325 { 00326 throw std::runtime_error("no such section " + section); 00327 } 00328 00329 return it->second; 00330 } 00331 00332 template <typename T> 00333 void write(const T& data) 00334 { 00335 m_ofs.write(reinterpret_cast<const char *>(&data), sizeof(data)); 00336 } 00337 00338 void set_active_section(const std::string& section) 00339 { 00340 // Yeah, this is slightly inefficient, but it's only being used at 00341 // dataset creation time. 00342 if (section != m_active_section) 00343 { 00344 section_info& sec = find(section); 00345 00346 // We have to ensure that only one section is written at a time. 00347 if (sec.offset() > 0) 00348 { 00349 throw std::runtime_error("interleaved write access to section " + section); 00350 } 00351 00352 align_stream(sec.alignment()); 00353 sec.set_offset(m_ofs.tellp()); 00354 m_active_section = section; 00355 } 00356 } 00357 00358 void align_stream(size_t alignment) 00359 { 00360 while (m_ofs.tellp() % alignment) 00361 { 00362 m_ofs.put(0); 00363 } 00364 } 00365 00366 section_map_type m_section_map; 00367 std::string m_active_section; 00368 std::ofstream m_ofs; 00369 const std::string m_dataset_name; 00370 const boost::uint32_t m_format_version; 00371 mmd_header m_header; 00372 }; 00373 00385 memory_mapped_dataset(const std::string& map_file_name, 00386 const std::string& dataset_name, 00387 boost::uint32_t format_version) 00388 : m_file(map_file_name) 00389 , m_format(dataset_name) 00390 { 00391 try 00392 { 00393 m_map.open(m_file, boost::iostreams::mapped_file::readonly); 00394 } 00395 catch (const BOOST_IOSTREAMS_FAILURE& fail) 00396 { 00397 // otherwise it's a real pain to figure out which file it's actually complaining about 00398 throw BOOST_IOSTREAMS_FAILURE(m_file + ": " + fail.what()); 00399 } 00400 00401 const mmd_header *hdr = data<mmd_header>(); 00402 00403 if (hdr->mmd_magic != MMD_MAGIC) 00404 { 00405 throw std::runtime_error(m_file + ": invalid magic"); 00406 } 00407 00408 if (hdr->mmd_version != MMD_VERSION) 00409 { 00410 throw std::runtime_error(m_file + ": unsupported version"); 00411 } 00412 00413 if (hdr->index_offset == 0 || hdr->index_length == 0) 00414 { 00415 throw std::runtime_error(m_file + ": corrupted file"); 00416 } 00417 00418 std::string indexstr(data<char>(hdr->index_offset, hdr->index_length), hdr->index_length); 00419 std::istringstream iss(indexstr); 00420 00421 std::string dset_name; 00422 boost::uint32_t fmt_version; 00423 00424 boost::archive::text_iarchive ia(iss); 00425 ia >> dset_name >> fmt_version >> m_section_map; 00426 00427 if (dset_name != dataset_name) 00428 { 00429 throw std::runtime_error(m_file + ": unexpected format name: " + dset_name + " (expected " + dataset_name + ")"); 00430 } 00431 00432 if (fmt_version != format_version) 00433 { 00434 std::ostringstream oss; 00435 oss << m_file << ": unsupported format version: " << fmt_version << " (expected " << format_version << ")"; 00436 throw std::runtime_error(oss.str()); 00437 } 00438 } 00439 00440 std::string description() const 00441 { 00442 return m_format + " (" + m_file + ")"; 00443 } 00444 00455 const section_info& find(const std::string& section, const std::string& type) const 00456 { 00457 section_map_type::const_iterator it = m_section_map.find(section); 00458 00459 if (it == m_section_map.end()) 00460 { 00461 throw std::runtime_error(m_file + ": no such section " + section); 00462 } 00463 00464 if (it->second.offset() == 0) 00465 { 00466 throw std::runtime_error(m_file + ": corrupt section " + section); 00467 } 00468 00469 if (it->second.type() != type) 00470 { 00471 throw std::runtime_error(m_file + ": invalid section type " + it->second.type() + " (expected " + type + ")"); 00472 } 00473 00474 return it->second; 00475 } 00476 00488 template <typename T> 00489 const T *data(size_t offset = 0, size_t count = 1) const 00490 { 00491 BOOST_STATIC_ASSERT_MSG(boost::is_pod<T>::value, "data<>() called on non-POD type"); 00492 00493 if (offset + count*sizeof(T) > m_map.size()) 00494 { 00495 throw std::runtime_error(m_file + ": potential attempt to access data beyond end of mapping"); 00496 } 00497 00498 return reinterpret_cast<const T *>(m_map.const_data() + offset); 00499 } 00500 00504 const std::string& filename() const 00505 { 00506 return m_file; 00507 } 00508 00518 static void warm_cache(const void *beg, const void *end) 00519 { 00520 const char *b = reinterpret_cast<const char *>(beg); 00521 const char *e = reinterpret_cast<const char *>(end); 00522 char buf[MAP_PAGE_SIZE]; 00523 size_t page_off = static_cast<size_t>(b - static_cast<const char *>(0))%sizeof(buf); 00524 00525 if (page_off) 00526 { 00527 size_t len = std::min(sizeof(buf) - page_off, static_cast<size_t>(e - b)); 00528 std::memcpy(buf, beg, len); 00529 b += len; 00530 } 00531 00532 while (b < e) 00533 { 00534 size_t len = std::min(sizeof(buf), static_cast<size_t>(e - b)); 00535 std::memcpy(buf, b, len); 00536 b += len; 00537 } 00538 } 00539 00540 private: 00541 const std::string m_file; 00542 const std::string m_format; 00543 boost::iostreams::mapped_file m_map; 00544 section_map_type m_section_map; 00545 }; 00546 00547 }} 00548 00549 #endif