libmoost
/home/mhx/git/github/libmoost/include/moost/io/file_backed_data_source.hpp
Go to the documentation of this file.
00001 /* vim:set ts=3 sw=3 sts=3 et: */
00091 #ifndef MOOST_IO_FILE_BACKED_DATA_SOURCE_HPP__
00092 #define MOOST_IO_FILE_BACKED_DATA_SOURCE_HPP__
00093 
00094 #include <string>
00095 #include <vector>
00096 #include <map>
00097 #include <stdexcept>
00098 
00099 #include <boost/bind.hpp>
00100 #include <boost/shared_ptr.hpp>
00101 #include <boost/algorithm/string.hpp>
00102 
00103 #include "../safe_shared_ptr.hpp"
00104 #include "file_watcher.hpp"
00105 #include "../terminal_format.hpp"
00106 #include "../xml/simple_parser.hpp"
00107 #include "../logging/class_logger.hpp"
00108 
00109 namespace moost { namespace io {
00110 
00119 template <typename T>
00120 class data_policy_base
00121 {
00122 public:
00123    typedef T data_type;
00124    virtual ~data_policy_base() { }
00125    virtual std::string getName() const = 0;
00126    virtual boost::shared_ptr<T> loadFromFile(const std::string& filepath) const = 0;
00127    virtual size_t size(boost::shared_ptr<T> pData) const = 0;
00128 };
00129 
00130 
00135 class loadable
00136 {
00137 public:
00138    virtual ~loadable() { }
00139    virtual void load() = 0;
00140 };
00141 
00142 
00146 struct file_backed_data_source_config
00147 {
00148 public:
00149    std::string filepath;            // the filepath of the data file
00150    int minSecsSinceLastLoad;        // reject reload sooner than this
00151    bool throwOnFirstLoadFail;       // if true, throw an exception if the initial load fails
00152    double minProportionOfLastLoad;  //  reject reload smaller than this
00153 
00154    // some possible default values, you still need to set the filepath!
00155    file_backed_data_source_config() :
00156    minSecsSinceLastLoad(30), throwOnFirstLoadFail(true), minProportionOfLastLoad(0.5)
00157    {
00158    }
00159 
00160    file_backed_data_source_config(const std::string& path, int minSecs, bool throwOnFirst,
00161       double minProportion) :
00162    filepath(path), minSecsSinceLastLoad(minSecs), throwOnFirstLoadFail(throwOnFirst),
00163    minProportionOfLastLoad(minProportion)
00164    {
00165    }
00166 };
00167 
00168 class file_backed_data_source_config_factory
00169 {
00170 public:
00171    file_backed_data_source_config createFromXml(const std::string& xmlFilepath)
00172    {
00173       tag_map_t tagMap;
00174       xmlToMap(tagMap, xmlFilepath);
00175       file_backed_data_source_config conf;
00176       setAndRemove(conf.filepath, "Filepath", tagMap);
00177       setAndRemove(conf.minSecsSinceLastLoad, "MinSecsSinceLastLoad", tagMap);
00178       setAndRemove(conf.throwOnFirstLoadFail, "ThrowOnFirstLoadFail", tagMap);
00179       setAndRemove(conf.minProportionOfLastLoad, "MinProportionOfLastLoad", tagMap);
00180       if (!tagMap.empty())
00181          throw std::runtime_error("Unexpected xml tag: " + tagMap.begin()->first);
00182       return conf;
00183    }
00184 
00185 private:
00186    typedef std::map<std::string, std::string> tag_map_t;
00187 
00188    void xmlToMap(std::map<std::string, std::string>& tagMap, const std::string& xmlFilepath)
00189    {
00190       moost::xml::simple_parser xmlParser;
00191       xmlParser.load(xmlFilepath);
00192       const moost::xml::simple_parser::tree_branch_t& root = xmlParser.get_root();
00193       if (!root.front())
00194          throw std::runtime_error("No xml root tag");
00195       std::string rootTag(root.front()->header);
00196       if (rootTag != "FileBackedDataSource")
00197          throw std::runtime_error("Unexpected xml root tag: " + rootTag);
00198       root.front()->leaves2map(tagMap);
00199    }
00200 
00201    template <typename T>
00202    void setAndRemove(T& value, const std::string& name, tag_map_t& tagMap)
00203    {
00204       tag_map_t::iterator it = tagMap.find(name);
00205       if (it == tagMap.end())
00206          return;
00207       value = boost::lexical_cast<T>(it->second);
00208       tagMap.erase(it);
00209    }
00210 
00211    void setAndRemove(bool& value, const std::string& name, tag_map_t& tagMap)
00212    {
00213       tag_map_t::iterator it = tagMap.find(name);
00214       if (it == tagMap.end())
00215          return;
00216       std::string lowerVal = boost::to_lower_copy(it->second);
00217       if (lowerVal == "true")
00218          value = true;
00219       else if (lowerVal == "false")
00220          value = false;
00221       else
00222          throw std::runtime_error("Unexpected value for boolean tag: " + lowerVal);
00223       tagMap.erase(it);
00224    }
00225 };
00226 
00227 template <typename DataPolicy>
00228 class file_backed_data_source : public loadable
00229 {
00230 public:
00231 
00232    typedef typename DataPolicy::data_type data_type;
00233 
00234    file_backed_data_source(const DataPolicy& dataPolicy) : m_dataPolicy(dataPolicy),
00235       m_firstLoad(true), m_lastLoadTime(-1) { }
00236 
00237    void configure(file_backed_data_source_config conf)
00238    {
00239       m_conf = conf;
00240    }
00241 
00242    const file_backed_data_source_config& getConfig() const
00243    {
00244       return m_conf;
00245    }
00246 
00247    size_t size() const
00248    {
00249       boost::shared_ptr<data_type> pData = m_pData.get_shared();
00250       return m_dataPolicy.size(pData);
00251    }
00252 
00253    int getLastLoadTime() const
00254    {
00255       return m_lastLoadTime;
00256    }
00257 
00258    void registerLoadable(boost::shared_ptr<loadable> pOther, bool loadOtherFirst = true)
00259    {
00260       if (loadOtherFirst)
00261          m_preRegistered.push_back(pOther);
00262       else
00263          m_postRegistered.push_back(pOther);
00264    }
00265 
00266    boost::shared_ptr<data_type> get_shared_ptr()
00267    {
00268       return m_pData.get_shared();
00269    }
00270 
00271    void load()
00272    {
00273       if (m_firstLoad)
00274       {
00275          m_fileWatcher.start();
00276          m_fileWatcher.insert(m_conf.filepath, boost::bind(&file_backed_data_source<DataPolicy>::reload, this, _1, _2));
00277       }
00278       reload(file_watcher::CHANGED, m_conf.filepath);
00279       if (m_firstLoad)
00280       {
00281          m_firstLoad = false;
00282       }
00283    }
00284 
00285 private:
00286    DataPolicy m_dataPolicy;
00287 
00288    file_backed_data_source_config m_conf;
00289    bool m_firstLoad;
00290    int m_lastLoadTime;
00291 
00292    file_watcher m_fileWatcher;
00293 
00294    moost::safe_shared_ptr<data_type> m_pData;
00295 
00296    typedef std::vector<boost::shared_ptr<loadable> > registered_t;
00297    registered_t m_preRegistered;
00298    registered_t m_postRegistered;
00299 
00300    void reload(file_watcher::file_action action, const std::string& filepath)
00301    {
00302       if (action != file_watcher::CHANGED)
00303          return;
00304 
00305       // abandon reload if it's too soon after previous one
00306       int timeNow = static_cast<int>(time(NULL));
00307       if (!m_firstLoad && timeNow < m_lastLoadTime + m_conf.minSecsSinceLastLoad)
00308          return;
00309 
00310       // force pre-registered sources to reload first
00311       for (registered_t::iterator it = m_preRegistered.begin(); it != m_preRegistered.end(); ++it)
00312          (*it)->load();
00313 
00314       MLOG_CLASS_INFO("Updating " << m_dataPolicy.getName() << "..");
00315 
00316       boost::shared_ptr<data_type> pData;
00317       loadWithErrorHandling(pData, filepath);
00318 
00319       // abandon new dataset if it looks too small
00320       size_t newSize = m_dataPolicy.size(pData);
00321       if (!m_firstLoad && newSize < size() * m_conf.minProportionOfLastLoad)
00322          return;
00323 
00324       MLOG_CLASS_INFO(moost::terminal_format::getOkay() << ": Loaded " << newSize);
00325 
00326       m_pData = pData;
00327       m_lastLoadTime = static_cast<int>(time(NULL));
00328 
00329       // force post-registered sources to reload afterwards
00330       for (registered_t::iterator it = m_postRegistered.begin(); it != m_postRegistered.end(); ++it)
00331          (*it)->load();
00332 
00333    }
00334 
00335    void loadWithErrorHandling(boost::shared_ptr<data_type>& pData, const std::string& filepath)
00336    {
00337       try
00338       {
00339          pData = m_dataPolicy.loadFromFile(filepath);
00340       }
00341       catch (std::runtime_error& ex)
00342       {
00343          MLOG_CLASS_WARN(ex.what() << " loading " << m_dataPolicy.getName()
00344             << " from " << filepath);
00345          if (m_firstLoad && m_conf.throwOnFirstLoadFail)
00346          {
00347             throw;
00348          }
00349       }
00350       catch (...)
00351       {
00352          MLOG_CLASS_WARN("exception loading " << m_dataPolicy.getName()
00353             << " from " << filepath);
00354          if (m_firstLoad && m_conf.throwOnFirstLoadFail)
00355          {
00356             throw;
00357          }
00358       }
00359    }
00360 };
00361 
00362 class file_backed_data_source_factory
00363 {
00364 public:
00365 
00366    template <typename DataPolicy>
00367    boost::shared_ptr<file_backed_data_source<DataPolicy> > createFromConfig(const DataPolicy& dataPolicy, file_backed_data_source_config conf)
00368    {
00369       boost::shared_ptr<file_backed_data_source<DataPolicy> > pSource(new file_backed_data_source<DataPolicy>(dataPolicy));
00370       pSource->configure(conf);
00371 
00372       return pSource;
00373    }
00374 
00375    template <typename DataPolicy>
00376    boost::shared_ptr<file_backed_data_source<DataPolicy> > createFromXml(const DataPolicy& dataPolicy, const std::string& xmlFilepath)
00377    {
00378       file_backed_data_source_config_factory config_factory;
00379       file_backed_data_source_config conf = config_factory.createFromXml(xmlFilepath);
00380       return createFromConfig(dataPolicy, conf);
00381    }
00382 };
00383 
00384 }}
00385 
00386 #endif