libmoost
/home/mhx/git/github/libmoost/include/moost/xml/simple_parser.hpp
Go to the documentation of this file.
00001 /* vim:set ts=3 sw=3 sts=3 et: */
00028 #ifndef MOOST_XML_PARSER_H
00029 #define MOOST_XML_PARSER_H
00030 
00084 #include <map>
00085 #include <string>
00086 #include <fstream>
00087 #include <vector>
00088 #include <cctype> // for tolower
00089 #include <stdexcept>
00090 #include <algorithm>
00091 
00092 #include <boost/shared_ptr.hpp>
00093 #include  <boost/algorithm/string/case_conv.hpp>
00094 #include <boost/lexical_cast.hpp>
00095 
00096 namespace moost { namespace xml {
00097 
00101 class simple_parser
00102 {
00103 public:
00104 
00110    struct tree_node
00111    {
00112       typedef std::vector< boost::shared_ptr<tree_node> > tree_branch_t;
00113 
00114       std::string    header; //< the header tag of the node
00115       std::string    value;  //< the value between the header tags
00116       tree_branch_t  leaves; //< a list of subnodes (if any)
00117 
00122       inline void leaves2map(std::map<std::string, std::string>& destMap) const;
00123    };
00124 
00125    typedef boost::shared_ptr<tree_node> shared_node_t;
00126    typedef tree_node::tree_branch_t     tree_branch_t;
00127 
00128 public:
00129 
00135    inline void
00136       load(const std::string& fileName, bool makeLowercaseTags = false);
00137 
00141    inline const tree_branch_t&
00142       get_root() const { return m_root; }
00143 
00144 private:
00145 
00152    inline void recursive_parse_tag( const std::string& header,
00153                                     std::istream& xmlFile,
00154                                     boost::shared_ptr<tree_node>& pCurrNode,
00155                                     bool makeLowercaseTags = false);
00156 
00168    inline bool parse_token( std::istream& xmlFile,
00169                             std::string& token, bool& isComment);
00170 
00171 private:
00172 
00173    tree_branch_t m_root; //< the stored xml tree
00174 };
00175 
00176 // -----------------------------------------------------------------------------
00177 
00178 void simple_parser::load(const std::string& fileName, bool makeLowercaseTags)
00179 {
00180    std::ifstream xmlFile(fileName.c_str());
00181 
00182    if ( !xmlFile.is_open() )
00183       throw std::runtime_error("Cannot open file <" + fileName + ">!" );
00184 
00185    xmlFile.exceptions(std::ios::badbit);
00186 
00187    std::string tokenName;
00188    bool isComment;
00189    bool isInTag;
00190    for (;;)
00191    {
00192       isInTag = parse_token(xmlFile, tokenName, isComment);
00193       if ( xmlFile.eof() )
00194          break;
00195 
00196       if ( isInTag )
00197       {
00198          if ( isComment )
00199             continue;
00200 
00201          boost::shared_ptr<tree_node> pTmpMap(new tree_node());
00202          m_root.push_back(pTmpMap);
00203 
00204          recursive_parse_tag(tokenName, xmlFile, pTmpMap, makeLowercaseTags);
00205       }
00206    }
00207 }
00208 
00209 // -----------------------------------------------------------------------------
00210 
00211 bool simple_parser::parse_token( std::istream& xmlFile,
00212                                  std::string& token, bool& isComment)
00213 {
00214    token.clear();
00215    bool isInTag = false;
00216    isComment = false;
00217    char c = 0;
00218 
00219    for(;;)
00220    {
00221       c = xmlFile.get();
00222       if ( xmlFile.eof() )
00223          break;
00224 
00225       if ( isspace(c) && token.empty() )
00226          continue;
00227 
00228       if ( c == '<' && !isComment )
00229       {
00230          if ( !token.empty() ) // end of a in-token
00231          {
00232             xmlFile.putback(c);
00233             break;
00234          }
00235 
00236          isInTag = true;
00237          continue;
00238       }
00239       else if ( c == '>' )
00240       {
00241          if ( isComment )
00242          {
00243             if ( token.substr(token.size()-2,2) == "--" )
00244                break;
00245          }
00246          else
00247             break;
00248       }
00249 
00250       token.append(1, c);
00251       if ( token.size() == 3 && token == "!--" )
00252          isComment = true;
00253    }
00254 
00255    return isInTag;
00256 }
00257 
00258 // -----------------------------------------------------------------------------
00259 
00260 void simple_parser::recursive_parse_tag( const std::string& header,
00261                                          std::istream& xmlFile,
00262                                          boost::shared_ptr<tree_node>& pCurrNode,
00263                                          bool makeLowercaseTags)
00264 {
00265    pCurrNode->header = header;
00266 
00267    if ( makeLowercaseTags )
00268    {
00269       std::transform( pCurrNode->header.begin(), pCurrNode->header.end(),
00270                       pCurrNode->header.begin(), (int(*)(int)) std::tolower);
00271    }
00272 
00273    bool isInTag = false;
00274    bool isComment = false;
00275 
00276    std::string tokenName;
00277    std::string closeToken = "/" + header;
00278 
00279    for (;;)
00280    {
00281       isInTag = parse_token(xmlFile, tokenName, isComment);
00282       if ( xmlFile.eof() )
00283          throw std::runtime_error("EOF Before finding the right token!" );
00284 
00285       if ( isInTag )
00286       {
00287          if ( tokenName.empty() )
00288             continue;
00289 
00290          if ( isComment )
00291             continue;
00292 
00293          if ( tokenName == closeToken )
00294             return;
00295 
00296          if ( tokenName[0] == '/' )
00297             throw std::runtime_error("Cannot find closing token for <" + header + ">! Found <" + tokenName + "> instead!" );
00298 
00299          boost::shared_ptr<tree_node> pTmpMap(new tree_node());
00300          pCurrNode->leaves.push_back( pTmpMap );
00301          recursive_parse_tag(tokenName, xmlFile, pTmpMap, makeLowercaseTags);
00302       }
00303       else
00304          pCurrNode->value = tokenName;
00305    }
00306 
00307    throw std::runtime_error("simple_parser::recursive_parse_tag. Should never end here!");
00308 }
00309 
00310 // -----------------------------------------------------------------------------
00311 
00312 void simple_parser::tree_node::leaves2map( std::map<std::string, std::string>& currentLevel ) const
00313 {
00314    tree_branch_t::const_iterator leafIt;
00315    for ( leafIt = leaves.begin(); leafIt != leaves.end(); ++leafIt )
00316       currentLevel[(*leafIt)->header] = (*leafIt)->value;
00317 }
00318 
00319 // -----------------------------------------------------------------------------
00320 // -----------------------------------------------------------------------------
00321 
00322 template <typename T>
00323 struct get_opt_detail
00324 {
00325    static T lexical_cast(const std::string& val)
00326    {
00327       return boost::lexical_cast<T>(val);
00328    }
00329 };
00330 
00331 template <>
00332 struct get_opt_detail<bool>
00333 {
00334    static bool lexical_cast(const std::string& val)
00335    {
00336       if (val == "true")
00337          return true;
00338       if (val == "false")
00339          return false;
00340       return boost::lexical_cast<bool>(val);
00341    }
00342 };
00343 
00352 template <typename T>
00353 static void get_opt( T& value,
00354                      const std::string& key,
00355                      const std::map<std::string, std::string>& optMap,
00356                      bool throwIfNotFound = true )
00357 {
00358    std::string localKey = boost::to_lower_copy(key);
00359    std::map<std::string, std::string>::const_iterator f = optMap.find(localKey);
00360    if ( f == optMap.end() )
00361    {
00362       if ( throwIfNotFound )
00363          throw std::runtime_error("mandatory key <" + key + "> not found!");
00364    }
00365    else
00366    {
00367       try
00368       {
00369          value = get_opt_detail<T>::lexical_cast(f->second);
00370       }
00371       catch (const boost::bad_lexical_cast&)
00372       {
00373          throw std::runtime_error("bad cast for key <" + key + ">");
00374       }
00375    }
00376 }
00377 
00378 }}
00379 
00380 // -----------------------------------------------------------------------------
00381 
00382 #endif // MOOST_XML_PARSER_H