libmoost
/home/mhx/git/github/libmoost/include/moost/utils/histogram.hpp
Go to the documentation of this file.
00001 /* vim:set ts=3 sw=3 sts=3 et: */
00028 #ifndef MOOST_UTILS_HISTOGRAM_HPP__
00029 #define MOOST_UTILS_HISTOGRAM_HPP__
00030 
00095 
00100 
00102 
00109 
00123 #include <algorithm>
00124 #include <iostream>
00125 #include <sstream>
00126 #include <stdexcept>
00127 #include <string>
00128 #include <vector>
00129 #include <cmath>
00130 #include <map>
00131 #include <limits>
00132 
00133 #include "foreach.hpp"
00134 
00135 namespace moost { namespace utils {
00136 
00137 template <typename FloatType>
00138 class histogram
00139 {
00140 public:
00141    typedef FloatType float_type;
00142 
00143 private:
00144    struct data_info
00145    {
00146       std::vector<float_type> vec;
00147       std::string id;
00148       std::string sym;
00149 
00150       data_info(const std::string& id, const std::string& sym)
00151          : id(id)
00152          , sym(sym)
00153       {}
00154    };
00155 
00156    struct range_info
00157    {
00158       float_type offset;
00159       float_type bin_width;
00160       float_type disp_factor;
00161       std::string unit_prefix;
00162    };
00163 
00164    struct stats
00165    {
00166       float_type min;
00167       float_type max;
00168       float_type mean;
00169       float_type dev;
00170    };
00171 
00172 public:
00173    histogram(const std::string& unit, size_t bins = 120, size_t height = 25)
00174       : m_count(0)
00175       , m_unit(unit)
00176       , m_bins(bins)
00177       , m_height(height)
00178       , m_lo_cutoff(0.01)
00179       , m_hi_cutoff(0.01)
00180       , m_prec(4)
00181    {
00182       if (m_bins < 1)
00183       {
00184          throw std::range_error("invalid number of bins");
00185       }
00186 
00187       if (m_height < 1)
00188       {
00189          throw std::range_error("invalid histogram height");
00190       }
00191    }
00192 
00193    void set_display_range(float_type min, float_type max)
00194    {
00195       if (min >= max || min < 0.0 || max > 1.0)
00196       {
00197          throw std::range_error("invalid display range");
00198       }
00199 
00200       m_lo_cutoff = min;
00201       m_hi_cutoff = 1.0 - max;
00202    }
00203 
00204    void set_precision(size_t prec)
00205    {
00206       m_prec = prec;
00207    }
00208 
00209    template <class InputIterator>
00210    void add(InputIterator first, InputIterator last, const std::string& id, const std::string& sym)
00211    {
00212       m_data.push_back(data_info(id, sym));
00213       std::copy(first, last, std::back_inserter(m_data.back().vec));
00214       m_count += m_data.back().vec.size();
00215    }
00216 
00217    float_type mean() const
00218    {
00219       stats st;
00220       get_stats(st, m_data);
00221       return st.mean;
00222    }
00223 
00224    void draw(std::ostream& os, bool legend = true) const
00225    {
00226       if (m_count == 0)
00227       {
00228          // maybe we can do something more useful here?
00229          return;
00230       }
00231 
00232       range_info ri;
00233 
00234       optimum_range(ri);
00235 
00236       std::map< std::string, std::vector<size_t> > binvec;
00237       std::vector<size_t> total(m_bins + 1);
00238 
00239       foreach (const data_info& di, m_data)
00240       {
00241          std::vector<size_t>& bv = binvec[di.id];
00242 
00243          bv.resize(m_bins + 1);
00244 
00245          foreach (float_type v, di.vec)
00246          {
00247             size_t bin = std::min(m_bins, static_cast<size_t>(std::max(float_type(0.0), (v - ri.offset)/ri.bin_width)));
00248             ++bv[bin];
00249             ++total[bin];
00250          }
00251       }
00252 
00253       size_t max_total = *std::max_element(total.begin(), total.end());
00254 
00255       typedef std::vector< std::vector<std::string> > mx_t;
00256       mx_t mx(m_height);
00257 
00258       foreach (std::vector<std::string>& v, mx)
00259       {
00260          v.resize(m_bins + 1);
00261          std::fill(v.begin(), v.end(), " ");
00262       }
00263 
00264       for (size_t bin = 0; bin <= m_bins; ++bin)
00265       {
00266          size_t accu = 0;
00267          size_t top = 0;
00268 
00269          foreach (const data_info& di, m_data)
00270          {
00271             accu += binvec.find(di.id)->second[bin];
00272 
00273             for (size_t new_top = (accu*m_height + max_total/2)/max_total; top < new_top; ++top)
00274             {
00275                mx[top][bin] = di.sym;
00276             }
00277          }
00278       }
00279 
00280       std::ostream_iterator<std::string> osi(os);
00281       std::fill_n(osi, m_bins + 1, "-");
00282       os << std::endl;
00283 
00284       for (mx_t::reverse_iterator it = mx.rbegin(); it != mx.rend(); ++it)
00285       {
00286          std::copy(it->begin(), it->end(), osi);
00287          os << std::endl;
00288       }
00289 
00290       std::fill_n(osi, m_bins + 1, "-");
00291       os << std::endl;
00292 
00293       std::string ticks, labels;
00294       ticks.resize(m_bins + 1, ' ');
00295       add_ticks(ticks, ri, 1 + m_bins/2, '\'');
00296       add_ticks(ticks, ri, 1 + m_bins/8, '|', &labels);
00297 
00298       os << ticks << std::endl;
00299       os << labels << std::endl;
00300 
00301       if (legend)
00302       {
00303          stats st;
00304          os << std::endl;
00305 
00306          foreach (const data_info& di, m_data)
00307          {
00308             get_stats(st, di.vec);
00309 
00310             std::ostringstream oss;
00311             oss.setf(std::ios_base::fixed, std::ios_base::floatfield);
00312             oss.precision(2);
00313             oss << 100.0*di.vec.size()/m_count << " %";
00314 
00315             os << " [" << di.sym << "] " << di.id << " (" << oss.str() << "): " << stats2str(st, ri) << std::endl;
00316          }
00317 
00318          if (m_data.size() > 1)
00319          {
00320             get_stats(st, m_data);
00321             os << std::endl << " overall: " << stats2str(st, ri) << std::endl;
00322          }
00323 
00324          os << std::endl;
00325          std::fill_n(osi, m_bins + 1, "-");
00326          os << std::endl;
00327       }
00328    }
00329 
00330 private:
00331    std::string stats2str(const stats& st, const range_info& ri) const
00332    {
00333       std::ostringstream oss;
00334 
00335       oss.precision(m_prec);
00336 
00337       oss << st.mean/ri.disp_factor << " ± " << st.dev/ri.disp_factor << " " << ri.unit_prefix << m_unit
00338           << " [" << st.min/ri.disp_factor << " .. " <<  st.max/ri.disp_factor<< " " << ri.unit_prefix << m_unit << "]";
00339 
00340       return oss.str();
00341    }
00342 
00343    static void get_stats(stats& s, const std::vector<data_info>& vdvec)
00344    {
00345       std::vector<const std::vector<float_type> *> vv;
00346       foreach (const data_info& di, vdvec)
00347       {
00348          vv.push_back(&di.vec);
00349       }
00350       get_stats(s, vv);
00351    }
00352 
00353    static void get_stats(stats& s, const std::vector<float_type>& vec)
00354    {
00355       std::vector<const std::vector<float_type> *> vv;
00356       vv.push_back(&vec);
00357       get_stats(s, vv);
00358    }
00359 
00360    static float_type invalid_value()
00361    {
00362       return std::numeric_limits<float_type>::has_quiet_NaN
00363              ? std::numeric_limits<float_type>::quiet_NaN()
00364              : float_type(0);
00365    }
00366 
00367    static void get_stats(stats& s, const std::vector<const std::vector<float_type> *>& vv)
00368    {
00369       float_type min = invalid_value();
00370       float_type max = invalid_value();
00371       float_type sum = 0;
00372       float_type sum_of_squares = 0;
00373       size_t count = 0;
00374 
00375       foreach (const std::vector<float_type> *vec, vv)
00376       {
00377          foreach (float_type v, *vec)
00378          {
00379             sum += v;
00380             sum_of_squares += v*v;
00381 
00382             if (count == 0 || v < min)
00383                min = v;
00384 
00385             if (count == 0 || v > max)
00386                max = v;
00387 
00388             ++count;
00389          }
00390       }
00391 
00392       if (count > 0)
00393       {
00394          s.mean = sum/count;
00395 
00396          if (count > 1)
00397          {
00398             s.dev = std::sqrt((sum_of_squares - s.mean*sum)/(count - 1));
00399          }
00400          else
00401          {
00402             s.dev = invalid_value();
00403          }
00404       }
00405       else
00406       {
00407          s.mean = invalid_value();
00408          s.dev = invalid_value();
00409       }
00410 
00411       s.min = min;
00412       s.max = max;
00413    }
00414 
00415    void optimum_range(range_info& ri) const
00416    {
00417       const size_t num_lower = m_count*m_lo_cutoff + 1;
00418       const size_t num_upper = m_count*m_hi_cutoff + 1;
00419 
00420       std::vector<float_type> small(num_lower*m_data.size()), large(num_upper*m_data.size());
00421 
00422       for (size_t i = 0; i < m_data.size(); ++i)
00423       {
00424          std::partial_sort_copy(m_data[i].vec.begin(), m_data[i].vec.end(),
00425                                 small.begin() + i*num_lower, small.begin() + (i + 1)*num_lower);
00426          std::partial_sort_copy(m_data[i].vec.begin(), m_data[i].vec.end(),
00427                                 large.begin() + i*num_upper, large.begin() + (i + 1)*num_upper, std::greater<float_type>());
00428       }
00429 
00430       std::partial_sort(small.begin(), small.begin() + num_lower, small.end());
00431       std::partial_sort(large.begin(), large.begin() + num_upper, large.end(), std::greater<float_type>());
00432 
00433       float_type xmin = small[m_count*m_lo_cutoff];
00434       float_type xmax = large[m_count*m_hi_cutoff];
00435 
00436       float_type delta = nicenum((xmax - xmin)/(1 + m_bins/8), true);
00437 
00438       ri.offset = std::floor(xmin/delta)*delta;
00439       float_type upper = std::ceil(xmax/delta)*delta;
00440       ri.bin_width = nice_bin_width((upper - ri.offset)/m_bins);
00441 
00442       float_type exp = std::log10(std::max(std::abs(xmin), std::abs(xmax)));
00443 
00444       if (exp < 0.0)
00445       {
00446          const char *prefix[] = { "m", "u", "n", "p" };
00447          int ix = std::min(3, static_cast<int>((0.0 - exp)/3.0));
00448          ri.unit_prefix = prefix[ix];
00449          ri.disp_factor = std::pow(1e-3, ix + 1);
00450       }
00451       else if (exp > 3.0)
00452       {
00453          const char *prefix[] = { "k", "M", "T", "P" };
00454          int ix = std::min(3, static_cast<int>((exp - 3.0)/3.0));
00455          ri.unit_prefix = prefix[ix];
00456          ri.disp_factor = std::pow(1e3, ix + 1);
00457       }
00458       else
00459       {
00460          ri.unit_prefix.clear();
00461          ri.disp_factor = 1.0;
00462       }
00463    }
00464 
00465    void add_ticks(std::string& ticks, const range_info& ri, size_t desired_count, char tickmark, std::string *labels = 0) const
00466    {
00467       // Adapted from: Andrew S. Glassner "Graphics Gems", p.61
00468 
00469       const size_t min_label_sep = 2;
00470       const float_type xmin = ri.offset/ri.disp_factor;
00471       const float_type xmax = (ri.offset + ri.bin_width*m_bins)/ri.disp_factor;
00472       const float_type range = nicenum(xmax - xmin, false, ri.bin_width/ri.disp_factor);
00473       const float_type delta = nicenum(range/desired_count, true, ri.bin_width/ri.disp_factor);
00474       const float_type gmin = std::floor(xmin/delta)*delta;
00475       const float_type gmax = std::ceil(xmax/delta)*delta;
00476       const int prec = std::max(-static_cast<int>(std::log10(delta) - 0.8), 0);
00477       std::string labelstr;
00478 
00479       for (float_type gx = gmin; gx < gmax + 0.5*delta; gx += delta)
00480       {
00481          int x = (gx*ri.disp_factor - ri.offset)/ri.bin_width + 0.5;
00482 
00483          if (x >= 0 && x < static_cast<int>(ticks.size()))
00484          {
00485             ticks[x] = tickmark;
00486 
00487             std::ostringstream oss;
00488             oss.setf(std::ios_base::fixed, std::ios_base::floatfield);
00489             oss.precision(prec);
00490             oss << gx;
00491 
00492             if (labelstr.size() == 0 || static_cast<size_t>(x) >= labelstr.size() + min_label_sep)
00493             {
00494                labelstr.append(static_cast<size_t>(x) - labelstr.size(), ' ');
00495                labelstr.append(oss.str());
00496             }
00497          }
00498       }
00499 
00500       labelstr.append(" ");
00501       labelstr.append(ri.unit_prefix);
00502       labelstr.append(m_unit);
00503 
00504       if (labels)
00505       {
00506          labels->swap(labelstr);
00507       }
00508    }
00509 
00510    static float_type nicenum(float_type x, bool round, float_type multiple_of = -1.0)
00511    {
00512       // Adapted from: Andrew S. Glassner "Graphics Gems", p.61
00513 
00514       float_type exp = std::floor(std::log10(x));
00515       float_type frac = x/std::pow(10.0, exp);
00516       float_type nice;
00517       bool check_multiple = multiple_of > 0.0;
00518 
00519       if (check_multiple)
00520       {
00521          multiple_of /= std::pow(10.0, exp);
00522       }
00523 
00524       const float_type (*nn)[2];
00525 
00526       if (round)
00527       {
00528          static const float_type cand[][2] = {
00529             { 1.5,  1.0},
00530             { 3.0,  2.0},
00531             { 7.0,  5.0},
00532             {-1.0, 10.0},
00533          };
00534 
00535          nn = &cand[0];
00536       }
00537       else
00538       {
00539          static const float_type cand[][2] = {
00540             { 1.0,  1.0},
00541             { 2.0,  2.0},
00542             { 5.0,  5.0},
00543             {-1.0, 10.0},
00544          };
00545 
00546          nn = &cand[0];
00547       }
00548 
00549       for (;; ++nn)
00550       {
00551          float_type integral;
00552 
00553          if ((*nn)[0] < 0.0 || (frac < (*nn)[0] && (!check_multiple || std::fabs(std::modf((*nn)[1]/multiple_of, &integral)) < 1e-6)))
00554          {
00555             nice = (*nn)[1];
00556             break;
00557          }
00558       }
00559 
00560       return nice*std::pow(10.0, exp);
00561    }
00562 
00563    static float_type nice_bin_width(float_type x)
00564    {
00565       float_type exp = std::floor(std::log10(x));
00566       float_type frac = x/std::pow(10.0, exp);
00567       float_type nice;
00568 
00569       if (frac < 1.0)
00570          nice = 1.0;
00571       else if (frac < 2.0)
00572          nice = 2.0;
00573       else if (frac < 2.5)
00574          nice = 2.5;
00575       else if (frac < 5.0)
00576          nice = 5.0;
00577       else
00578          nice = 10.0;
00579 
00580       return nice*std::pow(10.0, exp);
00581    }
00582 
00583    std::vector<data_info> m_data;
00584    size_t m_count;
00585    const std::string m_unit;
00586    const size_t m_bins;
00587    const size_t m_height;
00588    float_type m_lo_cutoff;
00589    float_type m_hi_cutoff;
00590    size_t m_prec;
00591 };
00592 
00593 }}
00594 
00595 #endif