libmoost
|
00001 /* vim:set ts=3 sw=3 sts=3 et: */ 00028 #ifndef MOOST_UTILS_HISTOGRAM_HPP__ 00029 #define MOOST_UTILS_HISTOGRAM_HPP__ 00030 00095 00100 00102 00109 00123 #include <algorithm> 00124 #include <iostream> 00125 #include <sstream> 00126 #include <stdexcept> 00127 #include <string> 00128 #include <vector> 00129 #include <cmath> 00130 #include <map> 00131 #include <limits> 00132 00133 #include "foreach.hpp" 00134 00135 namespace moost { namespace utils { 00136 00137 template <typename FloatType> 00138 class histogram 00139 { 00140 public: 00141 typedef FloatType float_type; 00142 00143 private: 00144 struct data_info 00145 { 00146 std::vector<float_type> vec; 00147 std::string id; 00148 std::string sym; 00149 00150 data_info(const std::string& id, const std::string& sym) 00151 : id(id) 00152 , sym(sym) 00153 {} 00154 }; 00155 00156 struct range_info 00157 { 00158 float_type offset; 00159 float_type bin_width; 00160 float_type disp_factor; 00161 std::string unit_prefix; 00162 }; 00163 00164 struct stats 00165 { 00166 float_type min; 00167 float_type max; 00168 float_type mean; 00169 float_type dev; 00170 }; 00171 00172 public: 00173 histogram(const std::string& unit, size_t bins = 120, size_t height = 25) 00174 : m_count(0) 00175 , m_unit(unit) 00176 , m_bins(bins) 00177 , m_height(height) 00178 , m_lo_cutoff(0.01) 00179 , m_hi_cutoff(0.01) 00180 , m_prec(4) 00181 { 00182 if (m_bins < 1) 00183 { 00184 throw std::range_error("invalid number of bins"); 00185 } 00186 00187 if (m_height < 1) 00188 { 00189 throw std::range_error("invalid histogram height"); 00190 } 00191 } 00192 00193 void set_display_range(float_type min, float_type max) 00194 { 00195 if (min >= max || min < 0.0 || max > 1.0) 00196 { 00197 throw std::range_error("invalid display range"); 00198 } 00199 00200 m_lo_cutoff = min; 00201 m_hi_cutoff = 1.0 - max; 00202 } 00203 00204 void set_precision(size_t prec) 00205 { 00206 m_prec = prec; 00207 } 00208 00209 template <class InputIterator> 00210 void add(InputIterator first, InputIterator last, const std::string& id, const std::string& sym) 00211 { 00212 m_data.push_back(data_info(id, sym)); 00213 std::copy(first, last, std::back_inserter(m_data.back().vec)); 00214 m_count += m_data.back().vec.size(); 00215 } 00216 00217 float_type mean() const 00218 { 00219 stats st; 00220 get_stats(st, m_data); 00221 return st.mean; 00222 } 00223 00224 void draw(std::ostream& os, bool legend = true) const 00225 { 00226 if (m_count == 0) 00227 { 00228 // maybe we can do something more useful here? 00229 return; 00230 } 00231 00232 range_info ri; 00233 00234 optimum_range(ri); 00235 00236 std::map< std::string, std::vector<size_t> > binvec; 00237 std::vector<size_t> total(m_bins + 1); 00238 00239 foreach (const data_info& di, m_data) 00240 { 00241 std::vector<size_t>& bv = binvec[di.id]; 00242 00243 bv.resize(m_bins + 1); 00244 00245 foreach (float_type v, di.vec) 00246 { 00247 size_t bin = std::min(m_bins, static_cast<size_t>(std::max(float_type(0.0), (v - ri.offset)/ri.bin_width))); 00248 ++bv[bin]; 00249 ++total[bin]; 00250 } 00251 } 00252 00253 size_t max_total = *std::max_element(total.begin(), total.end()); 00254 00255 typedef std::vector< std::vector<std::string> > mx_t; 00256 mx_t mx(m_height); 00257 00258 foreach (std::vector<std::string>& v, mx) 00259 { 00260 v.resize(m_bins + 1); 00261 std::fill(v.begin(), v.end(), " "); 00262 } 00263 00264 for (size_t bin = 0; bin <= m_bins; ++bin) 00265 { 00266 size_t accu = 0; 00267 size_t top = 0; 00268 00269 foreach (const data_info& di, m_data) 00270 { 00271 accu += binvec.find(di.id)->second[bin]; 00272 00273 for (size_t new_top = (accu*m_height + max_total/2)/max_total; top < new_top; ++top) 00274 { 00275 mx[top][bin] = di.sym; 00276 } 00277 } 00278 } 00279 00280 std::ostream_iterator<std::string> osi(os); 00281 std::fill_n(osi, m_bins + 1, "-"); 00282 os << std::endl; 00283 00284 for (mx_t::reverse_iterator it = mx.rbegin(); it != mx.rend(); ++it) 00285 { 00286 std::copy(it->begin(), it->end(), osi); 00287 os << std::endl; 00288 } 00289 00290 std::fill_n(osi, m_bins + 1, "-"); 00291 os << std::endl; 00292 00293 std::string ticks, labels; 00294 ticks.resize(m_bins + 1, ' '); 00295 add_ticks(ticks, ri, 1 + m_bins/2, '\''); 00296 add_ticks(ticks, ri, 1 + m_bins/8, '|', &labels); 00297 00298 os << ticks << std::endl; 00299 os << labels << std::endl; 00300 00301 if (legend) 00302 { 00303 stats st; 00304 os << std::endl; 00305 00306 foreach (const data_info& di, m_data) 00307 { 00308 get_stats(st, di.vec); 00309 00310 std::ostringstream oss; 00311 oss.setf(std::ios_base::fixed, std::ios_base::floatfield); 00312 oss.precision(2); 00313 oss << 100.0*di.vec.size()/m_count << " %"; 00314 00315 os << " [" << di.sym << "] " << di.id << " (" << oss.str() << "): " << stats2str(st, ri) << std::endl; 00316 } 00317 00318 if (m_data.size() > 1) 00319 { 00320 get_stats(st, m_data); 00321 os << std::endl << " overall: " << stats2str(st, ri) << std::endl; 00322 } 00323 00324 os << std::endl; 00325 std::fill_n(osi, m_bins + 1, "-"); 00326 os << std::endl; 00327 } 00328 } 00329 00330 private: 00331 std::string stats2str(const stats& st, const range_info& ri) const 00332 { 00333 std::ostringstream oss; 00334 00335 oss.precision(m_prec); 00336 00337 oss << st.mean/ri.disp_factor << " ± " << st.dev/ri.disp_factor << " " << ri.unit_prefix << m_unit 00338 << " [" << st.min/ri.disp_factor << " .. " << st.max/ri.disp_factor<< " " << ri.unit_prefix << m_unit << "]"; 00339 00340 return oss.str(); 00341 } 00342 00343 static void get_stats(stats& s, const std::vector<data_info>& vdvec) 00344 { 00345 std::vector<const std::vector<float_type> *> vv; 00346 foreach (const data_info& di, vdvec) 00347 { 00348 vv.push_back(&di.vec); 00349 } 00350 get_stats(s, vv); 00351 } 00352 00353 static void get_stats(stats& s, const std::vector<float_type>& vec) 00354 { 00355 std::vector<const std::vector<float_type> *> vv; 00356 vv.push_back(&vec); 00357 get_stats(s, vv); 00358 } 00359 00360 static float_type invalid_value() 00361 { 00362 return std::numeric_limits<float_type>::has_quiet_NaN 00363 ? std::numeric_limits<float_type>::quiet_NaN() 00364 : float_type(0); 00365 } 00366 00367 static void get_stats(stats& s, const std::vector<const std::vector<float_type> *>& vv) 00368 { 00369 float_type min = invalid_value(); 00370 float_type max = invalid_value(); 00371 float_type sum = 0; 00372 float_type sum_of_squares = 0; 00373 size_t count = 0; 00374 00375 foreach (const std::vector<float_type> *vec, vv) 00376 { 00377 foreach (float_type v, *vec) 00378 { 00379 sum += v; 00380 sum_of_squares += v*v; 00381 00382 if (count == 0 || v < min) 00383 min = v; 00384 00385 if (count == 0 || v > max) 00386 max = v; 00387 00388 ++count; 00389 } 00390 } 00391 00392 if (count > 0) 00393 { 00394 s.mean = sum/count; 00395 00396 if (count > 1) 00397 { 00398 s.dev = std::sqrt((sum_of_squares - s.mean*sum)/(count - 1)); 00399 } 00400 else 00401 { 00402 s.dev = invalid_value(); 00403 } 00404 } 00405 else 00406 { 00407 s.mean = invalid_value(); 00408 s.dev = invalid_value(); 00409 } 00410 00411 s.min = min; 00412 s.max = max; 00413 } 00414 00415 void optimum_range(range_info& ri) const 00416 { 00417 const size_t num_lower = m_count*m_lo_cutoff + 1; 00418 const size_t num_upper = m_count*m_hi_cutoff + 1; 00419 00420 std::vector<float_type> small(num_lower*m_data.size()), large(num_upper*m_data.size()); 00421 00422 for (size_t i = 0; i < m_data.size(); ++i) 00423 { 00424 std::partial_sort_copy(m_data[i].vec.begin(), m_data[i].vec.end(), 00425 small.begin() + i*num_lower, small.begin() + (i + 1)*num_lower); 00426 std::partial_sort_copy(m_data[i].vec.begin(), m_data[i].vec.end(), 00427 large.begin() + i*num_upper, large.begin() + (i + 1)*num_upper, std::greater<float_type>()); 00428 } 00429 00430 std::partial_sort(small.begin(), small.begin() + num_lower, small.end()); 00431 std::partial_sort(large.begin(), large.begin() + num_upper, large.end(), std::greater<float_type>()); 00432 00433 float_type xmin = small[m_count*m_lo_cutoff]; 00434 float_type xmax = large[m_count*m_hi_cutoff]; 00435 00436 float_type delta = nicenum((xmax - xmin)/(1 + m_bins/8), true); 00437 00438 ri.offset = std::floor(xmin/delta)*delta; 00439 float_type upper = std::ceil(xmax/delta)*delta; 00440 ri.bin_width = nice_bin_width((upper - ri.offset)/m_bins); 00441 00442 float_type exp = std::log10(std::max(std::abs(xmin), std::abs(xmax))); 00443 00444 if (exp < 0.0) 00445 { 00446 const char *prefix[] = { "m", "u", "n", "p" }; 00447 int ix = std::min(3, static_cast<int>((0.0 - exp)/3.0)); 00448 ri.unit_prefix = prefix[ix]; 00449 ri.disp_factor = std::pow(1e-3, ix + 1); 00450 } 00451 else if (exp > 3.0) 00452 { 00453 const char *prefix[] = { "k", "M", "T", "P" }; 00454 int ix = std::min(3, static_cast<int>((exp - 3.0)/3.0)); 00455 ri.unit_prefix = prefix[ix]; 00456 ri.disp_factor = std::pow(1e3, ix + 1); 00457 } 00458 else 00459 { 00460 ri.unit_prefix.clear(); 00461 ri.disp_factor = 1.0; 00462 } 00463 } 00464 00465 void add_ticks(std::string& ticks, const range_info& ri, size_t desired_count, char tickmark, std::string *labels = 0) const 00466 { 00467 // Adapted from: Andrew S. Glassner "Graphics Gems", p.61 00468 00469 const size_t min_label_sep = 2; 00470 const float_type xmin = ri.offset/ri.disp_factor; 00471 const float_type xmax = (ri.offset + ri.bin_width*m_bins)/ri.disp_factor; 00472 const float_type range = nicenum(xmax - xmin, false, ri.bin_width/ri.disp_factor); 00473 const float_type delta = nicenum(range/desired_count, true, ri.bin_width/ri.disp_factor); 00474 const float_type gmin = std::floor(xmin/delta)*delta; 00475 const float_type gmax = std::ceil(xmax/delta)*delta; 00476 const int prec = std::max(-static_cast<int>(std::log10(delta) - 0.8), 0); 00477 std::string labelstr; 00478 00479 for (float_type gx = gmin; gx < gmax + 0.5*delta; gx += delta) 00480 { 00481 int x = (gx*ri.disp_factor - ri.offset)/ri.bin_width + 0.5; 00482 00483 if (x >= 0 && x < static_cast<int>(ticks.size())) 00484 { 00485 ticks[x] = tickmark; 00486 00487 std::ostringstream oss; 00488 oss.setf(std::ios_base::fixed, std::ios_base::floatfield); 00489 oss.precision(prec); 00490 oss << gx; 00491 00492 if (labelstr.size() == 0 || static_cast<size_t>(x) >= labelstr.size() + min_label_sep) 00493 { 00494 labelstr.append(static_cast<size_t>(x) - labelstr.size(), ' '); 00495 labelstr.append(oss.str()); 00496 } 00497 } 00498 } 00499 00500 labelstr.append(" "); 00501 labelstr.append(ri.unit_prefix); 00502 labelstr.append(m_unit); 00503 00504 if (labels) 00505 { 00506 labels->swap(labelstr); 00507 } 00508 } 00509 00510 static float_type nicenum(float_type x, bool round, float_type multiple_of = -1.0) 00511 { 00512 // Adapted from: Andrew S. Glassner "Graphics Gems", p.61 00513 00514 float_type exp = std::floor(std::log10(x)); 00515 float_type frac = x/std::pow(10.0, exp); 00516 float_type nice; 00517 bool check_multiple = multiple_of > 0.0; 00518 00519 if (check_multiple) 00520 { 00521 multiple_of /= std::pow(10.0, exp); 00522 } 00523 00524 const float_type (*nn)[2]; 00525 00526 if (round) 00527 { 00528 static const float_type cand[][2] = { 00529 { 1.5, 1.0}, 00530 { 3.0, 2.0}, 00531 { 7.0, 5.0}, 00532 {-1.0, 10.0}, 00533 }; 00534 00535 nn = &cand[0]; 00536 } 00537 else 00538 { 00539 static const float_type cand[][2] = { 00540 { 1.0, 1.0}, 00541 { 2.0, 2.0}, 00542 { 5.0, 5.0}, 00543 {-1.0, 10.0}, 00544 }; 00545 00546 nn = &cand[0]; 00547 } 00548 00549 for (;; ++nn) 00550 { 00551 float_type integral; 00552 00553 if ((*nn)[0] < 0.0 || (frac < (*nn)[0] && (!check_multiple || std::fabs(std::modf((*nn)[1]/multiple_of, &integral)) < 1e-6))) 00554 { 00555 nice = (*nn)[1]; 00556 break; 00557 } 00558 } 00559 00560 return nice*std::pow(10.0, exp); 00561 } 00562 00563 static float_type nice_bin_width(float_type x) 00564 { 00565 float_type exp = std::floor(std::log10(x)); 00566 float_type frac = x/std::pow(10.0, exp); 00567 float_type nice; 00568 00569 if (frac < 1.0) 00570 nice = 1.0; 00571 else if (frac < 2.0) 00572 nice = 2.0; 00573 else if (frac < 2.5) 00574 nice = 2.5; 00575 else if (frac < 5.0) 00576 nice = 5.0; 00577 else 00578 nice = 10.0; 00579 00580 return nice*std::pow(10.0, exp); 00581 } 00582 00583 std::vector<data_info> m_data; 00584 size_t m_count; 00585 const std::string m_unit; 00586 const size_t m_bins; 00587 const size_t m_height; 00588 float_type m_lo_cutoff; 00589 float_type m_hi_cutoff; 00590 size_t m_prec; 00591 }; 00592 00593 }} 00594 00595 #endif