00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_GENERIC_HISTOGRAM_HPP_
00016 #define CGA_TOOLS_GENERIC_HISTOGRAM_HPP_
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/Exception.hpp"
00022 #include <boost/cstdint.hpp>
00023 #include <boost/random/lagged_fibonacci.hpp>
00024 #include <boost/date_time/posix_time/posix_time.hpp>
00025
00026 namespace cgatools { namespace util {
00027
00028
00029
00030
00031 template <class Value=size_t, class Index=size_t>
00032 class GenericHistogram {
00033 public:
00034 typedef Value value_type;
00035 typedef Index index_type;
00036 typedef GenericHistogram<value_type,index_type> my_type;
00037
00038 typedef std::pair<Index, Value> Bucket;
00039 typedef std::vector<Bucket> Buckets;
00040
00041 typedef typename Buckets::iterator iterator;
00042 typedef typename Buckets::const_iterator const_iterator;
00043
00044 GenericHistogram() {
00045 initBuckets();
00046 }
00047
00048 GenericHistogram(const Index& min, const Index& max, const Index& bucketSize) {
00049 initBuckets();
00050 addRange(min, max, bucketSize);
00051 }
00052
00053
00054 void addRange(const Index& min, const Index& max, const Index& bucketSize) {
00055 CGA_ASSERT_MSG(bucketSize<bucketSize+bucketSize,"Invalid bucket size:"<<bucketSize);
00056 for(Index i=min; i<max; i+=bucketSize)
00057 addBucket(i);
00058 }
00059
00060 iterator begin() {return buckets_.begin();}
00061 const_iterator begin() const {return buckets_.begin();}
00062 iterator end() {return buckets_.end();}
00063 const_iterator end() const {return buckets_.end();}
00064 iterator last() {return buckets_.end()-1;}
00065 const_iterator last() const {return buckets_.end()-1;}
00066
00067 bool hasBucket(index_type index) {
00068 iterator b = findBucket(index);
00069 return b!=buckets_.end() && index==b->first;
00070 }
00071
00072 void addBucket(index_type index) {
00073 iterator b = findBucket(index);
00074 if (b==buckets_.end() || !(index==b->first)
00075
00076 || (buckets_.size()==1 && index==index_type())
00077 )
00078 buckets_.insert(b,std::make_pair(index,value_type()));
00079 }
00080
00081 value_type& operator[] (const index_type& index) {
00082 return findBucket(index)->second;
00083 }
00084
00085 const value_type& operator[] (const index_type& index) const {
00086 return findBucket(index)->second;
00087 }
00088
00089 iterator findBucket(Index index) {
00090 return std::lower_bound(buckets_.begin(), last(), index, compare_);
00091 }
00092
00093 const_iterator findBucket(Index index) const {
00094 return std::lower_bound(buckets_.begin(), last(), index, compare_);
00095 }
00096
00097 void merge(const my_type& src) {
00098 CGA_ASSERT_MSG(src.buckets_.size()==buckets_.size(),
00099 "Merge for different size histograms is not implemented. src:"
00100 <<src.buckets_.size() << " dst:"<<buckets_.size());
00101 for(int i=0, iEnd=src.buckets_.size(); i<iEnd; ++i) {
00102 const Bucket& srcObj = src.buckets_[i];
00103 Bucket& myObj = buckets_[i];
00104 CGA_ASSERT_MSG(srcObj.first == myObj.first,
00105 "Different bucket ranges. src:"<<srcObj.first<<" dst:"<<myObj.first);
00106 myObj.second+=srcObj.second;
00107 }
00108
00109 }
00110
00111
00112 value_type totalInBuckets(value_type initialValue) const {
00113 for (const_iterator it=buckets_.begin(), itEnd=buckets_.end(); it!=itEnd; ++it)
00114 initialValue+=it->second;
00115 return initialValue;
00116 }
00117
00118 void write( std::ostream& out, char separator = ',' ) const {
00119 out << ">bucket" << separator << "count" << separator << "cumulative count" << std::endl;
00120 if (buckets_.empty())
00121 return;
00122 Value cumulativeValue=begin()->second;
00123 for(typename GenericHistogram<Value,Index>::const_iterator it=begin(),
00124 itEnd=last(); it!=itEnd; ++it, cumulativeValue+=it->second)
00125 {
00126 out << it->first << separator << it->second << separator << cumulativeValue << std::endl;
00127 }
00128 out << "out of range" << separator << last()->second << separator << cumulativeValue <<std::endl;
00129 }
00130
00131 protected:
00132 struct CompareBuckets {
00133 bool operator() (const Bucket& b0, const Bucket& b1) {return b0.first < b1.first;}
00134 bool operator() (const index_type& i, const Bucket& b) {return i < b.first;}
00135 bool operator() (const Bucket& b, const index_type& i) {return b.first < i;}
00136 };
00137
00138 void initBuckets() {
00139 buckets_.push_back(std::make_pair(index_type(),value_type()));
00140 }
00141
00142 Buckets buckets_;
00143 CompareBuckets compare_;
00144 };
00145
00146 template <class Value, class Index>
00147 std::ostream& operator<<(std::ostream& out, const GenericHistogram<Value,Index>& r) {
00148 r.write(out);
00149 return out;
00150 }
00151
00152 class SimpleHistogram
00153 {
00154 friend std::ostream& operator<<(std::ostream& out, const SimpleHistogram& src);
00155 public:
00156 SimpleHistogram(size_t maxValue)
00157 : count_(maxValue+1, 0), sum_(0), number_(0)
00158 {}
00159
00160 void operator()(size_t val)
00161 {
00162 if (val+1 >= count_.size()) {
00163 ++( count_.back() );
00164 } else {
00165 ++( count_[val] );
00166 }
00167 ++number_;
00168 sum_ += val;
00169 }
00170
00171 void operator+=(const SimpleHistogram& other)
00172 {
00173 CGA_ASSERT(other.count_.size() == count_.size());
00174 for (size_t ii = 0; ii < count_.size(); ++ii) {
00175 count_[ii] += other.count_[ii];
00176 }
00177 sum_ += other.sum_;
00178 number_ += other.number_;
00179 }
00180
00181 void write(std::ostream& out) const;
00182 private:
00183 std::vector<size_t> count_;
00184 size_t sum_;
00185 size_t number_;
00186 };
00187
00188 } }
00189 #endif //CGA_TOOLS_GENERIC_HISTOGRAM_HPP_