00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #ifndef CGA_TOOLS_GENERIC_HISTOGRAM_HPP_
00016 #define CGA_TOOLS_GENERIC_HISTOGRAM_HPP_
00017 
00019 
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/Exception.hpp"
00022 #include <boost/cstdint.hpp>
00023 #include <boost/random/lagged_fibonacci.hpp>
00024 #include <boost/date_time/posix_time/posix_time.hpp>
00025 
00026 namespace cgatools { namespace util {
00027 
00028     
00029     
00030 
00031     template <class Value=size_t, class Index=size_t>
00032     class GenericHistogram {
00033     public:
00034         typedef Value value_type;
00035         typedef Index index_type;
00036         typedef GenericHistogram<value_type,index_type> my_type;
00037 
00038         typedef std::pair<Index, Value> Bucket;
00039         typedef std::vector<Bucket>     Buckets;
00040 
00041         typedef typename Buckets::iterator          iterator;
00042         typedef typename Buckets::const_iterator    const_iterator;
00043 
00044         GenericHistogram() {
00045             initBuckets();
00046         }
00047 
00048         GenericHistogram(const Index& min, const Index& max, const Index& bucketSize) {
00049             initBuckets();
00050             addRange(min, max, bucketSize);
00051         }
00052 
00053         
00054         void addRange(const Index& min, const Index& max, const Index& bucketSize) {
00055             CGA_ASSERT_MSG(bucketSize<bucketSize+bucketSize,"Invalid bucket size:"<<bucketSize);
00056             for(Index i=min; i<max; i+=bucketSize)
00057                 addBucket(i);
00058         }
00059 
00060         iterator begin() {return buckets_.begin();}
00061         const_iterator begin() const {return buckets_.begin();}
00062         iterator end() {return buckets_.end();}
00063         const_iterator end() const {return buckets_.end();}
00064         iterator last() {return buckets_.end()-1;}
00065         const_iterator last() const {return buckets_.end()-1;}
00066 
00067         bool hasBucket(index_type index) {
00068             iterator b = findBucket(index);
00069             return b!=buckets_.end() && index==b->first;
00070         }
00071 
00072         void addBucket(index_type index) {
00073             iterator b = findBucket(index);
00074             if (b==buckets_.end() || !(index==b->first)
00075                 
00076                 || (buckets_.size()==1 && index==index_type()) 
00077                 )
00078                 buckets_.insert(b,std::make_pair(index,value_type()));
00079         }
00080 
00081         value_type& operator[] (const index_type& index) {
00082             return findBucket(index)->second;
00083         }
00084 
00085         const value_type& operator[] (const index_type& index) const {
00086             return findBucket(index)->second;
00087         }
00088 
00089         iterator findBucket(Index index) {
00090             return std::lower_bound(buckets_.begin(), last(), index, compare_);
00091         }
00092 
00093         const_iterator findBucket(Index index) const {
00094             return std::lower_bound(buckets_.begin(), last(), index, compare_);
00095         }
00096 
00097         void merge(const my_type& src) {
00098             CGA_ASSERT_MSG(src.buckets_.size()==buckets_.size(),
00099                 "Merge for different size histograms is not implemented. src:"
00100                 <<src.buckets_.size() << " dst:"<<buckets_.size());
00101             for(int i=0, iEnd=src.buckets_.size(); i<iEnd; ++i) {
00102                 const Bucket& srcObj = src.buckets_[i];
00103                 Bucket& myObj = buckets_[i];
00104                 CGA_ASSERT_MSG(srcObj.first == myObj.first,
00105                     "Different bucket ranges. src:"<<srcObj.first<<" dst:"<<myObj.first);
00106                 myObj.second+=srcObj.second;
00107             }
00108 
00109         }
00110 
00111         
00112         value_type totalInBuckets(value_type initialValue) const {
00113             for (const_iterator it=buckets_.begin(), itEnd=buckets_.end(); it!=itEnd; ++it)
00114                 initialValue+=it->second;
00115             return initialValue;
00116         }
00117 
00118         void write( std::ostream& out, char separator = ',' ) const {
00119             out << ">bucket" << separator << "count" << separator << "cumulative count" << std::endl;
00120             if (buckets_.empty())
00121                 return;
00122             Value cumulativeValue=begin()->second;
00123             for(typename GenericHistogram<Value,Index>::const_iterator it=begin(), 
00124                 itEnd=last(); it!=itEnd; ++it, cumulativeValue+=it->second) 
00125             {
00126                 out << it->first << separator << it->second << separator << cumulativeValue << std::endl;
00127             }
00128             out << "out of range" << separator << last()->second << separator << cumulativeValue <<std::endl;
00129         }
00130 
00131     protected:
00132         struct CompareBuckets {
00133             bool operator() (const Bucket& b0, const Bucket& b1) {return b0.first < b1.first;}
00134             bool operator() (const index_type& i, const Bucket& b) {return i < b.first;}
00135             bool operator() (const Bucket& b, const index_type& i) {return b.first < i;}
00136         };
00137 
00138         void initBuckets() {
00139             buckets_.push_back(std::make_pair(index_type(),value_type()));
00140         }
00141 
00142         Buckets buckets_;
00143         CompareBuckets compare_;
00144     };
00145 
00146     template <class Value, class Index>
00147     std::ostream& operator<<(std::ostream& out, const GenericHistogram<Value,Index>& r) {
00148         r.write(out);
00149         return out;
00150     }
00151 
00152     class SimpleHistogram
00153     {
00154         friend std::ostream& operator<<(std::ostream& out, const SimpleHistogram& src);
00155     public:
00156         SimpleHistogram(size_t maxValue)
00157             : count_(maxValue+1, 0), sum_(0), number_(0)
00158         {}
00159 
00160         void operator()(size_t val)
00161         {
00162             if (val+1 >= count_.size()) {
00163                 ++( count_.back() );
00164             } else {
00165                 ++( count_[val] );
00166             }
00167             ++number_;
00168             sum_ += val;
00169         }
00170 
00171         void operator+=(const SimpleHistogram& other)
00172         {
00173             CGA_ASSERT(other.count_.size() == count_.size());
00174             for (size_t ii = 0; ii < count_.size(); ++ii) {
00175                 count_[ii] += other.count_[ii];
00176             }
00177             sum_ += other.sum_;
00178             number_ += other.number_;
00179         }
00180 
00181         void write(std::ostream& out) const;
00182     private:
00183         std::vector<size_t> count_;
00184         size_t sum_;
00185         size_t number_;
00186     };
00187 
00188 } }
00189 #endif //CGA_TOOLS_GENERIC_HISTOGRAM_HPP_