00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_CGDATA_LIBRARY_READER_HPP_
00016 #define CGATOOLS_CGDATA_LIBRARY_READER_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include <boost/noncopyable.hpp>
00022 #include <boost/filesystem.hpp>
00023 #include <boost/regex.hpp>
00024 #include <boost/array.hpp>
00025
00026 namespace cgatools { namespace cgdata {
00027
00029 template <class T>
00030 class GapTable
00031 {
00032 public:
00033 class GapsRecord
00034 {
00035 friend class GapTable;
00036 public:
00037 bool operator== (const T& gaps) const {
00038 return gaps_ == gaps;
00039 }
00040
00041 bool operator< (const GapsRecord& r) const {
00042 return gaps_ < r.gaps_;
00043 }
00044
00045 GapsRecord(const T &gaps, double frequency_ = 0)
00046 : gaps_(gaps), frequency_(frequency_) {}
00047
00048 T gaps_;
00049 double frequency_;
00050
00051 protected:
00052 GapsRecord()
00053 : frequency_(0) {}
00054
00055 };
00056 typedef std::vector<GapsRecord> Table;
00057
00058 GapTable(const std::string& fname, double outliersFraction = 0)
00059 :fileName_(fname),
00060 outliersFraction_(outliersFraction)
00061 {
00062 load(fileName_);
00063
00064 if (outliersFraction_!=0)
00065 cutOutliers();
00066 }
00067
00068 double getFrequency( const T& gaps, double outsideTheRange=0 )
00069 {
00070 GapsRecord record(gaps);
00071 typename Table::const_iterator it=std::lower_bound(records_.begin(),records_.end(),GapsRecord(gaps));
00072
00073 if (it!=records_.end())
00074 {
00075 if (it->gaps_==gaps)
00076 return it->frequency_;
00077 if (records_.front()<it->gaps_)
00078 return 0;
00079 }
00080 return outsideTheRange;
00081 }
00082
00083
00084 protected:
00085 void load(const std::string& fname);
00086
00087 void cutOutliers()
00088 {
00089 double sideCutFraction = outliersFraction_/2;
00090 size_t left=0,
00091 right=0;
00092
00093 double sideFractionLeft = 0;
00094 for (left = 0; left < records_.size(); ++left)
00095 {
00096 sideFractionLeft += records_[left].frequency_;
00097 if (sideFractionLeft > sideCutFraction) {
00098 break;
00099 }
00100 }
00101
00102 double sideFractionRight = 0;
00103 for (right = records_.size(); right > 0; )
00104 {
00105 --right;
00106 sideFractionRight += records_[right].frequency_;
00107 if (sideFractionRight > sideCutFraction) {
00108 ++right;
00109 break;
00110 }
00111 }
00112 records_.erase(records_.begin()+right,records_.end());
00113 records_.erase(records_.begin(),records_.begin()+left);
00114 }
00115
00116 Table records_;
00117
00118 std::string fileName_;
00119 double outliersFraction_;
00120 };
00121
00123 typedef boost::array<int8_t,3> SmallGapTuple;
00124 typedef int MateGapSize;
00125
00126 typedef GapTable<SmallGapTuple> WobbleGapTable;
00127 typedef GapTable<MateGapSize> MateGapTable;
00128
00129 } }
00130
00131 #endif // CGATOOLS_CGDATA_LIBRARY_READER_HPP_