00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_JUNCTION_STAT_HPP_
00016 #define CGA_TOOLS_JUNCTION_STAT_HPP_
00017
00020
00021
00022 #include "cgatools/core.hpp"
00023 #include "cgatools/junctions/Junction.hpp"
00024 #include "cgatools/util/GenericHistogram.hpp"
00025
00026
00027 #include <string>
00028 #include <vector>
00029
00030 namespace cgatools { namespace junctions {
00031
00032 #define ADD_FNAME(names, N) names[N] = #N
00033
00034 class JunctionCounterFields {
00035 public:
00036 enum Fields {
00037 TOTAL_JUNCTIONS = 0,
00038 LEFT_SIDES_EQUAL,
00039 RIGHT_SIDES_EQUAL,
00040 BOTH_SIDES_EQUAL,
00041 LEFT_UNKNOWN,
00042 RIGHT_UNKNOWN,
00043 STRAND_BOTH_POSITIVE,
00044 STRAND_LEFT_NEGATIVE,
00045 STRAND_RIGHT_NEGATIVE,
00046 STRAND_BOTH_NEGATIVE,
00047
00048 NUMBER_OF_FIELDS
00049 };
00050 typedef boost::array<std::string,NUMBER_OF_FIELDS> Names;
00051
00052 static Names getFieldNames()
00053 {
00054 Names names;
00055 ADD_FNAME(names, TOTAL_JUNCTIONS);
00056 ADD_FNAME(names, LEFT_SIDES_EQUAL);
00057 ADD_FNAME(names, RIGHT_SIDES_EQUAL);
00058 ADD_FNAME(names, BOTH_SIDES_EQUAL);
00059 ADD_FNAME(names, LEFT_UNKNOWN);
00060 ADD_FNAME(names, RIGHT_UNKNOWN);
00061 ADD_FNAME(names, STRAND_BOTH_POSITIVE);
00062 ADD_FNAME(names, STRAND_LEFT_NEGATIVE);
00063 ADD_FNAME(names, STRAND_RIGHT_NEGATIVE);
00064 ADD_FNAME(names, STRAND_BOTH_NEGATIVE);
00065 return names;
00066 }
00067 };
00068
00069
00070 template <class Fields, class Type>
00071 class CounterSet
00072 {
00073 public:
00074 CounterSet() {
00075 values_.assign(Type(0));
00076 }
00077
00078 void print(std::ostream &out) const {
00079 typename Fields::Names fieldNames = Fields::getFieldNames();
00080 for (size_t i=0; i<values_.size(); ++i)
00081 out << fieldNames[i] << '\t' << values_[i] << std::endl;
00082 }
00083
00084 Type &operator[] (size_t i) {CGA_ASSERT_L(i,values_.size()); return values_[i];}
00085 boost::array<Type,Fields::NUMBER_OF_FIELDS> values_;
00086 };
00087
00088 class JuctionStatisticsCollector
00089 {
00090 public:
00091
00092 JuctionStatisticsCollector(uint32_t distanceTolerance)
00093 :distanceTolerance_(distanceTolerance),out_(NULL)
00094 {
00095 size_t buckets[7][3] =
00096 {
00097 {0,101,10},
00098 {200, 1001, 100},
00099 {2000,10001,1000},
00100 {20000, 100001, 10000},
00101 {2000000, 10000001, 1000000},
00102 {20000000,100000001, 10000000},
00103 {200000000,1000000001,100000000}
00104 };
00105
00106 scoreHistogram_.addRange(0,20,1);
00107 scoreHistogram_.addRange(20,200,10);
00108
00109 for (size_t i=0; i<6; ++i)
00110 oneSideNeighbourDist_.addRange(buckets[i][0],buckets[i][1],buckets[i][2]);
00111
00112 searchDistanceTolerance_ = distanceTolerance*5;
00113 twoSideNeighbourDist_.addRange(0,searchDistanceTolerance_+10,10);
00114
00115 for (size_t i=0; i<4; ++i)
00116 for (size_t j=0; j<clusterSizeInBases_.size(); ++j)
00117 clusterSizeInBases_[j].addRange(buckets[i][0],buckets[i][1],buckets[i][2]);
00118
00119 for (size_t j=0; j<clusterSizeInJunctions_.size(); ++j)
00120 clusterSizeInJunctions_[j].addRange(0,15,1);
00121 }
00122
00123 void findOneSideNeighbours(const Junctions& junctions, int firstSide);
00124 void findNeighbours(const Junctions& junctions, int sideToCompare=JUNCTION_BOTH_SIDES);
00125 void run(const Junctions& junctions, std::ostream &out);
00126 protected:
00127 uint32_t distanceTolerance_;
00128 uint32_t searchDistanceTolerance_;
00129 CounterSet<JunctionCounterFields,int64_t> counters_;
00130 util::GenericHistogram<size_t,size_t> scoreHistogram_;
00131 util::GenericHistogram<size_t,size_t> oneSideNeighbourDist_;
00132 util::GenericHistogram<size_t,size_t> twoSideNeighbourDist_;
00133
00134 boost::array<util::GenericHistogram<size_t,size_t>,3> clusterSizeInBases_;
00135 boost::array<util::GenericHistogram<size_t,size_t>,3> clusterSizeInJunctions_;
00136 std::ostream *out_;
00137 };
00138
00139 }}
00140
00141
00142 #endif //CGA_TOOLS_JUNCTION_STAT_HPP_
00143