00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_JUNCTION_STAT_HPP_
00016 #define CGA_TOOLS_JUNCTION_STAT_HPP_
00017
00020
00021
00022 #include "cgatools/core.hpp"
00023 #include "cgatools/junctions/Junction.hpp"
00024 #include "cgatools/util/GenericHistogram.hpp"
00025
00026
00027 #include <string>
00028 #include <vector>
00029
00030 namespace cgatools { namespace junctions {
00031
00032 #define ADD_STAT_PARAMS() \
00033 ADD_STAT_PARAM(TOTAL_JUNCTIONS) \
00034 ADD_STAT_PARAM(LEFT_SIDES_EQUAL) \
00035 ADD_STAT_PARAM(RIGHT_SIDES_EQUAL) \
00036 ADD_STAT_PARAM(BOTH_SIDES_EQUAL) \
00037 ADD_STAT_PARAM(LEFT_UNKNOWN) \
00038 ADD_STAT_PARAM(RIGHT_UNKNOWN) \
00039 ADD_STAT_PARAM(INTERCHOMOSOMIAL) \
00040 ADD_STAT_PARAM(STRAND_BOTH_POSITIVE) \
00041 ADD_STAT_PARAM(STRAND_LEFT_NEGATIVE) \
00042 ADD_STAT_PARAM(STRAND_RIGHT_NEGATIVE) \
00043 ADD_STAT_PARAM(STRAND_BOTH_NEGATIVE)
00044
00045 class JunctionCounterFields {
00046 public:
00047 enum Fields {
00048 #undef ADD_STAT_PARAM
00049 #define ADD_STAT_PARAM(p) p,
00050 ADD_STAT_PARAMS()
00051 };
00052
00053 #undef ADD_STAT_PARAM
00054 #define ADD_STAT_PARAM(p) +1
00055 static const size_t NUMBER_OF_FIELDS = ADD_STAT_PARAMS();
00056
00057 typedef boost::array<std::string,NUMBER_OF_FIELDS> Names;
00058 static const Names names_;
00059 static const Names& getFieldNames() {
00060 return names_;
00061 }
00062 };
00063
00064 template <class Fields, class Type>
00065 class CounterSet
00066 {
00067 public:
00068 CounterSet() {
00069 values_.assign(Type(0));
00070 }
00071
00072 void print(std::ostream &out) const {
00073 const typename Fields::Names& fieldNames = Fields::getFieldNames();
00074 for (size_t i=0; i<values_.size(); ++i)
00075 out << fieldNames[i] << '\t' << values_[i] << std::endl;
00076 }
00077
00078 Type &operator[] (size_t i) {CGA_ASSERT_L(i,values_.size()); return values_[i];}
00079 boost::array<Type,Fields::NUMBER_OF_FIELDS> values_;
00080 };
00081
00082 class JuctionStatisticsCollector
00083 {
00084 public:
00085
00086 JuctionStatisticsCollector(uint32_t distanceTolerance)
00087 :distanceTolerance_(distanceTolerance),out_(NULL)
00088 {
00089 size_t buckets[7][3] =
00090 {
00091 {0,101,10},
00092 {200, 1001, 100},
00093 {2000,10001,1000},
00094 {20000, 100001, 10000},
00095 {2000000, 10000001, 1000000},
00096 {20000000,100000001, 10000000},
00097 {200000000,1000000001,100000000}
00098 };
00099
00100 scoreHistogram_.addRange(0,20,1);
00101 scoreHistogram_.addRange(20,200,10);
00102
00103 for (size_t i=0; i<6; ++i)
00104 oneSideNeighbourDist_.addRange(buckets[i][0],buckets[i][1],buckets[i][2]);
00105
00106 searchDistanceTolerance_ = distanceTolerance*5;
00107 twoSideNeighbourDist_.addRange(0,searchDistanceTolerance_+10,10);
00108
00109 for (size_t i=0; i<4; ++i)
00110 for (size_t j=0; j<clusterSizeInBases_.size(); ++j)
00111 clusterSizeInBases_[j].addRange(buckets[i][0],buckets[i][1],buckets[i][2]);
00112
00113 for (size_t j=0; j<clusterSizeInJunctions_.size(); ++j)
00114 clusterSizeInJunctions_[j].addRange(0,15,1);
00115 }
00116
00117 void findOneSideNeighbours(const Junctions& junctions, int firstSide);
00118 void findNeighbours(const Junctions& junctions, int sideToCompare=JUNCTION_BOTH_SIDES);
00119 void run(const Junctions& junctions, std::ostream &out);
00120 protected:
00121 uint32_t distanceTolerance_;
00122 uint32_t searchDistanceTolerance_;
00123 CounterSet<JunctionCounterFields,int64_t> counters_;
00124 util::GenericHistogram<size_t,size_t> scoreHistogram_;
00125 util::GenericHistogram<size_t,size_t> oneSideNeighbourDist_;
00126 util::GenericHistogram<size_t,size_t> twoSideNeighbourDist_;
00127
00128 boost::array<util::GenericHistogram<size_t,size_t>,3> clusterSizeInBases_;
00129 boost::array<util::GenericHistogram<size_t,size_t>,3> clusterSizeInJunctions_;
00130 std::ostream *out_;
00131 };
00132
00133 }}
00134
00135
00136 #endif //CGA_TOOLS_JUNCTION_STAT_HPP_
00137