00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #ifndef CGA_TOOLS_JUNCTION_STAT_HPP_
00016 #define CGA_TOOLS_JUNCTION_STAT_HPP_
00017 
00020 
00021 
00022 #include "cgatools/core.hpp"
00023 #include "cgatools/junctions/Junction.hpp"
00024 #include "cgatools/util/GenericHistogram.hpp"
00025 
00026 
00027 #include <string>
00028 #include <vector>
00029 
00030 namespace cgatools { namespace junctions {
00031 
00032     #define ADD_FNAME(names, N) names[N] = #N
00033 
00034     class JunctionCounterFields {
00035     public:
00036         enum Fields {
00037             TOTAL_JUNCTIONS = 0,
00038             LEFT_SIDES_EQUAL,
00039             RIGHT_SIDES_EQUAL,
00040             BOTH_SIDES_EQUAL,
00041             LEFT_UNKNOWN,
00042             RIGHT_UNKNOWN,
00043             STRAND_BOTH_POSITIVE,
00044             STRAND_LEFT_NEGATIVE,
00045             STRAND_RIGHT_NEGATIVE,
00046             STRAND_BOTH_NEGATIVE,
00047 
00048             NUMBER_OF_FIELDS
00049         };
00050         typedef boost::array<std::string,NUMBER_OF_FIELDS> Names;
00051 
00052         static Names getFieldNames()
00053         {
00054             Names names;
00055             ADD_FNAME(names, TOTAL_JUNCTIONS);
00056             ADD_FNAME(names, LEFT_SIDES_EQUAL);
00057             ADD_FNAME(names, RIGHT_SIDES_EQUAL);
00058             ADD_FNAME(names, BOTH_SIDES_EQUAL);
00059             ADD_FNAME(names, LEFT_UNKNOWN);
00060             ADD_FNAME(names, RIGHT_UNKNOWN);
00061             ADD_FNAME(names, STRAND_BOTH_POSITIVE);
00062             ADD_FNAME(names, STRAND_LEFT_NEGATIVE);
00063             ADD_FNAME(names, STRAND_RIGHT_NEGATIVE);
00064             ADD_FNAME(names, STRAND_BOTH_NEGATIVE);
00065             return names;
00066         }
00067     };
00068 
00069 
00070     template <class Fields, class Type>
00071     class CounterSet
00072     {
00073     public:
00074         CounterSet() {
00075             values_.assign(Type(0));
00076         }
00077 
00078         void print(std::ostream &out) const {
00079             typename Fields::Names fieldNames = Fields::getFieldNames();
00080             for (size_t i=0; i<values_.size(); ++i)
00081                 out << fieldNames[i] << '\t' << values_[i] << std::endl;
00082         }
00083 
00084         Type &operator[] (size_t i) {CGA_ASSERT_L(i,values_.size()); return values_[i];}
00085         boost::array<Type,Fields::NUMBER_OF_FIELDS> values_;
00086     };
00087 
00088     class JuctionStatisticsCollector 
00089     {
00090     public:
00091 
00092         JuctionStatisticsCollector(uint32_t distanceTolerance)
00093             :distanceTolerance_(distanceTolerance),out_(NULL)
00094         {
00095             size_t buckets[7][3] = 
00096             {
00097                 {0,101,10},
00098                 {200, 1001, 100},
00099                 {2000,10001,1000},
00100                 {20000,   100001,     10000},
00101                 {2000000, 10000001,   1000000},
00102                 {20000000,100000001,  10000000},
00103                 {200000000,1000000001,100000000}
00104             };
00105             
00106             scoreHistogram_.addRange(0,20,1);
00107             scoreHistogram_.addRange(20,200,10);
00108 
00109             for (size_t i=0; i<6; ++i)
00110                 oneSideNeighbourDist_.addRange(buckets[i][0],buckets[i][1],buckets[i][2]);
00111 
00112             searchDistanceTolerance_ = distanceTolerance*5;
00113             twoSideNeighbourDist_.addRange(0,searchDistanceTolerance_+10,10);
00114 
00115             for (size_t i=0; i<4; ++i)
00116                 for (size_t j=0; j<clusterSizeInBases_.size(); ++j)
00117                     clusterSizeInBases_[j].addRange(buckets[i][0],buckets[i][1],buckets[i][2]);
00118 
00119             for (size_t j=0; j<clusterSizeInJunctions_.size(); ++j)
00120                 clusterSizeInJunctions_[j].addRange(0,15,1);
00121         }
00122 
00123         void findOneSideNeighbours(const Junctions& junctions, int firstSide);
00124         void findNeighbours(const Junctions& junctions, int sideToCompare=JUNCTION_BOTH_SIDES);
00125         void run(const Junctions& junctions, std::ostream &out);
00126     protected:
00127         uint32_t    distanceTolerance_;
00128         uint32_t    searchDistanceTolerance_;
00129         CounterSet<JunctionCounterFields,int64_t> counters_;
00130         util::GenericHistogram<size_t,size_t> scoreHistogram_;
00131         util::GenericHistogram<size_t,size_t> oneSideNeighbourDist_;
00132         util::GenericHistogram<size_t,size_t> twoSideNeighbourDist_;
00133 
00134         boost::array<util::GenericHistogram<size_t,size_t>,3> clusterSizeInBases_;
00135         boost::array<util::GenericHistogram<size_t,size_t>,3> clusterSizeInJunctions_;
00136         std::ostream *out_;
00137     };
00138 
00139 }}
00140 
00141 
00142 #endif //CGA_TOOLS_JUNCTION_STAT_HPP_
00143