00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_JUNCTION_COMPARE_HPP_
00016 #define CGA_TOOLS_JUNCTION_COMPARE_HPP_
00017
00020
00021
00022
00023 #include "cgatools/core.hpp"
00024 #include "cgatools/junctions/Junction.hpp"
00025 #include "cgatools/util/GenericHistogram.hpp"
00026
00027
00028 #include <string>
00029 #include <vector>
00030 #include <map>
00031
00032 namespace cgatools { namespace reference {
00033 class CrrFile;
00034 }}
00035
00036 namespace cgatools { namespace junctions {
00037
00040 class JunctionRef
00041 {
00042 public:
00043 typedef Junctions::const_iterator It;
00044 JunctionRef() :it_(nullValue_.begin()) {}
00045 JunctionRef(It it) :it_(it) {}
00046
00047 const Junction& operator* () const {return *it_;}
00048 bool operator< (const JunctionRef& other) const
00049 {
00050 if (sourceId_ != other.sourceId_)
00051 return sourceId_ < other.sourceId_;
00052 else
00053 return it_->id_ < other.it_->id_;
00054 }
00055 bool operator==(const JunctionRef& other) const
00056 {
00057 return sourceId_ == other.sourceId_ &&
00058 it_ == other.it_;
00059 }
00060
00061 It it_;
00062 std::vector<JunctionRef> matchedJunctionIts_;
00063
00065 uint32_t sourceId_;
00066
00067 bool isNull() const {return it_==nullValue_.begin() && matchedJunctionIts_.empty();}
00069 std::string getId() const {return it_->id_ + "-" + boost::lexical_cast<std::string>(sourceId_);}
00070
00071 protected:
00072 static Junctions nullValue_;
00073
00074 friend std::ostream& operator<<(std::ostream &ostr, const JunctionRef::It& j);
00075 friend std::ostream& operator<<(std::ostream &ostr, const JunctionRef& j);
00076 };
00077
00078
00079
00080 class CompareBySide : public CompareJunctionsBySide {
00081 public:
00082 CompareBySide(int side=JUNCTION_LEFT_SIDE)
00083 : CompareJunctionsBySide(side)
00084 {}
00085 using CompareJunctionsBySide::operator();
00086
00087 bool operator() (JunctionRef::It j1,JunctionRef::It j2) const {
00088 return CompareJunctionsBySide::operator() (*j1,*j2);
00089 }
00090
00091 bool operator() (const JunctionRef &j1,const JunctionRef &j2) const {
00092 return CompareJunctionsBySide::operator() (*j1,*j2);
00093 }
00094 };
00095
00096 class JunctionRefs : public std::vector<JunctionRef>
00097 {
00098 public:
00099 JunctionRefs() : std::vector<JunctionRef>() {}
00100
00101 JunctionRefs(const Junctions& junctions, uint32_t id = 0)
00102 : std::vector<JunctionRef>()
00103 {
00104 addJunctions(junctions, id);
00105 }
00106
00107 void addJunctions(const Junctions& junctions, uint32_t id) {
00108 reserve(size()+junctions.size());
00109 for (Junctions::const_iterator it=junctions.begin(),endIt=junctions.end(); it!=endIt;++it) {
00110 push_back(it);
00111 back().sourceId_ = id;
00112 }
00113 }
00114 };
00115
00116 class FullGenomeJunctionComparator
00117 {
00118 public:
00119 typedef std::vector<Junctions> JunctionsArray;
00120 typedef std::vector<JunctionRefs> JunctionRefArray;
00121
00122
00123 FullGenomeJunctionComparator(
00124 uint32_t distanceTolerance,
00125 const std::vector<size_t> &scoreThresholds,
00126 const reference::CrrFile& reference,
00127 const std::string &outPrefix
00128 )
00129 : distanceTolerance_(distanceTolerance),
00130 scoreThresholds_(scoreThresholds),
00131 outPrefix_(outPrefix),
00132 reference_(reference)
00133 {}
00134
00135 static const char SEPARATOR = '\t';
00136
00137
00138 void compare(const JunctionFiles& junctionFiles);
00139
00141 void printList(
00142 const std::string& fileName,
00143 const JunctionRefs& list,
00144 const util::DelimitedFile::Metadata& sourceMetadata,
00145 const std::vector<std::string> &extraHeaderColumns
00146 ) const;
00147
00151 void printMultiList(const std::string& fileName,
00152 const JunctionRefs& list,
00153 const util::DelimitedFile::Metadata& sourceMetadata,
00154 size_t multiListSize = 1
00155 ) const;
00156
00158 const JunctionRefArray& getCompatible() const {return compatiblePerFile_;}
00160 const JunctionRefArray& getIncompatible() const {return incompatiblePerFile_;}
00161
00163 const JunctionRefs& getCompatibleCombined() const {return comatibleCombined_;}
00165 const JunctionsArray& getPrefiltered() const {return prefilteredJunctions_;}
00166
00167 protected:
00168 uint32_t distanceTolerance_;
00169 std::vector<size_t> scoreThresholds_;
00170 std::string outPrefix_;
00171
00172 const reference::CrrFile& reference_;
00173
00174 JunctionsArray prefilteredJunctions_;
00175
00176 JunctionRefArray compatiblePerFile_;
00177 JunctionRefArray incompatiblePerFile_;
00178 JunctionRefs comatibleCombined_;
00179
00185 double getCompatibility(const Junction& j0, const Junction& j1) const;
00186
00192 void compareBySide(const JunctionsArray& junctions, int firstSide,
00193 JunctionRefs &oneSideCompatibleCombined,
00194 JunctionRefs &twoSidesCompatibleCombined,
00195 JunctionRefs &twoSidesCompatible,
00196 JunctionRefs &twoSidesIncompatible) const;
00197
00199 void splitList(const JunctionRefs& list, JunctionRefArray& result) const;
00200 void prefilterByScore(JunctionsArray& junctionArray) const;
00201
00203 void mergeResults(JunctionRefArray &input, JunctionRefs &output) const;
00204
00205 };
00206
00207 }}
00208
00209
00210 #endif //CGA_TOOLS_JUNCTION_COMPARE_HPP_
00211