00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #ifndef CGA_TOOLS_JUNCTION_COMPARE_HPP_
00016 #define CGA_TOOLS_JUNCTION_COMPARE_HPP_
00017 
00020 
00021 
00022 
00023 #include "cgatools/core.hpp"
00024 #include "cgatools/junctions/Junction.hpp"
00025 #include "cgatools/util/GenericHistogram.hpp"
00026 
00027 
00028 #include <string>
00029 #include <vector>
00030 #include <map>
00031 
00032 namespace cgatools { namespace reference {
00033     class CrrFile;
00034 }}
00035 
00036 namespace cgatools { namespace junctions {
00037 
00040     class JunctionRef 
00041     {
00042     public:
00043         typedef Junctions::const_iterator It;
00044         JunctionRef() :it_(nullValue_.begin()) {}
00045         JunctionRef(It it) :it_(it) {}
00046 
00047         const Junction& operator* () const {return *it_;}
00048         bool operator< (const JunctionRef& other) {
00049             CGA_ERROR_EX("JunctionIt::operator< is disabled"); return it_<other.it_;
00050         }
00051 
00052         It                      it_;
00053         std::vector<JunctionRef> matchedJunctionIts_;
00054 
00056         uint32_t                sourceId_;
00057 
00058         bool isNull() const {return it_==nullValue_.begin() && matchedJunctionIts_.empty();}
00060         std::string getId() const {return it_->id_ + "-" + boost::lexical_cast<std::string>(sourceId_);}
00061 
00062     protected:
00063         static Junctions nullValue_;
00064 
00065         friend std::ostream& operator<<(std::ostream &ostr, const JunctionRef::It& j);
00066         friend std::ostream& operator<<(std::ostream &ostr, const JunctionRef& j);
00067     };
00068 
00069 
00070     
00071     class CompareBySide : public CompareJunctionsBySide {
00072     public:
00073         CompareBySide(int side=JUNCTION_LEFT_SIDE) 
00074             : CompareJunctionsBySide(side)
00075         {}
00076         using CompareJunctionsBySide::operator();
00077 
00078         bool operator() (JunctionRef::It j1,JunctionRef::It j2) const {
00079             return CompareJunctionsBySide::operator() (*j1,*j2);
00080         }
00081 
00082         bool operator() (const JunctionRef &j1,const JunctionRef &j2) const {
00083             return CompareJunctionsBySide::operator() (*j1,*j2);
00084         }
00085     };
00086 
00087     class JunctionRefs : public std::vector<JunctionRef> 
00088     {
00089     public:
00090         JunctionRefs() : std::vector<JunctionRef>() {}
00091 
00092         JunctionRefs(const Junctions& junctions, uint32_t id = 0) 
00093             : std::vector<JunctionRef>()
00094         {
00095             addJunctions(junctions, id);
00096         }
00097 
00098         void addJunctions(const Junctions& junctions, uint32_t id) {
00099             reserve(size()+junctions.size());
00100             for (Junctions::const_iterator it=junctions.begin(),endIt=junctions.end(); it!=endIt;++it) {
00101                 push_back(it);
00102                 back().sourceId_ = id;
00103             }
00104         }
00105     };
00106 
00107     class FullGenomeJunctionComparator
00108     {
00109     public:
00110         typedef std::vector<Junctions> JunctionsArray;
00111         typedef std::vector<JunctionRefs> JunctionRefArray;
00112 
00113 
00114         FullGenomeJunctionComparator(
00115             uint32_t distanceTolerance,
00116             const std::vector<size_t> &scoreThresholds,
00117             const reference::CrrFile& reference,
00118             const std::string &outPrefix
00119         )
00120         :   distanceTolerance_(distanceTolerance),
00121             scoreThresholds_(scoreThresholds),
00122             outPrefix_(outPrefix),
00123             reference_(reference)
00124         {}
00125 
00126         static const char SEPARATOR = '\t';
00127 
00128 
00129         void compare(const JunctionFiles& junctionFiles);
00130 
00134         void printList(const std::string& fileName, 
00135             const JunctionRefs& list,
00136             const util::DelimitedFile::Metadata& sourceMetadata,
00137             size_t multiListSize = 1
00138             ) const;
00139 
00141         const JunctionRefArray& getIncompatible() const {return incompatiblePerFile_;}
00142 
00144         const JunctionRefs&     getCompatible()  const  {return comatible_;}
00146         const JunctionsArray&     getPrefiltered()  const  {return prefilteredJunctions_;}
00147 
00148     protected:
00149         uint32_t            distanceTolerance_;
00150         std::vector<size_t> scoreThresholds_;
00151         std::string         outPrefix_;
00152 
00153         const reference::CrrFile& reference_;
00154 
00155         JunctionsArray      prefilteredJunctions_;
00156 
00157         JunctionRefArray    incompatiblePerFile_;
00158         JunctionRefs        comatible_;
00159 
00165         double getCompatibility(const Junction& j0, const Junction& j1) const;
00166 
00172         void compareBySide(const JunctionsArray& junctions, int firstSide,
00173             JunctionRefs &oneSideCompatible,
00174             JunctionRefs &twoSidesCompatible,
00175             JunctionRefs &twoSidesIncompatible) const;
00176 
00178         void splitList(const JunctionRefs& list, JunctionRefArray& result) const;
00179         void prefilterByScore(JunctionsArray& junctionArray) const; 
00180 
00182         void mergeResults(JunctionRefArray &input, JunctionRefs &output) const;
00183 
00184     };
00185 
00186 }}
00187 
00188 
00189 #endif //CGA_TOOLS_JUNCTION_COMPARE_HPP_
00190