00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #ifndef CGA_TOOLS_JUNCTION_HPP_
00016 #define CGA_TOOLS_JUNCTION_HPP_
00017 
00027 
00028 
00029 #include "cgatools/core.hpp"
00030 #include "cgatools/reference/CrrFile.hpp"
00031 #include "cgatools/util/DelimitedFile.hpp"
00032 
00033 
00034 #include <boost/array.hpp>
00035 
00036 
00037 #include <string>
00038 #include <vector>
00039 #include <map>
00040 
00041 
00042 namespace cgatools { namespace junctions {
00043 
00044     
00045     const int JUNCTION_LEFT_SIDE = 0;
00046     const int JUNCTION_RIGHT_SIDE = 1;
00047     const int JUNCTION_BOTH_SIDES = 2;
00048 
00049     
00050     enum JunctionStrand {
00051         JUNCTION_MINUS_STRAND = -1,
00052         JUNCTION_UNKNOWN_STRAND = 0,
00053         JUNCTION_PLUS_STRAND = +1
00054     };
00055 
00057     class JunctionSideSection {
00058     public:
00059 
00060         
00061         JunctionStrand strand_;
00062 
00063         
00064         reference::Location position_;
00065 
00066         
00067         int length_;
00068 
00069         
00070         std::string repeatClassification_;
00071 
00072         
00073         std::string genes_;
00074 
00076         JunctionSideSection();
00077 
00079         JunctionSideSection(JunctionStrand, const reference::Location& position, int length, 
00080                     const std::string& repeatClassification,
00081                     const std::string& genes
00082                     );
00083 
00085         void write(std::ostream&, const reference::CrrFile&) const;
00086     };
00087 
00088 
00089 
00091     class Junction {
00092     public:
00093         std::string id_;
00094 
00095         
00096         boost::array<JunctionSideSection,2> sideSections_;
00097 
00101         std::string transitionSequence_;
00102 
00105         size_t transitionLength_;
00106 
00109         bool            transitionIsKnown_;
00110 
00112         uint32_t        score_;
00113 
00115         std::string     xRef_;
00116         std::string     deletedTransposableElement_;
00117         std::string     knownUnderrepresentedRepeat_;
00118         double          frequencyInBaselineGenomeSet_;
00119 
00121         std::string     assembledSequence_;
00122 
00124         std::vector<std::string> annotations_;
00125 
00128         Junction();
00129 
00131         Junction(
00132             const std::string& junctionId,
00133             const JunctionSideSection& leftSection,
00134             const JunctionSideSection& rightSection,
00135             const std::string& transitionSequence,
00136             size_t transitionLength,
00137             bool transitionIsKnown,
00138             uint32_t score,
00139             const std::string& xRef,
00140             const std::string& deletedTransposableElement,
00141             const std::string& knownUnderrepresentedRepeat,
00142             double frequencyInBaselineGenomeSet,
00143             const std::string& assembledSequence
00144             );
00145 
00147         const reference::Location &getLocation(int side) const {
00148             return sideSections_[side].position_;
00149         }
00150 
00152         size_t getDistance() const;
00153 
00155         void write(std::ostream&, const reference::CrrFile&,
00156                    size_t expectedAnnotationCount) const;
00157     };
00158 
00160     typedef std::vector<Junction> Junctions;
00161 
00164     class CompareJunctionsBySide {
00165     public:
00166         CompareJunctionsBySide(int side=JUNCTION_LEFT_SIDE) 
00167             :side_(side), otherSide_(JUNCTION_RIGHT_SIDE-side)
00168         { 
00169             CGA_ASSERT_MSG(side==JUNCTION_LEFT_SIDE || side>=JUNCTION_RIGHT_SIDE, 
00170                 "wrong junction side: " << side);
00171         }
00172 
00173         static const reference::Location &getLocation(const Junction& j, int side) {
00174             return j.sideSections_[side].position_;
00175         }
00176 
00177         bool operator() (const Junction& j1,const Junction& j2) const {
00178             const reference::Location &f1 = j1.getLocation(side_);
00179             const reference::Location &f2 = j2.getLocation(side_);
00180             if (f1!=f2)
00181                 return f1<f2;
00182             const reference::Location &s1 = j1.getLocation(otherSide_);
00183             const reference::Location &s2 = j2.getLocation(otherSide_);
00184             if (s1!=s2)
00185                 return s1<s2;
00186             return j1.id_ < j2.id_; 
00187         }
00188 
00189     protected:
00190         int side_;
00191         int otherSide_;
00192     };
00193 
00195     class JunctionFile {
00196     public:
00197 
00198         JunctionFile() {}
00199 
00202         JunctionFile(const std::string& name, const reference::CrrFile&);
00203 
00205         void read(const std::string& name, const reference::CrrFile& reference);
00206 
00208         void write(const std::string& name, const reference::CrrFile&);
00209 
00211         void add(const Junction&);
00212 
00214         Junctions junctions_;
00215 
00217         static const std::string header_;
00218 
00220         util::DelimitedFile::Metadata metadata_;
00221 
00223         std::string fileName_;
00224 
00226         std::vector<std::string> annotationHeaders_;
00227     };
00228 
00230     typedef std::vector<JunctionFile> JunctionFiles;
00231 }}
00232 
00233 
00234 #endif //CGA_TOOLS_JUNCTION_HPP_
00235