00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_JUNCTION_HPP_
00016 #define CGA_TOOLS_JUNCTION_HPP_
00017
00027
00028
00029 #include "cgatools/core.hpp"
00030 #include "cgatools/reference/CrrFile.hpp"
00031 #include "cgatools/util/DelimitedFile.hpp"
00032
00033
00034 #include <boost/array.hpp>
00035
00036
00037 #include <string>
00038 #include <vector>
00039 #include <map>
00040
00041
00042 namespace cgatools { namespace junctions {
00043
00044
00045 const int JUNCTION_LEFT_SIDE = 0;
00046 const int JUNCTION_RIGHT_SIDE = 1;
00047 const int JUNCTION_BOTH_SIDES = 2;
00048
00049
00050 enum JunctionStrand {
00051 JUNCTION_MINUS_STRAND = -1,
00052 JUNCTION_UNKNOWN_STRAND = 0,
00053 JUNCTION_PLUS_STRAND = +1
00054 };
00055
00057 class JunctionSideSection {
00058 public:
00059
00060
00061 JunctionStrand strand_;
00062
00063
00064 reference::Location position_;
00065
00066
00067 int length_;
00068
00069
00070 std::string repeatClassification_;
00071
00072
00073 std::string genes_;
00074
00076 JunctionSideSection();
00077
00079 JunctionSideSection(JunctionStrand, const reference::Location& position, int length,
00080 const std::string& repeatClassification,
00081 const std::string& genes
00082 );
00083
00085 void write(std::ostream&, const reference::CrrFile&) const;
00086 };
00087
00088
00089
00091 class Junction {
00092 public:
00093 std::string id_;
00094
00095
00096 boost::array<JunctionSideSection,2> sideSections_;
00097
00101 std::string transitionSequence_;
00102
00105 size_t transitionLength_;
00106
00109 bool transitionIsKnown_;
00110
00112 uint32_t score_;
00113
00115 std::string xRef_;
00116 std::string deletedTransposableElement_;
00117 std::string knownUnderrepresentedRepeat_;
00118 double frequencyInBaselineGenomeSet_;
00119
00121 std::string assembledSequence_;
00122
00124 std::vector<std::string> annotations_;
00125
00128 Junction();
00129
00131 Junction(
00132 const std::string& junctionId,
00133 const JunctionSideSection& leftSection,
00134 const JunctionSideSection& rightSection,
00135 const std::string& transitionSequence,
00136 size_t transitionLength,
00137 bool transitionIsKnown,
00138 uint32_t score,
00139 const std::string& xRef,
00140 const std::string& deletedTransposableElement,
00141 const std::string& knownUnderrepresentedRepeat,
00142 double frequencyInBaselineGenomeSet,
00143 const std::string& assembledSequence
00144 );
00145
00147 const reference::Location &getLocation(int side) const {
00148 return sideSections_[side].position_;
00149 }
00150
00152 size_t getDistance() const;
00153
00155 void write(std::ostream&, const reference::CrrFile&,
00156 size_t expectedAnnotationCount) const;
00157 };
00158
00160 typedef std::vector<Junction> Junctions;
00161
00164 class CompareJunctionsBySide {
00165 public:
00166 CompareJunctionsBySide(int side=JUNCTION_LEFT_SIDE)
00167 :side_(side), otherSide_(JUNCTION_RIGHT_SIDE-side)
00168 {
00169 CGA_ASSERT_MSG(side==JUNCTION_LEFT_SIDE || side>=JUNCTION_RIGHT_SIDE,
00170 "wrong junction side: " << side);
00171 }
00172
00173 static const reference::Location &getLocation(const Junction& j, int side) {
00174 return j.sideSections_[side].position_;
00175 }
00176
00177 bool operator() (const Junction& j1,const Junction& j2) const {
00178 const reference::Location &f1 = j1.getLocation(side_);
00179 const reference::Location &f2 = j2.getLocation(side_);
00180 if (f1!=f2)
00181 return f1<f2;
00182 const reference::Location &s1 = j1.getLocation(otherSide_);
00183 const reference::Location &s2 = j2.getLocation(otherSide_);
00184 if (s1!=s2)
00185 return s1<s2;
00186 return j1.id_ < j2.id_;
00187 }
00188
00189 protected:
00190 int side_;
00191 int otherSide_;
00192 };
00193
00195 class JunctionFile {
00196 public:
00197
00198 JunctionFile() {}
00199
00202 JunctionFile(const std::string& name, const reference::CrrFile&);
00203
00205 void read(const std::string& name, const reference::CrrFile& reference);
00206
00208 void write(const std::string& name, const reference::CrrFile&);
00209
00211 void add(const Junction&);
00212
00214 Junctions junctions_;
00215
00217 static const std::string header_;
00218
00220 util::DelimitedFile::Metadata metadata_;
00221
00223 std::string fileName_;
00224
00226 std::vector<std::string> annotationHeaders_;
00227 };
00228
00230 typedef std::vector<JunctionFile> JunctionFiles;
00231 }}
00232
00233
00234 #endif //CGA_TOOLS_JUNCTION_HPP_
00235