00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_JUNCTION_HPP_
00016 #define CGA_TOOLS_JUNCTION_HPP_
00017
00027
00028
00029 #include "cgatools/core.hpp"
00030 #include "cgatools/reference/CrrFile.hpp"
00031 #include "cgatools/util/DelimitedFile.hpp"
00032
00033
00034 #include <boost/array.hpp>
00035
00036
00037 #include <string>
00038 #include <vector>
00039 #include <map>
00040
00041
00042 namespace cgatools { namespace junctions {
00043
00044
00045 const int JUNCTION_LEFT_SIDE = 0;
00046 const int JUNCTION_RIGHT_SIDE = 1;
00047 const int JUNCTION_BOTH_SIDES = 2;
00048
00049
00050 enum JunctionStrand {
00051 JUNCTION_MINUS_STRAND = -1,
00052 JUNCTION_UNKNOWN_STRAND = 0,
00053 JUNCTION_PLUS_STRAND = +1
00054 };
00055
00057 class JunctionSideSection {
00058 public:
00059
00060
00061 JunctionStrand strand_;
00062
00063
00064 reference::Location position_;
00065
00066
00067 int length_;
00068
00069
00070 std::string repeatClassification_;
00071
00072
00073 std::string genes_;
00074
00076 JunctionSideSection();
00077
00079 JunctionSideSection(JunctionStrand, const reference::Location& position, int length,
00080 const std::string& repeatClassification,
00081 const std::string& genes
00082 );
00083
00085 void write(std::ostream&, const reference::CrrFile&) const;
00086
00089 int getDir ( size_t side ) const;
00090
00093 reference::Location getBasePos( size_t side ) const;
00094 };
00095
00096
00097
00099 class Junction {
00100 public:
00101 std::string id_;
00102
00103
00104 boost::array<JunctionSideSection,2> sideSections_;
00105
00109 std::string transitionSequence_;
00110
00113 size_t transitionLength_;
00114
00117 bool transitionIsKnown_;
00118
00120 uint32_t score_;
00121
00123 std::string xRef_;
00124 std::string deletedTransposableElement_;
00125 std::string knownUnderrepresentedRepeat_;
00126 double frequencyInBaselineGenomeSet_;
00127
00129 std::string assembledSequence_;
00130
00132 std::vector<std::string> annotations_;
00133
00136 Junction();
00137
00139 Junction(
00140 const std::string& junctionId,
00141 const JunctionSideSection& leftSection,
00142 const JunctionSideSection& rightSection,
00143 const std::string& transitionSequence,
00144 size_t transitionLength,
00145 bool transitionIsKnown,
00146 uint32_t score,
00147 const std::string& xRef,
00148 const std::string& deletedTransposableElement,
00149 const std::string& knownUnderrepresentedRepeat,
00150 double frequencyInBaselineGenomeSet,
00151 const std::string& assembledSequence
00152 );
00153
00155 const reference::Location &getLocation(int side) const {
00156 return sideSections_[side].position_;
00157 }
00158
00160 size_t getDistance() const;
00161
00163 void write(std::ostream&, const reference::CrrFile&,
00164 size_t expectedAnnotationCount) const;
00165 };
00166
00168 typedef std::vector<Junction> Junctions;
00169
00172 class CompareJunctionsBySide {
00173 public:
00174 CompareJunctionsBySide(int side=JUNCTION_LEFT_SIDE)
00175 :side_(side), otherSide_(JUNCTION_RIGHT_SIDE-side)
00176 {
00177 CGA_ASSERT_MSG(side==JUNCTION_LEFT_SIDE || side>=JUNCTION_RIGHT_SIDE,
00178 "wrong junction side: " << side);
00179 }
00180
00181 static const reference::Location &getLocation(const Junction& j, int side) {
00182 return j.sideSections_[side].position_;
00183 }
00184
00185 bool operator() (const Junction& j1,const Junction& j2) const {
00186 const reference::Location &f1 = j1.getLocation(side_);
00187 const reference::Location &f2 = j2.getLocation(side_);
00188 if (f1!=f2)
00189 return f1<f2;
00190 const reference::Location &s1 = j1.getLocation(otherSide_);
00191 const reference::Location &s2 = j2.getLocation(otherSide_);
00192 if (s1!=s2)
00193 return s1<s2;
00194 return j1.id_ < j2.id_;
00195 }
00196
00197 protected:
00198 int side_;
00199 int otherSide_;
00200 };
00201
00203 class JunctionFile {
00204 public:
00205 static const std::string SEP;
00206
00207 JunctionFile() {}
00208
00211 JunctionFile(const std::string& name, const reference::CrrFile&);
00212
00214 void read(const std::string& name, const reference::CrrFile& reference);
00215
00217 void write(const std::string& name, const reference::CrrFile&);
00218
00220 void add(const Junction&);
00221
00223 Junctions junctions_;
00224
00226 static const std::string header_;
00227
00229 util::DelimitedFile::Metadata metadata_;
00230
00232 std::string fileName_;
00233
00235 std::vector<std::string> annotationHeaders_;
00236 };
00237
00239 typedef std::vector<JunctionFile> JunctionFiles;
00240 }}
00241
00242
00243 #endif //CGA_TOOLS_JUNCTION_HPP_
00244