00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_JUNCTION_HPP_
00016 #define CGA_TOOLS_JUNCTION_HPP_
00017
00029
00030
00031 #include "cgatools/core.hpp"
00032 #include "cgatools/reference/CrrFile.hpp"
00033
00034
00035 #include <boost/array.hpp>
00036
00037
00038 #include <string>
00039 #include <vector>
00040 #include <map>
00041
00042
00043 namespace cgatools { namespace junctions {
00044
00045 class Junction;
00046 typedef reference::CrrFile ReferenceGenome;
00047 typedef uint8_t Base;
00048 typedef reference::Location Location;
00049
00050 class Range : public reference::Range
00051 {
00052 public:
00053 void setContig(uint16_t contigId) {
00054 chromosome_ = contigId;
00055 }
00056 };
00057
00058
00059 const int JUNCTION_LEFT_SIDE = 0;
00060 const int JUNCTION_RIGHT_SIDE = 1;
00061
00062
00063 const int JUNCTION_UNKNOWN_LENGTH = -1;
00064
00065
00066
00067 enum JunctionSideSectionZygosity {
00068 JUNCTION_UNKNOWN_ZYGOSITY = 0,
00069 JUNCTION_HETEROZYGOUS = 1,
00070 JUNCTION_HOMOZYGOUS = 2
00071 };
00072 void read(std::vector<std::string>::const_iterator&, JunctionSideSectionZygosity&);
00073 void write(std::ostream&, JunctionSideSectionZygosity, const std::string &separator);
00074
00075
00076
00077
00078 enum JunctionStrand {
00079 JUNCTION_MINUS_STRAND = -1,
00080 JUNCTION_UNKNOWN_STRAND = 0,
00081 JUNCTION_PLUS_STRAND = +1
00082 };
00083 void read(std::vector<std::string>::const_iterator&, JunctionStrand&);
00084 void write(std::ostream&, JunctionStrand, const std::string &separator);
00085
00086
00087
00088 class JunctionSideSection {
00089 public:
00090
00091
00092 JunctionStrand strand_;
00093
00094
00095 Location position_;
00096
00097
00098 int length_;
00099
00100
00101 JunctionSideSectionZygosity zygosity_;
00102
00103
00104
00105
00106 bool isKnown_;
00107
00108
00109 JunctionSideSection();
00110
00111
00112 JunctionSideSection(JunctionStrand, const Location&);
00113
00114
00115 JunctionSideSection(JunctionStrand, const Location&, int length);
00116
00117
00118 JunctionSideSection(JunctionStrand, const Location&, JunctionSideSectionZygosity);
00119
00120
00121 JunctionSideSection(JunctionStrand, const Location&, int length, JunctionSideSectionZygosity);
00122
00123
00124 static const int TOKEN_COUNT = 5;
00125
00126
00127 Range getRange(int side) const;
00128
00129
00130 void read(std::vector<std::string>::const_iterator&, const ReferenceGenome&);
00131 void write(std::ostream&, const ReferenceGenome&, const std::string &separator) const;
00132 };
00133
00134
00135
00136
00137 class JunctionTransitionSection {
00138 public:
00139
00140
00141
00142
00143 std::vector<Base> sequence_;
00144
00145
00146
00147 bool isKnown_;
00148
00149
00150 JunctionTransitionSection();
00151
00152
00153
00154 JunctionTransitionSection(const std::vector<Base>&);
00155 JunctionTransitionSection(const std::string&);
00156
00157
00158 void reverseComplement();
00159
00160
00161 static void reverseComplement(std::vector<Base>&);
00162
00163
00164 static const int TOKEN_COUNT = 2;
00165
00166
00167 void read(std::vector<std::string>::const_iterator&);
00168 void write(std::ostream&, const std::string &separator) const;
00169 };
00170
00171
00172
00173
00174
00175 class JunctionScore {
00176 friend std::ostream& operator<<(std::ostream& ostr, const JunctionScore& o);
00177 public:
00178 double score_;
00179 char type_;
00180
00181
00182 JunctionScore();
00183
00184
00185 JunctionScore(double score, char type);
00186
00187
00188 static const int TOKEN_COUNT = 1;
00189
00190
00191 void read(std::vector<std::string>::const_iterator&);
00192 void write(std::ostream&, const std::string &separator) const;
00193 };
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205 class JunctionId {
00206 friend std::ostream& operator<<(std::ostream& ostr, const JunctionId& o);
00207 public:
00208 typedef std::vector<std::string> IdType;
00209 IdType id_;
00210
00211
00212 JunctionId();
00213
00214
00215 JunctionId(const std::string& code, int id);
00216
00217
00218 JunctionId(const std::string& id);
00219
00220
00221 bool operator==(const JunctionId&) const;
00222 bool operator!=(const JunctionId&) const;
00223 bool operator<(const JunctionId&) const;
00224
00225
00226 void read(std::vector<std::string>::const_iterator&);
00227 void write(std::ostream&, const std::string &separator) const;
00228 std::string string() const;
00229 };
00230
00231
00232
00233
00234 class JunctionIdGenerator {
00235 public:
00236
00237 JunctionIdGenerator(const std::string&);
00238
00239
00240 JunctionIdGenerator(const Junction&);
00241 JunctionIdGenerator(const JunctionId&);
00242 JunctionIdGenerator(const std::vector<std::string>&);
00243
00244
00245 std::vector<std::string> nextId_;
00246
00247
00248 JunctionId operator()();
00249 };
00250
00251
00252
00253
00254 class Junction {
00255 public:
00256 JunctionId id_;
00257
00258
00259 boost::array<JunctionSideSection,2> sideSections_;
00260 JunctionTransitionSection transitionSection_;
00261
00262 JunctionScore score_;
00263 std::vector<std::string> comments_;
00264
00265
00266
00267 Junction();
00268
00269
00270 Junction(
00271 const JunctionId&,
00272 const JunctionSideSection& leftSection,
00273 const JunctionTransitionSection&,
00274 const JunctionSideSection& rightSection,
00275 const JunctionScore& score=JunctionScore());
00276 Junction(
00277 const JunctionId&,
00278 const boost::array<JunctionSideSection,2>&,
00279 const JunctionTransitionSection&,
00280 const JunctionScore& score=JunctionScore());
00281
00282
00283
00284 Junction(
00285 const JunctionId&,
00286 const JunctionSideSection& leftSection,
00287 const JunctionSideSection& rightSection,
00288 const JunctionScore& score=JunctionScore());
00289 Junction(
00290 const JunctionId&,
00291 const boost::array<JunctionSideSection,2>&,
00292 const JunctionScore& score=JunctionScore());
00293
00294
00295 Junction(
00296 const JunctionId&,
00297 const JunctionSideSection& leftSection,
00298 const JunctionTransitionSection&,
00299 const JunctionScore& score=JunctionScore());
00300
00301
00302 Junction(
00303 const JunctionId&,
00304 const JunctionTransitionSection&,
00305 const JunctionSideSection& rightSection,
00306 const JunctionScore& score=JunctionScore());
00307
00308
00309
00310 Junction(const std::string&, const ReferenceGenome& reference, const std::string &separator);
00311
00312
00313 void addComment(const std::string&);
00314
00315
00316 bool isCanonical() const;
00317
00318
00319 void canonicalize();
00320
00321
00322 bool isSemiJunction() const;
00323
00324
00325 void reverseComplement();
00326
00327
00328 Location position() const;
00329
00330
00331 void removeDuplicateComments();
00332
00333
00334 bool operator<(const Junction& that) const;
00335
00336
00337
00338 int computeDistance(const ReferenceGenome&) const;
00339
00340
00341 void read(const std::vector<std::string>&, const ReferenceGenome&);
00342 void write(std::ostream&, const ReferenceGenome&, const std::string &separator) const;
00343 };
00344
00345 typedef std::vector<Junction> Junctions;
00346
00347
00348 class JunctionFile {
00349 public:
00350
00351
00352
00353
00354 JunctionFile(const std::string& name, const ReferenceGenome&, std::string separator="\t");
00355
00356
00357 JunctionFile(std::string separator="\t");
00358
00359
00360
00361
00362 void read(const std::string& name, const ReferenceGenome& reference, std::string extraId="");
00363
00364
00365
00366 void write(const std::string& name, const ReferenceGenome&);
00367 void writeSortedWithoutReplication(const std::string& name, const ReferenceGenome&);
00368 void writeSortedWithReplication(const std::string& name, const ReferenceGenome&);
00369
00370
00371 void add(const Junction&);
00372
00373
00374 void canonicalize();
00375
00376
00377 void sort();
00378
00379
00380 void removeDuplicateComments();
00381
00382
00383 Junctions junctions_;
00384
00385
00386 static const std::string header_;
00387
00388
00389 std::string separator_;
00390 };
00391
00392
00393 class InvalidJunction {
00394 };
00395
00396
00397 std::ostream& operator<<(std::ostream&, const JunctionId&);
00398
00399 void parseTokenString(std::string const & inputString, char const * sep,
00400 std::vector<std::string> & tokens,
00401 bool trimWhitespace = false,
00402 bool compressEmpty = false);
00403
00404
00405
00406 }}
00407
00408
00409 #endif //CGA_TOOLS_JUNCTION_HPP_
00410