00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_CGDATA_EVIDENCEREADER_HPP_
00016 #define CGATOOLS_CGDATA_EVIDENCEREADER_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/DelimitedFile.hpp"
00022 #include "cgatools/reference/CrrFile.hpp"
00023 #include "cgatools/cgdata/GenomeMetadata.hpp"
00024 #include "cgatools/variants/Call.hpp"
00025
00026 #include <boost/noncopyable.hpp>
00027 #include <boost/scoped_ptr.hpp>
00028 #include <boost/shared_ptr.hpp>
00029
00030 namespace cgatools { namespace cgdata {
00031
00032 class EvidenceReader : boost::noncopyable
00033 {
00034 public:
00035 struct IntervalRecord
00036 {
00037 IntervalRecord();
00038 static void bindToParser(
00039 util::DelimitedFile &parser, IntervalRecord& rec, const reference::CrrFile& crr);
00040
00041 int32_t intervalId_;
00042 uint16_t chromosome_;
00043 uint32_t offset_;
00044 uint32_t length_;
00045 uint16_t ploidy_;
00046 int32_t evidenceScoreVAF_;
00047 int32_t evidenceScoreEAF_;
00048 std::vector<uint16_t> alleleIndexes_;
00049 boost::array<std::string, 4> alleles_;
00050 boost::array<std::string, 3> alleleAlignments_;
00051
00052 reference::Range getRange() const;
00053
00056 bool isCompatible(
00057 uint16_t alleleIndex, const variants::Call& call,
00058 const reference::CrrFile& crr) const;
00059
00065 bool isCompatible(
00066 uint16_t alleleIndex,
00067 const reference::Range& range,
00068 const std::string& seq,
00069 bool ignoreAdjacentInsertions) const;
00070
00071 std::string getAlignment(size_t alleleIndex) const;
00072 static std::string cigarToAlignment(const std::string& cigar);
00073 };
00074
00075 struct DnbRecord
00076 {
00077 friend std::ostream& operator<< (std::ostream& out, const DnbRecord& r);
00078
00079 DnbRecord();
00080 std::string getId() const;
00081
00084 bool hasOverlap(const reference::Range& r, bool allowGap) const;
00085
00086 static void bindToParser(util::DelimitedFile &parser,
00087 DnbRecord& rec, const reference::CrrFile& crr);
00088
00089 int32_t intervalId_;
00090 uint16_t chromosome_;
00091 std::string slide_;
00092 std::string lane_;
00093 uint16_t fileNumInLane_;
00094 uint32_t dnbOffsetInLaneFile_;
00095 uint8_t alleleIndex_;
00096 uint8_t side_;
00097 bool strand_;
00098 int32_t offsetInAllele_;
00099 std::string alleleAlignment_;
00100 boost::array<int32_t,2> offsetInReference_;
00101 boost::array<std::string,2> referenceAlignment_;
00102
00103 boost::array<int32_t,4> scoreAllele_;
00104 uint8_t mappingQuality_;
00105
00106 std::string sequence_;
00107 std::string scores_;
00108 };
00109
00110 EvidenceReader(const reference::CrrFile& crr,
00111 const GenomeMetadata& exp);
00112
00113 void seek(const reference::Range& r);
00114
00117 void seekToChr(uint16_t chr);
00118
00121 void nextInChr();
00122
00123 bool inInterval() const
00124 {
00125 return inInterval_;
00126 }
00127
00128 const IntervalRecord& getInterval() const
00129 {
00130 CGA_ASSERT(inInterval_);
00131 return intervalsFile_->rec_;
00132 }
00133
00134 const std::vector<DnbRecord>& getDnbs()
00135 {
00136 seekDnbs();
00137 return dnbs_;
00138 }
00139
00143 int32_t getEvidenceScoreVAF() const
00144 {
00145 if (!inInterval_)
00146 return 0;
00147 else
00148 return intervalsFile_->rec_.evidenceScoreVAF_;
00149 }
00150
00154 int32_t getEvidenceScoreEAF() const
00155 {
00156 if (!inInterval_)
00157 return 0;
00158 else
00159 return intervalsFile_->rec_.evidenceScoreEAF_;
00160 }
00161
00166 uint32_t countSupportingDnbs(uint16_t alleleIndex, int32_t scoreThreshold);
00167
00168 private:
00169 struct IntervalsFile
00170 {
00171 std::string filename_;
00172 boost::shared_ptr<std::istream> f_;
00173 util::DelimitedFile parser_;
00174 IntervalRecord rec_;
00175
00176 IntervalsFile(const std::string& fn, const reference::CrrFile& crr);
00177 };
00178
00179 struct DnbsFile
00180 {
00181 std::string filename_;
00182 boost::shared_ptr<std::istream> f_;
00183 util::DelimitedFile parser_;
00184 DnbRecord rec_;
00185
00186 DnbsFile(const std::string& fn, const reference::CrrFile& crr);
00187 };
00188
00190 void openIntervals(uint16_t chr);
00191
00192 void openDnbs();
00193 void seekDnbs();
00194 bool nextDnbs();
00195
00196 const reference::CrrFile& crr_;
00197 const GenomeMetadata& exp_;
00198
00199 boost::scoped_ptr<IntervalsFile> intervalsFile_;
00200
00201 uint16_t dnbsChromosome_;
00202 int32_t dnbsIntervalId_;
00203 boost::scoped_ptr<DnbsFile> dnbsFile_;
00204 std::vector<DnbRecord> dnbs_;
00205
00206 bool inInterval_;
00207 };
00208
00209 } }
00210
00211 #endif // CGATOOLS_CGDATA_EVIDENCEREADER_HPP_