00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_CGDATA_EVIDENCEREADER_HPP_
00016 #define CGATOOLS_CGDATA_EVIDENCEREADER_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/DelimitedFile.hpp"
00022 #include "cgatools/reference/CrrFile.hpp"
00023 #include "cgatools/cgdata/GenomeMetadata.hpp"
00024 #include "cgatools/variants/Call.hpp"
00025
00026 #include <boost/noncopyable.hpp>
00027 #include <boost/scoped_ptr.hpp>
00028 #include <boost/shared_ptr.hpp>
00029
00030 namespace cgatools { namespace cgdata {
00031
00032 class EvidenceReader : boost::noncopyable
00033 {
00034 public:
00035 struct IntervalRecord
00036 {
00037 IntervalRecord();
00038 static void bindToParser(
00039 util::DelimitedFile &parser, IntervalRecord& rec, const reference::CrrFile& crr);
00040
00041 int32_t intervalId_;
00042 uint16_t chromosome_;
00043 uint32_t offset_;
00044 uint32_t length_;
00045 uint16_t ploidy_;
00046 int32_t score_;
00047 std::vector<uint16_t> alleleIndexes_;
00048 boost::array<std::string, 3> alleles_;
00049 boost::array<std::string, 2> alleleAlignments_;
00050
00051 reference::Range getRange() const;
00052
00055 bool isCompatible(
00056 uint16_t alleleIndex, const variants::Call& call,
00057 const reference::CrrFile& crr) const;
00058
00064 bool isCompatible(
00065 uint16_t alleleIndex,
00066 const reference::Range& range,
00067 const std::string& seq,
00068 bool ignoreAdjacentInsertions) const;
00069
00070 std::string getAlignment(size_t alleleIndex) const;
00071 static std::string cigarToAlignment(const std::string& cigar);
00072 };
00073
00074 struct DnbRecord
00075 {
00076 friend std::ostream& operator<< (std::ostream& out, const DnbRecord& r);
00077
00078 DnbRecord();
00079 std::string getId() const;
00080 static void bindToParser(util::DelimitedFile &parser,
00081 DnbRecord& rec, const reference::CrrFile& crr);
00082
00083 int32_t intervalId_;
00084 uint16_t chromosome_;
00085 std::string slide_;
00086 std::string lane_;
00087 uint16_t fileNumInLane_;
00088 uint32_t dnbOffsetInLaneFile_;
00089 uint8_t alleleIndex_;
00090 uint8_t side_;
00091 bool strand_;
00092 int32_t offsetInAllele_;
00093 std::string alleleAlignment_;
00094 boost::array<int32_t,2> offsetInReference_;
00095 boost::array<std::string,2> referenceAlignment_;
00096
00097 boost::array<int32_t,3> scoreAllele_;
00098 uint8_t mappingQuality_;
00099
00100 std::string sequence_;
00101 std::string scores_;
00102 };
00103
00104 EvidenceReader(const reference::CrrFile& crr,
00105 const GenomeMetadata& exp);
00106
00107 void seek(const reference::Range& r);
00108
00111 void seekToChr(uint16_t chr);
00112
00115 void nextInChr();
00116
00117 bool inInterval() const
00118 {
00119 return inInterval_;
00120 }
00121
00122 const IntervalRecord& getInterval() const
00123 {
00124 CGA_ASSERT(inInterval_);
00125 return intervalsFile_->rec_;
00126 }
00127
00128 const std::vector<DnbRecord>& getDnbs()
00129 {
00130 seekDnbs();
00131 return dnbs_;
00132 }
00133
00136 uint32_t getScore() const
00137 {
00138 if (!inInterval_)
00139 return 0;
00140 else
00141 return intervalsFile_->rec_.score_;
00142 }
00143
00148 uint32_t countSupportingDnbs(uint16_t alleleIndex, int32_t scoreThreshold);
00149
00150 private:
00151 struct IntervalsFile
00152 {
00153 std::string filename_;
00154 boost::shared_ptr<std::istream> f_;
00155 util::DelimitedFile parser_;
00156 IntervalRecord rec_;
00157
00158 IntervalsFile(const std::string& fn, const reference::CrrFile& crr);
00159 };
00160
00161 struct DnbsFile
00162 {
00163 std::string filename_;
00164 boost::shared_ptr<std::istream> f_;
00165 util::DelimitedFile parser_;
00166 DnbRecord rec_;
00167
00168 DnbsFile(const std::string& fn, const reference::CrrFile& crr);
00169 };
00170
00172 void openIntervals(uint16_t chr);
00173
00174 void openDnbs();
00175 void seekDnbs();
00176 bool nextDnbs();
00177
00178 const reference::CrrFile& crr_;
00179 const GenomeMetadata& exp_;
00180
00181 boost::scoped_ptr<IntervalsFile> intervalsFile_;
00182
00183 uint16_t dnbsChromosome_;
00184 int32_t dnbsIntervalId_;
00185 boost::scoped_ptr<DnbsFile> dnbsFile_;
00186 std::vector<DnbRecord> dnbs_;
00187
00188 bool inInterval_;
00189 };
00190
00191 } }
00192
00193 #endif // CGATOOLS_CGDATA_EVIDENCEREADER_HPP_