00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #ifndef CGATOOLS_CGDATA_EVIDENCEREADER_HPP_
00016 #define CGATOOLS_CGDATA_EVIDENCEREADER_HPP_ 1
00017 
00019 
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/DelimitedFile.hpp"
00022 #include "cgatools/reference/CrrFile.hpp"
00023 #include "cgatools/cgdata/GenomeMetadata.hpp"
00024 #include "cgatools/variants/Call.hpp"
00025 
00026 #include <boost/noncopyable.hpp>
00027 #include <boost/scoped_ptr.hpp>
00028 #include <boost/shared_ptr.hpp>
00029 
00030 namespace cgatools { namespace cgdata {
00031 
00032     class EvidenceReader : boost::noncopyable
00033     {
00034     public:
00035         struct IntervalRecord
00036         {
00037             IntervalRecord();
00038             static void bindToParser(
00039                 util::DelimitedFile &parser, IntervalRecord& rec, const reference::CrrFile& crr);
00040 
00041             int32_t intervalId_;
00042             uint16_t chromosome_;
00043             uint32_t offset_;
00044             uint32_t length_;
00045             uint16_t ploidy_;
00046             int32_t score_;
00047             std::vector<uint16_t> alleleIndexes_;
00048             boost::array<std::string, 3> alleles_;
00049             boost::array<std::string, 2> alleleAlignments_;
00050 
00051             reference::Range getRange() const;
00052 
00055             bool isCompatible(
00056                 uint16_t alleleIndex, const variants::Call& call,
00057                 const reference::CrrFile& crr) const;
00058 
00064             bool isCompatible(
00065                 uint16_t alleleIndex,
00066                 const reference::Range& range,
00067                 const std::string& seq,
00068                 bool ignoreAdjacentInsertions) const;
00069 
00070             std::string getAlignment(size_t alleleIndex) const;
00071             static std::string cigarToAlignment(const std::string& cigar);
00072         };
00073 
00074         struct DnbRecord
00075         {
00076             friend std::ostream& operator<< (std::ostream& out, const DnbRecord& r);
00077 
00078             DnbRecord();
00079             std::string getId() const;
00080             static void bindToParser(util::DelimitedFile &parser, 
00081                                         DnbRecord& rec, const reference::CrrFile& crr);
00082 
00083             int32_t     intervalId_;
00084             uint16_t chromosome_;
00085             std::string slide_;
00086             std::string lane_;
00087             uint16_t    fileNumInLane_;
00088             uint32_t    dnbOffsetInLaneFile_;
00089             uint8_t     alleleIndex_;
00090             uint8_t     side_;
00091             bool        strand_;
00092             int32_t                     offsetInAllele_;
00093             std::string                 alleleAlignment_;
00094             boost::array<int32_t,2>     offsetInReference_;
00095             boost::array<std::string,2> referenceAlignment_;
00096 
00097             boost::array<int32_t,3>     scoreAllele_;
00098             uint8_t                     mappingQuality_;
00099 
00100             std::string sequence_;
00101             std::string scores_;
00102         };
00103 
00104         EvidenceReader(const reference::CrrFile& crr,
00105                        const GenomeMetadata& exp);
00106 
00107         void seek(const reference::Range& r);
00108 
00111         void seekToChr(uint16_t chr);
00112 
00115         void nextInChr();
00116 
00117         bool inInterval() const
00118         {
00119             return inInterval_;
00120         }
00121 
00122         const IntervalRecord& getInterval() const
00123         {
00124             CGA_ASSERT(inInterval_);
00125             return intervalsFile_->rec_;
00126         }
00127 
00128         const std::vector<DnbRecord>& getDnbs()
00129         {
00130             seekDnbs();
00131             return dnbs_;
00132         }
00133 
00136         uint32_t getScore() const
00137         {
00138             if (!inInterval_)
00139                 return 0;
00140             else
00141                 return intervalsFile_->rec_.score_;
00142         }
00143 
00148         uint32_t countSupportingDnbs(uint16_t alleleIndex, int32_t scoreThreshold);
00149 
00150     private:
00151         struct IntervalsFile
00152         {
00153             std::string filename_;
00154             boost::shared_ptr<std::istream> f_;
00155             util::DelimitedFile parser_;
00156             IntervalRecord rec_;
00157 
00158             IntervalsFile(const std::string& fn, const reference::CrrFile& crr);
00159         };
00160 
00161         struct DnbsFile
00162         {
00163             std::string filename_;
00164             boost::shared_ptr<std::istream> f_;
00165             util::DelimitedFile parser_;
00166             DnbRecord rec_;
00167 
00168             DnbsFile(const std::string& fn, const reference::CrrFile& crr);
00169         };
00170 
00172         void openIntervals(uint16_t chr);
00173 
00174         void openDnbs();
00175         void seekDnbs();
00176         bool nextDnbs();
00177 
00178         const reference::CrrFile& crr_;
00179         const GenomeMetadata& exp_;
00180 
00181         boost::scoped_ptr<IntervalsFile> intervalsFile_;
00182 
00183         uint16_t dnbsChromosome_;
00184         int32_t dnbsIntervalId_;
00185         boost::scoped_ptr<DnbsFile> dnbsFile_;
00186         std::vector<DnbRecord> dnbs_;
00187 
00188         bool inInterval_;
00189     };
00190 
00191 } } 
00192 
00193 #endif // CGATOOLS_CGDATA_EVIDENCEREADER_HPP_