00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_ALLELE_ALIGNMENT_HPP_
00016 #define CGA_TOOLS_ALLELE_ALIGNMENT_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021
00022 #include "cgatools/reference/CrrFile.hpp"
00023 #include "cgatools/cgdata/EvidenceReader.hpp"
00024 #include "cgatools/util/BaseUtil.hpp"
00025
00026 namespace cgatools { namespace mapping {
00027
00031 class AlleleSequenceAligner
00032 {
00033 public:
00034 AlleleSequenceAligner(const reference::CrrFile& reference)
00035 :reference_(reference)
00036 ,chrSequence_(NULL)
00037 ,allele_(NULL)
00038 ,alleleLength_(0)
00039 {}
00040
00042 void setInterval(uint16_t chr, size_t begin, size_t length, const std::string* allele);
00043
00045 char operator[] (int i) const
00046 {
00047 if (i<0)
00048 return chrSequence_->getUnambiguousBase(begin_+i);
00049 if (i >= int(alleleLength_))
00050 return chrSequence_->getUnambiguousBase(begin_+length_-alleleLength_+i);
00051 return (*allele_)[i];
00052 }
00053
00055 int endSequencePosition() const {
00056 return chrSequence_->length()-length_+alleleLength_-begin_;
00057 }
00058
00059 int startSequencePosition() const {
00060 return -int(begin_);
00061 }
00062
00063 const reference::CompactDnaSequence& getChrSequence() const {return *chrSequence_;}
00064 protected:
00065 const reference::CrrFile& reference_;
00066 const reference::CompactDnaSequence* chrSequence_;
00067 const std::string * allele_;
00068 size_t alleleLength_;
00069 uint16_t chr_;
00070 int64_t begin_;
00071 size_t length_;
00072 };
00073
00076 class DnbSequenceIterator
00077 {
00078 public:
00079
00080
00081 DnbSequenceIterator(const std::string & sequence,
00082 size_t startFrom, int direction=1, bool needsComplementing=false)
00083 :sequence_(sequence)
00084 ,position_(startFrom)
00085 ,direction_(direction)
00086 ,needsComplementing_(needsComplementing && direction_<0)
00087 {
00088 CGA_ASSERT_LE(0,position_);
00089 CGA_ASSERT_L(position_,sequence_.size());
00090 }
00091
00092 DnbSequenceIterator& operator++ ()
00093 {
00094 position_+=direction_;
00095 return *this;
00096 }
00097
00099 char operator* () const {
00100 CGA_ASSERT_LE(0,position_);
00101 CGA_ASSERT_L(position_,sequence_.size());
00102 if (needsComplementing_)
00103 return util::baseutil::complement(sequence_[position_]);
00104 else
00105 return sequence_[position_];
00106 }
00107
00108 protected:
00109 std::string sequence_;
00110 size_t position_;
00111 int direction_;
00112 bool needsComplementing_;
00113 };
00114
00118 class GapAndConcordanceExtractor
00119 {
00120 public:
00121 GapAndConcordanceExtractor(const AlleleSequenceAligner& alleleAligner)
00122 :
00123 concordance_(-1)
00124 ,endOffsetInAllele_(0)
00125 ,mismatches_(0)
00126 ,alleleAligner_(alleleAligner)
00127 ,halfDnbSize_(35)
00128 {}
00129
00130 void run( uint8_t side, bool strand, int32_t offsetInAllele, const std::string & alignmentCigar,
00131 const std::string & readSequence, const std::string & readScores);
00132
00133 std::vector<int8_t> gaps_;
00134 double concordance_;
00135 int32_t endOffsetInAllele_;
00136 size_t mismatches_;
00137
00138 protected:
00139 const AlleleSequenceAligner& alleleAligner_;
00140 size_t halfDnbSize_;
00141 };
00142
00143 namespace GapEst {
00144 class GapsEstimator;
00145 };
00146
00149 class GapProbabilityAndConcordanceExtractor : public GapAndConcordanceExtractor
00150 {
00151 public:
00152
00154 GapProbabilityAndConcordanceExtractor(const AlleleSequenceAligner& alleleAligner,
00155 boost::array<boost::shared_ptr<GapEst::GapsEstimator>,2> gapsEstimators)
00156 : GapAndConcordanceExtractor(alleleAligner),gapsEstimators_(gapsEstimators)
00157 {}
00158 void run( uint8_t side, bool strand, int32_t offsetInAllele, const std::string & alignmentCigar,
00159 const std::string & readSequence, const std::string & readScores);
00160
00161 double gapProbability_;
00162 protected:
00163 boost::array<boost::shared_ptr<GapEst::GapsEstimator>,2> gapsEstimators_;
00164 };
00165
00166 } }
00167
00168 #endif // CGA_TOOLS_ALLELE_ALIGNMENT_HPP_