00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_GAPS_ESTIMATOR_HPP
00016 #define CGA_TOOLS_GAPS_ESTIMATOR_HPP
00017
00019
00020 #include "cgatools/core.hpp"
00021
00022 #include "cgatools/reference/CrrFile.hpp"
00023 #include "cgatools/cgdata/LibraryReader.hpp"
00024 #include "cgatools/cgdata/Dnb.hpp"
00025 #include "cgatools/util/BaseUtil.hpp"
00026
00027 namespace cgatools { namespace mapping { namespace GapEst {
00028
00029 inline bool isValidBase(char base) {
00030 return
00031 base=='A' || base=='C' || base=='G' || base=='T' ||
00032 base=='a' || base=='c' || base=='g' || base=='t';
00033 }
00034
00035 inline uint8_t baseTypeIndex(char base) {
00036 switch (base) {
00037 case 'A':
00038 case 'a':
00039 return 0;
00040 case 'C':
00041 case 'c':
00042 return 1;
00043 case 'G':
00044 case 'g':
00045 return 2;
00046 case 'T':
00047 case 't':
00048 return 3;
00049 default:
00050 return 4;
00051 }
00052 }
00054 class SequenceRetriever
00055 {
00056 public:
00057 static const size_t WOBBLE_SEQUENCE_BASE_COUNT = 10;
00058 typedef boost::array<char,WOBBLE_SEQUENCE_BASE_COUNT> Sequence;
00059
00060 virtual ~SequenceRetriever() { }
00061
00065 virtual void getSequence(Sequence& seq, int cloneEndOffset, int sequenceLength) const = 0;
00066 };
00067
00069 class NoCallSequenceRetriever : public SequenceRetriever
00070 {
00071 public:
00072 void getSequence(Sequence& seq, int cloneEndOffset, int sequenceLength) const;
00073 };
00074
00076 template <class TSequence>
00077 class TemplateSequenceRetriever : public SequenceRetriever
00078 {
00079 public:
00080 TemplateSequenceRetriever(const TSequence& sequence,
00081 bool isCircular,
00082 boost::uint32_t cloneEnd,
00083 int side,
00084 int strand)
00085 : sequence_(sequence),
00086 isCircular_(isCircular),
00087 cloneEnd_(cloneEnd),
00088 strand_(side == strand ? 0 : 1)
00089 {
00090 }
00091
00092 void getSequence(Sequence& seq, int cloneEndOffset, int sequenceLength) const;
00093
00094 private:
00095 const TSequence& sequence_;
00096 bool isCircular_;
00097 int cloneEnd_;
00098 int strand_;
00099 };
00100
00101 template <class TSequence>
00102 void TemplateSequenceRetriever<TSequence>::
00103 getSequence(Sequence& seq, int cloneEndOffset, int sequenceLength) const
00104 {
00105 if (0 == strand_)
00106 {
00107 int offset = cloneEnd_ + cloneEndOffset;
00108 for(int ii=0; ii<sequenceLength; ii++, offset++)
00109 {
00110 if (offset >= (int)sequence_.endSequencePosition())
00111 {
00112 if (!isCircular_)
00113 seq[ii] = 'N';
00114 else
00115 seq[ii] = sequence_[offset];
00116 }
00117 else if (offset < sequence_.startSequencePosition())
00118 {
00119 if (!isCircular_)
00120 seq[ii] = 'N';
00121 else
00122 seq[ii] = sequence_[offset+sequence_.endSequencePosition()];
00123 }
00124 else
00125 seq[ii] = sequence_[offset];
00126 }
00127 }
00128 else
00129 {
00130 int offset = cloneEnd_ - cloneEndOffset - 1;
00131 for(int ii=0; ii<sequenceLength; ii++, offset--)
00132 {
00133 if (offset >= (int)sequence_.endSequencePosition())
00134 {
00135 if (!isCircular_)
00136 seq[ii] = 'N';
00137 else
00138 seq[ii] = util::baseutil::complement(sequence_[offset]);
00139 }
00140 else if (offset < sequence_.startSequencePosition())
00141 {
00142 if (!isCircular_)
00143 seq[ii] = 'N';
00144 else
00145 seq[ii] = util::baseutil::
00146 complement(sequence_[offset+sequence_.endSequencePosition()]);
00147 }
00148 else
00149 seq[ii] = util::baseutil::complement(sequence_[offset]);
00150 }
00151 }
00152 }
00153
00154 class DependentGaps;
00155 struct GapsEstimatorData;
00156
00157 typedef cgdata::DnbStructure ArmData;
00158
00161 class GapsEstimator
00162 {
00163 public:
00164 typedef cgdata::SmallGapTuple GapTuple;
00165 typedef SequenceRetriever::Sequence Sequence;
00166
00167 GapsEstimator(const ArmData& armData, size_t side, boost::uint32_t seed);
00168
00169
00170 ~GapsEstimator();
00171
00173 void loadGaps(const std::string& gapFile0,const std::string& gapFile1);
00174
00176 double getProbability(GapTuple gaps, const SequenceRetriever& sr) const;
00177
00179 double getProbability(GapTuple gaps) const;
00180
00183 void getGapTuples(double fraction, std::vector<GapTuple>& result) const;
00184
00187 GapTuple randomGaps(const SequenceRetriever& sr) const;
00188
00190 size_t gapCount() const;
00191
00192 private:
00193 typedef GapEst::DependentGaps DependentGaps;
00194 typedef GapEst::GapsEstimatorData GapsEstimatorData;
00195
00196 std::vector<DependentGaps> dg_;
00197 boost::shared_ptr<GapsEstimatorData> data_;
00198 size_t side_;
00199 };
00200
00201 }}}
00202
00203 #endif // end of ifndef CGA_TOOLS_GAPS_ESTIMATOR_HPP