00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_SAM_RECORD_HPP_
00016 #define CGA_TOOLS_SAM_RECORD_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021
00022 #include <vector>
00023 #include <string>
00024
00025 namespace cgatools { namespace reference {
00026 class CrrFile;
00027 }}
00028
00029 namespace cgatools { namespace mapping {
00030
00031 typedef std::pair<uint16_t,uint16_t> UInt16Pair;
00032
00033 class SamRecord
00034 {
00035 friend std::ostream& operator<< (std::ostream& ost, const SamRecord& r);
00036 public:
00037 enum RecordType
00038 {
00039 DEFAULT = 0,
00040 BASE_MAPPING,
00041 EVIDENCE,
00042 EVIDENCE_CACHE
00043 };
00044
00045 typedef std::vector<SamRecord *> SamRecords;
00046
00047 SamRecord(
00048 const std::string& readName, bool isMapped, bool onNegativeStrand, bool isPrimary,
00049 uint8_t side, uint16_t chr, uint32_t position,
00050 const std::string& extCigar,
00051 uint8_t mappingQuality,
00052 const std::string& fullReadSequence,
00053 const std::string& fullReadScores,
00054 UInt16Pair sequenceStartAndLength
00055 );
00056
00057 virtual ~SamRecord() {}
00058
00059 bool correctPosition(const reference::CrrFile& reference);
00060
00061 RecordType typeId_;
00062
00063 std::string readName_;
00064 bool isMapped_;
00065 bool onNegativeStrand_;
00066 bool isPrimary_;
00067 bool isGroupPrimary_;
00068 uint8_t side_;
00069 uint16_t chr_;
00070 uint32_t position_;
00071 std::string extCigar_;
00072 uint8_t mappingQuality_;
00073
00074 std::string fullReadSequence_;
00075 std::string fullReadScores_;
00076 bool isSvCandidate_;
00077
00078
00079 UInt16Pair sequenceStartAndLength_[2];
00080
00081 SamRecords mates_;
00082
00083 virtual bool isConsistent() const;
00084 protected:
00085 };
00086
00089 class SamFileHeaderBlock
00090 {
00091 friend std::ostream& operator<< (std::ostream& ostr,const SamFileHeaderBlock& block);
00092 public:
00093 typedef std::vector<SamFileHeaderBlock> Children;
00094
00095 SamFileHeaderBlock(std::string id = "None")
00096 :id_(id),type_(id),separator_("\t")
00097 {}
00098 SamFileHeaderBlock(const std::string &id,
00099 const std::string& type, const std::string& separator, const std::string& value)
00100 :id_(id),type_(type),separator_(separator),value_(value)
00101 {}
00102 SamFileHeaderBlock(
00103 const std::string &id, const std::string& separator, const std::string& value)
00104 :id_(id),type_(id),separator_(separator),value_(value)
00105 {}
00106
00107 const SamFileHeaderBlock& getChild(const std::string &child) const;
00108
00109 bool operator== (const SamFileHeaderBlock& other) const {
00110 return id_ == other.id_;
00111 }
00112
00113 SamFileHeaderBlock& get(const SamFileHeaderBlock& b);
00114 SamFileHeaderBlock& add(const SamFileHeaderBlock& b);
00115
00116 std::string id_;
00117 std::string type_;
00118 std::string separator_;
00119 std::string value_;
00120 Children children_;
00121 };
00122
00123 class EvidenceSamRecord;
00124 class SamSequenceSplitter;
00125
00128 class SamRecordGenerator
00129 {
00130 public:
00131 static const char SAM_SEPARATOR = '\t';
00132
00133 SamRecordGenerator(
00134 std::ostream& outSamFile,
00135 const reference::CrrFile& reference,
00136 bool mateSvCandidates,
00137 bool addMateSequenceAndScore,
00138 bool addMateSequenceAndScoreIfMateUnmapped,
00139 bool printAlleleInfo
00140 )
00141 :
00142 outSamFile_(outSamFile), reference_(reference),
00143 mateSvCandidates_(mateSvCandidates),
00144 addMateSequenceAndScore_(addMateSequenceAndScore),
00145 addMateSequenceAndScoreIfMateUnmapped_(addMateSequenceAndScoreIfMateUnmapped),
00146 printAlleleInfo_(printAlleleInfo)
00147 {}
00148
00149 void mappingRecordToSam(const SamRecord& record);
00150
00151 void setHeader(const SamFileHeaderBlock& header);
00152 protected:
00153
00155 void printMateSequence(const std::string &mateSeq, const std::string &mateScore);
00156
00157 void printReadGroup();
00158 void printNegativeGapTag(const mapping::SamSequenceSplitter &splitter);
00159
00161 void flagAsSVCandidate();
00163 void printAlleleInfoTag(const mapping::EvidenceSamRecord& evidenceRecord);
00164
00165 uint32_t adjustedPosition(const SamRecord& record ) const;
00166
00167 std::ostream & outSamFile_;
00168 const reference::CrrFile& reference_;
00169 bool mateSvCandidates_;
00170 bool addMateSequenceAndScore_;
00171 bool addMateSequenceAndScoreIfMateUnmapped_;
00172 bool printAlleleInfo_;
00173 SamFileHeaderBlock header_;
00174 std::string readGroup_;
00175
00176 };
00177
00179 class Cigar {
00180 friend std::ostream& operator<< (std::ostream& out, const Cigar& cigar);
00181 public:
00182 class CigarElement {
00183 public:
00184 CigarElement(char type, size_t length)
00185 : type_(type), length_(length) {}
00186 int getSequenceLength() const;
00187 bool operator== (const CigarElement& e) const {return type_==e.type_ && length_==e.length_;}
00188
00189 char type_;
00190 size_t length_;
00191 };
00192 typedef std::vector<CigarElement> ParsedCigar;
00193
00194 Cigar(bool mergeNeighbours = false, bool removeZeros = false)
00195 :mergeNeighbours_(mergeNeighbours), removeZeros_(removeZeros)
00196 {}
00197
00198 Cigar(const std::string &cigar, bool mergeNeighbours = false, bool removeZeros = false)
00199 :mergeNeighbours_(mergeNeighbours), removeZeros_(removeZeros)
00200 {
00201 parse(cigar);
00202 }
00203
00204 void parse(const std::string &cigar);
00205
00207 size_t getSequenceLength() const;
00209 size_t getReferenceLength() const;
00210
00211 const ParsedCigar& getParsedCigar() const {return parsedCigar_;}
00212
00213 void add(const CigarElement &e);
00214
00215 CigarElement &operator[](size_t i) {return parsedCigar_[i];}
00216 CigarElement &back() {return parsedCigar_.back();}
00217
00218 size_t size() const {return parsedCigar_.size();}
00219
00220 void trancatePaddings();
00221
00223 Cigar pack() const;
00224
00225 bool operator== (const Cigar& c) const {return parsedCigar_==c.parsedCigar_;}
00226 protected:
00227 void push_back(const CigarElement &e) {parsedCigar_.push_back(e);}
00229 void add_back(const CigarElement &e);
00230
00231 bool mergeNeighbours_;
00232 bool removeZeros_;
00233 ParsedCigar parsedCigar_;
00234 };
00235
00236 } }
00237
00238 #endif // CGA_TOOLS_SAM_RECORD_HPP_