00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #ifndef CGA_TOOLS_SAM_RECORD_HPP_
00016 #define CGA_TOOLS_SAM_RECORD_HPP_ 1
00017 
00019 
00020 #include "cgatools/core.hpp"
00021 
00022 #include <vector>
00023 #include <string>
00024 
00025 namespace cgatools { namespace reference {
00026     class CrrFile;
00027 }}
00028 
00029 namespace cgatools { namespace mapping {
00030 
00031     typedef std::pair<uint16_t,uint16_t> UInt16Pair;
00032 
00033     class SamRecord
00034     {
00035         friend std::ostream& operator<< (std::ostream& ost, const SamRecord& r);
00036     public:
00037         enum RecordType
00038         {
00039             DEFAULT = 0,
00040             BASE_MAPPING,
00041             EVIDENCE,
00042             EVIDENCE_CACHE
00043         };
00044 
00045         typedef std::vector<SamRecord *> SamRecords;
00046 
00047         SamRecord(
00048             const std::string& readName, bool isMapped, bool onNegativeStrand, bool isPrimary,
00049             uint8_t side, uint16_t chr, uint32_t position,
00050             const std::string& extCigar,
00051             uint8_t mappingQuality,
00052             const std::string& fullReadSequence,
00053             const std::string& fullReadScores,
00054             UInt16Pair sequenceStartAndLength
00055             );
00056 
00057         virtual ~SamRecord() {}
00058 
00059         bool correctPosition(const reference::CrrFile& reference);
00060 
00061         RecordType  typeId_;
00062 
00063         std::string readName_;
00064         bool        isMapped_;
00065         bool        onNegativeStrand_;
00066         bool        isPrimary_;
00067         bool        isGroupPrimary_;
00068         uint8_t     side_;
00069         uint16_t    chr_;
00070         uint32_t    position_;
00071         std::string extCigar_;
00072         uint8_t     mappingQuality_;
00073 
00074         std::string fullReadSequence_;
00075         std::string fullReadScores_;
00076         bool        isSvCandidate_;
00077 
00078         
00079         UInt16Pair sequenceStartAndLength_[2];
00080 
00081         SamRecords  mates_;
00082 
00083         virtual bool isConsistent() const;
00084     protected:
00085     };
00086     
00089     class SamFileHeaderBlock
00090     {
00091         friend std::ostream& operator<< (std::ostream& ostr,const SamFileHeaderBlock& block);
00092     public:
00093         typedef std::vector<SamFileHeaderBlock> Children;
00094 
00095         SamFileHeaderBlock(std::string id = "None")
00096             :id_(id),type_(id),separator_("\t")
00097         {}
00098         SamFileHeaderBlock(const std::string &id, 
00099             const std::string& type, const std::string& separator, const std::string& value)
00100             :id_(id),type_(type),separator_(separator),value_(value)
00101         {}
00102         SamFileHeaderBlock(
00103             const std::string &id, const std::string& separator, const std::string& value)
00104             :id_(id),type_(id),separator_(separator),value_(value)
00105         {}
00106 
00107         const SamFileHeaderBlock& getChild(const std::string &child) const;
00108 
00109         bool operator== (const SamFileHeaderBlock& other) const {
00110             return id_ == other.id_;
00111         }
00112 
00113         SamFileHeaderBlock& get(const SamFileHeaderBlock& b);
00114         SamFileHeaderBlock& add(const SamFileHeaderBlock& b);
00115 
00116         std::string id_;
00117         std::string type_;
00118         std::string separator_;
00119         std::string value_;
00120         Children children_;
00121     };
00122 
00123     class EvidenceSamRecord;
00124     class SamSequenceSplitter;
00125 
00128     class SamRecordGenerator
00129     {
00130     public:
00131         static const char SAM_SEPARATOR = '\t';
00132 
00133         SamRecordGenerator(
00134             std::ostream& outSamFile, 
00135             const reference::CrrFile& reference,
00136             bool mateSvCandidates, 
00137             bool addMateSequenceAndScore,
00138             bool addMateSequenceAndScoreIfMateUnmapped,
00139             bool printAlleleInfo
00140             )
00141         :   
00142             outSamFile_(outSamFile), reference_(reference),
00143             mateSvCandidates_(mateSvCandidates),
00144             addMateSequenceAndScore_(addMateSequenceAndScore),
00145             addMateSequenceAndScoreIfMateUnmapped_(addMateSequenceAndScoreIfMateUnmapped),
00146             printAlleleInfo_(printAlleleInfo)
00147         {}
00148 
00149         void mappingRecordToSam(const SamRecord& record);
00150 
00151         void setHeader(const SamFileHeaderBlock& header);
00152     protected:
00153 
00155         void printMateSequence(const std::string &mateSeq, const std::string &mateScore);
00156 
00157         void printReadGroup();
00158         void printNegativeGapTag(const mapping::SamSequenceSplitter &splitter);
00159     
00161         void flagAsSVCandidate();
00163         void printAlleleInfoTag(const mapping::EvidenceSamRecord& evidenceRecord);
00164 
00165         uint32_t adjustedPosition(const SamRecord& record ) const;
00166 
00167         std::ostream &  outSamFile_;
00168         const reference::CrrFile& reference_;
00169         bool                mateSvCandidates_;
00170         bool                addMateSequenceAndScore_;
00171         bool                addMateSequenceAndScoreIfMateUnmapped_;
00172         bool                printAlleleInfo_;
00173         SamFileHeaderBlock  header_;
00174         std::string         readGroup_;
00175 
00176     };
00177 
00179     class Cigar {
00180         friend std::ostream& operator<< (std::ostream& out, const Cigar& cigar);
00181     public:
00182         class CigarElement {
00183         public:
00184             CigarElement(char type, size_t length)
00185                 : type_(type), length_(length) {}
00186             int getSequenceLength() const;
00187             bool operator== (const CigarElement& e) const {return type_==e.type_ && length_==e.length_;}
00188 
00189             char    type_;
00190             size_t  length_;
00191         };
00192         typedef std::vector<CigarElement> ParsedCigar;
00193 
00194         Cigar(bool mergeNeighbours = false, bool removeZeros = false) 
00195             :mergeNeighbours_(mergeNeighbours), removeZeros_(removeZeros)
00196         {}
00197 
00198         Cigar(const std::string &cigar, bool mergeNeighbours = false, bool removeZeros = false) 
00199             :mergeNeighbours_(mergeNeighbours), removeZeros_(removeZeros)
00200         {
00201             parse(cigar);
00202         }
00203 
00204         void parse(const std::string &cigar);
00205 
00207         size_t getSequenceLength() const;
00209         size_t getReferenceLength() const;
00210 
00211         const ParsedCigar& getParsedCigar() const {return parsedCigar_;}
00212 
00213         void add(const CigarElement &e);
00214 
00215         CigarElement &operator[](size_t i) {return parsedCigar_[i];}
00216         CigarElement &back() {return parsedCigar_.back();}
00217 
00218         size_t size() const {return parsedCigar_.size();}
00219         
00220         void trancatePaddings();
00221 
00223         Cigar pack() const;
00224 
00225         bool operator== (const Cigar& c) const {return parsedCigar_==c.parsedCigar_;}
00226     protected:
00227         void push_back(const CigarElement &e) {parsedCigar_.push_back(e);}
00229         void add_back(const CigarElement &e);
00230 
00231         bool mergeNeighbours_;
00232         bool removeZeros_;
00233         ParsedCigar parsedCigar_;
00234     };
00235 
00236 } } 
00237 
00238 #endif // CGA_TOOLS_SAM_RECORD_HPP_