00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_SAM_RECORD_HPP_
00016 #define CGA_TOOLS_SAM_RECORD_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "SamOptions.hpp"
00022
00023 #include <vector>
00024 #include <string>
00025 #include <map>
00026 #include <boost/shared_ptr.hpp>
00027
00028 namespace cgatools { namespace reference {
00029 class CrrFile;
00030 }}
00031
00032 namespace cgatools { namespace mapping {
00033
00034 typedef std::pair<uint16_t,uint16_t> UInt16Pair;
00035
00036 class SamRecord
00037 {
00038 friend std::ostream& operator<< (std::ostream& ost, const SamRecord& r);
00039 public:
00040 enum RecordType
00041 {
00042 DEFAULT = 0,
00043 BASE_MAPPING,
00044 EVIDENCE,
00045 EVIDENCE_CACHE
00046 };
00047
00048 typedef std::vector<SamRecord *> SamRecords;
00049 typedef std::vector<const SamRecord *> ConstSamRecords;
00050
00051 SamRecord(
00052 const std::string& readName, bool isMapped, bool onNegativeStrand, bool isPrimary,
00053 uint8_t side, uint16_t chr, int32_t position,
00054 const std::string& extCigar,
00055 uint8_t mappingQuality,
00056 bool isConsistentMapQ,
00057 const std::string& fullReadSequence,
00058 const std::string& fullReadScores,
00059 UInt16Pair sequenceStartAndLength
00060 );
00061
00062 virtual ~SamRecord() {}
00063
00064 bool correctPosition(const reference::CrrFile& reference);
00065
00066 RecordType typeId_;
00067
00068 std::string readName_;
00069 bool isMapped_;
00070 bool onNegativeStrand_;
00071 bool isPrimary_;
00072 bool isGroupPrimary_;
00073 uint8_t side_;
00074 uint16_t chr_;
00075 int32_t position_;
00076 std::string extCigar_;
00077
00078 std::string fullReadSequence_;
00079 std::string fullReadScores_;
00080 bool isSvCandidate_;
00081
00082
00083 UInt16Pair sequenceStartAndLength_[2];
00084
00085 SamRecords mates_;
00086 SamRecords alternatives_;
00087
00088 uint8_t getMappingQuality() const {return mappingQuality_;}
00089 void setMappingQuality(uint8_t value, bool isConsistent) {
00090 mappingQuality_ = value; isConsistentMapQ_ = isConsistent;
00091 }
00092
00093 bool isConsistent() const {return isConsistentMapQ_;}
00094 protected:
00095 uint8_t mappingQuality_;
00096 bool isConsistentMapQ_;
00097 };
00098
00101 class SamFileHeaderBlock
00102 {
00103 friend std::ostream& operator<< (std::ostream& ostr,const SamFileHeaderBlock& block);
00104 public:
00105 typedef std::vector<SamFileHeaderBlock> Children;
00106
00107 SamFileHeaderBlock(std::string id = "None")
00108 :id_(id),type_(id),separator_("\t")
00109 {}
00110 SamFileHeaderBlock(const std::string &id,
00111 const std::string& type, const std::string& separator, const std::string& value)
00112 :id_(id),type_(type),separator_(separator),value_(value)
00113 {}
00114 SamFileHeaderBlock(
00115 const std::string &id, const std::string& separator, const std::string& value)
00116 :id_(id),type_(id),separator_(separator),value_(value)
00117 {}
00118
00119 const SamFileHeaderBlock& getChild(const std::string &child) const;
00120
00121 bool operator== (const SamFileHeaderBlock& other) const {
00122 return id_ == other.id_;
00123 }
00124
00125 SamFileHeaderBlock& get(const SamFileHeaderBlock& b);
00126 SamFileHeaderBlock& add(const SamFileHeaderBlock& b);
00127
00128 std::string id_;
00129 std::string type_;
00130 std::string separator_;
00131 std::string value_;
00132 Children children_;
00133 };
00134
00135 class EvidenceSamRecord;
00136 class SamSequenceSplitter;
00137
00140 class SamRecordGenerator
00141 {
00142 public:
00143 class OutputFileDescriptor
00144 {
00145 public:
00146 OutputFileDescriptor(std::ostream & outStream)
00147 :hasHeader_(false), hasRecords_(false), outStream_(outStream)
00148 {}
00149 ~OutputFileDescriptor();
00150
00151 void writeHeader(const SamFileHeaderBlock& header);
00152
00153 bool hasHeader_;
00154 bool hasRecords_;
00155 std::ostream & outStream_;
00156 };
00157
00158 static const char SAM_SEPARATOR = '\t';
00159
00160 SamRecordGenerator(
00161 std::ostream& outSamFile,
00162 const reference::CrrFile& reference,
00163 const SamGeneratorConfig& config,
00164 const std::vector<std::string>& outStreams
00165 );
00166
00167 ~SamRecordGenerator();
00168
00169 void mappingRecordToSam(const SamRecord& record);
00170
00171 void setHeader(const SamFileHeaderBlock& header);
00172 protected:
00173
00174 bool isConsistent(const SamRecord &r) const;
00175
00176 OutputFileDescriptor& getOutputStream(const std::string &id);
00177
00179 void printMateSequence(OutputFileDescriptor& out,
00180 const std::string &mateSeq, const std::string &mateScore);
00181
00182 void printReadGroup(OutputFileDescriptor& out);
00183 void printNegativeGapTag(OutputFileDescriptor& out, const mapping::SamSequenceSplitter &splitter);
00184
00186 void flagAsSVCandidate(OutputFileDescriptor& out);
00188 void printAlleleInfoTag(OutputFileDescriptor& out, const mapping::EvidenceSamRecord& evidenceRecord);
00190 void printAlternatives(OutputFileDescriptor& out, const std::string& tag,
00191 const mapping::SamRecord::SamRecords& samRecords, size_t startFrom);
00192
00193 uint32_t getAdjustedSamPosition(const SamRecord& record ) const;
00194 std::string getSamChr(uint16_t chr) const;
00195
00196 typedef std::map<std::string,boost::shared_ptr<OutputFileDescriptor> > OutStreamMap;
00197 OutStreamMap outFiles_;
00198
00199 typedef boost::shared_ptr<std::ostream> OpenStreamPtr;
00200 typedef std::vector<OpenStreamPtr> OutStreams;
00201 OutStreams openStreams_;
00202
00203 const reference::CrrFile& reference_;
00204 SamFileHeaderBlock header_;
00205 std::string readGroup_;
00206 const SamGeneratorConfig & samGeneratorConfig_;
00207 };
00208
00209 } }
00210
00211 #endif // CGA_TOOLS_SAM_RECORD_HPP_