00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_SAM_RECORD_HPP_
00016 #define CGA_TOOLS_SAM_RECORD_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "SamOptions.hpp"
00022
00023 #include <vector>
00024 #include <string>
00025 #include <map>
00026 #include <boost/shared_ptr.hpp>
00027
00028 namespace cgatools { namespace reference {
00029 class CrrFile;
00030 }}
00031
00032 namespace cgatools { namespace mapping {
00033
00034 typedef std::pair<uint16_t,uint16_t> UInt16Pair;
00035
00036 class SamRecord
00037 {
00038 friend std::ostream& operator<< (std::ostream& ost, const SamRecord& r);
00039 public:
00040 enum RecordType
00041 {
00042 DEFAULT = 0,
00043 BASE_MAPPING,
00044 EVIDENCE,
00045 EVIDENCE_CACHE
00046 };
00047
00048 typedef std::vector<SamRecord *> SamRecords;
00049 typedef std::vector<const SamRecord *> ConstSamRecords;
00050
00051 SamRecord(
00052 const std::string& readName, bool isMapped, bool onNegativeStrand, bool isPrimary,
00053 uint8_t side, uint16_t chr, int32_t position,
00054 const std::string& extCigar,
00055 uint8_t mappingQuality,
00056 bool isConsistentMapQ,
00057 const std::string& fullReadSequence,
00058 const std::string& fullReadScores,
00059 UInt16Pair sequenceStartAndLength
00060 );
00061
00062 virtual ~SamRecord() {}
00063
00064 bool correctPosition(const reference::CrrFile& reference);
00065
00066 RecordType typeId_;
00067
00068 std::string readName_;
00069 bool isMapped_;
00070 bool onNegativeStrand_;
00071 bool isPrimary_;
00072 bool isGroupPrimary_;
00073 uint8_t side_;
00074 uint16_t chr_;
00075 int32_t position_;
00076 std::string extCigar_;
00077
00078 std::string fullReadSequence_;
00079 std::string fullReadScores_;
00080 bool isSvCandidate_;
00081
00082
00083 UInt16Pair sequenceStartAndLength_[2];
00084
00085 SamRecords mates_;
00086 SamRecords alternatives_;
00087
00088 uint8_t getMappingQuality() const {return mappingQuality_;}
00089 void setMappingQuality(uint8_t value, bool isConsistent) {
00090 mappingQuality_ = value; isConsistentMapQ_ = isConsistent;
00091 }
00092
00093
00094 bool isArtificialMateReported() const {return isArtificialMateReported_;}
00095 void setArtificialMateReported(bool isReported) {isArtificialMateReported_ = true;}
00096
00097 bool isConsistent() const {return isConsistentMapQ_;}
00098 protected:
00099 uint8_t mappingQuality_;
00100 bool isConsistentMapQ_;
00101 bool isArtificialMateReported_;
00102 };
00103
00106 class SamFileHeaderBlock
00107 {
00108 friend std::ostream& operator<< (std::ostream& ostr,const SamFileHeaderBlock& block);
00109 public:
00110 typedef std::vector<SamFileHeaderBlock> Children;
00111
00112 SamFileHeaderBlock(std::string id = "None")
00113 :id_(id),type_(id),separator_("\t")
00114 {}
00115 SamFileHeaderBlock(const std::string &id,
00116 const std::string& type, const std::string& separator, const std::string& value)
00117 :id_(id),type_(type),separator_(separator),value_(value)
00118 {}
00119 SamFileHeaderBlock(
00120 const std::string &id, const std::string& separator, const std::string& value)
00121 :id_(id),type_(id),separator_(separator),value_(value)
00122 {}
00123
00124 const SamFileHeaderBlock& getChild(const std::string &child) const;
00125
00126
00127 void removeNotMatchingChildren(const SamFileHeaderBlock &child);
00128
00129 bool operator== (const SamFileHeaderBlock& other) const {
00130 return id_ == other.id_ && type_ == other.type_;
00131 }
00132
00133 SamFileHeaderBlock& get(const SamFileHeaderBlock& b);
00134 SamFileHeaderBlock& add(const SamFileHeaderBlock& b);
00135
00136 std::string id_;
00137 std::string type_;
00138 std::string separator_;
00139 std::string value_;
00140 Children children_;
00141 };
00142
00143 class EvidenceSamRecord;
00144 class SamSequenceSplitter;
00145
00148 class SamRecordGenerator
00149 {
00150 public:
00151 class OutputFileDescriptor
00152 {
00153 public:
00154 OutputFileDescriptor(std::ostream & outStream)
00155 :hasHeader_(false), hasRecords_(false), outStream_(outStream)
00156 {}
00157 ~OutputFileDescriptor();
00158
00159 void writeHeader(const SamFileHeaderBlock& header);
00160
00161 bool hasHeader_;
00162 bool hasRecords_;
00163 std::ostream & outStream_;
00164 };
00165
00166 static const char SAM_SEPARATOR = '\t';
00167
00168 SamRecordGenerator(
00169 std::ostream& outSamFile,
00170 const reference::CrrFile& reference,
00171 const SamGeneratorConfig& config,
00172 const std::vector<std::string>& outStreams
00173 );
00174
00175 ~SamRecordGenerator();
00176
00177 void mappingRecordToSam(const SamRecord& record);
00178
00179 void setHeader(const SamFileHeaderBlock& header);
00180 protected:
00181
00182 bool isConsistent(const SamRecord &r) const;
00183
00184 OutputFileDescriptor& getOutputStream(const std::string &id);
00185
00187 void printMateSequence(OutputFileDescriptor& out,
00188 const std::string &mateSeq, const std::string &mateScore);
00189
00190 void printReadGroup(OutputFileDescriptor& out);
00191 void printNegativeGapTag(OutputFileDescriptor& out, const mapping::SamSequenceSplitter &splitter);
00192
00194 void flagAsSVCandidate(OutputFileDescriptor& out);
00196 void printAlleleInfoTag(OutputFileDescriptor& out, const mapping::EvidenceSamRecord& evidenceRecord);
00198 void printAlternatives(OutputFileDescriptor& out, const std::string& tag,
00199 const mapping::SamRecord::SamRecords& samRecords, size_t startFrom);
00200
00201 uint32_t getAdjustedSamPosition(const SamRecord& record ) const;
00202 std::string getSamChr(uint16_t chr) const;
00203
00204 typedef std::map<std::string,boost::shared_ptr<OutputFileDescriptor> > OutStreamMap;
00205 OutStreamMap outFiles_;
00206
00207 typedef boost::shared_ptr<std::ostream> OpenStreamPtr;
00208 typedef std::vector<OpenStreamPtr> OutStreams;
00209 OutStreams openStreams_;
00210
00211 const reference::CrrFile& reference_;
00212 SamFileHeaderBlock header_;
00213 std::string readGroup_;
00214 const SamGeneratorConfig & samGeneratorConfig_;
00215 };
00216
00217 } }
00218
00219 #endif // CGA_TOOLS_SAM_RECORD_HPP_