00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_COMMAND_MAP2SAM_CONVERTER_HPP_
00016 #define CGA_TOOLS_COMMAND_MAP2SAM_CONVERTER_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/RangeSet.hpp"
00022 #include "cgatools/reference/CrrFile.hpp"
00023 #include "MapSamUtils.hpp"
00024
00025 #include <boost/array.hpp>
00026 #include <boost/shared_ptr.hpp>
00027 #include <boost/scoped_ptr.hpp>
00028 #include <boost/ptr_container/ptr_vector.hpp>
00029 #include <string>
00030 #include <map>
00031
00032 namespace cgatools { namespace mapping {
00033
00034 class Map2SamConfig {
00035 public:
00036 Map2SamConfig()
00037 :recordsFrom_(0)
00038 ,recordsTo_(std::numeric_limits<size_t>::max())
00039 ,skipNotMapped_(false)
00040 ,addMateSequenceAndScore_(false)
00041 ,addUnmappedMateInfo_(false)
00042 ,mateSvCandidates_(false)
00043 ,dumpDebugInfo_(true)
00044 {}
00045
00046 size_t recordsFrom_;
00047 size_t recordsTo_;
00048 bool skipNotMapped_;
00049 bool addMateSequenceAndScore_;
00050 bool addUnmappedMateInfo_;
00051 bool mateSvCandidates_;
00052 bool dumpDebugInfo_;
00053
00054 util::StringVector exportRegions_;
00055
00056 std::string exportRootDirName_;
00057 std::string inputReadsBatchId_;
00058 std::string inputReadsFileName_;
00059 std::string inputMappingsFileName_;
00060 std::string referenceFileName_;
00061 std::string outputFileName_;
00062
00063 std::string commandLine_;
00064 };
00065
00066 class SamStatistics {
00067 friend std::ostream& operator<< (std::ostream& out, const SamStatistics& r);
00068 public:
00069 enum {
00070 IND_SINGLE_BEST_MATES = 0,
00071 IND_MULTIPLE_BEST_MATES,
00072 IND_ONLY_ZERO_WEIGHT_BEST_MATES,
00073 IND_ONE_ARM_BEST_MAPPINGS_NO_MATES,
00074 IND_ONE_ARM_ONLY_MAPPED_HAS_BEST,
00075 IND_ONE_ARM_ONLY_MAPPED,
00076 IND_READ_NOT_MAPPED,
00077 IND_PAIRED_SINGLE_ARM_MAPPINGS,
00078 IND_EVIDENCE_MAPPINGS_FOUND,
00079 IND_EVIDENCE_MAPPINGS_LOADED,
00080 IND_DUPLICATE_MAPPINGS_FILTERED,
00081 IND_TOTAL_VALUES
00082 };
00083 SamStatistics() {
00084 counters_.assign(0);
00085 }
00086
00087 typedef boost::array<size_t, IND_TOTAL_VALUES> SamStatCounters;
00088 SamStatCounters counters_;
00089 };
00090
00091 class LibraryData;
00092
00093 class Map2SamConverter {
00094 public:
00095 static const char SAM_SEPARATOR = '\t';
00096 typedef boost::shared_ptr<std::istream> InStream;
00097 typedef boost::ptr_vector<SamRecord> SamRecordArray;
00098
00099 Map2SamConverter(const Map2SamConfig &config, std::ostream &outSamFile);
00100 virtual ~Map2SamConverter() {}
00101
00102 virtual void init();
00103 void run();
00104
00105 protected:
00106 SamFileHeaderBlock createHeader();
00107
00108
00109 virtual void filterMappingRecords(SamRecordArray& records) {}
00110
00111
00112 virtual void writeMappingRecord(const SamRecord &m);
00113
00114 void processDnbRecord(const ReadsRecord& readsRecord,
00115 const mapping::MappingsRecords& mappingsRecords);
00116
00120 virtual bool alternativeMappingProcessing(
00121 const mapping::ReadsRecord& readsRecord, SamRecordArray& records) {
00122 return false;
00123 }
00124
00125 reference::Range getMappingRange(const SamRecord &mapping);
00126
00127 size_t detectPrimaryMapping(MappingsRecords &mappingsRecords, bool oneArmOnly);
00128
00130 size_t getChunkNumber(const std::string &fileName, size_t formatVersion);
00131
00132 size_t batchNumber_;
00133 size_t formatVersion_;
00134 std::string slide_;
00135 std::string lane_;
00136 std::string laneId_;
00137
00138 InStream readsFileStream_;
00139 InStream mappingsFileStream_;
00140
00141 boost::shared_ptr<util::DelimitedFile> readsFile_;
00142 boost::shared_ptr<util::DelimitedFile> mappingsFile_;
00143
00144 boost::shared_ptr<LibraryData> library_;
00145
00146 reference::CrrFile reference_;
00147
00148 std::ostream & outSamFile_;
00149
00150 mutable SamStatistics statistics_;
00151 const Map2SamConfig & config_;
00152
00153 boost::scoped_ptr<util::FastRangeSet> exportRegions_;
00154 boost::scoped_ptr<mapping::SamRecordGenerator> mappingSamRecordGenerator_;
00155 };
00156
00157
00158 } }
00159
00160 #endif // CGA_TOOLS_COMMAND_MAP2SAM_CONVERTER_HPP_