00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_COMMAND_MAP2SAM_CONVERTER_HPP_
00016 #define CGA_TOOLS_COMMAND_MAP2SAM_CONVERTER_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/RangeSet.hpp"
00022 #include "cgatools/reference/CrrFile.hpp"
00023 #include "MapSamUtils.hpp"
00024
00025 #include <boost/array.hpp>
00026 #include <boost/shared_ptr.hpp>
00027 #include <boost/scoped_ptr.hpp>
00028 #include <boost/ptr_container/ptr_vector.hpp>
00029 #include <string>
00030 #include <map>
00031
00032 namespace cgatools { namespace mapping {
00033
00034 class Map2SamConfig {
00035 public:
00036 Map2SamConfig()
00037 :recordsFrom_(0)
00038 ,recordsTo_(std::numeric_limits<size_t>::max())
00039 ,dumpDebugInfo_(true)
00040 {}
00041
00042 size_t recordsFrom_;
00043 size_t recordsTo_;
00044 bool dumpDebugInfo_;
00045
00046 util::StringVector exportRegions_;
00047
00048 std::string exportRootDirName_;
00049 std::string inputReadsBatchId_;
00050 std::string inputReadsFileName_;
00051 std::string inputMappingsFileName_;
00052 std::string referenceFileName_;
00053
00054 std::string outputFileName_;
00055 util::StringVector outputStreamNames_;
00056
00057 std::string commandLine_;
00058
00059 SamGeneratorConfig samGeneratorConfig_;
00060 };
00061
00062 class LibraryData;
00063
00064 class Map2SamConverter
00065 {
00066 public:
00067 typedef boost::shared_ptr<std::istream> InStream;
00068 typedef boost::ptr_vector<SamRecord> SamRecordArray;
00069 static const size_t MAX_SIDES = 2;
00070
00071 Map2SamConverter(const Map2SamConfig &config, std::ostream &outSamFile);
00072 virtual ~Map2SamConverter() {}
00073
00074 virtual void init();
00075 void run();
00076
00077 protected:
00078 SamFileHeaderBlock createHeader();
00079
00080
00081 virtual void writeMappingRecord(const SamRecord &m) const;
00082
00083 void processDnbRecord(const ReadsRecord& readsRecord,
00084 const mapping::MappingsRecords& mappingsRecords) const;
00085
00086 void outputSamMappings(const mapping::ReadsRecord& readsRecord,
00087 SamRecordArray& samMappings) const;
00088
00089 std::string generateDnbId(const mapping::ReadsRecord& readsRecord) const;
00090
00091 void convertBaseMappingsIntoSamMappings(const mapping::ReadsRecord& readsRecord,
00092 SamRecordArray& samMappings, const mapping::MappingsRecords& baseMappingRecords) const;
00093
00097 virtual bool processMappings(const mapping::ReadsRecord& readsRecord,
00098 SamRecordArray& samMappings, const mapping::MappingsRecords& baseMappingRecords) const = 0;
00099
00100 reference::Range getMappingRange(const SamRecord &mapping) const;
00101
00103 size_t getChunkNumber(const std::string &fileName, size_t formatVersion) const;
00104
00105 size_t batchNumber_;
00106 size_t formatVersion_;
00107 std::string slide_;
00108 std::string lane_;
00109 std::string laneId_;
00110
00111 InStream readsFileStream_;
00112 InStream mappingsFileStream_;
00113
00114 boost::shared_ptr<util::DelimitedFile> readsFile_;
00115 boost::shared_ptr<util::DelimitedFile> mappingsFile_;
00116
00117 boost::shared_ptr<LibraryData> library_;
00118
00119 reference::CrrFile reference_;
00120
00121 std::ostream & outSamFile_;
00122
00123 const Map2SamConfig & config_;
00124
00125 boost::scoped_ptr<util::FastRangeSet> exportRegions_;
00126 boost::scoped_ptr<mapping::SamRecordGenerator> mappingSamRecordGenerator_;
00127 };
00128
00129 class BaseMap2SamConverter : public Map2SamConverter
00130 {
00131 public:
00132 BaseMap2SamConverter(const Map2SamConfig &config, std::ostream &outSamFile)
00133 : Map2SamConverter(config, outSamFile) {}
00134
00135 protected:
00139 virtual bool processMappings(const mapping::ReadsRecord& readsRecord,
00140 SamRecordArray& samMappings, const mapping::MappingsRecords& baseMappingRecords) const;
00141
00142 size_t detectPrimaryMapping(const MappingsRecords &mappingsRecords,
00143 bool oneArmOnly, SamRecordArray& samMappings) const;
00144 };
00145
00146
00147 } }
00148
00149 #endif // CGA_TOOLS_COMMAND_MAP2SAM_CONVERTER_HPP_