00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_COMMAND_MERGEMAP2SAM_CONVERTER_HPP_
00016 #define CGA_TOOLS_COMMAND_MERGEMAP2SAM_CONVERTER_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/RangeSet.hpp"
00022 #include "Map2SamConverter.hpp"
00023 #include "EvidenceCache.hpp"
00024
00025 #include <boost/array.hpp>
00026
00027 namespace cgatools { namespace mapping {
00028
00029 class DoubleMatrix;
00030
00031 class MergedMap2SamConfig : public Map2SamConfig
00032 {
00033 public:
00034 MergedMap2SamConfig()
00035 : Map2SamConfig()
00036 ,mapqAlpha_(1E-12)
00037 ,minMateGapFrequency_(1E-7)
00038 ,minMapQ_(0)
00039 ,debugOutStream_(&std::cerr)
00040 {}
00041
00042 double mapqAlpha_;
00043 double minMateGapFrequency_;
00044
00045 uint8_t minMapQ_;
00046 std::string evidenceCacheRoot_;
00047 std::ostream* debugOutStream_;
00048 };
00049
00050 class MergedMap2SamConverter : public Map2SamConverter
00051 {
00052 public:
00053 typedef boost::array<std::vector<size_t>,2> MappingIndicesBySide;
00054 typedef boost::array<std::vector<double>,2> MappingWeightsBySide;
00055
00056 MergedMap2SamConverter(const MergedMap2SamConfig &config, std::ostream &outSamFile)
00057 : Map2SamConverter(config, outSamFile), config_(config)
00058 {}
00059
00060 virtual void init();
00061
00062 protected:
00064 virtual bool processMappings(const mapping::ReadsRecord& readsRecord,
00065 SamRecordArray& samMappings, const mapping::MappingsRecords& baseMappingRecords) const;
00066
00068 virtual void writeMappingRecord(const SamRecord &m) const;
00069
00071 void addEvidenceRecords(const mapping::ReadsRecord& readsRecord, SamRecordArray& records) const;
00073 void regroupMappings(const mapping::ReadsRecord& readsRecord, SamRecordArray& records) const;
00074
00076 void computeMappingWeights(const SamRecordArray& records,
00077 const MappingIndicesBySide& mappings, MappingWeightsBySide& weights) const;
00080 void filterDuplicatedMappings(SamRecordArray& records) const;
00081
00082 void computeWeightMatrix(
00083 const SamRecordArray& records, const MappingIndicesBySide& mappings,
00084 const MappingWeightsBySide& weights, DoubleMatrix& weightMatrix, DoubleMatrix& mateGaps) const;
00085
00086 double computeTotalWeight(const DoubleMatrix& weightMatrix, MappingWeightsBySide& sumAllPairs) const;
00087
00088 void findBestCandidates(const DoubleMatrix& m, const DoubleMatrix& mateGaps,
00089 const MappingIndicesBySide& mappingsBySide, const MappingWeightsBySide& sumAllPairs,
00090 MappingIndicesBySide& bestGroupMappings, MappingIndicesBySide& groups) const;
00091
00093 void reweightSingleMappings(SamRecordArray& records, const MappingIndicesBySide& mappingsBySide,
00094 const MappingWeightsBySide& dnbArmWeights,const MappingWeightsBySide& sumAllPairs) const;
00095
00097 void reweightDoubleArmMappings(SamRecordArray& records, const MappingIndicesBySide& mappingsBySide,
00098 const DoubleMatrix& weightMatrix, const DoubleMatrix& mateGapFrequency,
00099 const MappingWeightsBySide& sumAllPairs, const MappingIndicesBySide& bestFullMappedArms,
00100 const MappingIndicesBySide& groups, const double &totalWeight ) const;
00101
00103 void detectTheBestMappingAndMakeItPrimary(SamRecordArray& records) const;
00104
00106 void dumpBestLocalMatches(const mapping::ReadsRecord& readsRecord, const SamRecordArray& records,
00107 const MappingIndicesBySide& mappingsBySide, const MappingWeightsBySide& dnbArmWeights,
00108 const DoubleMatrix& weights, const DoubleMatrix& mateGaps,
00109 const MappingWeightsBySide& sumAllPairs, const MappingIndicesBySide& bestFullMappedArms,
00110 const MappingIndicesBySide& groups,
00111 const double& totalWeight) const;
00112
00113 void printMatrix(const DoubleMatrix& m, std::ostream& ostr) const;
00114
00115 const MergedMap2SamConfig & config_;
00116
00117 mapping::BatchRecords evidenceBatchRecords_;
00118 };
00119
00120 } }
00121
00122 #endif // CGA_TOOLS_COMMAND_MERGEMAP2SAM_CONVERTER_HPP_