00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_COMMAND_EVIDENCECACHE_HPP_
00016 #define CGA_TOOLS_COMMAND_EVIDENCECACHE_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/Streams.hpp"
00022 #include "cgatools/util/parse.hpp"
00023 #include "cgatools/util/RangeSet.hpp"
00024
00025 #include <boost/filesystem/path.hpp>
00026 #include <boost/ptr_container/ptr_map.hpp>
00027 #include <boost/foreach.hpp>
00028 #include <boost/lexical_cast.hpp>
00029
00030 #include "cgatools/cgdata/EvidenceReader.hpp"
00031 #include "cgatools/cgdata/LibraryReader.hpp"
00032 #include "cgatools/cgdata/Dnb.hpp"
00033 #include "cgatools/mapping/GapsEstimator.hpp"
00034
00035 namespace cgatools { namespace cgdata {
00036 class GenomeMetadata;
00037 }}
00038
00039 namespace cgatools { namespace reference {
00040 class CrrFile;
00041 }}
00042
00043 namespace cgatools { namespace util {
00044 class DelimitedFile;
00045 }}
00046
00047
00048 namespace cgatools { namespace mapping {
00049
00050
00051 class CacheOutStreams
00052 {
00053 public:
00054 typedef boost::ptr_map<uint64_t,util::OutputStream> OutputLaneBatches;
00055
00056 CacheOutStreams(const boost::filesystem::path& outputPrefix)
00057 :outputPrefix_(outputPrefix)
00058 {}
00059
00060 std::ostream& getBatchStream(const std::string& slide, const std::string& lane, size_t batchNo);
00061 static uint64_t getBatchStreamKey(const std::string& slide, const std::string& lane, size_t batchNo);
00062
00063 protected:
00064 boost::filesystem::path outputPrefix_;
00065 OutputLaneBatches outputLanes_;
00066 };
00067
00068 class LibraryMetadataContainer;
00069
00070 class EvidenceCacheBuilder
00071 {
00072 public:
00073 EvidenceCacheBuilder(const cgdata::GenomeMetadata& genomeMetadata,const reference::CrrFile& reference);
00074
00075 void processChrData(uint16_t chr, const boost::filesystem::path& outputPrefix,
00076 const util::FastRangeSet::RangeSet &exportRanges);
00077
00078 void exportRanges(const boost::filesystem::path& outputPrefix, const util::FastRangeSet& ranges);
00079
00080 protected:
00081 boost::shared_ptr<LibraryMetadataContainer> libraries_;
00082
00083 const cgdata::GenomeMetadata& genomeMetadata_;
00084 const reference::CrrFile& reference_;
00085 };
00086
00087 class EvidenceCacheDnbRecord : public cgdata::EvidenceReader::DnbRecord
00088 {
00089 public:
00090 double alleleConcordance_;
00091 };
00092
00093 typedef std::multimap<uint64_t,EvidenceCacheDnbRecord> BatchRecords;
00094
00095 class EvidenceCacheReader
00096 {
00097 public:
00098 typedef std::vector<boost::filesystem::path> InputBatchFiles;
00099 typedef boost::ptr_map<uint64_t,InputBatchFiles> InputLaneBatches;
00100
00101 EvidenceCacheReader(const boost::filesystem::path& rootDir)
00102 : rootDir_(rootDir)
00103 {
00104 collectFiles();
00105 }
00106
00107 void readBatchRecords(size_t batchKey, BatchRecords& result, const reference::CrrFile& crr);
00108
00109 protected:
00110 void collectFiles();
00111
00112 static void initCacheRecordParser(
00113 util::DelimitedFile& delimitedFile, EvidenceCacheDnbRecord& record, const reference::CrrFile& crr);
00114
00115 boost::filesystem::path rootDir_;
00116 InputLaneBatches inputBatches_;
00117 };
00118
00119
00120 } }
00121
00122 #endif // CGA_TOOLS_COMMAND_EVIDENCECACHE_HPP_