00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #ifndef CGA_TOOLS_COMMAND_EVIDENCECACHE_HPP_
00016 #define CGA_TOOLS_COMMAND_EVIDENCECACHE_HPP_ 1
00017 
00019 
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/Streams.hpp"
00022 #include "cgatools/util/parse.hpp"
00023 #include "cgatools/util/RangeSet.hpp"
00024 
00025 #include <boost/filesystem/path.hpp>
00026 #include <boost/ptr_container/ptr_map.hpp>
00027 #include <boost/foreach.hpp>
00028 #include <boost/lexical_cast.hpp>
00029 
00030 #include "cgatools/cgdata/EvidenceReader.hpp"
00031 #include "cgatools/cgdata/LibraryReader.hpp"
00032 #include "cgatools/cgdata/Dnb.hpp"
00033 #include "cgatools/mapping/GapsEstimator.hpp"
00034 
00035 namespace cgatools { namespace cgdata {
00036     class GenomeMetadata;
00037 }}
00038 
00039 namespace cgatools { namespace reference {
00040     class CrrFile;
00041 }}
00042 
00043 namespace cgatools { namespace util {
00044     class DelimitedFile;
00045 }}
00046 
00047 
00048 namespace cgatools { namespace mapping {
00049 
00050 
00051 class CacheOutStreams
00052 {
00053 public:
00054     typedef boost::ptr_map<uint64_t,util::OutputStream>   OutputLaneBatches;
00055 
00056     CacheOutStreams(const boost::filesystem::path& outputPrefix)
00057         :outputPrefix_(outputPrefix)
00058     {}
00059 
00060     std::ostream& getBatchStream(const std::string& slide, const std::string& lane, size_t batchNo);
00061     static uint64_t getBatchStreamKey(const std::string& slide, const std::string& lane, size_t batchNo);
00062 
00063 protected:
00064     boost::filesystem::path outputPrefix_;
00065     OutputLaneBatches       outputLanes_;
00066 };
00067     
00068 class LibraryMetadataContainer;
00069 
00070 class EvidenceCacheBuilder 
00071 {
00072 public:
00073     EvidenceCacheBuilder(const cgdata::GenomeMetadata& genomeMetadata,const reference::CrrFile& reference);
00074 
00075     void processChrData(uint16_t chr, const boost::filesystem::path& outputPrefix, 
00076         const util::FastRangeSet::RangeSet &exportRanges);
00077 
00078     void exportRanges(const boost::filesystem::path& outputPrefix, const util::FastRangeSet& ranges);
00079 
00080 protected:
00081     boost::shared_ptr<LibraryMetadataContainer> libraries_;
00082 
00083     const cgdata::GenomeMetadata&           genomeMetadata_;
00084     const reference::CrrFile&               reference_;
00085 };
00086 
00087 class EvidenceCacheDnbRecord : public cgdata::EvidenceReader::DnbRecord
00088 {
00089 public:
00090     double                   alleleConcordance_;
00091 };
00092 
00093 typedef std::multimap<uint64_t,EvidenceCacheDnbRecord> BatchRecords;
00094 
00095 class EvidenceCacheReader
00096 {
00097 public:
00098     typedef std::vector<boost::filesystem::path>    InputBatchFiles;
00099     typedef boost::ptr_map<uint64_t,InputBatchFiles>  InputLaneBatches;
00100 
00101     EvidenceCacheReader(const boost::filesystem::path& rootDir)
00102         : rootDir_(rootDir)
00103     {
00104         collectFiles();
00105     }
00106 
00107     void readBatchRecords(size_t batchKey, BatchRecords& result, const reference::CrrFile& crr);
00108 
00109 protected:
00110     void collectFiles();
00111 
00112     static void initCacheRecordParser(
00113         util::DelimitedFile& delimitedFile, EvidenceCacheDnbRecord& record, const reference::CrrFile& crr);
00114 
00115     boost::filesystem::path rootDir_;
00116     InputLaneBatches        inputBatches_;
00117 };
00118 
00119 
00120 } } 
00121 
00122 #endif // CGA_TOOLS_COMMAND_EVIDENCECACHE_HPP_