00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_COPYNUMBER_CNVFILEVCFSOURCE_HPP_
00016 #define CGATOOLS_COPYNUMBER_CNVFILEVCFSOURCE_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/conv/VcfRecordSource.hpp"
00022 #include "cgatools/reference/CrrFile.hpp"
00023 #include "cgatools/util/DelimitedFile.hpp"
00024
00025 #include <deque>
00026 #include <map>
00027 #include <set>
00028 #include <vector>
00029
00030
00031 namespace cgatools { namespace cgdata {
00032 class GenomeMetadata;
00033 } }
00034
00035 namespace cgatools { namespace copynumber {
00036
00037
00038
00039 struct diploidData {
00040 std::string chr;
00041 int begin;
00042 int end;
00043 cgatools::reference::Range range;
00044 float avgNormCvg;
00045 float gcCvg;
00046 float normedGcCvg;
00047 float relCvg;
00048 int cP;
00049 std::string cT;
00050 std::string pS;
00051 std::string tS;
00052 };
00053
00054 struct nondiploidData {
00055 std::string chr;
00056 int begin;
00057 int end;
00058 cgatools::reference::Range range;
00059 float cL;
00060 std::string lS;
00061 };
00062
00063 struct somaticData {
00064 std::string chr;
00065 int begin;
00066 int end;
00067 cgatools::reference::Range range;
00068 float cL;
00069 std::string lS;
00070 std::string laf;
00071 std::string llaf;
00072 std::string ulaf;
00073 };
00074
00075
00076 class CnvFileVcfRecordWriter : public cgatools::conv::VcfRecordWriter
00077 {
00078 public:
00079 CnvFileVcfRecordWriter(
00080 const std::vector< std::string> cnvFieldNames,
00081 const cgatools::reference::CrrFile& crr,
00082 int numGenomes,
00083 bool someSomatic);
00084
00085
00086 cgatools::reference::Location getLocation() const;
00087 void writeRef(std::ostream& out) const;
00088 void writeAlt(std::ostream& out) const;
00089 void writeInfo(std::ostream& out) const;
00090 void writeFormat(std::ostream& out) const;
00091 void writeSample(std::ostream& out, size_t gIdx) const;
00092
00093
00094 void setDiploidData(diploidData &data, int gIdx);
00095 void setNondiploidData(nondiploidData &data, int gIdx);
00096 void setSomaticData(somaticData &data, int gIdx);
00097
00098
00099 cgatools::reference::Range getDiploidRange(int gIdx) const { return dipData_[gIdx].range; }
00100 cgatools::reference::Range getNondiploidRange(int gIdx) const {return nondipData_[gIdx].range; }
00101 cgatools::reference::Range getSomaticRange(int gIdx) const {return somData_[gIdx].range; }
00102
00103
00104 bool isDeferred(int gIdx) const { return deferred_[gIdx]; }
00105
00106 void setDeferred(int gIdx,bool status) { deferred_[gIdx] = status; }
00107
00108 private:
00109 const cgatools::reference::CrrFile& crr_;
00110 bool someSomatic_;
00111 std::vector< diploidData > dipData_;
00112 std::vector< nondiploidData > nondipData_;
00113 std::vector< somaticData > somData_;
00114 std::vector< std::string > formatIds_;
00115 std::vector< bool > deferred_;
00116 };
00117
00118 class CnvFileVcfRecordSource : public cgatools::conv::VcfRecordSource
00119 {
00120 public:
00121 CnvFileVcfRecordSource(
00122 const std::vector< std::string >& dipdet,
00123 const std::vector< std::string >& nondipdet,
00124 const std::vector< std::string >& somnondipdet,
00125 const std::vector<std::string> fieldNames,
00126 const cgatools::reference::CrrFile& crr);
00127
00128
00129
00130 std::vector<cgatools::conv::VcfSubFieldHeaderRecord> getSubFieldHeaderRecords() const;
00131 std::string getSource(size_t idxGenome) const;
00132 std::vector<cgatools::conv::VcfKvHeaderRecord> getKeyValueHeaderRecords(size_t idxGenome) const;
00133 std::string getAssemblyId(size_t idxGenome) const;
00134
00135 bool eof() const;
00136 cgatools::conv::VcfRecordSource& operator++();
00137 const cgatools::conv::VcfRecordWriter& operator*() const;
00138 const cgatools::conv::VcfRecordWriter* operator->() const;
00139
00140 private:
00141
00142 void limitFieldNames(const std::vector< std::string > & fieldNames);
00143 void setUpDelimitedFiles();
00144 void setupDiploidDetails(boost::shared_ptr<cgatools::util::DelimitedFile> &df);
00145 void setupNondiploidDetails(boost::shared_ptr<cgatools::util::DelimitedFile> &df);
00146 void setupSomaticDetails(boost::shared_ptr<cgatools::util::DelimitedFile> &df);
00147 bool testForSomaticData();
00148 void computeGcCorrectedMeans();
00149
00150 private:
00151
00152 const cgatools::reference::CrrFile& crr_;
00153 cgatools::reference::Range genomeEnd_;
00154 std::vector< std::string > dipDetFn_;
00155 std::vector< std::string > nondipDetFn_;
00156 std::vector< std::string > somnondipDetFn_;
00157 std::vector< boost::shared_ptr<std::istream> > dipDetIStr_;
00158 std::vector< boost::shared_ptr<std::istream> > nondipDetIStr_;
00159 std::vector< boost::shared_ptr<std::istream> > somnondipDetIStr_;
00160 std::vector< boost::shared_ptr<cgatools::util::DelimitedFile> > dipDet_;
00161 std::vector< boost::shared_ptr<cgatools::util::DelimitedFile> > nondipDet_;
00162 std::vector< boost::shared_ptr<cgatools::util::DelimitedFile> > somnondipDet_;
00163 std::vector< float > gcCorrectedMean_;
00164 diploidData tmpDip_;
00165 nondiploidData tmpNondip_;
00166 somaticData tmpSom_;
00167 std::vector< std::string > cnvFieldNames_;
00168 std::set< std::string > cnvFieldNamesSet_;
00169 boost::shared_ptr<CnvFileVcfRecordWriter> writer_;
00170 bool someSomatic_;
00171 bool eof_;
00172 };
00173
00174 } }
00175
00176 #endif // CGATOOLS_COPYNUMBER_CNVFILEVCFSOURCE_HPP_