00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_COPYNUMBER_CNVFILEVCFSOURCE_HPP_
00016 #define CGATOOLS_COPYNUMBER_CNVFILEVCFSOURCE_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/conv/VcfRecordSource.hpp"
00022 #include "cgatools/reference/CrrFile.hpp"
00023 #include "cgatools/util/DelimitedFile.hpp"
00024
00025 #include <deque>
00026 #include <map>
00027 #include <set>
00028 #include <vector>
00029
00030
00031 namespace cgatools { namespace cgdata {
00032 class GenomeMetadata;
00033 } }
00034
00035 namespace cgatools { namespace copynumber {
00036
00037
00038
00039 struct diploidData {
00040 std::string chr;
00041 int begin;
00042 int end;
00043 cgatools::reference::Range range;
00044 float avgNormCvg;
00045 float gcCvg;
00046 float normedGcCvg;
00047 float relCvg;
00048 int cP;
00049 std::string cT;
00050 std::string pS;
00051 std::string tS;
00052 };
00053
00054 struct nondiploidData {
00055 std::string chr;
00056 int begin;
00057 int end;
00058 cgatools::reference::Range range;
00059 float cL;
00060 std::string lS;
00061 std::string laf;
00062 std::string llaf;
00063 std::string ulaf;
00064 };
00065
00066 struct somaticData {
00067 std::string chr;
00068 int begin;
00069 int end;
00070 cgatools::reference::Range range;
00071 float cL;
00072 std::string lS;
00073 std::string laf;
00074 std::string llaf;
00075 std::string ulaf;
00076 };
00077
00078
00079 class CnvFileVcfRecordWriter : public cgatools::conv::VcfRecordWriter
00080 {
00081 public:
00082 CnvFileVcfRecordWriter(
00083 const std::vector< std::string> cnvFieldNames,
00084 const cgatools::reference::CrrFile& crr,
00085 int numGenomes,
00086 bool someSomatic,
00087 bool someNonsomaticLAF);
00088
00089
00090 cgatools::reference::Location getLocation() const;
00091 void writeRef(std::ostream& out) const;
00092 void writeAlt(std::ostream& out) const;
00093 void writeInfo(std::ostream& out) const;
00094 void writeFormat(std::ostream& out) const;
00095 void writeSample(std::ostream& out, size_t gIdx) const;
00096
00097
00098 void setDiploidData(diploidData &data, int gIdx);
00099 void setNondiploidData(nondiploidData &data, int gIdx);
00100 void setSomaticData(somaticData &data, int gIdx);
00101
00102
00103 cgatools::reference::Range getDiploidRange(int gIdx) const { return dipData_[gIdx].range; }
00104 cgatools::reference::Range getNondiploidRange(int gIdx) const {return nondipData_[gIdx].range; }
00105 cgatools::reference::Range getSomaticRange(int gIdx) const {return somData_[gIdx].range; }
00106
00107
00108 bool isDeferred(int gIdx) const { return deferred_[gIdx]; }
00109
00110 void setDeferred(int gIdx,bool status) { deferred_[gIdx] = status; }
00111
00112 private:
00113 const cgatools::reference::CrrFile& crr_;
00114 bool someSomatic_;
00115 bool someNonsomaticLAF_;
00116 std::vector< diploidData > dipData_;
00117 std::vector< nondiploidData > nondipData_;
00118 std::vector< somaticData > somData_;
00119 std::vector< std::string > formatIds_;
00120 std::vector< bool > deferred_;
00121 };
00122
00123 class CnvFileVcfRecordSource : public cgatools::conv::VcfRecordSource
00124 {
00125 public:
00126 CnvFileVcfRecordSource(
00127 const std::vector< std::string >& dipdet,
00128 const std::vector< std::string >& nondipdet,
00129 const std::vector< std::string >& somnondipdet,
00130 const std::vector<std::string> fieldNames,
00131 const cgatools::reference::CrrFile& crr);
00132
00133
00134
00135 std::vector<cgatools::conv::VcfSubFieldHeaderRecord> getSubFieldHeaderRecords() const;
00136 std::string getSource(size_t idxGenome) const;
00137 std::vector<cgatools::conv::VcfKvHeaderRecord> getKeyValueHeaderRecords(size_t idxGenome) const;
00138 std::string getAssemblyId(size_t idxGenome) const;
00139
00140 bool eof() const;
00141 cgatools::conv::VcfRecordSource& operator++();
00142 const cgatools::conv::VcfRecordWriter& operator*() const;
00143 const cgatools::conv::VcfRecordWriter* operator->() const;
00144
00145 private:
00146
00147 void limitFieldNames(const std::vector< std::string > & fieldNames);
00148 void setUpDelimitedFiles();
00149 void setupDiploidDetails(boost::shared_ptr<cgatools::util::DelimitedFile> &df);
00150 void setupNondiploidDetails(boost::shared_ptr<cgatools::util::DelimitedFile> &df);
00151 void setupSomaticDetails(boost::shared_ptr<cgatools::util::DelimitedFile> &df);
00152 bool testForSomaticData();
00153 bool testForNonsomaticLaf();
00154 void computeGcCorrectedMeans();
00155
00156 private:
00157
00158 const cgatools::reference::CrrFile& crr_;
00159 cgatools::reference::Range genomeEnd_;
00160 std::vector< std::string > dipDetFn_;
00161 std::vector< std::string > nondipDetFn_;
00162 std::vector< std::string > somnondipDetFn_;
00163 std::vector< boost::shared_ptr<std::istream> > dipDetIStr_;
00164 std::vector< boost::shared_ptr<std::istream> > nondipDetIStr_;
00165 std::vector< boost::shared_ptr<std::istream> > somnondipDetIStr_;
00166 std::vector< boost::shared_ptr<cgatools::util::DelimitedFile> > dipDet_;
00167 std::vector< boost::shared_ptr<cgatools::util::DelimitedFile> > nondipDet_;
00168 std::vector< boost::shared_ptr<cgatools::util::DelimitedFile> > somnondipDet_;
00169 std::vector< float > gcCorrectedMean_;
00170 diploidData tmpDip_;
00171 nondiploidData tmpNondip_;
00172 somaticData tmpSom_;
00173 std::vector< std::string > cnvFieldNames_;
00174 std::set< std::string > cnvFieldNamesSet_;
00175 boost::shared_ptr<CnvFileVcfRecordWriter> writer_;
00176 bool someSomatic_;
00177 bool someNonsomaticLAF_;
00178 bool eof_;
00179 };
00180
00181 } }
00182
00183 #endif // CGATOOLS_COPYNUMBER_CNVFILEVCFSOURCE_HPP_