00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_JUNCTION_VCF_RECORD_HPP_
00016 #define CGA_TOOLS_JUNCTION_VCF_RECORD_HPP_
00017
00019
00020
00021 #include "cgatools/core.hpp"
00022 #include "cgatools/junctions/Junction.hpp"
00023 #include "cgatools/junctions/JunctionCompare.hpp"
00024 #include "cgatools/junctions/JunctionVcfWriter.hpp"
00025 #include "cgatools/conv/VcfRecordSource.hpp"
00026
00027
00028 #include <string>
00029 #include <vector>
00030 #include <map>
00031 #include <set>
00032
00033
00034 #include <boost/shared_ptr.hpp>
00035
00036
00037 namespace cgatools { namespace cgdata { class GenomeMetadata; }}
00038
00039
00040 namespace cgatools { namespace junctions {
00041
00043
00044 class JunctionVcfRecordWriter : public cgatools::conv::VcfRecordWriter
00045 {
00046 public:
00047
00048 JunctionVcfRecordWriter();
00049
00050
00051 cgatools::reference::Location getLocation() const;
00052
00053 void writeId ( std::ostream& out ) const;
00054 void writeRef ( std::ostream& out ) const;
00055 void writeAlt ( std::ostream& out ) const;
00056 void writeQual ( std::ostream& out ) const;
00057 void writeFilter ( std::ostream& out ) const;
00058 void writeInfo ( std::ostream& out ) const;
00059 void writeFormat ( std::ostream& out ) const;
00060 void writeSample ( std::ostream& out, size_t gIdx ) const;
00061
00062 friend class JunctionVcfRecordSource;
00063
00064 protected:
00065
00066 cgatools::reference::Location pos_;
00067 std::vector<std::string> sample_;
00068 std::string id_,
00069 ref_,
00070 alt_,
00071 qual_,
00072 filter_,
00073 info_,
00074 format_;
00075 };
00076
00078
00079 class JunctionVcfRecordSource : public cgatools::conv::VcfRecordSource
00080 {
00081 public:
00082
00083 JunctionVcfRecordSource
00084 (
00085 const std::vector< boost::shared_ptr<cgatools::cgdata::GenomeMetadata> >& genomes,
00086 const std::vector<std::string>& junctionFileNames,
00087 const std::vector<std::string> fieldNames,
00088 const cgatools::reference::CrrFile& crr,
00089 size_t scoreThreshold = 10,
00090 size_t sideLengthThreshold_ = 70,
00091 size_t distanceTolerance = 200,
00092 size_t junctionLengthThreshold = 500,
00093 bool normalPriorityOutput = false,
00094 bool useHighConfidenceJunctionsForTumor = true
00095 );
00096
00097
00098
00099 std::vector<cgatools::conv::VcfSubFieldHeaderRecord> getSubFieldHeaderRecords() const;
00100 std::string getSource(size_t idxGenome) const;
00101 std::vector<cgatools::conv::VcfKvHeaderRecord> getKeyValueHeaderRecords
00102 ( size_t idxGenome) const;
00103 std::string getAssemblyId(size_t idxGenome) const;
00104
00105 bool eof() const;
00106 cgatools::conv::VcfRecordSource& operator++();
00107 const cgatools::conv::VcfRecordWriter& operator*() const;
00108 const cgatools::conv::VcfRecordWriter* operator->() const;
00109
00110
00111
00114 void writeAllFields ( bool v );
00115
00116 int run();
00117
00118 protected:
00119
00120 int run1Genome();
00121 int run2Genomes();
00122
00123 bool add
00124 (
00125 std::vector<cgatools::conv::VcfSubFieldHeaderRecord>& result,
00126 cgatools::conv::VcfSubFieldHeaderRecord::Key key,
00127 const std::string& id,
00128 const std::string& number,
00129 const std::string& type,
00130 const std::string& description
00131 ) const;
00132
00133 JunctionVcfRecordWriter createRecord
00134 (
00135 const JunctionRef& jref,
00136 size_t side,
00137 const JunctionCompatMapPerFile& compat
00138 ) const;
00139
00140 std::string getAltField
00141 (
00142 const JunctionRef& jref,
00143 size_t side,
00144 bool suppressChrom = false
00145 ) const;
00146
00147 std::string getPosition ( reference::Location pos, const std::string& sep ) const;
00148
00149 std::string getInfo ( const JunctionRef& jref, size_t side ) const;
00150 std::string getMEI ( const std::string& med ) const;
00151 std::string getId ( const JunctionRef& jref, size_t side) const;
00152 std::string getFormat( const JunctionRef& jref, size_t side) const;
00153
00154 std::string getSample
00155 (
00156 const JunctionRef& jref,
00157 size_t side,
00158 size_t idx,
00159 const JunctionCompatMapPerFile& compat
00160 ) const;
00161
00162 std::string getSample ( const JunctionRef& jref, size_t side ) const;
00163 std::string getSampleFilter( const JunctionRef& jref ) const;
00164 std::string addFilterFlag ( const std::string& flag, bool& filtered) const;
00165
00166 bool need ( const std::string& fieldName ) const;
00167
00168 void copyJunctionListForVcf
00169 (
00170 const junctions::JunctionRefs& jrl,
00171 std::vector<JunctionRefSide>& out,
00172 JunctionCompatMapPerFile& compat
00173 ) const;
00174
00175 void pickNormalPriorityMatch
00176 (
00177 const JunctionRef& jr,
00178 JunctionRefSet& junctionsToSuppress
00179 ) const;
00180
00181 void pickDefaultMatch
00182 (
00183 const JunctionRef& jr,
00184 JunctionRefSet& junctionsToSuppress
00185 ) const;
00186
00188 const cgatools::reference::CrrFile& crr_;
00189
00191 std::vector<std::string> sampleIds_;
00192
00194 std::vector<std::string> junctionFileNames_;
00195
00197 std::vector<std::string> fieldNames_;
00198 std::set<std::string> fieldNameSet_;
00199
00201 bool writeAllFields_;
00202
00204 size_t scoreThreshold_;
00205
00207 size_t sideLengthThreshold_;
00208
00210 size_t junctionLengthThreshold_;
00211
00213 size_t distanceTolerance_;
00214
00216 bool normalPriorityOutput_;
00217
00218 bool useHighConfidenceJunctionsForTumor_;
00219
00221 JunctionFiles junctionFiles_;
00222
00224 std::vector<JunctionVcfRecordWriter> records_;
00225
00227 size_t currentRecord_;
00228
00229 static const char* sampleFieldIDs_[];
00230 };
00231
00232 }}
00233 #endif //CGA_TOOLS_JUNCTION_VCF_SOURCE_HPP_