00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_VARIANTS_LOCUS_HPP_
00016 #define CGATOOLS_VARIANTS_LOCUS_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/variants/Call.hpp"
00022 #include "cgatools/variants/Allele.hpp"
00023 #include "cgatools/reference/CrrFile.hpp"
00024 #include "cgatools/cgdata/EvidenceReader.hpp"
00025
00026 #include <map>
00027
00028 namespace cgatools { namespace variants {
00029
00030 class LocusAnnotations
00031 {
00032 public:
00033 LocusAnnotations()
00034 {
00035 }
00036
00037 LocusAnnotations(
00038 const std::vector<std::string>& annotationColumnHeaders,
00039 bool xRefIsAlleleSpecificInit);
00040
00041 size_t count() const
00042 {
00043 return names_.size();
00044 }
00045
00046 size_t getIndex(const std::string& name) const
00047 {
00048 for(size_t ii=0; ii<names_.size(); ii++)
00049 {
00050 if (names_[ii] == name)
00051 return ii;
00052 }
00053 return names_.size();
00054 }
00055
00056 size_t getSwapOrder(size_t idx) const
00057 {
00058 return swapOrder_[idx];
00059 }
00060
00061 const std::string& emptyString() const
00062 {
00063 return emptyString_;
00064 }
00065
00066 bool xRefIsAlleleSpecific() const
00067 {
00068 return xRefIsAlleleSpecific_;
00069 }
00070
00071 private:
00072 std::vector< std::string > names_;
00073 std::vector< size_t > swapOrder_;
00074 std::string emptyString_;
00075 bool xRefIsAlleleSpecific_;
00076 };
00077
00080 class Locus
00081 {
00082 typedef reference::CrrFile CrrFile;
00083 typedef reference::Location Location;
00084 typedef reference::Range Range;
00085 public:
00087 Locus()
00088 : crr_(0),
00089 ann_(new LocusAnnotations())
00090 {
00091 }
00092
00096 Locus(const CrrFile& crr)
00097 : crr_(&crr),
00098 ann_(new LocusAnnotations())
00099 {
00100 }
00101
00104 Locus(const Locus& other);
00105
00108 Locus& operator=(const Locus& other);
00109
00112 const std::vector<Call>& getCalls() const
00113 {
00114 return calls_;
00115 }
00116
00119 std::vector<Call>& getCalls()
00120 {
00121 return calls_;
00122 }
00123
00125 const std::vector<Allele>& getAlleles() const
00126 {
00127 return alleles_;
00128 }
00129
00131 const Range& getRange() const
00132 {
00133 return range_;
00134 }
00135
00137 void setRange(const Range& range);
00138
00140 const CrrFile& getReference() const
00141 {
00142 return *crr_;
00143 }
00144
00146 boost::uint32_t getId() const;
00147
00150 void setId(boost::uint32_t locusId);
00151
00153 boost::uint16_t getPloidy() const;
00154
00156 void setHapLink(size_t alleleOffset, const std::string& hapLink);
00157
00160 bool isRefCallLocus() const;
00161
00163 bool isNoCallLocus() const;
00164
00167 bool isRefConsistent() const;
00168
00171 bool hasNoCalls() const;
00172
00179 void computeReadCounts(int32_t* allele1ReadCount,
00180 int32_t* allele2ReadCount,
00181 int32_t* referenceAlleleReadCount,
00182 int32_t* totalReadCount,
00183 cgdata::EvidenceReader& evidence) const;
00184
00186 void clearCalls();
00187
00189 void addCall(const Call& call);
00190
00193 void initFromCalls(bool relaxedReferenceValidation = true);
00194
00199 void locationCalls(const Location& loc, std::vector< std::pair<char, const Call*> >& calls) const;
00200
00202 std::string getType() const;
00203
00205 void setType(const std::string& olplType);
00206
00208 std::string getZygosity() const;
00209
00212 void reorderAlleles();
00213
00216 void writeAsOneLine(std::ostream& out, bool writeExtras = true, char sep = '\t') const;
00217
00219 bool hasAnnotation(const std::string& name) const;
00220
00223 const std::string& getAnnotation(const std::string& name) const;
00224
00227 std::string getAllXRef() const;
00228
00232 bool xRefIsAlleleSpecific() const;
00233
00235 void setLocusAnnotations(const LocusAnnotations& ann);
00236
00237 static void writeOneLineFileHeader(
00238 std::ostream& out, bool alleleSpecificXRef, char sep = '\t');
00239 private:
00240 void validateCalls(bool relaxedReferenceValidation) const;
00241 void validateAllelesAndInitRange();
00242 void callError (const std::string&, const Call&) const;
00243 void locusError(const std::string&) const;
00244
00245 Range range_;
00246 const CrrFile* crr_;
00247 std::vector<Call> calls_;
00248 std::vector<Allele> alleles_;
00249 std::string olplType_;
00250 boost::shared_ptr< LocusAnnotations > ann_;
00251
00252
00253
00254 typedef boost::array<bool, 4> EvidenceMatchMap;
00255
00256
00257
00258
00259 typedef std::vector< EvidenceMatchMap > AlleleMatchMap;
00260
00261
00262 bool hasCount(const AlleleMatchMap& m, size_t alleleIdx) const;
00263
00264
00265
00266 size_t countSupport(
00267 const cgdata::EvidenceReader::DnbRecord& dnb, const EvidenceMatchMap& mm) const;
00268
00269
00270
00271 void matchAlleles(const cgdata::EvidenceReader::IntervalRecord& interval,
00272 bool& hasLengthChange,
00273 AlleleMatchMap& m,
00274 EvidenceMatchMap& rm) const;
00275
00276 bool hasFullyCalledNonReferenceAllele() const;
00277
00278 public:
00280 std::vector<std::string> extras_;
00281 };
00282
00283 std::ostream& operator<<(std::ostream& out, const Locus& locus);
00284
00285 } }
00286
00287 #endif // CGATOOLS_VARIANTS_LOCUS_HPP_