00001 // Copyright 2010 Complete Genomics, Inc. 00002 // 00003 // Licensed under the Apache License, Version 2.0 (the "License"); you 00004 // may not use this file except in compliance with the License. You 00005 // may obtain a copy of the License at 00006 // 00007 // http://www.apache.org/licenses/LICENSE-2.0 00008 // 00009 // Unless required by applicable law or agreed to in writing, software 00010 // distributed under the License is distributed on an "AS IS" BASIS, 00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 00012 // implied. See the License for the specific language governing 00013 // permissions and limitations under the License. 00014 00015 #ifndef CGATOOLS_VARIANTS_SUPERLOCUSITERATOR_HPP_ 00016 #define CGATOOLS_VARIANTS_SUPERLOCUSITERATOR_HPP_ 1 00017 00019 00020 #include "cgatools/core.hpp" 00021 #include "cgatools/variants/Superlocus.hpp" 00022 #include "cgatools/variants/VariantFileIterator.hpp" 00023 00024 #include <deque> 00025 00026 namespace cgatools { namespace variants { 00027 00028 class SuperlocusQueueIterator; 00029 00043 class SuperlocusIterator : private boost::noncopyable 00044 { 00045 public: 00053 SuperlocusIterator(uint32_t extend3Mers = 4, uint32_t extendBases = 0); 00054 00056 void setVariantFile(VariantFileIterator& iter); 00057 00059 void setVariantFiles(VariantFileIterator& iterA, VariantFileIterator& iterB); 00060 00062 void setVariantFiles(const std::vector<VariantFileIterator*> iters); 00063 00066 void skipToVariant(const reference::Range& range, const std::string& alleleSeq); 00067 00069 void seekFirst(); 00070 00072 bool eof() const 00073 { 00074 return eof_; 00075 } 00076 00078 const Superlocus& operator*() const 00079 { 00080 return sl_; 00081 } 00082 00084 const Superlocus* operator->() const 00085 { 00086 return &sl_; 00087 } 00088 00090 SuperlocusIterator& operator++() 00091 { 00092 next(); 00093 return *this; 00094 } 00095 00096 private: 00097 void next(); 00098 bool findVariant( 00099 const reference::Location& loc, reference::Range& range, reference::Range& searchRange); 00100 void retireLoci(); 00101 void extendVariant(); 00102 void extendVariant(reference::Range& slRange, reference::Range& searchRange); 00103 00104 reference::Range extendSearchRange( 00105 const reference::Range& range, const Locus& locus) const; 00106 uint32_t extendLeftBySuffixMatching(const Locus& locus) const; 00107 uint32_t extendRightByPrefixMatching(const Locus& locus) const; 00108 uint32_t extendLeftBySuffixMatching( 00109 const reference::Location& loc, const std::string& sequence) const; 00110 uint32_t extendRightByPrefixMatching( 00111 const reference::Location& loc, const std::string& sequence) const; 00112 size_t findContig(const reference::Location& loc) const; 00113 00114 Superlocus sl_; 00115 std::vector<VariantFileIterator*> iters_; 00116 std::vector< std::deque<Locus> >& queues_; 00117 std::vector< boost::shared_ptr<SuperlocusQueueIterator> > qIters_; 00118 bool started_; 00119 bool eof_; 00120 uint32_t extend3Mers_; 00121 uint32_t extendBases_; 00122 uint32_t padBases_; 00123 reference::Location queueCutoff_; 00124 reference::Location slRetired_; 00125 const reference::CrrFile* crr_; 00126 std::vector<reference::Range> contigs_; 00127 }; 00128 00129 } } // cgatools::variants 00130 00131 #endif // CGATOOLS_VARIANTS_SUPERLOCUSITERATOR_HPP_