00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_REFERENCE_COMPACTDNASEQUENCE_HPP_
00016 #define CGATOOLS_REFERENCE_COMPACTDNASEQUENCE_HPP_ 1
00017
00020
00021 #include "cgatools/core.hpp"
00022 #include "cgatools/util/Streams.hpp"
00023 #include "cgatools/util/Md5.hpp"
00024
00025 #include <vector>
00026 #include <string>
00027
00028 namespace cgatools { namespace reference {
00029
00032 struct AmbiguousRegion
00033 {
00034 AmbiguousRegion()
00035 : code_('N'),
00036 offset_(0),
00037 length_(0)
00038 {
00039 }
00040
00041 AmbiguousRegion(char code, uint32_t pos, uint32_t length)
00042 : code_(code),
00043 offset_(pos),
00044 length_(length)
00045 {
00046 }
00047
00048 char code_;
00049 uint32_t offset_;
00050 uint32_t length_;
00051 };
00052
00055 class CompactDnaSequence
00056 {
00057 public:
00058 CompactDnaSequence(const std::string& name,
00059 bool circular,
00060 const void* packedData,
00061 const util::Md5Digest& md5,
00062 size_t length,
00063 const std::vector<AmbiguousRegion> amb);
00064
00067 std::string getSequence (int64_t pos, int64_t length) const;
00068
00072 std::string getUnambiguousSequence(int64_t pos, int64_t length) const;
00073
00076 void appendSequence (std::string& seq, int64_t pos, int64_t length) const;
00077
00081 void appendUnambiguousSequence(std::string& seq, int64_t pos, int64_t length) const;
00082
00086 char getBase(int64_t pos) const;
00087
00093 char getUnambiguousBase(int64_t pos) const;
00094
00099 size_t extendLeftBy3Mers (size_t pos, size_t count) const;
00100
00105 size_t extendRightBy3Mers(size_t pos, size_t count) const;
00106
00110 void validate() const;
00111
00113 const std::string& getName() const
00114 {
00115 return name_;
00116 }
00117
00119 bool isCircular() const
00120 {
00121 return circular_;
00122 }
00123
00127 const util::Md5Digest& getMd5Digest() const
00128 {
00129 return md5_;
00130 }
00131
00133 size_t length() const
00134 {
00135 return length_;
00136 }
00137
00140 const std::vector<AmbiguousRegion>& getAmbiguousRegions() const
00141 {
00142 return amb_;
00143 }
00144
00145 private:
00146 inline int64_t fixCircularPos(int64_t pos) const;
00147 void applyAmbiguity(char* seq, int64_t pos, int64_t length) const;
00148
00149 std::string name_;
00150 bool circular_;
00151 const uint8_t* packedData_;
00152 util::Md5Digest md5_;
00153 int64_t length_;
00154 std::vector<AmbiguousRegion> amb_;
00155 };
00156
00157 } }
00158
00159 #endif // CGATOOLS_REFERENCE_COMPACTDNASEQUENCE_HPP_