00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_REFERENCE_RANGEANNOTATIONSTORE_HPP_
00016 #define CGATOOLS_REFERENCE_RANGEANNOTATIONSTORE_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021
00022 #include <string>
00023 #include <vector>
00024 #include <boost/shared_ptr.hpp>
00025
00026 #include "cgatools/util/Streams.hpp"
00027 #include "cgatools/util/DelimitedFile.hpp"
00028 #include "cgatools/util/RangeIntersector.hpp"
00029 #include "cgatools/reference/CrrFile.hpp"
00030 #include "cgatools/reference/ChromosomeIdField.hpp"
00031
00032 using cgatools::util::InputStream;
00033
00034 namespace cgatools { namespace reference {
00035
00041 template <typename Derived, typename TValue>
00042 class RangeAnnotationStore
00043 {
00044 private:
00045 typedef util::RangeIntersector<reference::Range,
00046 TValue,
00047 reference::RangeOverlap> DataStore;
00048 public:
00049 typedef typename DataStore::MapType::const_iterator QueryResult;
00050
00051 RangeAnnotationStore(const reference::CrrFile& crr)
00052 : crr_(crr)
00053 {}
00054
00056 const util::DelimitedFileMetadata& getMetadata() const
00057 {
00058 return metadata_;
00059 }
00060
00063 void intersect(const reference::Range& range,
00064 std::vector<QueryResult>& result) const
00065 {
00066 data_.intersect(range, result);
00067 }
00068
00069 #if 0
00072 void bindColumns(util::DelimitedFile& df, reference::Range& range, TValue& data)
00073 #endif
00074
00075 protected:
00078 typedef RangeAnnotationStore<Derived, TValue> Base;
00079
00081 void load(const std::string& fn)
00082 {
00083 boost::shared_ptr<std::istream> in =
00084 InputStream::openCompressedInputStreamByExtension(fn);
00085 util::DelimitedFile df(*in);
00086 metadata_ = df.getMetadata();
00087
00088 reference::Range range;
00089 TValue payload;
00090 static_cast<Derived*>(this)->bindColumns(df, range, payload);
00091
00092 while (df.next())
00093 data_.put(range, payload);
00094 }
00095
00099 void bindRangeColumns(util::DelimitedFile& df,
00100 reference::Range& range,
00101 const std::string& chrColName = "chromosome",
00102 const std::string& begColName = "begin",
00103 const std::string& endColName = "end")
00104 {
00105 df.addField(ChromosomeIdField(chrColName, &range.chromosome_, crr_));
00106 df.addField(util::ValueField<uint32_t>(begColName, &range.begin_));
00107 df.addField(util::ValueField<uint32_t>(endColName, &range.end_));
00108 }
00109
00110 private:
00111 const reference::CrrFile& crr_;
00112 util::DelimitedFileMetadata metadata_;
00113 DataStore data_;
00114 };
00115
00116 }}
00117
00118 #endif