00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_REFERENCE_RANGEANNOTATIONSTORE_HPP_
00016 #define CGATOOLS_REFERENCE_RANGEANNOTATIONSTORE_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021
00022 #include <string>
00023 #include <vector>
00024 #include <boost/shared_ptr.hpp>
00025
00026 #include "cgatools/util/Streams.hpp"
00027 #include "cgatools/util/DelimitedFile.hpp"
00028 #include "cgatools/util/RangeIntersector.hpp"
00029 #include "cgatools/reference/CrrFile.hpp"
00030 #include "cgatools/reference/ChromosomeIdField.hpp"
00031
00032 using cgatools::util::InputStream;
00033
00034 namespace cgatools { namespace reference {
00035
00041 template <typename Derived, typename TValue>
00042 class RangeAnnotationStore
00043 {
00044 private:
00045 typedef util::IntervalTree<reference::Range,
00046 reference::Location,
00047 TValue,
00048 reference::RangeOverlap,
00049 reference::GetRangeBoundary > DataStore;
00050 public:
00051 typedef typename DataStore::QueryResultType QueryResultType;
00052
00053 RangeAnnotationStore(const reference::CrrFile& crr)
00054 : crr_(crr)
00055 {}
00056
00058 const util::DelimitedFileMetadata& getMetadata() const
00059 {
00060 return metadata_;
00061 }
00062
00065 void intersect(const reference::Range& range,
00066 std::vector<QueryResultType>& result) const
00067 {
00068 data_.intersect(range, result);
00069 }
00070
00071 #if 0
00074 void bindColumns(util::DelimitedFile& df, reference::Range& range, TValue& data)
00075 #endif
00076
00077 protected:
00080 typedef RangeAnnotationStore<Derived, TValue> Base;
00081
00083 void load(const std::string& fn)
00084 {
00085 boost::shared_ptr<std::istream> in =
00086 InputStream::openCompressedInputStreamByExtension(fn);
00087 util::DelimitedFile df(*in, fn);
00088 metadata_ = df.getMetadata();
00089
00090 reference::Range range;
00091 TValue payload;
00092 static_cast<Derived*>(this)->bindColumns(df, range, payload);
00093
00094 while (df.next())
00095 data_.put(range, payload);
00096 }
00097
00101 void bindRangeColumns(util::DelimitedFile& df,
00102 reference::Range& range,
00103 const std::string& chrColName = "chromosome",
00104 const std::string& begColName = "begin",
00105 const std::string& endColName = "end")
00106 {
00107 df.addField(ChromosomeIdField(chrColName, &range.chromosome_, crr_));
00108 df.addField(util::ValueField<uint32_t>(begColName, &range.begin_));
00109 df.addField(util::ValueField<uint32_t>(endColName, &range.end_));
00110 }
00111
00112 private:
00113 const reference::CrrFile& crr_;
00114 util::DelimitedFileMetadata metadata_;
00115 DataStore data_;
00116 };
00117
00118 }}
00119
00120 #endif