00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGA_TOOLS_COMMAND_JOIN_HPP_
00016 #define CGA_TOOLS_COMMAND_JOIN_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/command/Command.hpp"
00022 #include "cgatools/util/DelimitedFile.hpp"
00023
00024 namespace cgatools { namespace command {
00025
00026 class Join : public Command
00027 {
00028 public:
00029 Join(const std::string& name);
00030
00031 protected:
00032 int run(po::variables_map& vm);
00033
00034 private:
00035 struct QueryPlan
00036 {
00037
00038 std::vector<int> matchIdx_;
00039
00040 std::pair<int,int> overlapIdx_;
00041 };
00042
00043 void dumpRecord(std::ostream& out,
00044 const std::vector<std::string>& aFields,
00045 const std::vector<std::string>& bFields);
00046 bool overlap(const std::pair<int64_t, int64_t>& lhs,
00047 const std::pair<int64_t, int64_t>& rhs);
00048 void parseJoinFields(const std::vector<std::string>& fields,
00049 const QueryPlan& qp,
00050 std::string& matchKey,
00051 std::pair<int64_t, int64_t>& range);
00052 void initQueryPlan(util::DelimitedFile& aa, util::DelimitedFile& bb);
00053 void parseMatchFields(const util::DelimitedFile& df,
00054 const std::string& fieldNameList,
00055 QueryPlan& qp);
00056 void parseOverlapFields(const util::DelimitedFile& df,
00057 const std::string& fieldNameList,
00058 QueryPlan& qp);
00059
00060 std::vector<std::string> inputFileNames_;
00061 std::string outputFileName_;
00062
00063 std::vector<std::string> match_;
00064 std::string overlapSpec_;
00065 std::string outputMode_;
00066 std::string overlapMode_;
00067 std::string select_;
00068 bool alwaysDump_;
00069 double overlapFractionA_, overlapFractionB_;
00070 int64_t boundaryUncertaintyA_, boundaryUncertaintyB_;
00071
00072 std::vector<QueryPlan> qp_;
00073 std::vector<std::string> aFields_, bFields_;
00074 std::vector< std::pair<size_t, size_t> > transform_;
00075 };
00076
00077 } }
00078
00079 #endif // CGA_TOOLS_COMMAND_JOIN_HPP_