00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 #ifndef CGA_TOOLS_COMMAND_JOIN_HPP_
00016 #define CGA_TOOLS_COMMAND_JOIN_HPP_ 1
00017 
00019 
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/command/Command.hpp"
00022 #include "cgatools/util/DelimitedFile.hpp"
00023 
00024 namespace cgatools { namespace command {
00025 
00026     class Join : public Command
00027     {
00028     public:
00029         Join(const std::string& name);
00030 
00031     protected:
00032         int run(po::variables_map& vm);
00033         
00034     private:
00035         struct QueryPlan
00036         {
00037             
00038             std::vector<int> matchIdx_;
00039             
00040             std::pair<int,int> overlapIdx_;
00041         };
00042 
00043         void dumpRecord(std::ostream& out,
00044                         const std::vector<std::string>& aFields,
00045                         const std::vector<std::string>& bFields);
00046         bool overlap(const std::pair<int64_t, int64_t>& lhs,
00047                      const std::pair<int64_t, int64_t>& rhs);
00048         void parseJoinFields(const std::vector<std::string>& fields,
00049                              const QueryPlan& qp,
00050                              std::string& matchKey,
00051                              std::pair<int64_t, int64_t>& range);
00052         void initQueryPlan(util::DelimitedFile& aa, util::DelimitedFile& bb);
00053         void parseMatchFields(const util::DelimitedFile& df,
00054                               const std::string& fieldNameList,
00055                               QueryPlan& qp);
00056         void parseOverlapFields(const util::DelimitedFile& df,
00057                                 const std::string& fieldNameList,
00058                                 QueryPlan& qp);
00059 
00060         std::vector<std::string> inputFileNames_;
00061         std::string outputFileName_;
00062 
00063         std::vector<std::string> match_;
00064         std::string overlapSpec_;
00065         std::string outputMode_;
00066         std::string overlapMode_;
00067         std::string select_;
00068         bool alwaysDump_;
00069         double overlapFractionA_, overlapFractionB_;
00070         int64_t boundaryUncertaintyA_, boundaryUncertaintyB_;
00071 
00072         std::vector<QueryPlan> qp_;
00073         std::vector<std::string> aFields_, bFields_;
00074         std::vector< std::pair<size_t, size_t> > transform_;
00075     };
00076 
00077 } } 
00078 
00079 #endif // CGA_TOOLS_COMMAND_JOIN_HPP_