00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_UTIL_DELIMITEDFILE_HPP_
00016 #define CGATOOLS_UTIL_DELIMITEDFILE_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/DelimitedLineParser.hpp"
00022
00023 #include <boost/noncopyable.hpp>
00024
00025 namespace cgatools { namespace util {
00026
00027 class DelimitedFileMetadata
00028 {
00029 public:
00034 int getFormatVersion() const;
00035
00036 bool hasKey(const std::string& key) const;
00037 const std::string& get(const std::string& key) const;
00038
00039 void set(const std::string& key, const std::string& value);
00040 void add(const std::string& key, const std::string& value);
00041 void removeAll(const std::string& key);
00042
00043 const std::vector< std::pair<std::string, std::string> >& getMap() const
00044 {
00045 return kv_;
00046 }
00047
00049 void initDefaults();
00050
00053 void transfer(const DelimitedFileMetadata& src,
00054 const std::string& keys,
00055 const std::string& prefix = "");
00056
00058 void sort();
00059 private:
00060 std::vector< std::pair<std::string, std::string> > kv_;
00061 };
00062
00063 std::ostream& operator<< (std::ostream& out, const DelimitedFileMetadata& meta);
00064
00076 class DelimitedFile : boost::noncopyable
00077 {
00078 public:
00079 typedef DelimitedFileMetadata Metadata;
00080 typedef DelimitedLineParser::EmptyFieldHandling EmptyFieldHandling;
00081 typedef DelimitedLineParser::StrictnessChecking StrictnessChecking;
00082
00083 explicit DelimitedFile(
00084 std::istream& in,
00085 char delimiter = '\t',
00086 EmptyFieldHandling emptyHandling = DelimitedLineParser::PROCESS_EMPTY_FIELDS,
00087 StrictnessChecking strictnessChecking = DelimitedLineParser::RELAXED_CHECKING);
00088
00089 enum FieldParserType
00090 {
00091 REQUIRED = 0,
00092 OPTIONAL = 1
00093 };
00094 template <class Field>
00095 void addField(const Field& parser, FieldParserType ft = REQUIRED)
00096 {
00097 int count = 0;
00098 for(size_t ii=0; ii<columnHeaders_.size(); ii++)
00099 {
00100 if (columnHeadersEqual(columnHeaders_[ii], parser.getName()))
00101 {
00102 lp_.setField(ii, parser);
00103 count++;
00104 }
00105 }
00106 if (0 == count && OPTIONAL != ft)
00107 throw Exception("missing required field: "+parser.getName());
00108 if (count > 1)
00109 throw Exception("multiple fields with same name: "+parser.getName());
00110 }
00111
00112 void addAllFields(std::vector<std::string>& fields);
00113
00114 bool next();
00115
00116 const Metadata& getMetadata() const
00117 {
00118 return metadata_;
00119 }
00120
00121 const std::vector<std::string>& getColumnHeaders() const
00122 {
00123 return columnHeaders_;
00124 }
00125
00126 size_t getFieldOffset(const std::string& fieldName) const;
00127
00128 bool hasField(const std::string& fieldName) const;
00129
00130 DelimitedLineParser& getDelimitedLineParser()
00131 {
00132 return lp_;
00133 }
00134
00135 const std::string& getLine() const
00136 {
00137 return line_;
00138 }
00139
00140 private:
00141 void readHeader();
00142 bool columnHeadersEqual(const std::string& h1, const std::string& h2) const;
00143
00144 std::istream& in_;
00145 Metadata metadata_;
00146 std::vector<std::string> columnHeaders_;
00147 DelimitedLineParser lp_;
00148 std::string line_;
00149 char delimiter_;
00150 EmptyFieldHandling emptyHandling_;
00151 StrictnessChecking strictnessChecking_;
00152 size_t lineNo_;
00153 };
00154
00155 } }
00156
00157 #endif // CGATOOLS_UTIL_DELIMITEDFILE_HPP_