00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_UTIL_DELIMITEDFILE_HPP_
00016 #define CGATOOLS_UTIL_DELIMITEDFILE_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/DelimitedLineParser.hpp"
00022
00023 #include <boost/noncopyable.hpp>
00024
00025 namespace cgatools { namespace util {
00026
00027 class DelimitedFileMetadata
00028 {
00029 public:
00031 static const std::string OUTPUT_FORMAT_VERSION;
00032
00036 static std::string PIPELINE_VERSION;
00037
00042 int getFormatVersion() const;
00043
00048 std::string getSoftwareVersionString() const;
00049
00050 bool hasKey(const std::string& key) const;
00051 const std::string& get(const std::string& key) const;
00052 template <class T>
00053 const T get(const std::string& key) const {
00054 return util::parseValue<T>(get(key));
00055 }
00056
00057 void set(const std::string& key, const std::string& value);
00058 void add(const std::string& key, const std::string& value);
00059 void removeAll(const std::string& key);
00060
00061 const std::vector< std::pair<std::string, std::string> >& getMap() const
00062 {
00063 return kv_;
00064 }
00065
00071 void initDefaults(const DelimitedFileMetadata& meta = DelimitedFileMetadata());
00072
00075 void transfer(const DelimitedFileMetadata& src,
00076 const std::string& keys,
00077 const std::string& prefix = "");
00078
00080 void sort();
00081
00082 void setFileName(const std::string& fn)
00083 {
00084 fileName_ = fn;
00085 }
00086
00087 private:
00088 std::vector< std::pair<std::string, std::string> > kv_;
00089 std::string fileName_;
00090
00091 void reportError(const std::string& error) const;
00092 };
00093
00094 std::ostream& operator<< (std::ostream& out, const DelimitedFileMetadata& meta);
00095
00107 class DelimitedFile : boost::noncopyable
00108 {
00109 public:
00110 typedef DelimitedFileMetadata Metadata;
00111 typedef DelimitedLineParser::EmptyFieldHandling EmptyFieldHandling;
00112 typedef DelimitedLineParser::StrictnessChecking StrictnessChecking;
00113
00114 explicit DelimitedFile(
00115 std::istream& in,
00116 const std::string& fileName ,
00117 char delimiter = '\t',
00118 EmptyFieldHandling emptyHandling = DelimitedLineParser::PROCESS_EMPTY_FIELDS,
00119 StrictnessChecking strictnessChecking = DelimitedLineParser::RELAXED_CHECKING);
00120
00121 enum FieldParserType
00122 {
00123 FPT_REQUIRED = 0,
00124 FPT_OPTIONAL = 1
00125 };
00126 template <class Field>
00127 void addField(const Field& parser, FieldParserType ft = FPT_REQUIRED)
00128 {
00129 int count = 0;
00130 for(size_t ii=0; ii<columnHeaders_.size(); ii++)
00131 {
00132 if (columnHeadersEqual(columnHeaders_[ii], parser.getName()))
00133 {
00134 lp_.setField(ii, parser);
00135 count++;
00136 }
00137 }
00138 if (0 == count && FPT_OPTIONAL != ft)
00139 reportError("missing required field: "+parser.getName());
00140 if (count > 1)
00141 reportError("multiple fields with same name: "+parser.getName());
00142 }
00143
00144 void addAllFields(std::vector<std::string>& fields);
00145
00146 bool next();
00147
00148 const Metadata& getMetadata() const
00149 {
00150 return metadata_;
00151 }
00152
00153 const std::vector<std::string>& getColumnHeaders() const
00154 {
00155 return columnHeaders_;
00156 }
00157
00158 size_t getFieldOffset(const std::string& fieldName) const;
00159
00160 bool hasField(const std::string& fieldName) const;
00161
00162 DelimitedLineParser& getDelimitedLineParser()
00163 {
00164 return lp_;
00165 }
00166
00167 const std::string& getLine() const
00168 {
00169 return line_;
00170 }
00171
00172 private:
00173 void readHeader();
00174 void reportError(const std::string& error) const;
00175 bool columnHeadersEqual(const std::string& h1, const std::string& h2) const;
00176
00177 std::istream& in_;
00178 std::string fileName_;
00179 Metadata metadata_;
00180 std::vector<std::string> columnHeaders_;
00181 DelimitedLineParser lp_;
00182 std::string line_;
00183 char delimiter_;
00184 EmptyFieldHandling emptyHandling_;
00185 StrictnessChecking strictnessChecking_;
00186 size_t lineNo_;
00187 std::string activeLineSetId_;
00188 bool withinActiveLineSet_;
00189 };
00190
00191 } }
00192
00193 #endif // CGATOOLS_UTIL_DELIMITEDFILE_HPP_