00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef CGATOOLS_UTIL_DELIMITEDFILE_HPP_
00016 #define CGATOOLS_UTIL_DELIMITEDFILE_HPP_ 1
00017
00019
00020 #include "cgatools/core.hpp"
00021 #include "cgatools/util/DelimitedLineParser.hpp"
00022
00023 #include <boost/noncopyable.hpp>
00024
00025 namespace cgatools { namespace util {
00026
00027 class DelimitedFileMetadata
00028 {
00029 public:
00031 static const std::string OUTPUT_FORMAT_VERSION;
00032
00036 static std::string PIPELINE_VERSION;
00037
00042 int getFormatVersion() const;
00043
00048 std::string getSoftwareVersionString() const;
00049
00050 bool hasKey(const std::string& key) const;
00051 const std::string& get(const std::string& key) const;
00052 template <class T>
00053 const T get(const std::string& key) const {
00054 return util::parseValue<T>(get(key));
00055 }
00056
00057 void set(const std::string& key, const std::string& value);
00058 void add(const std::string& key, const std::string& value);
00059 void removeAll(const std::string& key);
00060
00061 const std::vector< std::pair<std::string, std::string> >& getMap() const
00062 {
00063 return kv_;
00064 }
00065
00071 void initDefaults(const DelimitedFileMetadata& meta = DelimitedFileMetadata());
00072
00075 void transfer(const DelimitedFileMetadata& src,
00076 const std::string& keys,
00077 const std::string& prefix = "");
00078
00080 void sort();
00081
00082 void setFileName(const std::string& fn)
00083 {
00084 fileName_ = fn;
00085 }
00086
00087
00088 const std::string & getFileName() const{
00089 return fileName_;
00090 }
00091
00092 private:
00093 std::vector< std::pair<std::string, std::string> > kv_;
00094 std::string fileName_;
00095
00096 void reportError(const std::string& error) const;
00097 };
00098
00099 std::ostream& operator<< (std::ostream& out, const DelimitedFileMetadata& meta);
00100
00112 class DelimitedFile : boost::noncopyable
00113 {
00114 public:
00115 typedef DelimitedFileMetadata Metadata;
00116 typedef DelimitedLineParser::EmptyFieldHandling EmptyFieldHandling;
00117 typedef DelimitedLineParser::StrictnessChecking StrictnessChecking;
00118
00119 explicit DelimitedFile(
00120 std::istream& in,
00121 const std::string& fileName ,
00122 char delimiter = '\t',
00123 EmptyFieldHandling emptyHandling = DelimitedLineParser::PROCESS_EMPTY_FIELDS,
00124 StrictnessChecking strictnessChecking = DelimitedLineParser::RELAXED_CHECKING);
00125
00126 enum FieldParserType
00127 {
00128 FPT_REQUIRED = 0,
00129 FPT_OPTIONAL = 1
00130 };
00131 template <class Field>
00132 void addField(const Field& parser, FieldParserType ft = FPT_REQUIRED)
00133 {
00134 int count = 0;
00135 for(size_t ii=0; ii<columnHeaders_.size(); ii++)
00136 {
00137 if (columnHeadersEqual(columnHeaders_[ii], parser.getName()))
00138 {
00139 lp_.setField(ii, parser);
00140 count++;
00141 }
00142 }
00143 if (0 == count && FPT_OPTIONAL != ft)
00144 reportError("missing required field: "+parser.getName());
00145 if (count > 1)
00146 reportError("multiple fields with same name: "+parser.getName());
00147 }
00148
00149 void addAllFields(std::vector<std::string>& fields);
00150
00151 bool next();
00152
00153 const Metadata& getMetadata() const
00154 {
00155 return metadata_;
00156 }
00157
00158 const std::vector<std::string>& getColumnHeaders() const
00159 {
00160 return columnHeaders_;
00161 }
00162
00163 size_t getFieldOffset(const std::string& fieldName) const;
00164
00165 bool hasField(const std::string& fieldName) const;
00166
00167 DelimitedLineParser& getDelimitedLineParser()
00168 {
00169 return lp_;
00170 }
00171
00172 const std::string& getLine() const
00173 {
00174 return line_;
00175 }
00176
00177 private:
00178 void readHeader();
00179 void reportError(const std::string& error) const;
00180 bool columnHeadersEqual(const std::string& h1, const std::string& h2) const;
00181
00182 std::istream& in_;
00183 std::string fileName_;
00184 Metadata metadata_;
00185 std::vector<std::string> columnHeaders_;
00186 DelimitedLineParser lp_;
00187 std::string line_;
00188 char delimiter_;
00189 EmptyFieldHandling emptyHandling_;
00190 StrictnessChecking strictnessChecking_;
00191 size_t lineNo_;
00192 std::string activeLineSetId_;
00193 bool withinActiveLineSet_;
00194 };
00195
00196 } }
00197
00198 #endif // CGATOOLS_UTIL_DELIMITEDFILE_HPP_