124 lines
3.2 KiB
C++
124 lines
3.2 KiB
C++
|
|
#include "csvparser.h"
|
|
#include <climits>
|
|
|
|
|
|
CsvParser::CsvParser(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) {
|
|
skip_header = skip_hdr;
|
|
field_separator = field_sep;
|
|
quote_character = quote_ch;
|
|
line_separator = line_sep;
|
|
line_separator2 = line_sep2;
|
|
|
|
header_skiped = false;
|
|
}
|
|
|
|
MlValue CsvParser::parseCSV(const std::string &csvSource) {
|
|
int linesRead = 0;
|
|
bool inQuote(false);
|
|
bool newLine(false);
|
|
std::string field;
|
|
|
|
std::vector<MlValue> parsed_data;
|
|
parsed_data.reserve(128);
|
|
|
|
std::vector<MlValue> line;
|
|
line.reserve(32);
|
|
|
|
std::string::const_iterator aChar = csvSource.begin();
|
|
while (aChar != csvSource.end()) {
|
|
if (*aChar == quote_character) {
|
|
newLine = false;
|
|
inQuote = !inQuote;
|
|
} else if (*aChar == field_separator) {
|
|
newLine = false;
|
|
if (inQuote == true) {
|
|
field += *aChar;
|
|
} else {
|
|
line.push_back(ivalualize(field));
|
|
field.clear();
|
|
}
|
|
} else if (*aChar == line_separator || *aChar == line_separator2) {
|
|
if (inQuote == true) {
|
|
field += *aChar;
|
|
} else {
|
|
if (newLine == false) {
|
|
line.push_back(ivalualize(field));
|
|
add_line(line, parsed_data);
|
|
field.clear();
|
|
line.clear();
|
|
linesRead++;
|
|
if (linesRead == 16) {
|
|
int linesEstimation = csvSource.size() / (std::distance(csvSource.begin(), aChar) / linesRead);
|
|
if (linesEstimation > parsed_data.capacity())
|
|
parsed_data.reserve(linesEstimation);
|
|
}
|
|
newLine = true;
|
|
}
|
|
}
|
|
} else {
|
|
newLine = false;
|
|
field.push_back(*aChar);
|
|
}
|
|
|
|
aChar++;
|
|
}
|
|
|
|
if (field.size())
|
|
line.push_back(ivalualize(field));
|
|
|
|
add_line(line, parsed_data);
|
|
|
|
return parsed_data;
|
|
}
|
|
|
|
|
|
void CsvParser::add_line(const std::vector<MlValue> &line, std::vector<MlValue> &lines) {
|
|
if (skip_header && !header_skiped) {
|
|
header_skiped = true;
|
|
} else {
|
|
if (line.size())
|
|
lines.push_back(line);
|
|
}
|
|
}
|
|
|
|
MlValue CsvParser::ivalualize(const std::string &value) const {
|
|
long int_val;
|
|
double float_val;
|
|
if (value.empty() || ((!isdigit(value[0])) && (value[0] != '-') && (value[0] != '+'))) {
|
|
return MlValue::string(value);
|
|
} else if (is_string_int(value, int_val)) {
|
|
return MlValue(int_val);
|
|
} else if (is_string_float(value, float_val)) {
|
|
return MlValue(float_val);
|
|
} else {
|
|
return MlValue::string(value);
|
|
}
|
|
}
|
|
|
|
// Is string representing int value
|
|
bool CsvParser::is_string_int(const std::string &s, long &val) const {
|
|
char *end_ptr;
|
|
errno = 0;
|
|
// if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false;
|
|
val = strtol(s.c_str(), &end_ptr, 10);
|
|
|
|
if ( *end_ptr != '\0' ) return false;
|
|
if (errno == ERANGE && (val == LONG_MIN || val == LONG_MAX)) return false;
|
|
if (val == 0 && errno != 0) return false;
|
|
return true;
|
|
}
|
|
|
|
// Is string representing float value
|
|
bool CsvParser::is_string_float(const std::string &s, double &val) const {
|
|
char *end_ptr;
|
|
errno = 0;
|
|
// if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false;
|
|
val = strtod(s.c_str(), &end_ptr);
|
|
|
|
if ( *end_ptr != '\0' ) return false;
|
|
if (errno == ERANGE && (val == HUGE_VAL || val == -HUGE_VAL)) return false;
|
|
if (val == 0 && errno != 0) return false;
|
|
return true;
|
|
}
|