mlisp/clib/csvparser.cpp

115 lines
2.7 KiB
C++

#include "csvparser.h"
CsvParser::CsvParser(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) {
skip_header = skip_hdr;
field_separator = field_sep;
quote_character = quote_ch;
line_separator = line_sep;
line_separator2 = line_sep2;
header_skiped = false;
}
MlValue CsvParser::parseCSV(const std::string &csvSource) {
bool inQuote(false);
bool newLine(false);
std::string field;
// PERF optimize it for memory usage and performance
std::vector<std::vector<std::string>> parsed_data; // TODO some default size here
std::vector<std::string> line;
std::string::const_iterator aChar = csvSource.begin();
while (aChar != csvSource.end()) {
if (*aChar == quote_character) {
newLine = false;
inQuote = !inQuote;
} else if (*aChar == field_separator) {
newLine = false;
if (inQuote == true) {
field += *aChar;
} else {
line.push_back(field);
field.clear();
}
} else if (*aChar == line_separator || *aChar == line_separator2) {
if (inQuote == true) {
field += *aChar;
} else {
if (newLine == false) {
line.push_back(field);
addLine(line, parsed_data);
field.clear();
line.clear();
newLine = true;
}
}
} else {
newLine = false;
field.push_back(*aChar);
}
aChar++;
}
if (field.size())
line.push_back(field);
addLine(line, parsed_data);
return ivalualize(parsed_data);
}
MlValue CsvParser::ivalualize(std::vector<std::vector<std::string> > &parsed_data) const {
int rows = parsed_data.size();
int cols = rows > 0 ? parsed_data[0].size() : 0;
std::vector<MlValue> result;
if (rows > 0 && cols > 0) {
for (int r = 0; r < rows; r++) {
std::vector<MlValue> row;
for (int c = 0; c < cols; c++) {
std::string value = parsed_data[r][c];
if (is_string_int(value)) {
row.push_back(MlValue(stoi(value)));
}
if (is_string_float(value)) {
row.push_back(MlValue(std::stod(value)));
} else {
row.push_back(MlValue::string(value));
}
}
result.push_back(row);
}
}
return result;
}
void CsvParser::addLine(const std::vector<std::string> &line, std::vector<std::vector<std::string> > &lines) {
if (skip_header && !header_skiped) {
header_skiped = true;
} else {
if (line.size())
lines.push_back(line);
}
}
// std::regex int_underscored_regex("[0-9][0-9_]+[0-9]");
std::regex int_regex("[0-9]+");
std::regex double_regex("[0-9]+\\.[0-9]+");
// Is string representing int value
bool CsvParser::is_string_int(const std::string &str) const {
return std::regex_match(str, int_regex);
}
// Is string representing float value
bool CsvParser::is_string_float(const std::string &str) const {
return std::regex_match(str, double_regex);
}