#include "csvparser.h" CsvParser::CsvParser(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) { skip_header = skip_hdr; field_separator = field_sep; quote_character = quote_ch; line_separator = line_sep; line_separator2 = line_sep2; header_skiped = false; } MlValue CsvParser::parseCSV(const std::string &csvSource) { bool inQuote(false); bool newLine(false); std::string field; // PERF optimize it for memory usage and performance std::vector> parsed_data; // TODO some default size here std::vector line; std::string::const_iterator aChar = csvSource.begin(); while (aChar != csvSource.end()) { if (*aChar == quote_character) { newLine = false; inQuote = !inQuote; } else if (*aChar == field_separator) { newLine = false; if (inQuote == true) { field += *aChar; } else { line.push_back(field); field.clear(); } } else if (*aChar == line_separator || *aChar == line_separator2) { if (inQuote == true) { field += *aChar; } else { if (newLine == false) { line.push_back(field); addLine(line, parsed_data); field.clear(); line.clear(); newLine = true; } } } else { newLine = false; field.push_back(*aChar); } aChar++; } if (field.size()) line.push_back(field); addLine(line, parsed_data); return ivalualize(parsed_data); } MlValue CsvParser::ivalualize(std::vector > &parsed_data) const { int rows = parsed_data.size(); int cols = rows > 0 ? parsed_data[0].size() : 0; std::vector result; if (rows > 0 && cols > 0) { for (int r = 0; r < rows; r++) { std::vector row; for (int c = 0; c < cols; c++) { std::string value = parsed_data[r][c]; if (is_string_int(value)) { row.push_back(MlValue(stoi(value))); } if (is_string_float(value)) { row.push_back(MlValue(std::stod(value))); } else { row.push_back(MlValue::string(value)); } } result.push_back(row); } } return result; } void CsvParser::addLine(const std::vector &line, std::vector > &lines) { if (skip_header && !header_skiped) { header_skiped = true; } else { if (line.size()) lines.push_back(line); } } // std::regex int_underscored_regex("[0-9][0-9_]+[0-9]"); std::regex int_regex("[0-9]+"); std::regex double_regex("[0-9]+\\.[0-9]+"); // Is string representing int value bool CsvParser::is_string_int(const std::string &str) const { return std::regex_match(str, int_regex); } // Is string representing float value bool CsvParser::is_string_float(const std::string &str) const { return std::regex_match(str, double_regex); }