#include "csvparser.h" #include CsvParser::CsvParser(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) { skip_header = skip_hdr; field_separator = field_sep; quote_character = quote_ch; line_separator = line_sep; line_separator2 = line_sep2; header_skiped = false; } MlValue CsvParser::parseCSV(const std::string &csvSource) { constexpr size_t INITIAL_PARSED_ROWS_SIZE = 128; constexpr size_t INITIAL_COLUMNS_SIZE = 32; constexpr size_t ROWS_READ_FOR_SIZE_ESTIMATION = 16; size_t linesRead = 0; bool inQuote(false); bool newLine(false); std::string field; std::vector parsed_rows; parsed_rows.reserve(INITIAL_PARSED_ROWS_SIZE); std::vector line; line.reserve(INITIAL_COLUMNS_SIZE); std::string::const_iterator aChar = csvSource.begin(); std::string::const_iterator aEnd = csvSource.end(); while (aChar != aEnd) { if (*aChar == quote_character) { newLine = false; inQuote = !inQuote; } else if (*aChar == field_separator) { newLine = false; if (inQuote) { field += *aChar; } else { line.push_back(ivalualize(field)); field.clear(); } } else if (*aChar == line_separator || *aChar == line_separator2) { if (inQuote) { field += *aChar; } else { if (!newLine) { line.push_back(ivalualize(field)); add_row(line, parsed_rows); field.clear(); line.clear(); linesRead++; if (linesRead == ROWS_READ_FOR_SIZE_ESTIMATION) { size_t linesEstimation = csvSource.size() / (std::distance(csvSource.begin(), aChar) / linesRead); if (linesEstimation > parsed_rows.capacity()) parsed_rows.reserve(linesEstimation); } newLine = true; } } } else { newLine = false; field.push_back(*aChar); } aChar++; } if (!field.empty()) line.push_back(ivalualize(field)); add_row(line, parsed_rows); return parsed_rows; } void CsvParser::add_row(const std::vector &columns, std::vector &rows) { if (skip_header && !header_skiped) { header_skiped = true; } else { if (!columns.empty()) rows.emplace_back(columns); } } MlValue CsvParser::ivalualize(const std::string &value) { long int_val; double float_val; if (value.empty() || ((!isdigit(value[0])) && (value[0] != '-') && (value[0] != '+'))) { return MlValue::string(value); } else if (is_string_int(value, int_val)) { return MlValue(int_val); } else if (is_string_float(value, float_val)) { return MlValue(float_val); } else { return MlValue::string(value); } } // Is string representing int value bool CsvParser::is_string_int(const std::string &s, long &val) { char *end_ptr; errno = 0; // if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false; val = strtol(s.c_str(), &end_ptr, 10); if ( *end_ptr != '\0' ) return false; if (errno == ERANGE && (val == LONG_MIN || val == LONG_MAX)) return false; if (val == 0 && errno != 0) return false; return true; } // Is string representing float value bool CsvParser::is_string_float(const std::string &s, double &val) { char *end_ptr; errno = 0; // if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false; val = strtod(s.c_str(), &end_ptr); if ( *end_ptr != '\0' ) return false; if (errno == ERANGE && (val == HUGE_VAL || val == -HUGE_VAL)) return false; if (val == 0 && errno != 0) return false; return true; }