From a9316616083c4db6f5c82415bce3e088b3d57949 Mon Sep 17 00:00:00 2001 From: VaclavT Date: Mon, 22 Mar 2021 06:49:03 +0100 Subject: [PATCH] csv parser optimizations --- clib/csvparser.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/clib/csvparser.cpp b/clib/csvparser.cpp index f999d5d..2448504 100644 --- a/clib/csvparser.cpp +++ b/clib/csvparser.cpp @@ -13,12 +13,12 @@ CsvParser::CsvParser(bool skip_hdr, char field_sep, char quote_ch, char line_sep } MlValue CsvParser::parseCSV(const std::string &csvSource) { + int linesRead = 0; bool inQuote(false); bool newLine(false); std::string field; std::vector parsed_data; - // TODO when csvSource is really big use some bigger nr to prevent reallocations parsed_data.reserve(128); std::vector line; @@ -46,6 +46,12 @@ MlValue CsvParser::parseCSV(const std::string &csvSource) { add_line(line, parsed_data); field.clear(); line.clear(); + linesRead++; + if (linesRead == 16) { + int linesEstimation = csvSource.size() / (std::distance(csvSource.begin(), aChar) / linesRead); + if (linesEstimation > parsed_data.capacity()) + parsed_data.reserve(linesEstimation); + } newLine = true; } } @@ -93,9 +99,10 @@ MlValue CsvParser::ivalualize(const std::string &value) const { bool CsvParser::is_string_int(const std::string &s, long &val) const { char *end_ptr; errno = 0; - if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false; + // if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false; val = strtol(s.c_str(), &end_ptr, 10); - if ( *end_ptr != '\0' ) return false; + + if ( *end_ptr != '\0' ) return false; if (errno == ERANGE && (val == LONG_MIN || val == LONG_MAX)) return false; if (val == 0 && errno != 0) return false; return true; @@ -105,8 +112,9 @@ bool CsvParser::is_string_int(const std::string &s, long &val) const { bool CsvParser::is_string_float(const std::string &s, double &val) const { char *end_ptr; errno = 0; - if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false; + // if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false; val = strtod(s.c_str(), &end_ptr); + if ( *end_ptr != '\0' ) return false; if (errno == ERANGE && (val == HUGE_VAL || val == -HUGE_VAL)) return false; if (val == 0 && errno != 0) return false;