csv parser optimizations

This commit is contained in:
VaclavT 2021-03-22 06:49:03 +01:00
parent e981d6b67f
commit a931661608
1 changed files with 12 additions and 4 deletions

View File

@ -13,12 +13,12 @@ CsvParser::CsvParser(bool skip_hdr, char field_sep, char quote_ch, char line_sep
}
MlValue CsvParser::parseCSV(const std::string &csvSource) {
int linesRead = 0;
bool inQuote(false);
bool newLine(false);
std::string field;
std::vector<MlValue> parsed_data;
// TODO when csvSource is really big use some bigger nr to prevent reallocations
parsed_data.reserve(128);
std::vector<MlValue> line;
@ -46,6 +46,12 @@ MlValue CsvParser::parseCSV(const std::string &csvSource) {
add_line(line, parsed_data);
field.clear();
line.clear();
linesRead++;
if (linesRead == 16) {
int linesEstimation = csvSource.size() / (std::distance(csvSource.begin(), aChar) / linesRead);
if (linesEstimation > parsed_data.capacity())
parsed_data.reserve(linesEstimation);
}
newLine = true;
}
}
@ -93,9 +99,10 @@ MlValue CsvParser::ivalualize(const std::string &value) const {
bool CsvParser::is_string_int(const std::string &s, long &val) const {
char *end_ptr;
errno = 0;
if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false;
// if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false;
val = strtol(s.c_str(), &end_ptr, 10);
if ( *end_ptr != '\0' ) return false;
if ( *end_ptr != '\0' ) return false;
if (errno == ERANGE && (val == LONG_MIN || val == LONG_MAX)) return false;
if (val == 0 && errno != 0) return false;
return true;
@ -105,8 +112,9 @@ bool CsvParser::is_string_int(const std::string &s, long &val) const {
bool CsvParser::is_string_float(const std::string &s, double &val) const {
char *end_ptr;
errno = 0;
if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false;
// if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) return false;
val = strtod(s.c_str(), &end_ptr);
if ( *end_ptr != '\0' ) return false;
if (errno == ERANGE && (val == HUGE_VAL || val == -HUGE_VAL)) return false;
if (val == 0 && errno != 0) return false;