some TODOs solved

This commit is contained in:
vaclavt
2022-02-17 20:41:47 +01:00
parent 2d26c59df6
commit 765f2bc673
6 changed files with 49 additions and 47 deletions

View File

@@ -14,16 +14,20 @@ CsvParser::CsvParser(bool skip_hdr, char field_sep, char quote_ch, char line_sep
}
MlValue CsvParser::parseCSV(const std::string &csvSource) {
int linesRead = 0;
constexpr size_t INITIAL_PARSED_ROWS_SIZE = 128;
constexpr size_t INITIAL_COLUMNS_SIZE = 32;
constexpr size_t ROWS_READ_FOR_SIZE_ESTIMATION = 16;
size_t linesRead = 0;
bool inQuote(false);
bool newLine(false);
std::string field;
std::vector<MlValue> parsed_data;
parsed_data.reserve(128); // TODO introduce constant here
std::vector<MlValue> parsed_rows;
parsed_rows.reserve(INITIAL_PARSED_ROWS_SIZE);
std::vector<MlValue> line;
line.reserve(32); // TODO introduce constant here
line.reserve(INITIAL_COLUMNS_SIZE);
std::string::const_iterator aChar = csvSource.begin();
std::string::const_iterator aEnd = csvSource.end();
@@ -45,14 +49,14 @@ MlValue CsvParser::parseCSV(const std::string &csvSource) {
} else {
if (!newLine) {
line.push_back(ivalualize(field));
add_line(line, parsed_data);
add_row(line, parsed_rows);
field.clear();
line.clear();
linesRead++;
if (linesRead == 16) {
if (linesRead == ROWS_READ_FOR_SIZE_ESTIMATION) {
size_t linesEstimation = csvSource.size() / (std::distance(csvSource.begin(), aChar) / linesRead);
if (linesEstimation > parsed_data.capacity())
parsed_data.reserve(linesEstimation);
if (linesEstimation > parsed_rows.capacity())
parsed_rows.reserve(linesEstimation);
}
newLine = true;
}
@@ -68,18 +72,18 @@ MlValue CsvParser::parseCSV(const std::string &csvSource) {
if (!field.empty())
line.push_back(ivalualize(field));
add_line(line, parsed_data);
add_row(line, parsed_rows);
return parsed_data;
return parsed_rows;
}
void CsvParser::add_line(const std::vector<MlValue> &line, std::vector<MlValue> &lines) {
void CsvParser::add_row(const std::vector<MlValue> &columns, std::vector<MlValue> &rows) {
if (skip_header && !header_skiped) {
header_skiped = true;
} else {
if (!line.empty())
lines.emplace_back(line);
if (!columns.empty())
rows.emplace_back(columns);
}
}