From 3a660c1783ea276b2d4540397b63ff15ebf65a78 Mon Sep 17 00:00:00 2001 From: vaclavt Date: Tue, 11 Jan 2022 23:34:49 +0100 Subject: [PATCH] csv parsing a bit simplified --- csvreader.cpp | 76 +++++++++++++++++++-------------------------------- csvreader.h | 4 +-- 2 files changed, 30 insertions(+), 50 deletions(-) diff --git a/csvreader.cpp b/csvreader.cpp index 2d6b72a..f5c6754 100644 --- a/csvreader.cpp +++ b/csvreader.cpp @@ -18,19 +18,15 @@ CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep } -int CsvReader::parseCSV(const std::string &filename, std::vector &cols_def, Table &table) { +size_t CsvReader::parseCSVFile(const std::string &filename, std::vector &cols_def, Table &table) { + size_t row_cnt = 0; std::vector cdefs; cdefs.reserve(cols_def.size()); for (auto &cd : cols_def) { cdefs.emplace_back(table.get_column_def(cd.name)); } - int row_cnt = 0; bool inQuote(false); - std::string field; - - std::vector line; - line.reserve(32); errno = 0; FILE* fp = fopen(filename.c_str(), "r"); @@ -40,67 +36,50 @@ int CsvReader::parseCSV(const std::string &filename, std::vector &co char* line_str = NULL; size_t len = 0; - - long read_chars; - while ((read_chars = getline(&line_str, &len, fp)) != -1) { - if (skip_header && !header_skiped) { - header_skiped = true; - continue; - } - if (read_chars > 0 && line_str[read_chars - 1] == '\n') { - line_str[read_chars - 1] = '\0'; - --read_chars; - } - std::string csvSource{line_str}; - - std::string::const_iterator aChar = csvSource.begin(); - std::string::const_iterator strEnd = csvSource.end(); - while (aChar != strEnd) { - if (*aChar == quote_character) { - inQuote = !inQuote; - } else if (*aChar == field_separator) { - if (inQuote) { - field += *aChar; - } else { - line.push_back(field); - field.clear(); - } - } else { - field.push_back(*aChar); + try { + long read_chars; + while ((read_chars = getline(&line_str, &len, fp)) != -1) { + if (skip_header && !header_skiped) { + header_skiped = true; + continue; + } + if (read_chars > 0 && line_str[read_chars - 1] == '\n') { + line_str[read_chars - 1] = '\0'; + --read_chars; } - ++aChar; + row_cnt += parseCSVString(line_str, cols_def, table); } - if (!field.empty()) - line.push_back(field); + fclose(fp); - table.create_row_from_vector(cols_def, line); - row_cnt++; + } catch (const std::exception &e) { + if (line_str) + free(line_str); - field.clear(); - line.clear(); + throw e; } - fclose(fp); if (line_str) - free(line_str); + free(line_str); return row_cnt; } -int CsvReader::parseCSV2(const std::string &csvSource, std::vector &cols_def, Table& table) { - int row_cnt = 0; +size_t CsvReader::parseCSVString(const std::string &csvSource, std::vector &cols_def, Table& table) { + size_t row_cnt = 0; bool inQuote(false); bool newLine(false); - std::string field; std::vector line; - line.reserve(32); + std::string field; + line.reserve(256); + field.reserve(64); std::string::const_iterator aChar = csvSource.begin(); - while (aChar != csvSource.end()) { + std::string::const_iterator aEnd = csvSource.end(); + while (aChar != aEnd) { if (*aChar == quote_character) { newLine = false; inQuote = !inQuote; @@ -136,7 +115,8 @@ int CsvReader::parseCSV2(const std::string &csvSource, std::vector & aChar++; } - if (!field.empty()) line.push_back(field); + if (!field.empty()) + line.push_back(field); if (header_skiped) { table.create_row_from_vector(cols_def, line); diff --git a/csvreader.h b/csvreader.h index 1778437..5f5c8e6 100644 --- a/csvreader.h +++ b/csvreader.h @@ -26,9 +26,9 @@ namespace usql { public: explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n'); - int parseCSV2(const std::string &csvSource, std::vector &cols_def, Table& table); + size_t parseCSVString(const std::string &csvSource, std::vector &cols_def, Table& table); - int parseCSV(const std::string &filename, std::vector &cols_def, Table& table); + size_t parseCSVFile(const std::string &filename, std::vector &cols_def, Table& table); };