From cd92e2727093b88d0830f063b35c21e1278a382a Mon Sep 17 00:00:00 2001 From: VaclavT Date: Wed, 11 Aug 2021 23:40:18 +0200 Subject: [PATCH] insert rows as you go through csv --- csvreader.cpp | 120 +++++++++++++++++++++++--------------------------- csvreader.h | 15 ++++--- 2 files changed, 63 insertions(+), 72 deletions(-) diff --git a/csvreader.cpp b/csvreader.cpp index 08d2bdf..d042935 100644 --- a/csvreader.cpp +++ b/csvreader.cpp @@ -1,5 +1,6 @@ #include "csvreader.h" +#include "parser.h" namespace usql { @@ -10,78 +11,65 @@ namespace usql { line_separator = line_sep; line_separator2 = line_sep2; - header_skiped = false; + header_skiped = !skip_hdr; } - std::vector> CsvReader::parseCSV(const std::string &csvSource) { - int linesRead = 0; - bool inQuote(false); - bool newLine(false); - std::string field; + int CsvReader::parseCSV2(const std::string &csvSource, std::vector &cols_def, + void (Table::*function)(const std::vector&, const std::vector&), Table& a) { + int row_cnt = 0; + bool inQuote(false); + bool newLine(false); + std::string field; - std::vector> parsed_data; - parsed_data.reserve(256); + std::vector line; + line.reserve(32); - std::vector line; - line.reserve(32); + std::string::const_iterator aChar = csvSource.begin(); + while (aChar != csvSource.end()) { + if (*aChar == quote_character) { + newLine = false; + inQuote = !inQuote; + } else if (*aChar == field_separator) { + newLine = false; + if (inQuote == true) { + field += *aChar; + } else { + line.push_back(field); + field.clear(); + } + } else if (*aChar == line_separator || *aChar == line_separator2) { + if (inQuote == true) { + field += *aChar; + } else { + if (newLine == false) { + line.push_back(field); + if (header_skiped) { + (a.*function)(cols_def, line); + row_cnt++; + } + header_skiped = true; + field.clear(); + line.clear(); + newLine = true; + } + } + } else { + newLine = false; + field.push_back(*aChar); + } - std::string::const_iterator aChar = csvSource.begin(); - while (aChar != csvSource.end()) { - if (*aChar == quote_character) { - newLine = false; - inQuote = !inQuote; - } else if (*aChar == field_separator) { - newLine = false; - if (inQuote == true) { - field += *aChar; - } else { - line.push_back(field); - field.clear(); - } - } else if (*aChar == line_separator || *aChar == line_separator2) { - if (inQuote == true) { - field += *aChar; - } else { - if (newLine == false) { - line.push_back(field); - add_line(line, parsed_data); - field.clear(); - line.clear(); - linesRead++; - if (linesRead == 16) { - int linesEstimation = - csvSource.size() / - (std::distance(csvSource.begin(), aChar) / linesRead); - if (linesEstimation > parsed_data.capacity()) - parsed_data.reserve(linesEstimation); - } - newLine = true; - } - } - } else { - newLine = false; - field.push_back(*aChar); - } + aChar++; + } - aChar++; - } + if (!field.empty()) line.push_back(field); - if (field.size()) - line.push_back(field); - - add_line(line, parsed_data); - - return parsed_data; - } - - - void CsvReader::add_line(const std::vector &line, std::vector> &lines) { - if (skip_header && !header_skiped) { - header_skiped = true; - } else { - if (line.size()) - lines.push_back(line); - } - } + if (header_skiped) { + (a.*function)(cols_def, line); + row_cnt++; + header_skiped = true; + } + return row_cnt; } + +} // namespace diff --git a/csvreader.h b/csvreader.h index 8166583..1769ebc 100644 --- a/csvreader.h +++ b/csvreader.h @@ -5,6 +5,10 @@ #include #include #include +#include + +#include "parser.h" +#include "table.h" namespace usql { @@ -20,12 +24,11 @@ namespace usql { bool header_skiped; public: - CsvReader(bool skip_hdr = false, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', - char line_sep2 = '\n'); + CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n'); - std::vector> parseCSV(const std::string &csvSource); + int parseCSV2(const std::string &csvSource, std::vector &cols_def, + void (Table::*function)(const std::vector&, const std::vector&), Table& a); - private: - void add_line(const std::vector &line, std::vector> &lines); }; -} + +} // namespace