insert rows as you go through csv

This commit is contained in:
VaclavT 2021-08-11 23:40:18 +02:00
parent 710531c455
commit cd92e27270
2 changed files with 63 additions and 72 deletions

View File

@ -1,5 +1,6 @@
#include "csvreader.h" #include "csvreader.h"
#include "parser.h"
namespace usql { namespace usql {
@ -10,78 +11,65 @@ namespace usql {
line_separator = line_sep; line_separator = line_sep;
line_separator2 = line_sep2; line_separator2 = line_sep2;
header_skiped = false; header_skiped = !skip_hdr;
} }
std::vector<std::vector<std::string>> CsvReader::parseCSV(const std::string &csvSource) { int CsvReader::parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def,
int linesRead = 0; void (Table::*function)(const std::vector<ColDefNode>&, const std::vector<std::string>&), Table& a) {
bool inQuote(false); int row_cnt = 0;
bool newLine(false); bool inQuote(false);
std::string field; bool newLine(false);
std::string field;
std::vector<std::vector<std::string>> parsed_data; std::vector<std::string> line;
parsed_data.reserve(256); line.reserve(32);
std::vector<std::string> line; std::string::const_iterator aChar = csvSource.begin();
line.reserve(32); while (aChar != csvSource.end()) {
if (*aChar == quote_character) {
newLine = false;
inQuote = !inQuote;
} else if (*aChar == field_separator) {
newLine = false;
if (inQuote == true) {
field += *aChar;
} else {
line.push_back(field);
field.clear();
}
} else if (*aChar == line_separator || *aChar == line_separator2) {
if (inQuote == true) {
field += *aChar;
} else {
if (newLine == false) {
line.push_back(field);
if (header_skiped) {
(a.*function)(cols_def, line);
row_cnt++;
}
header_skiped = true;
field.clear();
line.clear();
newLine = true;
}
}
} else {
newLine = false;
field.push_back(*aChar);
}
std::string::const_iterator aChar = csvSource.begin(); aChar++;
while (aChar != csvSource.end()) { }
if (*aChar == quote_character) {
newLine = false;
inQuote = !inQuote;
} else if (*aChar == field_separator) {
newLine = false;
if (inQuote == true) {
field += *aChar;
} else {
line.push_back(field);
field.clear();
}
} else if (*aChar == line_separator || *aChar == line_separator2) {
if (inQuote == true) {
field += *aChar;
} else {
if (newLine == false) {
line.push_back(field);
add_line(line, parsed_data);
field.clear();
line.clear();
linesRead++;
if (linesRead == 16) {
int linesEstimation =
csvSource.size() /
(std::distance(csvSource.begin(), aChar) / linesRead);
if (linesEstimation > parsed_data.capacity())
parsed_data.reserve(linesEstimation);
}
newLine = true;
}
}
} else {
newLine = false;
field.push_back(*aChar);
}
aChar++; if (!field.empty()) line.push_back(field);
}
if (field.size()) if (header_skiped) {
line.push_back(field); (a.*function)(cols_def, line);
row_cnt++;
add_line(line, parsed_data); header_skiped = true;
}
return parsed_data;
}
void CsvReader::add_line(const std::vector<std::string> &line, std::vector<std::vector<std::string>> &lines) {
if (skip_header && !header_skiped) {
header_skiped = true;
} else {
if (line.size())
lines.push_back(line);
}
}
return row_cnt;
} }
} // namespace

View File

@ -5,6 +5,10 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <regex> #include <regex>
#include <functional>
#include "parser.h"
#include "table.h"
namespace usql { namespace usql {
@ -20,12 +24,11 @@ namespace usql {
bool header_skiped; bool header_skiped;
public: public:
CsvReader(bool skip_hdr = false, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n');
char line_sep2 = '\n');
std::vector<std::vector<std::string>> parseCSV(const std::string &csvSource); int parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def,
void (Table::*function)(const std::vector<ColDefNode>&, const std::vector<std::string>&), Table& a);
private:
void add_line(const std::vector<std::string> &line, std::vector<std::vector<std::string>> &lines);
}; };
}
} // namespace