faster csv loading

This commit is contained in:
2021-08-23 18:18:03 +02:00
parent a454e215eb
commit be89b55b17
5 changed files with 37 additions and 22 deletions

View File

@@ -2,7 +2,6 @@
#include "csvreader.h"
#include "parser.h"
#include <fstream>
namespace usql {
@@ -18,7 +17,6 @@ namespace usql {
int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
std::vector<ColDefNode> cdefs;
cdefs.reserve(cols_def.size());
for (auto &cd : cols_def) {
@@ -28,22 +26,33 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
int row_cnt = 0;
bool inQuote(false);
std::string field;
std::string csvSource;
std::vector<std::string> line;
line.reserve(32);
std::fstream data_file;
data_file.open(filename, std::ios::in);
/// if (newfile.is_open()){ //checking whether the file is open
FILE* fp = fopen(filename.c_str(), "r");
if (fp == NULL)
exit(EXIT_FAILURE);
while (getline(data_file, csvSource)) {
char* line_str = NULL;
size_t len = 0;
int read_chars;
while ((read_chars = getline(&line_str, &len, fp)) != -1) {
if (skip_header && !header_skiped) {
header_skiped = true;
continue;
}
if (read_chars > 0 && line_str[read_chars - 1] == '\n') {
line_str[read_chars - 1] = '\0';
--read_chars;
}
std::string csvSource{line_str};
std::string::const_iterator aChar = csvSource.begin();
while (aChar != csvSource.end()) {
std::string::const_iterator strEnd = csvSource.end();
while (aChar != strEnd) {
if (*aChar == quote_character) {
inQuote = !inQuote;
} else if (*aChar == field_separator) {
@@ -57,7 +66,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
field.push_back(*aChar);
}
aChar++;
++aChar;
}
if (!field.empty())
@@ -73,7 +82,10 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
//
}
data_file.close();
fclose(fp);
if (line_str)
free(line_str);
return row_cnt;
}