csv parsing a bit simplified

This commit is contained in:
vaclavt
2022-01-11 23:34:49 +01:00
parent 5c3ac683d8
commit 3a660c1783
2 changed files with 30 additions and 50 deletions

View File

@@ -18,19 +18,15 @@ CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep
} }
int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) { size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
size_t row_cnt = 0;
std::vector<ColDefNode> cdefs; std::vector<ColDefNode> cdefs;
cdefs.reserve(cols_def.size()); cdefs.reserve(cols_def.size());
for (auto &cd : cols_def) { for (auto &cd : cols_def) {
cdefs.emplace_back(table.get_column_def(cd.name)); cdefs.emplace_back(table.get_column_def(cd.name));
} }
int row_cnt = 0;
bool inQuote(false); bool inQuote(false);
std::string field;
std::vector<std::string> line;
line.reserve(32);
errno = 0; errno = 0;
FILE* fp = fopen(filename.c_str(), "r"); FILE* fp = fopen(filename.c_str(), "r");
@@ -40,67 +36,50 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
char* line_str = NULL; char* line_str = NULL;
size_t len = 0; size_t len = 0;
try {
long read_chars; long read_chars;
while ((read_chars = getline(&line_str, &len, fp)) != -1) { while ((read_chars = getline(&line_str, &len, fp)) != -1) {
if (skip_header && !header_skiped) { if (skip_header && !header_skiped) {
header_skiped = true; header_skiped = true;
continue; continue;
} }
if (read_chars > 0 && line_str[read_chars - 1] == '\n') { if (read_chars > 0 && line_str[read_chars - 1] == '\n') {
line_str[read_chars - 1] = '\0'; line_str[read_chars - 1] = '\0';
--read_chars; --read_chars;
}
std::string csvSource{line_str};
std::string::const_iterator aChar = csvSource.begin();
std::string::const_iterator strEnd = csvSource.end();
while (aChar != strEnd) {
if (*aChar == quote_character) {
inQuote = !inQuote;
} else if (*aChar == field_separator) {
if (inQuote) {
field += *aChar;
} else {
line.push_back(field);
field.clear();
}
} else {
field.push_back(*aChar);
} }
++aChar; row_cnt += parseCSVString(line_str, cols_def, table);
} }
if (!field.empty()) fclose(fp);
line.push_back(field);
table.create_row_from_vector(cols_def, line); } catch (const std::exception &e) {
row_cnt++; if (line_str)
free(line_str);
field.clear(); throw e;
line.clear();
} }
fclose(fp);
if (line_str) if (line_str)
free(line_str); free(line_str);
return row_cnt; return row_cnt;
} }
int CsvReader::parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table) { size_t CsvReader::parseCSVString(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table) {
int row_cnt = 0; size_t row_cnt = 0;
bool inQuote(false); bool inQuote(false);
bool newLine(false); bool newLine(false);
std::string field;
std::vector<std::string> line; std::vector<std::string> line;
line.reserve(32); std::string field;
line.reserve(256);
field.reserve(64);
std::string::const_iterator aChar = csvSource.begin(); std::string::const_iterator aChar = csvSource.begin();
while (aChar != csvSource.end()) { std::string::const_iterator aEnd = csvSource.end();
while (aChar != aEnd) {
if (*aChar == quote_character) { if (*aChar == quote_character) {
newLine = false; newLine = false;
inQuote = !inQuote; inQuote = !inQuote;
@@ -136,7 +115,8 @@ int CsvReader::parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &
aChar++; aChar++;
} }
if (!field.empty()) line.push_back(field); if (!field.empty())
line.push_back(field);
if (header_skiped) { if (header_skiped) {
table.create_row_from_vector(cols_def, line); table.create_row_from_vector(cols_def, line);

View File

@@ -26,9 +26,9 @@ namespace usql {
public: public:
explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n'); explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n');
int parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table); size_t parseCSVString(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table);
int parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table& table); size_t parseCSVFile(const std::string &filename, std::vector<ColDefNode> &cols_def, Table& table);
}; };