csv parsing a bit simplified
This commit is contained in:
@@ -18,19 +18,15 @@ CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
|
size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
|
||||||
|
size_t row_cnt = 0;
|
||||||
std::vector<ColDefNode> cdefs;
|
std::vector<ColDefNode> cdefs;
|
||||||
cdefs.reserve(cols_def.size());
|
cdefs.reserve(cols_def.size());
|
||||||
for (auto &cd : cols_def) {
|
for (auto &cd : cols_def) {
|
||||||
cdefs.emplace_back(table.get_column_def(cd.name));
|
cdefs.emplace_back(table.get_column_def(cd.name));
|
||||||
}
|
}
|
||||||
|
|
||||||
int row_cnt = 0;
|
|
||||||
bool inQuote(false);
|
bool inQuote(false);
|
||||||
std::string field;
|
|
||||||
|
|
||||||
std::vector<std::string> line;
|
|
||||||
line.reserve(32);
|
|
||||||
|
|
||||||
errno = 0;
|
errno = 0;
|
||||||
FILE* fp = fopen(filename.c_str(), "r");
|
FILE* fp = fopen(filename.c_str(), "r");
|
||||||
@@ -40,7 +36,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
|
|||||||
char* line_str = NULL;
|
char* line_str = NULL;
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
long read_chars;
|
long read_chars;
|
||||||
while ((read_chars = getline(&line_str, &len, fp)) != -1) {
|
while ((read_chars = getline(&line_str, &len, fp)) != -1) {
|
||||||
if (skip_header && !header_skiped) {
|
if (skip_header && !header_skiped) {
|
||||||
@@ -51,38 +47,19 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
|
|||||||
line_str[read_chars - 1] = '\0';
|
line_str[read_chars - 1] = '\0';
|
||||||
--read_chars;
|
--read_chars;
|
||||||
}
|
}
|
||||||
std::string csvSource{line_str};
|
|
||||||
|
|
||||||
std::string::const_iterator aChar = csvSource.begin();
|
row_cnt += parseCSVString(line_str, cols_def, table);
|
||||||
std::string::const_iterator strEnd = csvSource.end();
|
|
||||||
while (aChar != strEnd) {
|
|
||||||
if (*aChar == quote_character) {
|
|
||||||
inQuote = !inQuote;
|
|
||||||
} else if (*aChar == field_separator) {
|
|
||||||
if (inQuote) {
|
|
||||||
field += *aChar;
|
|
||||||
} else {
|
|
||||||
line.push_back(field);
|
|
||||||
field.clear();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
field.push_back(*aChar);
|
|
||||||
}
|
|
||||||
|
|
||||||
++aChar;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!field.empty())
|
|
||||||
line.push_back(field);
|
|
||||||
|
|
||||||
table.create_row_from_vector(cols_def, line);
|
|
||||||
row_cnt++;
|
|
||||||
|
|
||||||
field.clear();
|
|
||||||
line.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
|
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
if (line_str)
|
||||||
|
free(line_str);
|
||||||
|
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
if (line_str)
|
if (line_str)
|
||||||
free(line_str);
|
free(line_str);
|
||||||
|
|
||||||
@@ -90,17 +67,19 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
|
|||||||
return row_cnt;
|
return row_cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
int CsvReader::parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table) {
|
size_t CsvReader::parseCSVString(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table) {
|
||||||
int row_cnt = 0;
|
size_t row_cnt = 0;
|
||||||
bool inQuote(false);
|
bool inQuote(false);
|
||||||
bool newLine(false);
|
bool newLine(false);
|
||||||
std::string field;
|
|
||||||
|
|
||||||
std::vector<std::string> line;
|
std::vector<std::string> line;
|
||||||
line.reserve(32);
|
std::string field;
|
||||||
|
line.reserve(256);
|
||||||
|
field.reserve(64);
|
||||||
|
|
||||||
std::string::const_iterator aChar = csvSource.begin();
|
std::string::const_iterator aChar = csvSource.begin();
|
||||||
while (aChar != csvSource.end()) {
|
std::string::const_iterator aEnd = csvSource.end();
|
||||||
|
while (aChar != aEnd) {
|
||||||
if (*aChar == quote_character) {
|
if (*aChar == quote_character) {
|
||||||
newLine = false;
|
newLine = false;
|
||||||
inQuote = !inQuote;
|
inQuote = !inQuote;
|
||||||
@@ -136,7 +115,8 @@ int CsvReader::parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &
|
|||||||
aChar++;
|
aChar++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!field.empty()) line.push_back(field);
|
if (!field.empty())
|
||||||
|
line.push_back(field);
|
||||||
|
|
||||||
if (header_skiped) {
|
if (header_skiped) {
|
||||||
table.create_row_from_vector(cols_def, line);
|
table.create_row_from_vector(cols_def, line);
|
||||||
|
|||||||
@@ -26,9 +26,9 @@ namespace usql {
|
|||||||
public:
|
public:
|
||||||
explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n');
|
explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n');
|
||||||
|
|
||||||
int parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table);
|
size_t parseCSVString(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table);
|
||||||
|
|
||||||
int parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table& table);
|
size_t parseCSVFile(const std::string &filename, std::vector<ColDefNode> &cols_def, Table& table);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user