139 lines
3.0 KiB
C++
139 lines
3.0 KiB
C++
|
|
#include "csvreader.h"
|
|
#include "parser.h"
|
|
|
|
#include <fstream>
|
|
|
|
namespace usql {
|
|
|
|
CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) {
|
|
skip_header = skip_hdr;
|
|
field_separator = field_sep;
|
|
quote_character = quote_ch;
|
|
line_separator = line_sep;
|
|
line_separator2 = line_sep2;
|
|
|
|
header_skiped = !skip_hdr;
|
|
}
|
|
|
|
|
|
int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
|
|
|
|
std::vector<ColDefNode> cdefs;
|
|
cdefs.reserve(cols_def.size());
|
|
for (auto &cd : cols_def) {
|
|
cdefs.emplace_back(table.get_column_def(cd.name));
|
|
}
|
|
|
|
int row_cnt = 0;
|
|
bool inQuote(false);
|
|
std::string field;
|
|
std::string csvSource;
|
|
|
|
std::vector<std::string> line;
|
|
line.reserve(32);
|
|
|
|
std::fstream data_file;
|
|
data_file.open(filename, std::ios::in);
|
|
/// if (newfile.is_open()){ //checking whether the file is open
|
|
|
|
while (getline(data_file, csvSource)) {
|
|
if (skip_header && !header_skiped) {
|
|
header_skiped = true;
|
|
continue;
|
|
}
|
|
std::string::const_iterator aChar = csvSource.begin();
|
|
while (aChar != csvSource.end()) {
|
|
if (*aChar == quote_character) {
|
|
inQuote = !inQuote;
|
|
} else if (*aChar == field_separator) {
|
|
if (inQuote == true) {
|
|
field += *aChar;
|
|
} else {
|
|
line.push_back(field);
|
|
field.clear();
|
|
}
|
|
} else {
|
|
field.push_back(*aChar);
|
|
}
|
|
|
|
aChar++;
|
|
}
|
|
|
|
if (!field.empty())
|
|
line.push_back(field);
|
|
|
|
table.create_row_from_vector(cols_def, line);
|
|
row_cnt++;
|
|
|
|
field.clear();
|
|
line.clear();
|
|
// DEBUG
|
|
// if (row_cnt > 50000) break;
|
|
//
|
|
}
|
|
|
|
data_file.close();
|
|
|
|
return row_cnt;
|
|
}
|
|
|
|
int CsvReader::parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table) {
|
|
int row_cnt = 0;
|
|
bool inQuote(false);
|
|
bool newLine(false);
|
|
std::string field;
|
|
|
|
std::vector<std::string> line;
|
|
line.reserve(32);
|
|
|
|
std::string::const_iterator aChar = csvSource.begin();
|
|
while (aChar != csvSource.end()) {
|
|
if (*aChar == quote_character) {
|
|
newLine = false;
|
|
inQuote = !inQuote;
|
|
} else if (*aChar == field_separator) {
|
|
newLine = false;
|
|
if (inQuote == true) {
|
|
field += *aChar;
|
|
} else {
|
|
line.push_back(field);
|
|
field.clear();
|
|
}
|
|
} else if (*aChar == line_separator || *aChar == line_separator2) {
|
|
if (inQuote == true) {
|
|
field += *aChar;
|
|
} else {
|
|
if (newLine == false) {
|
|
line.push_back(field);
|
|
if (header_skiped) {
|
|
table.create_row_from_vector(cols_def, line);
|
|
row_cnt++;
|
|
}
|
|
header_skiped = true;
|
|
field.clear();
|
|
line.clear();
|
|
newLine = true;
|
|
}
|
|
}
|
|
} else {
|
|
newLine = false;
|
|
field.push_back(*aChar);
|
|
}
|
|
|
|
aChar++;
|
|
}
|
|
|
|
if (!field.empty()) line.push_back(field);
|
|
|
|
if (header_skiped) {
|
|
table.create_row_from_vector(cols_def, line);
|
|
row_cnt++;
|
|
header_skiped = true;
|
|
}
|
|
|
|
return row_cnt;
|
|
}
|
|
|
|
} // namespace
|