usql/csvreader.cpp

89 lines
2.2 KiB
C++

#include "csvreader.h"
#include <climits>
namespace usql {
CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) {
skip_header = skip_hdr;
field_separator = field_sep;
quote_character = quote_ch;
line_separator = line_sep;
line_separator2 = line_sep2;
header_skiped = false;
}
std::vector<std::vector<std::string>> CsvReader::parseCSV(const std::string &csvSource) {
int linesRead = 0;
bool inQuote(false);
bool newLine(false);
std::string field;
std::vector<std::vector<std::string>> parsed_data;
parsed_data.reserve(128);
std::vector<std::string> line;
line.reserve(32);
std::string::const_iterator aChar = csvSource.begin();
while (aChar != csvSource.end()) {
if (*aChar == quote_character) {
newLine = false;
inQuote = !inQuote;
} else if (*aChar == field_separator) {
newLine = false;
if (inQuote == true) {
field += *aChar;
} else {
line.push_back(field);
field.clear();
}
} else if (*aChar == line_separator || *aChar == line_separator2) {
if (inQuote == true) {
field += *aChar;
} else {
if (newLine == false) {
line.push_back(field);
add_line(line, parsed_data);
field.clear();
line.clear();
linesRead++;
if (linesRead == 16) {
int linesEstimation =
csvSource.size() /
(std::distance(csvSource.begin(), aChar) / linesRead);
if (linesEstimation > parsed_data.capacity())
parsed_data.reserve(linesEstimation);
}
newLine = true;
}
}
} else {
newLine = false;
field.push_back(*aChar);
}
aChar++;
}
if (field.size())
line.push_back(field);
add_line(line, parsed_data);
return parsed_data;
}
void CsvReader::add_line(const std::vector<std::string> &line, std::vector<std::vector<std::string>> &lines) {
if (skip_header && !header_skiped) {
header_skiped = true;
} else {
if (line.size())
lines.push_back(line);
}
}
}