faster csv loading

This commit is contained in:
VaclavT 2021-08-23 18:18:03 +02:00
parent a454e215eb
commit be89b55b17
5 changed files with 37 additions and 22 deletions

View File

@ -1,14 +1,14 @@
### TODO ### TODO
- date functions - now, add_date... - coalesce, date functions now, add_date; string functions rtrim, ltrim, rpad, lpad; math function round
- string functions rtrim, ltrim, rpad, lpad
- round function
- add pipe | concatenation - add pipe | concatenation
- support for order by, offset, limit (allow column name in order by, validate) - support for order by, offset, limit (allow column name in order by, validate)
- command line interface
- support for uniqueue indexes (primary key) - support for uniqueue indexes (primary key)
- support for btree indexes - support for btree indexes
- support for joining
- add count min and max functions, eg aggregate functions - add count min and max functions, eg aggregate functions
- add const wherever should be - add const wherever should be
- PERF in Row::Row(const Row &other), could be more efficient (memory and cpu) - PERF in Row::Row(const Row &other), could be more efficient (memory and cpu)
- use references where pointer cannot be nullptr - use references where pointer cannot be nullptr

View File

@ -2,7 +2,6 @@
#include "csvreader.h" #include "csvreader.h"
#include "parser.h" #include "parser.h"
#include <fstream>
namespace usql { namespace usql {
@ -18,7 +17,6 @@ namespace usql {
int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) { int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
std::vector<ColDefNode> cdefs; std::vector<ColDefNode> cdefs;
cdefs.reserve(cols_def.size()); cdefs.reserve(cols_def.size());
for (auto &cd : cols_def) { for (auto &cd : cols_def) {
@ -28,22 +26,33 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
int row_cnt = 0; int row_cnt = 0;
bool inQuote(false); bool inQuote(false);
std::string field; std::string field;
std::string csvSource;
std::vector<std::string> line; std::vector<std::string> line;
line.reserve(32); line.reserve(32);
std::fstream data_file; FILE* fp = fopen(filename.c_str(), "r");
data_file.open(filename, std::ios::in); if (fp == NULL)
/// if (newfile.is_open()){ //checking whether the file is open exit(EXIT_FAILURE);
while (getline(data_file, csvSource)) { char* line_str = NULL;
size_t len = 0;
int read_chars;
while ((read_chars = getline(&line_str, &len, fp)) != -1) {
if (skip_header && !header_skiped) { if (skip_header && !header_skiped) {
header_skiped = true; header_skiped = true;
continue; continue;
} }
if (read_chars > 0 && line_str[read_chars - 1] == '\n') {
line_str[read_chars - 1] = '\0';
--read_chars;
}
std::string csvSource{line_str};
std::string::const_iterator aChar = csvSource.begin(); std::string::const_iterator aChar = csvSource.begin();
while (aChar != csvSource.end()) { std::string::const_iterator strEnd = csvSource.end();
while (aChar != strEnd) {
if (*aChar == quote_character) { if (*aChar == quote_character) {
inQuote = !inQuote; inQuote = !inQuote;
} else if (*aChar == field_separator) { } else if (*aChar == field_separator) {
@ -57,7 +66,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
field.push_back(*aChar); field.push_back(*aChar);
} }
aChar++; ++aChar;
} }
if (!field.empty()) if (!field.empty())
@ -73,7 +82,10 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
// //
} }
data_file.close(); fclose(fp);
if (line_str)
free(line_str);
return row_cnt; return row_cnt;
} }

View File

@ -23,12 +23,16 @@ std::string date_to_string(const long datetime, const std::string format) {
return "invalid argument"; return "invalid argument";
} }
std::istringstream in_ss;
long string_to_date(const std::string &datestr, const std::string &format) { long string_to_date(const std::string &datestr, const std::string &format) {
// format for example "%d.%m.%Y"; // format for example "%d.%m.%Y";
std::istringstream in{datestr.c_str()}; in_ss.clear();
in_ss.str(datestr);
date::sys_seconds tp; date::sys_seconds tp;
in >> date::parse(format, tp); date::from_stream(in_ss, format.c_str(), tp);
return tp.time_since_epoch().count(); return tp.time_since_epoch().count();
} }

View File

@ -40,8 +40,7 @@ int ColBooleanValue::compare(ColValue &other) {
return m_bool == other.getBoolValue() ? 0 : m_bool && !other.getBoolValue() ? -1 : 1; // true first return m_bool == other.getBoolValue() ? 0 : m_bool && !other.getBoolValue() ? -1 : 1; // true first
} }
Row::Row(const Row &other) : m_columns(other.m_columns.size(), ColNullValue()) { Row::Row(const Row &other) : m_columns(other.m_columns.size()) {
// PERF here we first set cols null and then immediately replace it
for (int i = 0; i < other.m_columns.size(); i++) { for (int i = 0; i < other.m_columns.size(); i++) {
if (other[i].isNull()) if (other[i].isNull())
continue; // for null NOP continue; // for null NOP

6
row.h
View File

@ -127,7 +127,7 @@ namespace usql {
class Row { class Row {
public: public:
explicit Row(int cols_count) : m_columns(cols_count, ColNullValue()) {}; explicit Row(int cols_count) : m_columns(cols_count) {};
Row(const Row &other); Row(const Row &other);
Row &operator=(Row other); Row &operator=(Row other);
@ -145,7 +145,7 @@ namespace usql {
void setColumnValue(ColDefNode *col_def, ValueNode *col_value); void setColumnValue(ColDefNode *col_def, ValueNode *col_value);
ColValue &operator[](int i) const { ColValue &operator[](int i) const {
auto type_index = m_columns[i].index(); auto type_index = m_columns[i].index();
switch (type_index) { switch (type_index) {
case 0: case 0:
return (ColValue &) *std::get_if<ColNullValue>(&m_columns[i]); return (ColValue &) *std::get_if<ColNullValue>(&m_columns[i]);
@ -160,7 +160,7 @@ namespace usql {
case 5: case 5:
return (ColValue &) *std::get_if<ColBooleanValue>(&m_columns[i]); return (ColValue &) *std::get_if<ColBooleanValue>(&m_columns[i]);
} }
throw Exception("should not happen"); throw Exception("should not happen");
} }
int compare(const Row &other) const; int compare(const Row &other) const;