faster csv loading
This commit is contained in:
parent
a454e215eb
commit
be89b55b17
10
Readme.md
10
Readme.md
|
|
@ -1,14 +1,14 @@
|
|||
|
||||
### TODO
|
||||
- date functions - now, add_date...
|
||||
- string functions rtrim, ltrim, rpad, lpad
|
||||
- round function
|
||||
- coalesce, date functions now, add_date; string functions rtrim, ltrim, rpad, lpad; math function round
|
||||
- add pipe | concatenation
|
||||
|
||||
- support for order by, offset, limit (allow column name in order by, validate)
|
||||
- command line interface
|
||||
- support for uniqueue indexes (primary key)
|
||||
- support for btree indexes
|
||||
- support for joining
|
||||
- add count min and max functions, eg aggregate functions
|
||||
|
||||
- add const wherever should be
|
||||
- PERF in Row::Row(const Row &other), could be more efficient (memory and cpu)
|
||||
- use references where pointer cannot be nullptr
|
||||
- use references where pointer cannot be nullptr
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#include "csvreader.h"
|
||||
#include "parser.h"
|
||||
|
||||
#include <fstream>
|
||||
|
||||
namespace usql {
|
||||
|
||||
|
|
@ -18,7 +17,6 @@ namespace usql {
|
|||
|
||||
|
||||
int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
|
||||
|
||||
std::vector<ColDefNode> cdefs;
|
||||
cdefs.reserve(cols_def.size());
|
||||
for (auto &cd : cols_def) {
|
||||
|
|
@ -28,22 +26,33 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
|
|||
int row_cnt = 0;
|
||||
bool inQuote(false);
|
||||
std::string field;
|
||||
std::string csvSource;
|
||||
|
||||
std::vector<std::string> line;
|
||||
line.reserve(32);
|
||||
|
||||
std::fstream data_file;
|
||||
data_file.open(filename, std::ios::in);
|
||||
/// if (newfile.is_open()){ //checking whether the file is open
|
||||
FILE* fp = fopen(filename.c_str(), "r");
|
||||
if (fp == NULL)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
while (getline(data_file, csvSource)) {
|
||||
char* line_str = NULL;
|
||||
size_t len = 0;
|
||||
|
||||
|
||||
int read_chars;
|
||||
while ((read_chars = getline(&line_str, &len, fp)) != -1) {
|
||||
if (skip_header && !header_skiped) {
|
||||
header_skiped = true;
|
||||
continue;
|
||||
}
|
||||
if (read_chars > 0 && line_str[read_chars - 1] == '\n') {
|
||||
line_str[read_chars - 1] = '\0';
|
||||
--read_chars;
|
||||
}
|
||||
std::string csvSource{line_str};
|
||||
|
||||
std::string::const_iterator aChar = csvSource.begin();
|
||||
while (aChar != csvSource.end()) {
|
||||
std::string::const_iterator strEnd = csvSource.end();
|
||||
while (aChar != strEnd) {
|
||||
if (*aChar == quote_character) {
|
||||
inQuote = !inQuote;
|
||||
} else if (*aChar == field_separator) {
|
||||
|
|
@ -57,7 +66,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
|
|||
field.push_back(*aChar);
|
||||
}
|
||||
|
||||
aChar++;
|
||||
++aChar;
|
||||
}
|
||||
|
||||
if (!field.empty())
|
||||
|
|
@ -73,7 +82,10 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
|
|||
//
|
||||
}
|
||||
|
||||
data_file.close();
|
||||
fclose(fp);
|
||||
if (line_str)
|
||||
free(line_str);
|
||||
|
||||
|
||||
return row_cnt;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,12 +23,16 @@ std::string date_to_string(const long datetime, const std::string format) {
|
|||
return "invalid argument";
|
||||
}
|
||||
|
||||
|
||||
std::istringstream in_ss;
|
||||
long string_to_date(const std::string &datestr, const std::string &format) {
|
||||
// format for example "%d.%m.%Y";
|
||||
|
||||
std::istringstream in{datestr.c_str()};
|
||||
in_ss.clear();
|
||||
in_ss.str(datestr);
|
||||
|
||||
date::sys_seconds tp;
|
||||
in >> date::parse(format, tp);
|
||||
date::from_stream(in_ss, format.c_str(), tp);
|
||||
return tp.time_since_epoch().count();
|
||||
}
|
||||
|
||||
|
|
|
|||
3
row.cpp
3
row.cpp
|
|
@ -40,8 +40,7 @@ int ColBooleanValue::compare(ColValue &other) {
|
|||
return m_bool == other.getBoolValue() ? 0 : m_bool && !other.getBoolValue() ? -1 : 1; // true first
|
||||
}
|
||||
|
||||
Row::Row(const Row &other) : m_columns(other.m_columns.size(), ColNullValue()) {
|
||||
// PERF here we first set cols null and then immediately replace it
|
||||
Row::Row(const Row &other) : m_columns(other.m_columns.size()) {
|
||||
for (int i = 0; i < other.m_columns.size(); i++) {
|
||||
if (other[i].isNull())
|
||||
continue; // for null NOP
|
||||
|
|
|
|||
6
row.h
6
row.h
|
|
@ -127,7 +127,7 @@ namespace usql {
|
|||
class Row {
|
||||
|
||||
public:
|
||||
explicit Row(int cols_count) : m_columns(cols_count, ColNullValue()) {};
|
||||
explicit Row(int cols_count) : m_columns(cols_count) {};
|
||||
Row(const Row &other);
|
||||
|
||||
Row &operator=(Row other);
|
||||
|
|
@ -145,7 +145,7 @@ namespace usql {
|
|||
void setColumnValue(ColDefNode *col_def, ValueNode *col_value);
|
||||
|
||||
ColValue &operator[](int i) const {
|
||||
auto type_index = m_columns[i].index();
|
||||
auto type_index = m_columns[i].index();
|
||||
switch (type_index) {
|
||||
case 0:
|
||||
return (ColValue &) *std::get_if<ColNullValue>(&m_columns[i]);
|
||||
|
|
@ -160,7 +160,7 @@ namespace usql {
|
|||
case 5:
|
||||
return (ColValue &) *std::get_if<ColBooleanValue>(&m_columns[i]);
|
||||
}
|
||||
throw Exception("should not happen");
|
||||
throw Exception("should not happen");
|
||||
}
|
||||
|
||||
int compare(const Row &other) const;
|
||||
|
|
|
|||
Loading…
Reference in New Issue