inital support for parallel load

This commit is contained in:
vaclavt
2022-01-15 14:12:15 +01:00
parent 51d888c680
commit ee24964057
5 changed files with 166 additions and 14 deletions

View File

@@ -4,6 +4,7 @@
#include "csvreader.h"
#include "parser.h"
#include "threadpoool.h"
namespace usql {
@@ -19,13 +20,8 @@ CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep
size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
size_t lines_cnt = 0;
size_t row_cnt = 0;
std::vector<ColDefNode> cdefs;
cdefs.reserve(cols_def.size());
for (auto &cd : cols_def) {
cdefs.emplace_back(table.get_column_def(cd.name));
}
bool inQuote(false);
errno = 0;
@@ -37,6 +33,13 @@ size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNo
size_t len = 0;
try {
// TODO handle it by settings
const std::size_t hw_concurrency = 2; // std::thread::hardware_concurrency();
const bool use_threadpool = hw_concurrency > 1;
thread_pool tp{hw_concurrency};
std::mutex row_cnt_mutex;
long read_chars;
while ((read_chars = getline(&line_str, &len, fp)) != -1) {
if (skip_header && !header_skiped) {
@@ -47,8 +50,27 @@ size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNo
line_str[read_chars - 1] = '\0';
--read_chars;
}
lines_cnt++;
row_cnt += parseCSVString(line_str, cols_def, table);
if (!use_threadpool) {
row_cnt += parseCSVString(line_str, cols_def, table);
} else {
std::string csv_string(line_str);
dispatch(tp, std::function<void()>
([this, csv_string, &cols_def, &table, &row_cnt, &row_cnt_mutex]() {
int parsed = parseCSVString(csv_string, cols_def, table);
{
std::unique_lock<std::mutex> lock(row_cnt_mutex);
row_cnt++;
}
}
));
}
}
if (use_threadpool) {
tp.finish_tasks();
}
fclose(fp);
@@ -67,7 +89,7 @@ size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNo
return row_cnt;
}
size_t CsvReader::parseCSVString(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table) {
size_t CsvReader::parseCSVString(const std::string csvSource, std::vector<ColDefNode> &cols_def, Table& table) {
size_t row_cnt = 0;
bool inQuote(false);
bool newLine(false);