parallel wip
This commit is contained in:
@@ -20,9 +20,7 @@ CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep
|
|||||||
|
|
||||||
|
|
||||||
size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
|
size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
|
||||||
size_t lines_cnt = 0;
|
|
||||||
size_t row_cnt = 0;
|
size_t row_cnt = 0;
|
||||||
bool inQuote(false);
|
|
||||||
|
|
||||||
errno = 0;
|
errno = 0;
|
||||||
FILE* fp = fopen(filename.c_str(), "r");
|
FILE* fp = fopen(filename.c_str(), "r");
|
||||||
@@ -34,7 +32,8 @@ size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNo
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
// TODO handle it by settings
|
// TODO handle it by settings
|
||||||
const std::size_t hw_concurrency = 2; // std::thread::hardware_concurrency();
|
const std::size_t hw_concurrency = std::max(0, (int)(std::thread::hardware_concurrency() - 2));
|
||||||
|
// std::cout << "pool size: " << hw_concurrency << "\n";
|
||||||
const bool use_threadpool = hw_concurrency > 1;
|
const bool use_threadpool = hw_concurrency > 1;
|
||||||
|
|
||||||
thread_pool tp{hw_concurrency};
|
thread_pool tp{hw_concurrency};
|
||||||
@@ -50,28 +49,24 @@ size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNo
|
|||||||
line_str[read_chars - 1] = '\0';
|
line_str[read_chars - 1] = '\0';
|
||||||
--read_chars;
|
--read_chars;
|
||||||
}
|
}
|
||||||
lines_cnt++;
|
|
||||||
|
|
||||||
if (!use_threadpool) {
|
if (use_threadpool) {
|
||||||
row_cnt += parseCSVString(line_str, cols_def, table);
|
//std::string csv_string(line_str);
|
||||||
} else {
|
|
||||||
std::string csv_string(line_str);
|
|
||||||
dispatch(tp, std::function<void()>
|
dispatch(tp, std::function<void()>
|
||||||
([this, csv_string, &cols_def, &table, &row_cnt, &row_cnt_mutex]() {
|
([this, line_str, &cols_def, &table, &row_cnt, &row_cnt_mutex]() {
|
||||||
int parsed = parseCSVString(csv_string, cols_def, table);
|
// std::cout << "thread: " << std::this_thread::get_id() << " rownum " << row_cnt << "\n";
|
||||||
{
|
auto parsed = parseCSVString(line_str, cols_def, table);
|
||||||
std::unique_lock<std::mutex> lock(row_cnt_mutex);
|
{
|
||||||
row_cnt++;
|
std::unique_lock<std::mutex> lock(row_cnt_mutex);
|
||||||
}
|
row_cnt += parsed;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
));
|
));
|
||||||
}
|
} else
|
||||||
|
row_cnt += parseCSVString(line_str, cols_def, table);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_threadpool) {
|
if (use_threadpool) tp.finish();
|
||||||
tp.finish_tasks();
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
|
|
||||||
@@ -85,7 +80,6 @@ size_t CsvReader::parseCSVFile(const std::string &filename, std::vector<ColDefNo
|
|||||||
if (line_str)
|
if (line_str)
|
||||||
free(line_str);
|
free(line_str);
|
||||||
|
|
||||||
|
|
||||||
return row_cnt;
|
return row_cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -94,9 +88,9 @@ size_t CsvReader::parseCSVString(const std::string csvSource, std::vector<ColDef
|
|||||||
bool inQuote(false);
|
bool inQuote(false);
|
||||||
bool newLine(false);
|
bool newLine(false);
|
||||||
|
|
||||||
std::vector<std::string> line;
|
std::vector<std::string> columns;
|
||||||
std::string field;
|
std::string field;
|
||||||
line.reserve(256);
|
columns.reserve(256);
|
||||||
field.reserve(64);
|
field.reserve(64);
|
||||||
|
|
||||||
std::string::const_iterator aChar = csvSource.begin();
|
std::string::const_iterator aChar = csvSource.begin();
|
||||||
@@ -110,7 +104,7 @@ size_t CsvReader::parseCSVString(const std::string csvSource, std::vector<ColDef
|
|||||||
if (inQuote) {
|
if (inQuote) {
|
||||||
field += *aChar;
|
field += *aChar;
|
||||||
} else {
|
} else {
|
||||||
line.push_back(field);
|
columns.push_back(field);
|
||||||
field.clear();
|
field.clear();
|
||||||
}
|
}
|
||||||
} else if (*aChar == line_separator || *aChar == line_separator2) {
|
} else if (*aChar == line_separator || *aChar == line_separator2) {
|
||||||
@@ -118,14 +112,14 @@ size_t CsvReader::parseCSVString(const std::string csvSource, std::vector<ColDef
|
|||||||
field += *aChar;
|
field += *aChar;
|
||||||
} else {
|
} else {
|
||||||
if (!newLine) {
|
if (!newLine) {
|
||||||
line.push_back(field);
|
columns.push_back(field);
|
||||||
if (header_skiped) {
|
if (header_skiped) {
|
||||||
table.create_row_from_vector(cols_def, line);
|
table.create_row_from_vector(cols_def, columns);
|
||||||
row_cnt++;
|
row_cnt++;
|
||||||
}
|
}
|
||||||
header_skiped = true;
|
header_skiped = true;
|
||||||
field.clear();
|
field.clear();
|
||||||
line.clear();
|
columns.clear();
|
||||||
newLine = true;
|
newLine = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -138,13 +132,13 @@ size_t CsvReader::parseCSVString(const std::string csvSource, std::vector<ColDef
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!field.empty())
|
if (!field.empty())
|
||||||
line.push_back(field);
|
columns.push_back(field);
|
||||||
|
|
||||||
if (header_skiped) {
|
if (header_skiped) {
|
||||||
table.create_row_from_vector(cols_def, line);
|
table.create_row_from_vector(cols_def, columns);
|
||||||
row_cnt++;
|
row_cnt++;
|
||||||
|
} else
|
||||||
header_skiped = true;
|
header_skiped = true;
|
||||||
}
|
|
||||||
|
|
||||||
return row_cnt;
|
return row_cnt;
|
||||||
}
|
}
|
||||||
|
|||||||
12
csvreader.h
12
csvreader.h
@@ -12,9 +12,9 @@
|
|||||||
|
|
||||||
namespace usql {
|
namespace usql {
|
||||||
|
|
||||||
class CsvReader {
|
class CsvReader {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
char field_separator;
|
char field_separator;
|
||||||
char line_separator;
|
char line_separator;
|
||||||
char line_separator2;
|
char line_separator2;
|
||||||
@@ -23,13 +23,11 @@ namespace usql {
|
|||||||
bool skip_header;
|
bool skip_header;
|
||||||
bool header_skiped;
|
bool header_skiped;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n');
|
explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n');
|
||||||
|
|
||||||
size_t parseCSVString(const std::string csvSource, std::vector<ColDefNode> &cols_def, Table& table);
|
|
||||||
|
|
||||||
size_t parseCSVFile(const std::string &filename, std::vector<ColDefNode> &cols_def, Table& table);
|
size_t parseCSVFile(const std::string &filename, std::vector<ColDefNode> &cols_def, Table& table);
|
||||||
|
size_t parseCSVString(const std::string csvSource, std::vector<ColDefNode> &cols_def, Table& table);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|||||||
15
table.cpp
15
table.cpp
@@ -43,9 +43,8 @@ ColDefNode Table::get_column_def(int col_index) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::mutex insert_guard;
|
|
||||||
Row& Table::create_empty_row() {
|
Row& Table::create_empty_row() {
|
||||||
std::unique_lock guard(insert_guard);
|
std::unique_lock guard(m_insert_guard);
|
||||||
|
|
||||||
m_rows.emplace_back(columns_count(), false);
|
m_rows.emplace_back(columns_count(), false);
|
||||||
return m_rows.back();
|
return m_rows.back();
|
||||||
@@ -112,20 +111,20 @@ std::string Table::csv_string() {
|
|||||||
return out_string;
|
return out_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Table::load_csv_string(const std::string &content) {
|
size_t Table::load_csv_string(const std::string &content) {
|
||||||
std::vector<ColDefNode> &colDefs = m_col_defs;
|
std::vector<ColDefNode> &colDefs = m_col_defs;
|
||||||
|
|
||||||
CsvReader csvparser{};
|
CsvReader csvparser{};
|
||||||
int row_cnt = csvparser.parseCSVString(content, colDefs, *this);
|
auto row_cnt = csvparser.parseCSVString(content, colDefs, *this);
|
||||||
|
|
||||||
return row_cnt;
|
return row_cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Table::load_csv_file(const std::string &filename) {
|
size_t Table::load_csv_file(const std::string &filename) {
|
||||||
std::vector<ColDefNode> &colDefs = m_col_defs;
|
std::vector<ColDefNode> &colDefs = m_col_defs;
|
||||||
|
|
||||||
// allocate enough space
|
// allocate enough space
|
||||||
int line_size = 128;
|
int line_size = 256;
|
||||||
|
|
||||||
std::ifstream in(filename, std::ifstream::ate | std::ifstream::binary);
|
std::ifstream in(filename, std::ifstream::ate | std::ifstream::binary);
|
||||||
auto file_size = in.tellg();
|
auto file_size = in.tellg();
|
||||||
@@ -145,7 +144,7 @@ int Table::load_csv_file(const std::string &filename) {
|
|||||||
|
|
||||||
// load rows
|
// load rows
|
||||||
CsvReader csvparser{};
|
CsvReader csvparser{};
|
||||||
int row_cnt = csvparser.parseCSVFile(filename, colDefs, *this);
|
auto row_cnt = csvparser.parseCSVFile(filename, colDefs, *this);
|
||||||
|
|
||||||
return row_cnt;
|
return row_cnt;
|
||||||
}
|
}
|
||||||
@@ -279,6 +278,8 @@ void Table::reindex_row(Index &index, const ColDefNode &col_def, const Row &old_
|
|||||||
void Table::index_row(const Row &row) {
|
void Table::index_row(const Row &row) {
|
||||||
if (!m_indexes.empty()) {
|
if (!m_indexes.empty()) {
|
||||||
const size_t rowid = get_rowid(row);
|
const size_t rowid = get_rowid(row);
|
||||||
|
|
||||||
|
std::unique_lock guard(m_insert_guard);
|
||||||
for (auto &idx : m_indexes) {
|
for (auto &idx : m_indexes) {
|
||||||
ColDefNode cDef = get_column_def(idx.get_column_name());
|
ColDefNode cDef = get_column_def(idx.get_column_name());
|
||||||
index_row(idx, cDef, row, rowid);
|
index_row(idx, cDef, row, rowid);
|
||||||
|
|||||||
5
table.h
5
table.h
@@ -31,8 +31,8 @@ struct Table {
|
|||||||
void validate_row(Row &row);
|
void validate_row(Row &row);
|
||||||
|
|
||||||
std::string csv_string();
|
std::string csv_string();
|
||||||
int load_csv_string(const std::string &content);
|
size_t load_csv_string(const std::string &content);
|
||||||
int load_csv_file(const std::string &filename);
|
size_t load_csv_file(const std::string &filename);
|
||||||
|
|
||||||
void print();
|
void print();
|
||||||
|
|
||||||
@@ -40,6 +40,7 @@ struct Table {
|
|||||||
std::vector<ColDefNode> m_col_defs;
|
std::vector<ColDefNode> m_col_defs;
|
||||||
std::vector<Row> m_rows;
|
std::vector<Row> m_rows;
|
||||||
std::vector<Index> m_indexes;
|
std::vector<Index> m_indexes;
|
||||||
|
std::mutex m_insert_guard;
|
||||||
|
|
||||||
void create_row_from_vector(const std::vector<ColDefNode> &colDefs, const std::vector<std::string> &csv_line);
|
void create_row_from_vector(const std::vector<ColDefNode> &colDefs, const std::vector<std::string> &csv_line);
|
||||||
|
|
||||||
|
|||||||
186
threadpoool.h
186
threadpoool.h
@@ -4,129 +4,101 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <future>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
class thread_pool {
|
struct thread_pool {
|
||||||
public:
|
explicit thread_pool(std::size_t size) {
|
||||||
thread_pool(std::size_t size) : stop(false), exit_on_empty(false) {
|
start(size);
|
||||||
for (std::size_t i = 0; i < size; ++i) {
|
finished.reserve(1024);
|
||||||
workers.emplace_back([this] { spawn(); });
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual ~thread_pool() {
|
std::mutex mutex;
|
||||||
if (!stop) join();
|
std::condition_variable condition;
|
||||||
}
|
std::deque<std::packaged_task<void()>> work;
|
||||||
|
|
||||||
// template<class F, class... Args>
|
std::vector<std::future<void>> finished;
|
||||||
// void post(F&& f, Args&&... args) {
|
|
||||||
void post(std::function<void()> f) {
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(mutex);
|
|
||||||
tasks.push(f);
|
|
||||||
}
|
|
||||||
|
|
||||||
condition.notify_one();
|
// queue( lambda ) will enqueue the lambda into the tasks for the threads
|
||||||
}
|
template<class F, class R=std::result_of_t<F &()>>
|
||||||
|
std::future<R> queue(F &&f) {
|
||||||
|
// wrap the function object into a packaged task, splitting
|
||||||
|
// execution from the return value:
|
||||||
|
std::packaged_task<R()> p(std::forward<F>(f));
|
||||||
|
|
||||||
void join() {
|
auto r = p.get_future();
|
||||||
{
|
{
|
||||||
std::unique_lock<std::mutex> lock(mutex);
|
std::unique_lock<std::mutex> l(mutex);
|
||||||
stop = true;
|
work.emplace_back(std::move(p));
|
||||||
}
|
}
|
||||||
|
condition.notify_one();
|
||||||
|
|
||||||
condition.notify_all();
|
return r; // return the future result of the task
|
||||||
|
}
|
||||||
|
|
||||||
for (std::size_t i = 0; i < workers.size(); ++i) {
|
// start threads_num threads in the thread pool.
|
||||||
workers[i].join();
|
void start(std::size_t threads_num = 1) {
|
||||||
}
|
for (std::size_t i = 0; i < threads_num; ++i) {
|
||||||
}
|
finished.push_back(
|
||||||
|
std::async(
|
||||||
|
std::launch::async,
|
||||||
|
[this] { thread_task(); }
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void finish_tasks() {
|
// abort() cancels all non-started tasks, and tells every working thread
|
||||||
{
|
// stop running, and waits for them to finish up.
|
||||||
std::unique_lock<std::mutex> lock(mutex);
|
void abort() {
|
||||||
exit_on_empty = true;
|
cancel_pending();
|
||||||
}
|
finish();
|
||||||
|
}
|
||||||
|
|
||||||
while (!tasks.empty()) {
|
// cancel_pending() merely cancels all non-started tasks:
|
||||||
condition.notify_all();
|
void cancel_pending() {
|
||||||
|
std::unique_lock<std::mutex> l(mutex);
|
||||||
|
work.clear();
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_lock<std::mutex> lock(mutex);
|
// finish enques a "stop the thread" message for every thread, then waits for them:
|
||||||
condition_empty.wait(lock, [this]() {
|
void finish() {
|
||||||
return (tasks.empty());
|
{
|
||||||
});
|
std::unique_lock<std::mutex> l(mutex);
|
||||||
}
|
for (auto &&unused:finished) {
|
||||||
|
work.emplace_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
condition.notify_all();
|
||||||
|
finished.clear();
|
||||||
|
}
|
||||||
|
|
||||||
// BLEJU, BLEJU
|
~thread_pool() {
|
||||||
// while (!tasks.empty()) condition.notify_all();
|
finish();
|
||||||
bool op = true;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void spawn() {
|
void thread_task() {
|
||||||
std::function<void()> task;
|
while (true) {
|
||||||
|
std::packaged_task<void()> f;
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> l(mutex);
|
||||||
|
if (work.empty()) {
|
||||||
|
condition.wait(l, [&] { return !work.empty(); });
|
||||||
|
}
|
||||||
|
f = std::move(work.front());
|
||||||
|
work.pop_front();
|
||||||
|
}
|
||||||
|
// if the task is invalid, it means we are asked to abort:
|
||||||
|
if (!f.valid()) return;
|
||||||
|
|
||||||
while (!stop && (!exit_on_empty || !tasks.empty())) {
|
f();
|
||||||
std::unique_lock<std::mutex> lock(mutex);
|
}
|
||||||
condition.wait(lock, [this]() {
|
}
|
||||||
return (!tasks.empty()) || (tasks.empty() && stop);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!tasks.empty()) {
|
|
||||||
task = std::move(tasks.front());
|
|
||||||
tasks.pop();
|
|
||||||
task();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (exit_on_empty) {
|
|
||||||
condition_empty.notify_one();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
std::vector<std::thread> workers;
|
|
||||||
std::queue<std::function<void()>> tasks;
|
|
||||||
|
|
||||||
std::mutex mutex;
|
|
||||||
std::condition_variable condition;
|
|
||||||
std::condition_variable condition_empty;
|
|
||||||
bool stop;
|
|
||||||
bool exit_on_empty;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// template<class F, class... Args>
|
inline void dispatch(thread_pool &pool, std::function<void()> f) {
|
||||||
// inline void dispatch(thread_pool& pool, F&& f, Args&&... args) {
|
pool.queue(f);
|
||||||
inline void dispatch(thread_pool& pool, std::function<void()> f) {
|
|
||||||
pool.post(f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// int main() {
|
|
||||||
// int cnt = 0;
|
|
||||||
|
|
||||||
// thread_pool tp{std::thread::hardware_concurrency()};
|
|
||||||
// std::mutex mutex;
|
|
||||||
|
|
||||||
// std::cout << "start" << std::endl;
|
|
||||||
|
|
||||||
// for(int i=0; i<100; i++) {
|
|
||||||
// dispatch(tp, std::function<void()>
|
|
||||||
// ([i, &cnt, &mutex]() {
|
|
||||||
// std::cout << "test " << i << std::endl;
|
|
||||||
// {
|
|
||||||
// std::unique_lock<std::mutex> lock(mutex);
|
|
||||||
// cnt++;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// ));
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
// std::cout << "end" << std::endl;
|
|
||||||
// tp.join();
|
|
||||||
// std::cout << "cnt:" << cnt << std::endl;
|
|
||||||
|
|
||||||
// }
|
|
||||||
|
|||||||
Reference in New Issue
Block a user