indexes WIP
This commit is contained in:
162
usql_dml.cpp
162
usql_dml.cpp
@@ -1,15 +1,13 @@
|
||||
#include "usql.h"
|
||||
#include "exception.h"
|
||||
#include "ml_date.h"
|
||||
#include "ml_string.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
|
||||
namespace usql {
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) const {
|
||||
// find source table
|
||||
Table *table = find_table(node.table_name);
|
||||
|
||||
@@ -28,7 +26,7 @@ std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
|
||||
}
|
||||
|
||||
// check for aggregate function
|
||||
bool aggregate_funcs = check_for_aggregate_only_functions(node, result_tbl_col_defs.size());
|
||||
bool is_aggregated = check_for_aggregate_only_functions(node, result_tbl_col_defs.size());
|
||||
|
||||
// prepare result table structure
|
||||
auto result = std::make_unique<Table>("result", result_tbl_col_defs);
|
||||
@@ -36,41 +34,26 @@ std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
|
||||
// replace possible order by col names to col indexes and validate
|
||||
setup_order_columns(node.order_by, result.get());
|
||||
|
||||
|
||||
// execute access plan
|
||||
Row* new_row = nullptr;
|
||||
for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) {
|
||||
// eval where for row
|
||||
if (eval_where(node.where.get(), table, *row)) {
|
||||
// prepare empty row and copy column values
|
||||
// when agregate functions in result only one row for table
|
||||
if (!aggregate_funcs || result->rows_count()==0) {
|
||||
new_row = &result->create_empty_row();
|
||||
}
|
||||
|
||||
for (auto idx = 0; idx < result->columns_count(); idx++) {
|
||||
auto src_table_col_idx = source_table_col_index[idx];
|
||||
// look for index to use
|
||||
auto [use_index, rowids] = probe_index_scan(node.where.get(), table);
|
||||
|
||||
if (src_table_col_idx == FUNCTION_CALL) {
|
||||
auto evaluated_value = eval_value_node(table, *row, node.cols_names->operator[](idx).value.get(), &result_tbl_col_defs[idx], &new_row->operator[](idx));
|
||||
ValueNode *col_value = evaluated_value.get();
|
||||
// index scan
|
||||
if (use_index) {
|
||||
for (int & rowid : rowids) {
|
||||
evalRowWhere(node, table, (Row *) &table->get_row(rowid), result.get(), new_row, result_tbl_col_defs, source_table_col_index, is_aggregated);
|
||||
}
|
||||
|
||||
new_row->setColumnValue(&result_tbl_col_defs[idx], col_value);
|
||||
} else {
|
||||
ColValue &col_value = row->operator[](src_table_col_idx);
|
||||
new_row->setColumnValue(&result_tbl_col_defs[idx], col_value);
|
||||
}
|
||||
}
|
||||
|
||||
// add row to result
|
||||
if (aggregate_funcs == 0) {
|
||||
result->commit_row(*new_row);
|
||||
}
|
||||
// full scan
|
||||
} else {
|
||||
for (auto row = table->fs_begin(); row != table->fs_end(); ++row) {
|
||||
evalRowWhere(node, table, &(*row), result.get(), new_row, result_tbl_col_defs, source_table_col_index, is_aggregated);
|
||||
}
|
||||
}
|
||||
// when aggregates commit this one row
|
||||
if (aggregate_funcs && new_row != nullptr) {
|
||||
result->commit_row(*new_row);
|
||||
}
|
||||
|
||||
|
||||
execute_distinct(node, result.get());
|
||||
|
||||
@@ -81,7 +64,116 @@ std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
|
||||
return result;
|
||||
}
|
||||
|
||||
bool USql::check_for_aggregate_only_functions(SelectFromTableNode &node, int result_cols_cnt) const {
|
||||
std::pair<bool, std::vector<int>> USql::probe_index_scan(const Node *where, Table *table) const {
|
||||
bool indexscan_possible = normalize_where(where);
|
||||
|
||||
if (indexscan_possible && Settings::get_bool_setting("USE_INDEXSCAN")) {
|
||||
// where->dump();
|
||||
return look_for_usable_index(where, table);
|
||||
}
|
||||
|
||||
// no index scan
|
||||
return std::make_pair(false, std::vector<int>{});
|
||||
}
|
||||
|
||||
std::pair<bool, std::vector<int>> USql::look_for_usable_index(const Node *where, Table *table) const {
|
||||
if (where->node_type == NodeType::relational_operator) {
|
||||
auto * ron = (RelationalOperatorNode *)where;
|
||||
if (ron->op == RelationalOperatorType::equal) {
|
||||
if (ron->left->node_type == NodeType::database_value &&
|
||||
((ron->right->node_type == NodeType::int_value) || (ron->right->node_type == NodeType::string_value))
|
||||
) {
|
||||
auto col_name = ((DatabaseValueNode *)ron->left.get())->col_name;
|
||||
|
||||
Index<IndexValue> * used_index = table->get_index_for_column(col_name);
|
||||
if (used_index != nullptr) {
|
||||
std::vector<int> rowids;
|
||||
|
||||
if (used_index->get_data_type() == IndexedDataType::integer)
|
||||
rowids = used_index->search(((ValueNode *) ron->right.get())->getIntegerValue());
|
||||
else if (used_index->get_data_type() == IndexedDataType::string)
|
||||
rowids = used_index->search(((ValueNode *) ron->right.get())->getStringValue());
|
||||
|
||||
std::cout << "using index " << table->m_name << "(" << used_index->get_column_name() << "), " << rowids.size() << "/" << table->rows_count() << std::endl;
|
||||
return std::make_pair(true, rowids);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (where->node_type == NodeType::logical_operator) {
|
||||
auto * operatorNode = (LogicalOperatorNode *)where;
|
||||
if (operatorNode->op == LogicalOperatorType::and_operator) {
|
||||
auto [use_index, rowids] = look_for_usable_index(operatorNode->left.get(), table);
|
||||
if (use_index) {
|
||||
return std::make_pair(true, rowids);
|
||||
}
|
||||
return look_for_usable_index(operatorNode->right.get(), table);
|
||||
}
|
||||
}
|
||||
|
||||
// no index available
|
||||
return std::make_pair(false, std::vector<int>{});
|
||||
}
|
||||
|
||||
bool USql::normalize_where(const Node *node) const {
|
||||
// normalize relational operators "layout" and check whether index scan even possible
|
||||
|
||||
// unify relational operators tha left node is always database value
|
||||
if (node->node_type == NodeType::relational_operator) {
|
||||
// TODO more optimizations here, for example node 1 = 2 etc
|
||||
auto * ron = (RelationalOperatorNode *)node;
|
||||
if (ron->right->node_type == NodeType::database_value && ((ron->left->node_type == NodeType::int_value) || (ron->left->node_type == NodeType::string_value)) ) {
|
||||
std::swap(ron->left, ron->right);
|
||||
}
|
||||
return true;
|
||||
} else if (node->node_type == NodeType::logical_operator) {
|
||||
auto * operatorNode = (LogicalOperatorNode *)node;
|
||||
if (operatorNode->op == LogicalOperatorType::or_operator) {
|
||||
return false;
|
||||
}
|
||||
bool left_subnode = normalize_where(operatorNode->left.get());
|
||||
bool right_subnode = normalize_where(operatorNode->left.get());
|
||||
return left_subnode && right_subnode;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void USql::evalRowWhere(SelectFromTableNode &where_node,
|
||||
Table *src_table, Row *src_row,
|
||||
Table *rslt_table, Row *rslt_row,
|
||||
const std::vector<ColDefNode> &rslt_tbl_col_defs,
|
||||
const std::vector<int> &src_table_col_index,
|
||||
bool is_aggregated) {
|
||||
|
||||
if (eval_where(where_node.where.get(), src_table, *src_row)) {
|
||||
// prepare empty src_row and copy column values
|
||||
// when aggregate functions in rslt_table only one src_row for src_table
|
||||
if (!is_aggregated || rslt_table->rows_count() == 0) {
|
||||
rslt_row = &rslt_table->create_empty_row();
|
||||
}
|
||||
|
||||
for (auto idx = 0; idx < rslt_table->columns_count(); idx++) {
|
||||
auto src_table_col_idx = src_table_col_index[idx];
|
||||
|
||||
if (src_table_col_idx == FUNCTION_CALL) {
|
||||
auto evaluated_value = eval_value_node(src_table, *src_row, where_node.cols_names->operator[](idx).value.get(),
|
||||
const_cast<ColDefNode *>(&rslt_tbl_col_defs[idx]), &rslt_row->operator[](idx));
|
||||
ValueNode *col_value = evaluated_value.get();
|
||||
|
||||
rslt_row->setColumnValue((ColDefNode *) &rslt_tbl_col_defs[idx], col_value);
|
||||
} else {
|
||||
ColValue &col_value = src_row->operator[](src_table_col_idx);
|
||||
rslt_row->setColumnValue((ColDefNode *) &rslt_tbl_col_defs[idx], col_value);
|
||||
}
|
||||
}
|
||||
|
||||
// add src_row to rslt_table
|
||||
if (!is_aggregated) {
|
||||
rslt_table->commit_row(*rslt_row);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool USql::check_for_aggregate_only_functions(SelectFromTableNode &node, size_t result_cols_cnt) {
|
||||
int aggregate_funcs = 0;
|
||||
for (int i = 0; i < node.cols_names->size(); i++) {
|
||||
SelectColNode * col_node = &node.cols_names->operator[](i);
|
||||
@@ -99,7 +191,7 @@ bool USql::check_for_aggregate_only_functions(SelectFromTableNode &node, int res
|
||||
return aggregate_funcs > 0;
|
||||
}
|
||||
|
||||
void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) const {
|
||||
void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) {
|
||||
if (node.cols_names->size() == 1 && node.cols_names->operator[](0).name == "*") {
|
||||
node.cols_names->clear();
|
||||
node.cols_names->reserve(table->columns_count());
|
||||
@@ -109,7 +201,7 @@ void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) const {
|
||||
}
|
||||
}
|
||||
|
||||
void USql::setup_order_columns(std::vector<ColOrderNode> &node, Table *table) const {
|
||||
void USql::setup_order_columns(std::vector<ColOrderNode> &node, Table *table) {
|
||||
for (auto& order_node : node) {
|
||||
if (!order_node.col_name.empty()) {
|
||||
ColDefNode col_def = table->get_column_def(order_node.col_name);
|
||||
|
||||
Reference in New Issue
Block a user