usql/usql.cpp

643 lines
23 KiB
C++

#include "usql.h"
#include "exception.h"
#include "ml_date.h"
#include <algorithm>
#include <fstream>
namespace usql {
std::unique_ptr<Table> USql::execute(const std::string &command) {
try {
std::unique_ptr<Node> node = m_parser.parse(command);
return execute(*node);
} catch (std::exception &e) {
return create_stmt_result_table(-1, e.what(), 0);
}
}
std::unique_ptr<Table> USql::execute(Node &node) {
// TODO optimize execution nodes here
switch (node.node_type) {
case NodeType::create_table:
return execute_create_table(static_cast<CreateTableNode &>(node));
case NodeType::create_table_as_select:
return execute_create_table_as_table(static_cast<CreateTableAsSelectNode &>(node));
case NodeType::drop_table:
return execute_drop(static_cast<DropTableNode &>(node));
case NodeType::insert_into:
return execute_insert_into_table(static_cast<InsertIntoTableNode &>(node));
case NodeType::select_from:
return execute_select(static_cast<SelectFromTableNode &>(node));
case NodeType::delete_from:
return execute_delete(static_cast<DeleteFromTableNode &>(node));
case NodeType::update_table:
return execute_update(static_cast<UpdateTableNode &>(node));
case NodeType::load_table:
return execute_load(static_cast<LoadIntoTableNode &>(node));
case NodeType::save_table:
return execute_save(static_cast<SaveTableNode &>(node));
case NodeType::set:
return execute_set(static_cast<SetNode &>(node));
case NodeType::show:
return execute_show(static_cast<ShowNode &>(node));
default:
return create_stmt_result_table(-1, "unknown statement", 0);
}
}
std::unique_ptr<Table> USql::execute_create_table(CreateTableNode &node) {
check_table_not_exists(node.table_name);
Table table{node.table_name, node.cols_defs};
m_tables.push_back(table);
return create_stmt_result_table(0, "table created", 0);
}
std::unique_ptr<Table> USql::execute_create_table_as_table(CreateTableAsSelectNode &node) {
check_table_not_exists(node.table_name);
auto select = execute_select((SelectFromTableNode &) *node.select_table);
// create table
Table new_table{node.table_name, select->m_col_defs};
m_tables.push_back(new_table);
// copy rows
// must be here, if rows are put into new_table, they are lost during m_tables.push_table
Table *table = find_table(node.table_name);
for( Row& orig_row : select->m_rows) {
table->commit_copy_of_row(orig_row);
}
select.release(); // is it correct? hoping not to release select table here and then when releasing CreateTableAsSelectNode
return create_stmt_result_table(0, "table created", table->m_rows.size());
}
std::unique_ptr<Table> USql::execute_load(LoadIntoTableNode &node) {
// find source table
Table *table_def = find_table(node.table_name);
// read data
// std::ifstream ifs(node.filename);
// std::string content((std::istreambuf_iterator<char>(ifs)),
// (std::istreambuf_iterator<char>()));
// load rows
// auto rows_cnt = table_def->load_csv_string(content);
auto rows_cnt = table_def->load_csv_file(node.filename);
return create_stmt_result_table(0, "load succeeded", rows_cnt);
}
std::unique_ptr<Table> USql::execute_save(SaveTableNode &node) {
// find source table
Table *table_def = find_table(node.table_name);
// make csv string
std::string csv_string = table_def->csv_string();
// save data
std::ofstream file(node.filename);
file << csv_string;
file.close();
return create_stmt_result_table(0, "save succeeded", table_def->rows_count());
}
std::unique_ptr<Table> USql::execute_drop(DropTableNode &node) {
auto name_cmp = [node](const Table& t) { return t.m_name == node.table_name; };
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp);
if (table_def != std::end(m_tables)) {
m_tables.erase(table_def);
return create_stmt_result_table(0, "drop succeeded", 0);
}
throw Exception("table not found (" + node.table_name + ")");
}
std::unique_ptr<Table> USql::execute_set(SetNode &node) {
Settings::set_setting(node.name, node.value);
return create_stmt_result_table(0, "set succeeded", 1);
}
std::unique_ptr<Table> USql::execute_show(ShowNode &node) {
std::string value = Settings::get_setting(node.name);
return create_stmt_result_table(0, "show succeeded: " + value, 1);
}
std::unique_ptr<Table> USql::execute_insert_into_table(InsertIntoTableNode &node) {
// find table
Table *table_def = find_table(node.table_name);
if (node.cols_names.size() != node.cols_values.size())
throw Exception("Incorrect number of values");
// prepare empty new_row
Row& new_row = table_def->create_empty_row();
// copy values
for (size_t i = 0; i < node.cols_names.size(); i++) {
ColDefNode col_def = table_def->get_column_def(node.cols_names[i].col_name);
auto col_value = eval_value_node(table_def, new_row, node.cols_values[i].get());
new_row.setColumnValue(&col_def, col_value.get());
}
// append new_row
table_def->commit_row(new_row);
return create_stmt_result_table(0, "insert succeeded", 1);
}
std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
// find source table
Table *table = find_table(node.table_name);
// expand *
if (node.cols_names->size()==1 && node.cols_names->operator[](0).name == "*") {
node.cols_names->clear();
node.cols_names->reserve(table->columns_count());
for(const auto& col : table->m_col_defs) {
node.cols_names->emplace_back(SelectColNode{std::make_unique<DatabaseValueNode>(col.name), col.name});
}
}
// create result table
std::vector<ColDefNode> result_tbl_col_defs{};
std::vector<int> source_table_col_index{};
for (int i = 0; i < node.cols_names->size(); i++) {
auto [src_tbl_col_index, rst_tbl_col_def] = get_column_definition(table, &node.cols_names->operator[](i), i);
source_table_col_index.push_back(src_tbl_col_index);
result_tbl_col_defs.push_back(rst_tbl_col_def);
}
auto result = std::make_unique<Table>("result", result_tbl_col_defs);
// execute access plan
for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) {
// eval where for row
if (eval_where(node.where.get(), table, *row)) {
// prepare empty row and copy column values
Row& new_row = result->create_empty_row();
for (auto idx = 0; idx < result->columns_count(); idx++) {
auto row_col_index = source_table_col_index[idx];
if (row_col_index == FUNCTION_CALL) {
auto evaluated_value = eval_value_node(table, *row, node.cols_names->operator[](idx).value.get());
ValueNode *col_value = evaluated_value.get();
new_row.setColumnValue(&result_tbl_col_defs[idx], col_value);
} else {
ColValue &col_value = row->operator[](row_col_index);
new_row.setColumnValue(&result_tbl_col_defs[idx], col_value);
}
}
// add row to result
result->commit_row(new_row);
}
}
execute_distinct(node, result.get());
execute_order_by(node, table, result.get());
execute_offset_limit(node.offset_limit, result.get());
return result;
}
void USql::execute_distinct(SelectFromTableNode &node, Table *result) {
if (!node.distinct) return;
auto compare_rows = [](const Row &a, const Row &b) { return a.compare(b) >= 0; };
std::sort(result->m_rows.begin(), result->m_rows.end(), compare_rows);
result->m_rows.erase(std::unique(result->m_rows.begin(), result->m_rows.end()), result->m_rows.end());
}
void USql::execute_order_by(SelectFromTableNode &node, Table *table, Table *result) {
if (node.order_by.empty()) return;
auto compare_rows = [&node, &result](const Row &a, const Row &b) {
for(const auto& order_by_col_def : node.order_by) {
// TODO validate index
ColDefNode col_def = result->get_column_def(order_by_col_def.col_index - 1);
ColValue &a_val = a[col_def.order];
ColValue &b_val = b[col_def.order];
int compare = a_val.compare(b_val);
if (compare < 0) return order_by_col_def.ascending;
if (compare > 0) return !order_by_col_def.ascending;
}
return false;
};
std::sort(result->m_rows.begin(), result->m_rows.end(), compare_rows);
}
void USql::execute_offset_limit(OffsetLimitNode &node, Table *result) {
if (node.offset > 0)
result->m_rows.erase(result->m_rows.begin(),
result->rows_count() > node.offset ? result->m_rows.begin() + node.offset : result->m_rows.end());
if (node.limit > 0 && node.limit < result->rows_count())
result->m_rows.erase(result->m_rows.begin() + node.limit, result->m_rows.end());
}
std::tuple<int, ColDefNode> USql::get_column_definition(Table *table, SelectColNode *select_col_node, int col_order ) {
return get_node_definition(table, select_col_node->value.get(), select_col_node->name, col_order );
}
std::tuple<int, ColDefNode> USql::get_node_definition(Table *table, Node * node, const std::string & col_name, int col_order ) {
if (node->node_type == NodeType::database_value) {
auto dbval_node = static_cast<DatabaseValueNode *>(node);
ColDefNode src_col_def = table->get_column_def(dbval_node->col_name);
ColDefNode col_def = ColDefNode{col_name, src_col_def.type, col_order, src_col_def.length, src_col_def.null};
return std::make_tuple(src_col_def.order, col_def);
} else if (node->node_type == NodeType::function) {
auto func_node = static_cast<FunctionNode *>(node);
if (func_node->function == "to_string") {
ColDefNode col_def = ColDefNode{col_name, ColumnType::varchar_type, col_order, 32, true};
return std::make_tuple(-1, col_def);
} else if (func_node->function == "to_date") {
ColDefNode col_def = ColDefNode{col_name, ColumnType::integer_type, col_order, 1, true};
return std::make_tuple(-1, col_def);
}
throw Exception("Unsupported function");
} else if (node->node_type == NodeType::arithmetical_operator) {
auto ari_node = static_cast<ArithmeticalOperatorNode *>(node);
auto [left_col_index, left_tbl_col_def] = get_node_definition(table, ari_node->left.get(), col_name, col_order );
auto [right_col_index, right_tbl_col_def] = get_node_definition(table, ari_node->right.get(), col_name, col_order );
ColumnType col_type; // TODO handle varchar and it len
if (left_tbl_col_def.type==ColumnType::float_type || right_tbl_col_def.type==ColumnType::float_type)
col_type = ColumnType::float_type;
else
col_type = ColumnType::integer_type;
ColDefNode col_def = ColDefNode{col_name, col_type, col_order, 1, true};
return std::make_tuple(-1, col_def);
} else if (node->node_type == NodeType::logical_operator) {
ColDefNode col_def = ColDefNode{col_name, ColumnType::bool_type, col_order, 1, true};
return std::make_tuple(-1, col_def);
} else if (node->node_type == NodeType::int_value) {
ColDefNode col_def = ColDefNode{col_name, ColumnType::integer_type, col_order, 1, true};
return std::make_tuple(-1, col_def);
} else if (node->node_type == NodeType::float_value) {
ColDefNode col_def = ColDefNode{col_name, ColumnType::float_type, col_order, 1, true};
return std::make_tuple(-1, col_def);
} else if (node->node_type == NodeType::string_value) {
// TODO right len
ColDefNode col_def = ColDefNode{col_name, ColumnType::varchar_type, col_order, 64, true};
return std::make_tuple(-1, col_def);
}
throw Exception("Unsupported node type");
}
std::unique_ptr<Table> USql::execute_delete(DeleteFromTableNode &node) {
// find source table
Table *table = find_table(node.table_name);
// execute access plan
auto affected_rows = table->rows_count();
table->m_rows.erase(
std::remove_if(table->m_rows.begin(), table->m_rows.end(),
[&node, table](Row &row){return eval_where(node.where.get(), table, row);}),
table->m_rows.end());
affected_rows -= table->rows_count();
return create_stmt_result_table(0, "delete succeeded", affected_rows);
}
std::unique_ptr<Table> USql::execute_update(UpdateTableNode &node) {
// find source table
Table *table = find_table(node.table_name);
// execute access plan
int affected_rows = 0;
for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) {
// eval where for row
if (eval_where(node.where.get(), table, *row)) {
int i = 0;
for (const auto& col : node.cols_names) {
// TODO cache it like in select
ColDefNode col_def = table->get_column_def(col.col_name);
std::unique_ptr<ValueNode> new_val = eval_arithmetic_operator(col_def.type,
static_cast<ArithmeticalOperatorNode &>(*node.values[i]),
table, *row);
usql::Table::validate_column(&col_def, new_val.get());
row->setColumnValue(&col_def, new_val.get());
i++;
}
affected_rows++;
// TODO tady je problem, ze kdyz to zfajluje na jednom radku ostatni by se nemely provest
}
}
return create_stmt_result_table(0, "update succeeded", affected_rows);
}
bool USql::eval_where(Node *where, Table *table, Row &row) {
switch (where->node_type) {
case NodeType::true_node:
return true;
case NodeType::relational_operator: // just one condition
return eval_relational_operator(*((RelationalOperatorNode *) where), table, row);
case NodeType::logical_operator:
return eval_logical_operator(*((LogicalOperatorNode *) where), table, row);
default:
throw Exception("Wrong node type");
}
return false;
}
bool USql::eval_relational_operator(const RelationalOperatorNode &filter, Table *table, Row &row) {
std::unique_ptr<ValueNode> left_value = eval_value_node(table, row, filter.left.get());
std::unique_ptr<ValueNode> right_value = eval_value_node(table, row, filter.right.get());
double comparator;
if (left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::int_value) {
comparator = left_value->getIntegerValue() - right_value->getIntegerValue();
} else if ((left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::float_value) ||
(left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::int_value) ||
(left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::float_value)) {
comparator = left_value->getDoubleValue() - right_value->getDoubleValue();
} else if (left_value->node_type == NodeType::string_value || right_value->node_type == NodeType::string_value) {
comparator = left_value->getStringValue().compare(right_value->getStringValue());
} else if (left_value->node_type == NodeType::bool_value && right_value->node_type == NodeType::bool_value) {
bool bl = left_value->getBooleanValue();
bool br = right_value->getBooleanValue();
comparator = bl == br ? 0 : 1;
// date values are essentially int values so handled above
} else {
throw Exception("Undefined combination of types");
}
switch (filter.op) {
case RelationalOperatorType::equal:
return comparator == 0.0;
case RelationalOperatorType::not_equal:
return comparator != 0.0;
case RelationalOperatorType::greater:
return comparator > 0.0;
case RelationalOperatorType::greater_equal:
return comparator >= 0.0;
case RelationalOperatorType::lesser:
return comparator < 0.0;
case RelationalOperatorType::lesser_equal:
return comparator <= 0.0;
}
throw Exception("invalid relational operator");
}
std::unique_ptr<ValueNode> USql::eval_value_node(Table *table, Row &row, Node *node) {
if (node->node_type == NodeType::database_value) {
return eval_database_value_node(table, row, node);
} else if (node->node_type == NodeType::int_value || node->node_type == NodeType::float_value || node->node_type == NodeType::string_value || node->node_type == NodeType::bool_value) {
return eval_literal_value_node(table, row, node);
} else if (node->node_type == NodeType::function) {
return eval_function_value_node(table, row, node);
} else if (node->node_type == NodeType::null_value) {
return std::make_unique<NullValueNode>();
} else if (node->node_type == NodeType::arithmetical_operator) {
return eval_arithmetic_operator(ColumnType::float_type, static_cast<ArithmeticalOperatorNode &>(*node), table, row);
}
throw Exception("unsupported node type");
}
std::unique_ptr<ValueNode> USql::eval_database_value_node(Table *table, Row &row, Node *node) {
auto *dvl = static_cast<DatabaseValueNode *>(node);
ColDefNode col_def = table->get_column_def( dvl->col_name); // TODO optimize it to just get this def once
ColValue &db_value = row[col_def.order];
if (db_value.isNull())
return std::make_unique<NullValueNode>();
if (col_def.type == ColumnType::integer_type)
return std::make_unique<IntValueNode>(db_value.getIntValue());
if (col_def.type == ColumnType::float_type)
return std::make_unique<DoubleValueNode>(db_value.getDoubleValue());
if (col_def.type == ColumnType::varchar_type)
return std::make_unique<StringValueNode>(db_value.getStringValue());
if (col_def.type == ColumnType::bool_type)
return std::make_unique<BooleanValueNode>(db_value.getBoolValue());
if (col_def.type == ColumnType::date_type)
return std::make_unique<IntValueNode>(db_value.getIntValue());
throw Exception("unknown database value type");
}
std::unique_ptr<ValueNode> USql::eval_literal_value_node(Table *table, Row &row, Node *node) {
if (node->node_type == NodeType::int_value) {
auto *ivl = static_cast<IntValueNode *>(node);
return std::make_unique<IntValueNode>(ivl->value);
} else if (node->node_type == NodeType::float_value) {
auto *ivl = static_cast<DoubleValueNode *>(node);
return std::make_unique<DoubleValueNode>(ivl->value);
} else if (node->node_type == NodeType::string_value) {
auto *ivl = static_cast<StringValueNode *>(node);
return std::make_unique<StringValueNode>(ivl->value);
} else if (node->node_type == NodeType::bool_value) {
auto *ivl = static_cast<BooleanValueNode *>(node);
return std::make_unique<BooleanValueNode>(ivl->value);
}
// Date has no it's own value node (it is passed around as string)
throw Exception("invalid type");
}
std::unique_ptr<ValueNode> USql::eval_function_value_node(Table *table, Row &row, Node *node) {
auto *fnc = static_cast<FunctionNode *>(node);
std::vector<std::unique_ptr<ValueNode>> evaluatedPars;
for(auto & param : fnc->params) {
evaluatedPars.push_back(eval_value_node(table, row, param.get()));
}
// TODO use some enum
if (fnc->function == "lower") {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return std::tolower(c); });
return std::make_unique<StringValueNode>(str);
}
if (fnc->function == "upper") {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return std::toupper(c); });
return std::make_unique<StringValueNode>(str);
}
if (fnc->function == "to_date") {
std::string date = evaluatedPars[0]->getStringValue();
std::string format = evaluatedPars[1]->getStringValue();
long epoch_time = string_to_date(date, format);
return std::make_unique<IntValueNode>(epoch_time); // No DateValueNode for now
}
if (fnc->function == "to_string") {
long date = evaluatedPars[0]->getDateValue();
std::string format = evaluatedPars[1]->getStringValue();
std::string formatted_date = date_to_string(date, format);
return std::make_unique<StringValueNode>(formatted_date);
}
throw Exception("invalid function");
}
bool USql::eval_logical_operator(LogicalOperatorNode &node, Table *pTable, Row &row) {
//bool left = eval_relational_operator(static_cast<const RelationalOperatorNode &>(*node.left), pTable, row);
bool left = eval_where(&(*node.left), pTable, row);
if ((node.op == LogicalOperatorType::and_operator && !left) || (node.op == LogicalOperatorType::or_operator && left))
return left;
//bool right = eval_relational_operator(static_cast<const RelationalOperatorNode &>(*node.right), pTable, row);
bool right = eval_where(&(*node.right), pTable, row);
return right;
}
std::unique_ptr<ValueNode> USql::eval_arithmetic_operator(ColumnType outType, ArithmeticalOperatorNode &node, Table *table, Row &row) {
if (node.op == ArithmeticalOperatorType::copy_value) {
return eval_value_node(table, row, node.left.get());
}
std::unique_ptr<ValueNode> left = eval_value_node(table, row, node.left.get());
std::unique_ptr<ValueNode> right = eval_value_node(table, row, node.right.get());
if (left->isNull() || right->isNull())
return std::make_unique<NullValueNode>();
if (outType == ColumnType::float_type) {
double l = ((ValueNode *) left.get())->getDoubleValue();
double r = ((ValueNode *) right.get())->getDoubleValue();
switch (node.op) {
case ArithmeticalOperatorType::plus_operator:
return std::make_unique<DoubleValueNode>(l + r);
case ArithmeticalOperatorType::minus_operator:
return std::make_unique<DoubleValueNode>(l - r);
case ArithmeticalOperatorType::multiply_operator:
return std::make_unique<DoubleValueNode>(l * r);
case ArithmeticalOperatorType::divide_operator:
return std::make_unique<DoubleValueNode>(l / r);
default:
throw Exception("implement me!!");
}
} else if (outType == ColumnType::integer_type) {
long l = ((ValueNode *) left.get())->getIntegerValue();
long r = ((ValueNode *) right.get())->getIntegerValue();
switch (node.op) {
case ArithmeticalOperatorType::plus_operator:
return std::make_unique<IntValueNode>(l + r);
case ArithmeticalOperatorType::minus_operator:
return std::make_unique<IntValueNode>(l - r);
case ArithmeticalOperatorType::multiply_operator:
return std::make_unique<IntValueNode>(l * r);
case ArithmeticalOperatorType::divide_operator:
return std::make_unique<IntValueNode>(l / r);
default:
throw Exception("implement me!!");
}
} else if (outType == ColumnType::varchar_type) {
std::string l = ((ValueNode *) left.get())->getStringValue();
std::string r = ((ValueNode *) right.get())->getStringValue();
switch (node.op) {
case ArithmeticalOperatorType::plus_operator:
return std::make_unique<StringValueNode>(l + r);
default:
throw Exception("implement me!!");
}
}
// TODO date node should support addition and subtraction
throw Exception("implement me!!");
}
std::unique_ptr<Table> USql::create_stmt_result_table(long code, const std::string &text, size_t affected_rows) {
std::vector<ColDefNode> result_tbl_col_defs{};
result_tbl_col_defs.emplace_back("code", ColumnType::integer_type, 0, 1, false);
result_tbl_col_defs.emplace_back("desc", ColumnType::varchar_type, 1, 48, false);
result_tbl_col_defs.emplace_back("affected_rows", ColumnType::integer_type, 0, 1, true);
auto table_def = std::make_unique<Table>("result", result_tbl_col_defs);
Row& new_row = table_def->create_empty_row();
new_row.setIntColumnValue(0, code);
new_row.setStringColumnValue(1, text.size() <= 48 ? text : text.substr(0,48));
new_row.setIntColumnValue(2, (long)affected_rows);
table_def->commit_row(new_row);
return table_def;
}
Table *USql::find_table(const std::string &name) {
auto name_cmp = [name](const Table& t) { return t.m_name == name; };
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp);
if (table_def != std::end(m_tables)) {
return table_def.operator->();
} else {
throw Exception("table not found (" + name + ")");
}
}
void USql::check_table_not_exists(const std::string &name) {
auto name_cmp = [name](const Table& t) { return t.m_name == name; };
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp);
if (table_def != std::end(m_tables)) {
throw Exception("table already exists");
}
}
} // namespace