diff --git a/CMakeLists.txt b/CMakeLists.txt index 30407fc..a240e37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,15 +7,15 @@ set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14") -project(msql) +project(usql) -set(PROJECT_NAME msql) +set(PROJECT_NAME usql) set(SOURCE - exception.cpp lexer.cpp parser.cpp executor.cpp main.cpp table.cpp table.h row.cpp row.h) + exception.cpp lexer.cpp parser.cpp executor.cpp main.cpp table.cpp table.h row.cpp row.h csvreader.cpp csvreader.h) add_executable(${PROJECT_NAME} ${SOURCE}) target_link_libraries(${PROJECT_NAME} stdc++ m) -target_compile_options(msql PRIVATE -g) +target_compile_options(usql PRIVATE -g) diff --git a/Readme.md b/Readme.md index b6ff5a7..895b698 100644 --- a/Readme.md +++ b/Readme.md @@ -1,6 +1,5 @@ ### TODO -- rename it to usql - rename Exception to UException, Table to UTable, Row to URow etc - remove newlines from lexed string tokens - unify using of float and double keywords diff --git a/csvreader.cpp b/csvreader.cpp new file mode 100644 index 0000000..c22bb60 --- /dev/null +++ b/csvreader.cpp @@ -0,0 +1,88 @@ + +#include "csvreader.h" +#include + +namespace usql { + + CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) { + skip_header = skip_hdr; + field_separator = field_sep; + quote_character = quote_ch; + line_separator = line_sep; + line_separator2 = line_sep2; + + header_skiped = false; + } + + std::vector> CsvReader::parseCSV(const std::string &csvSource) { + int linesRead = 0; + bool inQuote(false); + bool newLine(false); + std::string field; + + std::vector> parsed_data; + parsed_data.reserve(128); + + std::vector line; + line.reserve(32); + + std::string::const_iterator aChar = csvSource.begin(); + while (aChar != csvSource.end()) { + if (*aChar == quote_character) { + newLine = false; + inQuote = !inQuote; + } else if (*aChar == field_separator) { + newLine = false; + if (inQuote == true) { + field += *aChar; + } else { + line.push_back(field); + field.clear(); + } + } else if (*aChar == line_separator || *aChar == line_separator2) { + if (inQuote == true) { + field += *aChar; + } else { + if (newLine == false) { + line.push_back(field); + add_line(line, parsed_data); + field.clear(); + line.clear(); + linesRead++; + if (linesRead == 16) { + int linesEstimation = + csvSource.size() / + (std::distance(csvSource.begin(), aChar) / linesRead); + if (linesEstimation > parsed_data.capacity()) + parsed_data.reserve(linesEstimation); + } + newLine = true; + } + } + } else { + newLine = false; + field.push_back(*aChar); + } + + aChar++; + } + + if (field.size()) + line.push_back(field); + + add_line(line, parsed_data); + + return parsed_data; + } + + + void CsvReader::add_line(const std::vector &line, std::vector> &lines) { + if (skip_header && !header_skiped) { + header_skiped = true; + } else { + if (line.size()) + lines.push_back(line); + } + } + +} diff --git a/csvreader.h b/csvreader.h new file mode 100644 index 0000000..706b544 --- /dev/null +++ b/csvreader.h @@ -0,0 +1,31 @@ + +#pragma once + +#include +#include +#include +#include + +namespace usql { + + class CsvReader { + + private: + char field_separator; + char line_separator; + char line_separator2; + char quote_character; + + bool skip_header; + bool header_skiped; + + public: + CsvReader(bool skip_hdr = false, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', + char line_sep2 = '\n'); + + std::vector> parseCSV(const std::string &csvSource); + + private: + void add_line(const std::vector &line, std::vector> &lines); + }; +} diff --git a/data.csv b/data.csv new file mode 100644 index 0000000..177e13f --- /dev/null +++ b/data.csv @@ -0,0 +1,3 @@ +Ticker,Price +FDX,257.3 +C,59.85 \ No newline at end of file diff --git a/exception.cpp b/exception.cpp index a5364a0..894cc3d 100644 --- a/exception.cpp +++ b/exception.cpp @@ -1,9 +1,11 @@ #include "exception.h" +namespace usql { -Exception::Exception(const std::string &msg) { - cause = msg; -} + Exception::Exception(const std::string &msg) { + cause = msg; + } -const char* Exception::what() const noexcept { return cause.c_str(); } \ No newline at end of file + const char *Exception::what() const noexcept { return cause.c_str(); } +} \ No newline at end of file diff --git a/exception.h b/exception.h index c44a483..12cd6fc 100644 --- a/exception.h +++ b/exception.h @@ -4,12 +4,16 @@ #include -class Exception : public std::exception { -private: - std::string cause; +namespace usql { -public: + class Exception : public std::exception { + private: + std::string cause; + + public: Exception(const std::string &msg); - const char* what() const noexcept; -}; + const char *what() const noexcept; + }; + +} \ No newline at end of file diff --git a/executor.cpp b/executor.cpp index 676cb24..f85f0fb 100644 --- a/executor.cpp +++ b/executor.cpp @@ -1,330 +1,419 @@ #include "executor.h" #include "exception.h" +#include "csvreader.h" #include +#include +namespace usql { -Executor::Executor() { - m_tables.clear(); -} - -Table* Executor::find_table(const std::string name) { - auto name_cmp = [name](Table t){ return t.m_name == name; }; - auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp ); - if (table_def != std::end(m_tables)) { - return table_def.operator->(); - } else { - throw Exception("table not found (" + name + ")"); - } -} - - -bool Executor::execute(Node& node) { - // TODO optimize node here - switch (node.node_type) { - case NodeType::create_table: - return execute_create_table(static_cast(node)); - case NodeType::insert_into: - return execute_insert_into_table(static_cast(node)); - case NodeType::select_from: - return execute_select(static_cast(node)); - case NodeType::delete_from: - return execute_delete(static_cast(node)); - case NodeType::update_table: - return execute_update(static_cast(node)); - default: - // TODO error message - return false; - } - -} - -bool Executor::execute_create_table(CreateTableNode& node) { - // TODO check table does not exists - Table table{node.table_name, node.cols_defs}; - m_tables.push_back(table); - - return true; -} - -bool Executor::execute_insert_into_table(InsertIntoTableNode& node) { - // TODO check column names.size = values.size - - // find table - Table* table_def = find_table(node.table_name); - - // prepare empty new_row - Row new_row = table_def->createEmptyRow(); - - // copy values - for(size_t i=0; iget_column_def(colNameNode.name); - - // TODO validate value - - if (col_def.type == ColumnType::integer_type) { - new_row.setColumnValue(col_def.order, std::stoi(node.cols_values[i].value)); - } else if (col_def.type == ColumnType::float_type) { - new_row.setColumnValue(col_def.order, std::stof(node.cols_values[i].value)); - } else { - new_row.setColumnValue(col_def.order, node.cols_values[i].value); - } - } - - // TODO check not null columns - - // append new_row - table_def->addRow(new_row); - - return true; -} - -bool Executor::execute_select(SelectFromTableNode& node) { - // TODO create plan for accessing rows - - // find source table - Table* table = find_table(node.table_name); - - // create result table - std::vector result_tbl_col_defs{}; - std::vector source_table_col_index{}; - int i = 0; // new column order - for(ColNameNode rc : node.cols_names) { - ColDefNode cdef = table->get_column_def(rc.name); - source_table_col_index.push_back(cdef.order); - - auto col = ColDefNode(rc.name, cdef.type, i, cdef.length, cdef.null); - result_tbl_col_defs.push_back(col); - - i++; - } - Table result {"result", result_tbl_col_defs}; - - // execute access plan - for (auto row = begin (table->m_rows); row != end (table->m_rows); ++row) { - // eval where for row - if (evalWhere(node.where.get(), table, row)) { - // prepare empty row - Row new_row = result.createEmptyRow(); - - // copy column values - for(auto idx=0; idxithColumn(row_col_index); - if (result_tbl_col_defs[idx].type == ColumnType::integer_type) - new_row.setColumnValue(idx, ((ColIntegerValue*)col_value)->integerValue()); - if (result_tbl_col_defs[idx].type == ColumnType::float_type) - new_row.setColumnValue(idx, col_value->floatValue()); - if (result_tbl_col_defs[idx].type == ColumnType::varchar_type) - new_row.setColumnValue(idx, col_value->stringValue()); - } - - // add row to result - result.m_rows.push_back(new_row); - } - } - - result.print(); - - return true; -} - -bool Executor::execute_delete(DeleteFromTableNode& node) { - // TODO create plan for accessing rows - - // find source table - Table* table = find_table(node.table_name); - - // execute access plan - auto it = table->m_rows.begin(); - for ( ; it != table->m_rows.end(); ) { - if (evalWhere(node.where.get(), table, it)) { - // TODO this can be really expensive operation - it = table->m_rows.erase(it); - } else { - ++it; - } - } - - return true; -} - -bool Executor::execute_update(UpdateTableNode &node) { - // TODO create plan for accessing rows - - // find source table - Table* table = find_table(node.table_name); - - // execute access plan - for (auto row = begin (table->m_rows); row != end (table->m_rows); ++row) { - // eval where for row - if (evalWhere(node.where.get(), table, row)) { - // TODO do update - int i = 0; - for(auto col : node.cols_names) { - // TODO cache it like in select - ColDefNode cdef = table->get_column_def(col.name); - - std::unique_ptr new_val = evalArithmetic(static_cast(*node.values[i]), table, row); - - if (cdef.type == ColumnType::integer_type) { - row->setColumnValue(cdef.order, ((IntValueNode*)new_val.get())->value); - } else if (cdef.type == ColumnType::float_type) { - row->setColumnValue(cdef.order, ((FloatValueNode*)new_val.get())->value); - } else { - throw Exception("Implement me!"); - } - i++; - } - } - } - - return true; -} - - -bool Executor::evalWhere(Node *where, Table *table, - std::vector>::iterator &row) const { - switch (where->node_type) { // no where clause - case NodeType::true_node: - return true; - case NodeType::relational_operator: // just one condition - return evalRelationalOperator(*((RelationalOperatorNode *)where), table, row); - case NodeType::logical_operator: - return evalLogicalOperator(*((LogicalOperatorNode *)where), table, row); - default: - throw Exception("Wrong node type"); - } - - return false; -} - -bool Executor::evalRelationalOperator(const RelationalOperatorNode &filter, Table *table, std::vector>::iterator &row) const { - std::unique_ptr left_value = evalNode(table, row, filter.left.get()); - std::unique_ptr right_value = evalNode(table, row, filter.right.get()); - - double comparator; - - if (left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::int_value) { - auto lvalue = static_cast(left_value.get()); - auto rvalue = static_cast(right_value.get()); - comparator = lvalue->value - rvalue->value; - } - if (left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::float_value) { - auto *lvalue = static_cast(left_value.get()); - auto *rvalue = static_cast(right_value.get()); - comparator = (double)lvalue->value - rvalue->value; - } - if (left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::string_value) { - auto *lvalue = static_cast(left_value.get()); - auto *rvalue = static_cast(right_value.get()); - comparator = std::to_string(lvalue->value).compare(rvalue->value); + Executor::Executor() { + m_tables.clear(); } - if (left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::int_value) { - auto *lvalue = static_cast(left_value.get()); - auto *rvalue = static_cast(right_value.get()); - comparator = lvalue->value - (double)rvalue->value; - } - if (left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::float_value) { - auto *lvalue = static_cast(left_value.get()); - auto *rvalue = static_cast(right_value.get()); - comparator = lvalue->value - rvalue->value; - } - if (left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::string_value) { - auto *lvalue = static_cast(left_value.get()); - auto *rvalue = static_cast(right_value.get()); - comparator = std::to_string(lvalue->value).compare(rvalue->value); + Table *Executor::find_table(const std::string name) { + auto name_cmp = [name](Table t) { return t.m_name == name; }; + auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp); + if (table_def != std::end(m_tables)) { + return table_def.operator->(); + } else { + throw Exception("table not found (" + name + ")"); + } } - if (left_value->node_type == NodeType::string_value && right_value->node_type == NodeType::int_value) { - StringValueNode *lvalue = static_cast(left_value.get()); - IntValueNode *rvalue = static_cast(right_value.get()); - comparator = lvalue->value.compare(std::to_string(rvalue->value)); - } - if (left_value->node_type == NodeType::string_value && right_value->node_type == NodeType::float_value) { - StringValueNode *lvalue = static_cast(left_value.get()); - FloatValueNode *rvalue = static_cast(right_value.get()); - comparator = lvalue->value.compare(std::to_string(rvalue->value)); - } - if (left_value->node_type == NodeType::string_value && right_value->node_type == NodeType::string_value) { - StringValueNode *lvalue = static_cast(left_value.get()); - StringValueNode *rvalue = static_cast(right_value.get()); - comparator = lvalue->value.compare(rvalue->value); + std::unique_ptr Executor::create_stmt_result_table(int code, std::string text) { + std::vector result_tbl_col_defs{}; + result_tbl_col_defs.push_back(ColDefNode("code", ColumnType::integer_type, 0, 1, false)); + result_tbl_col_defs.push_back(ColDefNode("desc", ColumnType::varchar_type, 1, 255, false)); + + auto table_def = std::make_unique
("result", result_tbl_col_defs); + + Row new_row = table_def->createEmptyRow(); + new_row.setColumnValue(0, code); + new_row.setColumnValue(1, text); + table_def->addRow(new_row); + + return std::move(table_def); } - switch (filter.op) { - case RelationalOperatorType::equal: - return comparator == 0.0; - case RelationalOperatorType::not_equal: - return comparator != 0.0; - case RelationalOperatorType::greater: - return comparator > 0.0; - case RelationalOperatorType::greater_equal: - return comparator >= 0.0; - case RelationalOperatorType::lesser: - return comparator < 0.0; - case RelationalOperatorType::lesser_equal: - return comparator <= 0.0; + std::unique_ptr
Executor::execute(Node &node) { + // TODO optimize execution nodes here + switch (node.node_type) { + case NodeType::create_table: + return execute_create_table(static_cast(node)); + case NodeType::insert_into: + return execute_insert_into_table(static_cast(node)); + case NodeType::select_from: + return execute_select(static_cast(node)); + case NodeType::delete_from: + return execute_delete(static_cast(node)); + case NodeType::update_table: + return execute_update(static_cast(node)); + case NodeType::load_table: + return execute_load(static_cast(node)); + default: + return create_stmt_result_table(-1, "unknown statement"); + } + } - throw Exception("invalid relational operator"); -} -std::unique_ptr Executor::evalNode(Table *table, std::vector>::iterator &row, Node *node) const { - if (node->node_type == NodeType::database_value) { - DatabaseValueNode *dvl = static_cast(node); - ColDefNode col_def = table->get_column_def(dvl->col_name); // TODO optimize it to just get this def once - auto db_value = row->ithColumn(col_def.order); + std::unique_ptr
Executor::execute_create_table(CreateTableNode &node) { + // TODO check table does not exists + Table table{node.table_name, node.cols_defs}; + m_tables.push_back(table); - if (col_def.type == ColumnType::integer_type) { - return std::make_unique(db_value->integerValue()); - } - if (col_def.type == ColumnType::float_type) { - return std::make_unique(db_value->floatValue()); - } - if (col_def.type == ColumnType::varchar_type) { - return std::make_unique(db_value->stringValue()); - } - - } else if (node->node_type == NodeType::int_value) { - IntValueNode *ivl = static_cast(node); - return std::make_unique(ivl->value); - - } else if (node->node_type == NodeType::float_value) { - FloatValueNode *ivl = static_cast(node); - return std::make_unique(ivl->value); - - } else if (node->node_type == NodeType::string_value) { - StringValueNode *ivl = static_cast(node); - return std::make_unique(ivl->value); + return create_stmt_result_table(0, "table created"); } - throw Exception("invalid type"); -} -bool Executor::evalLogicalOperator(LogicalOperatorNode &node, Table *pTable, - std::vector>::iterator &iter) const { - bool left = evalRelationalOperator(static_cast(*node.left), pTable, iter); + std::unique_ptr
Executor::execute_insert_into_table(InsertIntoTableNode &node) { + // TODO check column names.size = values.size - if ((node.op == LogicalOperatorType::and_operator && !left) || (node.op == LogicalOperatorType::or_operator && left)) - return left; + // find table + Table *table_def = find_table(node.table_name); - bool right = evalRelationalOperator(static_cast(*node.right), pTable, iter); - return right; -} + // prepare empty new_row + Row new_row = table_def->createEmptyRow(); -std::unique_ptr Executor::evalArithmetic(ArithmeticalOperatorNode &node, Table *table, - std::vector>::iterator &row) const { + // copy values + for (size_t i = 0; i < node.cols_names.size(); i++) { + ColDefNode col_def = table_def->get_column_def(node.cols_names[i].name); - switch (node.op) { - case ArithmeticalOperatorType::copy_value: - return evalNode(table, row, node.left.get()); - default: - throw Exception("implement me!!"); + // TODO validate value + + if (col_def.type == ColumnType::integer_type) { + new_row.setColumnValue(col_def.order, std::stoi(node.cols_values[i].value)); + } else if (col_def.type == ColumnType::float_type) { + new_row.setColumnValue(col_def.order, std::stof(node.cols_values[i].value)); + } else { + new_row.setColumnValue(col_def.order, node.cols_values[i].value); + } + } + + // append new_row + table_def->addRow(new_row); + + return create_stmt_result_table(0, "insert succeded"); } + + + std::unique_ptr
Executor::execute_select(SelectFromTableNode &node) { + // TODO create plan for accessing rows + + // find source table + Table *table = find_table(node.table_name); + + // create result table + std::vector result_tbl_col_defs{}; + std::vector source_table_col_index{}; + int i = 0; // new column order + for (ColNameNode rc : node.cols_names) { + ColDefNode cdef = table->get_column_def(rc.name); + source_table_col_index.push_back(cdef.order); + + auto col = ColDefNode(rc.name, cdef.type, i, cdef.length, cdef.null); + result_tbl_col_defs.push_back(col); + + i++; + } + auto result = std::make_unique
("result", result_tbl_col_defs); + + // execute access plan + for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) { + // eval where for row + if (evalWhere(node.where.get(), table, row)) { + // prepare empty row + Row new_row = result->createEmptyRow(); + + // copy column values + for (auto idx = 0; idx < result->columns_count(); idx++) { + auto row_col_index = source_table_col_index[idx]; + ColValue *col_value = row->ithColumn(row_col_index); + if (result_tbl_col_defs[idx].type == ColumnType::integer_type) + new_row.setColumnValue(idx, + ((ColIntegerValue *) col_value)->integerValue()); + if (result_tbl_col_defs[idx].type == ColumnType::float_type) + new_row.setColumnValue(idx, col_value->floatValue()); + if (result_tbl_col_defs[idx].type == ColumnType::varchar_type) + new_row.setColumnValue(idx, col_value->stringValue()); + } + + // add row to result + result->m_rows.push_back(new_row); + } + } + + return std::move(result); + } + + + std::unique_ptr
Executor::execute_delete(DeleteFromTableNode &node) { + // TODO create plan for accessing rows + + // find source table + Table *table = find_table(node.table_name); + + // execute access plan + auto it = table->m_rows.begin(); + for (; it != table->m_rows.end();) { + if (evalWhere(node.where.get(), table, it)) { + // TODO this can be really expensive operation + it = table->m_rows.erase(it); + } else { + ++it; + } + } + + return create_stmt_result_table(0, "delete succeded"); + } + + + std::unique_ptr
Executor::execute_update(UpdateTableNode &node) { + // TODO create plan for accessing rows + + // find source table + Table *table = find_table(node.table_name); + + // execute access plan + for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) { + // eval where for row + if (evalWhere(node.where.get(), table, row)) { + int i = 0; + for (auto col : node.cols_names) { + // TODO cache it like in select + ColDefNode cdef = table->get_column_def(col.name); + + std::unique_ptr new_val = evalArithmetic(cdef.type, + static_cast(*node.values[i]), + table, row); + + if (cdef.type == ColumnType::integer_type) { + row->setColumnValue(cdef.order, new_val->getIntValue()); + } else if (cdef.type == ColumnType::float_type) { + row->setColumnValue(cdef.order, new_val->getDoubleValue()); + } else if (cdef.type == ColumnType::varchar_type) { + row->setColumnValue(cdef.order, new_val->getStringValue()); + } else { + throw Exception("Implement me!"); + } + i++; + } + } + } + + return create_stmt_result_table(0, "delete succeeded"); + } + + + std::unique_ptr
Executor::execute_load(LoadIntoTableNode &node) { + // find source table + Table *table_def = find_table(node.table_name); + + // read data + std::ifstream ifs(node.filename); + std::string content((std::istreambuf_iterator(ifs)), + (std::istreambuf_iterator())); + + CsvReader csvparser{}; + auto csv = csvparser.parseCSV(content); + + std::vector &colDefs = table_def->m_col_defs; + + for (auto it = csv.begin() + 1; it != csv.end(); ++it) { + std::vector csv_line = *it; + + // prepare empty new_row + Row new_row = table_def->createEmptyRow(); + + // copy values + for (size_t i = 0; i < table_def->columns_count(); i++) { + ColDefNode col_def = table_def->get_column_def(colDefs[i].name); + + // TODO validate value + if (col_def.type == ColumnType::integer_type) { + new_row.setColumnValue(col_def.order, std::stoi(csv_line[i])); + } else if (col_def.type == ColumnType::float_type) { + new_row.setColumnValue(col_def.order, std::stof(csv_line[i])); + } else { + new_row.setColumnValue(col_def.order, csv_line[i]); + } + } + + // append new_row + table_def->addRow(new_row); + } + + return create_stmt_result_table(0, "load succeeded"); + } + + + bool Executor::evalWhere(Node *where, Table *table, + std::vector>::iterator &row) const { + switch (where->node_type) { // no where clause + case NodeType::true_node: + return true; + case NodeType::relational_operator: // just one condition + return evalRelationalOperator(*((RelationalOperatorNode *) where), table, row); + case NodeType::logical_operator: + return evalLogicalOperator(*((LogicalOperatorNode *) where), table, row); + default: + throw Exception("Wrong node type"); + } + + return false; + } + + + bool Executor::evalRelationalOperator(const RelationalOperatorNode &filter, Table *table, + std::vector>::iterator &row) const { + std::unique_ptr left_value = evalNode(table, row, filter.left.get()); + std::unique_ptr right_value = evalNode(table, row, filter.right.get()); + + double comparator; + + if (left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::int_value) { + comparator = left_value->getIntValue() - right_value->getIntValue(); + } else if ((left_value->node_type == NodeType::int_value && + right_value->node_type == NodeType::float_value) || + (left_value->node_type == NodeType::float_value && + right_value->node_type == NodeType::int_value) || + (left_value->node_type == NodeType::float_value && + right_value->node_type == NodeType::float_value)) { + comparator = left_value->getDoubleValue() - right_value->getDoubleValue(); + } else if (left_value->node_type == NodeType::string_value || + right_value->node_type == NodeType::string_value) { + comparator = left_value->getStringValue().compare(right_value->getStringValue()); + } else { + // TODO throw exception + } + + + switch (filter.op) { + case RelationalOperatorType::equal: + return comparator == 0.0; + case RelationalOperatorType::not_equal: + return comparator != 0.0; + case RelationalOperatorType::greater: + return comparator > 0.0; + case RelationalOperatorType::greater_equal: + return comparator >= 0.0; + case RelationalOperatorType::lesser: + return comparator < 0.0; + case RelationalOperatorType::lesser_equal: + return comparator <= 0.0; + } + + throw Exception("invalid relational operator"); + + } + + + std::unique_ptr + Executor::evalNode(Table *table, std::vector>::iterator &row, Node *node) const { + if (node->node_type == NodeType::database_value) { + DatabaseValueNode *dvl = static_cast(node); + ColDefNode col_def = table->get_column_def( + dvl->col_name); // TODO optimize it to just get this def once + auto db_value = row->ithColumn(col_def.order); + + if (col_def.type == ColumnType::integer_type) { + return std::make_unique(db_value->integerValue()); + } + if (col_def.type == ColumnType::float_type) { + return std::make_unique(db_value->floatValue()); + } + if (col_def.type == ColumnType::varchar_type) { + return std::make_unique(db_value->stringValue()); + } + + } else if (node->node_type == NodeType::int_value) { + IntValueNode *ivl = static_cast(node); + return std::make_unique(ivl->value); + + } else if (node->node_type == NodeType::float_value) { + FloatValueNode *ivl = static_cast(node); + return std::make_unique(ivl->value); + + } else if (node->node_type == NodeType::string_value) { + StringValueNode *ivl = static_cast(node); + return std::make_unique(ivl->value); + } + + throw Exception("invalid type"); + } + + + bool Executor::evalLogicalOperator(LogicalOperatorNode &node, Table *pTable, + std::vector>::iterator &iter) const { + bool left = evalRelationalOperator(static_cast(*node.left), pTable, iter); + + if ((node.op == LogicalOperatorType::and_operator && !left) || + (node.op == LogicalOperatorType::or_operator && left)) + return left; + + bool right = evalRelationalOperator(static_cast(*node.right), pTable, iter); + return right; + } + + + std::unique_ptr + Executor::evalArithmetic(ColumnType outType, ArithmeticalOperatorNode &node, Table *table, + std::vector>::iterator &row) const { + if (node.op == ArithmeticalOperatorType::copy_value) { + return evalNode(table, row, node.left.get()); + } + + std::unique_ptr left = evalNode(table, row, node.left.get()); + std::unique_ptr right = evalNode(table, row, node.right.get()); + + if (outType == ColumnType::float_type) { + double l = ((ValueNode *) left.get())->getDoubleValue(); + double r = ((ValueNode *) right.get())->getDoubleValue(); + switch (node.op) { + case ArithmeticalOperatorType::plus_operator: + return std::make_unique(l + r); + case ArithmeticalOperatorType::minus_operator: + return std::make_unique(l - r); + case ArithmeticalOperatorType::multiply_operator: + return std::make_unique(l * r); + case ArithmeticalOperatorType::divide_operator: + return std::make_unique(l / r); + default: + throw Exception("implement me!!"); + } + } else if (outType == ColumnType::integer_type) { + int l = ((ValueNode *) left.get())->getIntValue(); + int r = ((ValueNode *) right.get())->getIntValue(); + switch (node.op) { + case ArithmeticalOperatorType::plus_operator: + return std::make_unique(l + r); + case ArithmeticalOperatorType::minus_operator: + return std::make_unique(l - r); + case ArithmeticalOperatorType::multiply_operator: + return std::make_unique(l * r); + case ArithmeticalOperatorType::divide_operator: + return std::make_unique(l / r); + default: + throw Exception("implement me!!"); + } + + } else if (outType == ColumnType::varchar_type) { + std::string l = ((ValueNode *) left.get())->getStringValue(); + std::string r = ((ValueNode *) right.get())->getStringValue(); + switch (node.op) { + case ArithmeticalOperatorType::plus_operator: + return std::make_unique(l + r); + + default: + throw Exception("implement me!!"); + } + } + + throw Exception("implement me!!"); + } + } \ No newline at end of file diff --git a/executor.h b/executor.h index 53e491e..3828160 100644 --- a/executor.h +++ b/executor.h @@ -5,38 +5,50 @@ #include -class Executor { -private: +namespace usql { -public: - Executor(); + class Executor { + private: - bool execute(Node& node); + public: + Executor(); -private: - bool execute_create_table(CreateTableNode& node); - bool execute_insert_into_table(InsertIntoTableNode& node); - bool execute_select(SelectFromTableNode& node); - bool execute_delete(DeleteFromTableNode& node); - bool execute_update(UpdateTableNode& node); + std::unique_ptr
execute(Node &node); - Table* find_table(const std::string name); -private: - std::vector
m_tables; + private: + std::unique_ptr
execute_create_table(CreateTableNode &node); - bool evalWhere(Node *where, Table *table, - std::vector>::iterator &row) const; + std::unique_ptr
execute_insert_into_table(InsertIntoTableNode &node); - std::unique_ptr - evalNode(Table *table, std::vector>::iterator &row, - Node *node) const; + std::unique_ptr
execute_select(SelectFromTableNode &node); - bool evalRelationalOperator(const RelationalOperatorNode &filter, Table *table, - std::vector>::iterator &row) const; + std::unique_ptr
execute_delete(DeleteFromTableNode &node); - bool evalLogicalOperator(LogicalOperatorNode &node, Table *pTable, - std::vector>::iterator &iter) const; + std::unique_ptr
execute_update(UpdateTableNode &node); - std::unique_ptr evalArithmetic(ArithmeticalOperatorNode &node, Table *table, - std::vector>::iterator &row) const; -}; + std::unique_ptr
execute_load(LoadIntoTableNode &node); + + Table *find_table(const std::string name); + + std::unique_ptr
create_stmt_result_table(int code, std::string text); + + private: + std::vector
m_tables; + + bool evalWhere(Node *where, Table *table, + std::vector>::iterator &row) const; + + std::unique_ptr evalNode(Table *table, std::vector>::iterator &row, + Node *node) const; + + bool evalRelationalOperator(const RelationalOperatorNode &filter, Table *table, + std::vector>::iterator &row) const; + + bool evalLogicalOperator(LogicalOperatorNode &node, Table *pTable, + std::vector>::iterator &iter) const; + + std::unique_ptr evalArithmetic(ColumnType outType, ArithmeticalOperatorNode &node, Table *table, + std::vector>::iterator &row) const; + }; + +} \ No newline at end of file diff --git a/lexer.cpp b/lexer.cpp index fb2eddd..a9477ee 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -3,395 +3,415 @@ #include +namespace usql { + + Token::Token(const std::string &token_str, TokenType typ) { + token_string = token_str; + type = typ; + } + + + Lexer::Lexer() { + k_words_regex = + "[0-9]+\\.[0-9]+|[0-9][0-9_]+[0-9]|[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/" + ",;:\?]|==|>=|<=|~=|>|<|=|;|~|\\||or|and|\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n"; + k_int_regex = "[0-9]+"; + k_int_underscored_regex = "[0-9][0-9_]+[0-9]"; + k_double_regex = "[0-9]+\\.[0-9]+"; + k_identifier_regex = "[A-Za-z]+[A-Za-z0-9_#]*"; + } + + void Lexer::parse(const std::string &code) { + // TODO handle empty code + m_tokens.clear(); + + // PERF something like this to preallocate ?? + if (code.size() > 100) { + m_tokens.reserve(code.size() / 10); + } + m_code_str = code; + if (!m_code_str.empty() && m_code_str.back() != '\n') { + m_code_str.append("\n"); // TODO temp solution to prevent possible situation when last line is a comment + } + + auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), k_words_regex); + auto words_end = std::sregex_iterator(); + + for (std::sregex_iterator i = words_begin; i != words_end; ++i) { + std::smatch match = *i; + std::string match_str = match.str(); + TokenType token_type = type(match_str); + if (token_type == TokenType::string_literal) + match_str = stringLiteral(match_str); + + if (token_type != TokenType::newline) + m_tokens.push_back(Token{match_str, token_type}); + } + + // DEBUG IT + // debugTokens(); + + m_index = 0; + } + + void Lexer::debugTokens() { + int i = 0; + for (std::vector::iterator it = m_tokens.begin(); it != m_tokens.end(); ++it) { + std::cerr << i << "\t" << it->token_string << std::endl; + i++; + } + } + + Token Lexer::currentToken() { return m_tokens[m_index]; } + + Token Lexer::consumeCurrentToken() { + int i = m_index; + nextToken(); + return m_tokens[i]; + } + + void Lexer::nextToken() { + if (m_index < m_tokens.size()) { + m_index++; + } + } + + void Lexer::skipToken(TokenType type) { + if (tokenType() == type) { + nextToken(); + } else { + throw Exception("ERROR unexpected token " + consumeCurrentToken().token_string + ", instead of " + + typeToString(type)); + } + } -Token::Token(const std::string &token_str, TokenType typ) { - token_string = token_str; - type = typ; -} - -void Lexer::parse(const std::string &code) { - // TODO handle empty code - m_tokens.clear(); - - // PERF something like this to preallocate ?? - if (code.size() > 100) { - m_tokens.reserve(code.size() / 10); - } - m_code_str = code; - if (!m_code_str.empty() && m_code_str.back() != '\n') { - m_code_str.append("\n"); // TODO tempo solution to prevent possible situation when last line is a comment - } - - // TODO make it constant - std::regex words_regex("[0-9]+\\.[0-9]+|[0-9][0-9_]+[0-9]|[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/" - ",;:\?]|==|>=|<=|~=|>|<|=|;|~|\\||or|and|\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n"); - - auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), words_regex); - auto words_end = std::sregex_iterator(); - - for (std::sregex_iterator i = words_begin; i != words_end; ++i) { - std::smatch match = *i; - std::string match_str = match.str(); - TokenType token_type = type(match_str); - if (token_type == TokenType::string_literal) - match_str = stringLiteral(match_str); - - if (token_type != TokenType::newline) - m_tokens.push_back(Token{match_str, token_type}); - } - - // DEBUG IT - // debugTokens(); - - m_index = 0; -} - -void Lexer::debugTokens() { - int i = 0; - for (std::vector::iterator it = m_tokens.begin(); it != m_tokens.end(); ++it) { - std::cerr << i << "\t" << it->token_string << std::endl; - i++; - } -} + void Lexer::skipTokenOptional(TokenType type) { + if (tokenType() == type) { + nextToken(); + } + } -Token Lexer::currentToken() { return m_tokens[m_index]; } - -Token Lexer::consumeCurrentToken() { - int i = m_index; - nextToken(); - return m_tokens[i]; -} + TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; } -void Lexer::nextToken() { - if (m_index < m_tokens.size()) { - m_index++; - } -} + TokenType Lexer::nextTokenType() { + return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof; + } -void Lexer::skipToken(TokenType type) { - if (tokenType() == type) { - nextToken(); - } else { - throw Exception("ERROR unexpected token " + consumeCurrentToken().token_string + ", instead of " + typeToString(type)); - } -} + TokenType Lexer::prevTokenType() { return m_index > 0 ? m_tokens[m_index - 1].type : TokenType::undef; } -void Lexer::skipTokenOptional(TokenType type) { - if (tokenType() == type) { - nextToken(); - } -} + bool Lexer::isRelationalOperator(TokenType token_type) { + return (token_type == TokenType::equal || token_type == TokenType::not_equal || + token_type == TokenType::greater || token_type == TokenType::greater_equal || + token_type == TokenType::lesser || token_type == TokenType::lesser_equal); + } -TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; } + bool Lexer::isLogicalOperator(TokenType token_type) { + return (token_type == TokenType::logical_and || token_type == TokenType::logical_or); + } -TokenType Lexer::nextTokenType() { return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof; } + bool Lexer::isArithmeticalOperator(TokenType token_type) { + return (token_type == TokenType::plus || token_type == TokenType::minus || + token_type == TokenType::multiply || + token_type == TokenType::divide); + } -TokenType Lexer::prevTokenType() { return m_index > 0 ? m_tokens[m_index - 1].type : TokenType::undef; } + TokenType Lexer::type(const std::string &token) { + // TODO, FIXME 'one is evaluated as identifier + if (token == ";") + return TokenType::semicolon; -bool Lexer::isRelationalOperator(TokenType token_type) { - return (token_type == TokenType::equal || token_type == TokenType::not_equal || token_type == TokenType::greater || token_type == TokenType::greater_equal || - token_type == TokenType::lesser || token_type == TokenType::lesser_equal); -} + if (token == "+") + return TokenType::plus; -bool Lexer::isLogicalOperator(TokenType token_type) { - return (token_type == TokenType::logical_and || token_type == TokenType::logical_or); -} + if (token == "-") + return TokenType::minus; -bool Lexer::isArithmeticalOperator(TokenType token_type) { - return (token_type == TokenType::plus || token_type == TokenType::minus || token_type == TokenType::multiply || token_type == TokenType::divide); -} + if (token == "*") + return TokenType::multiply; -TokenType Lexer::type(const std::string &token) { - // TODO move it to class level not to reinit it again and again - std::regex int_regex("[0-9]+"); - std::regex int_underscored_regex("[0-9][0-9_]+[0-9]"); - std::regex double_regex("[0-9]+\\.[0-9]+"); - std::regex identifier_regex("[A-Za-z]+[A-Za-z0-9_#]*"); + if (token == "/") + return TokenType::divide; - // TODO 'one is evaluated as identifier - if (token == ";") - return TokenType::semicolon; + if (token == "(") + return TokenType::open_paren; - if (token == "+") - return TokenType::plus; + if (token == ")") + return TokenType::close_paren; - if (token == "-") - return TokenType::minus; + if (token == "=") + return TokenType::equal; - if (token == "*") - return TokenType::multiply; + if (token == "!=") + return TokenType::not_equal; - if (token == "/") - return TokenType::divide; + if (token == ">") + return TokenType::greater; - if (token == "(") - return TokenType::open_paren; + if (token == ">=") + return TokenType::greater_equal; - if (token == ")") - return TokenType::close_paren; + if (token == "<") + return TokenType::lesser; - if (token == "=") - return TokenType::equal; + if (token == "<=") + return TokenType::lesser_equal; - if (token == "!=") - return TokenType::not_equal; + if (token == "create") + return TokenType::keyword_create; - if (token == ">") - return TokenType::greater; + if (token == "where") + return TokenType::keyword_where; - if (token == ">=") - return TokenType::greater_equal; + if (token == "from") + return TokenType::keyword_from; - if (token == "<") - return TokenType::lesser; + if (token == "delete") + return TokenType::keyword_delete; - if (token == "<=") - return TokenType::lesser_equal; + if (token == "table") + return TokenType::keyword_table; - if (token == "create") - return TokenType::keyword_create; + if (token == "insert") + return TokenType::keyword_insert; - if (token == "where") - return TokenType::keyword_where; + if (token == "into") + return TokenType::keyword_into; - if (token == "from") - return TokenType::keyword_from; + if (token == "values") + return TokenType::keyword_values; - if (token == "delete") - return TokenType::keyword_delete; + if (token == "select") + return TokenType::keyword_select; - if (token == "table") - return TokenType::keyword_table; + if (token == "set") + return TokenType::keyword_set; - if (token == "insert") - return TokenType::keyword_insert; + if (token == "copy") + return TokenType::keyword_copy; - if (token == "into") - return TokenType::keyword_into; + if (token == "update") + return TokenType::keyword_update; - if (token == "values") - return TokenType::keyword_values; + if (token == "load") + return TokenType::keyword_load; - if (token == "select") - return TokenType::keyword_select; + if (token == "not") + return TokenType::keyword_not; - if (token == "set") - return TokenType::keyword_set; + if (token == "null") + return TokenType::keyword_null; - if (token == "copy") - return TokenType::keyword_copy; + if (token == "integer") + return TokenType::keyword_int; - if (token == "update") - return TokenType::keyword_update; + if (token == "float") + return TokenType::keyword_float; - if (token == "not") - return TokenType::keyword_not; - - if (token == "null") - return TokenType::keyword_null; - - if (token == "integer") - return TokenType::keyword_int; - - if (token == "float") - return TokenType::keyword_float; - - if (token == "varchar") - return TokenType::keyword_varchar; - - if (token == "or") - return TokenType::logical_or; - - if (token == "and") - return TokenType::logical_and; - - if (token == ",") - return TokenType::comma; - - if (token == "\n" || token == "\r\n" || token == "\r") - return TokenType::newline; - - if (token.length() > 1 && token.at(0) == '%' && (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) - return TokenType::comment; - - // if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"') - // return TokenType::string_literal; - - if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'') - return TokenType::string_literal; - - if (std::regex_match(token, int_regex)) - return TokenType::int_number; - - if (std::regex_match(token, int_underscored_regex)) - return TokenType::int_number; - - if (std::regex_match(token, double_regex)) - return TokenType::double_number; - - if (std::regex_match(token, identifier_regex)) - return TokenType::identifier; - - if (m_index + 1 >= m_tokens.size()) - return TokenType::eof; - - return TokenType::undef; -} - -std::string Lexer::stringLiteral(std::string token) { - // remove ' or " from the literal ends - bool replace = token[0]=='\'' && token[token.size()-1]=='\''; - - std::string str = token.substr(1, token.size() - 2); - if (!replace) { - return str; - } - std::string out = ""; - out.reserve(str.size()); - - - for(std::string::size_type i = 0; i < str.size(); ++i) { - if (str[i] == '\'' && i < str.size() - 1) { - if (str[i+1] == '\'') { - out.append(1, '\''); - i++; - } else { - out.append(1, str[i]); - } - } else if (str[i] == '\\' && i < str.size() - 1) { - if (str[i+1] == 'n') { - out.append(1, '\n'); - i++; - } else if (str[i+1] == 't') { - out.append(1, '\t'); - i++; - } else { - out.append(1, str[i]); - } - } else { - out.append(1, str[i]); - } - } - return out; -} - -std::string Lexer::typeToString(TokenType token_type) { - std::string txt; - switch (token_type) { - case TokenType::undef: - txt = "undef"; - break; - case TokenType::identifier: - txt = "identifier"; - break; - case TokenType::plus: - txt = "+"; - break; - case TokenType::minus: - txt = "-"; - break; - case TokenType::multiply: - txt = "*"; - break; - case TokenType::divide: - txt = "/"; - break; - case TokenType::equal: - txt = "=="; - break; - case TokenType::not_equal: - txt = "!="; - break; - case TokenType::greater: - txt = ">"; - break; - case TokenType::greater_equal: - txt = ">="; - break; - case TokenType::lesser: - txt = "<"; - break; - case TokenType::lesser_equal: - txt = "<="; - break; - case TokenType::keyword_create: - txt = "create"; - break; - case TokenType::keyword_where: - txt = "where"; - break; - case TokenType::keyword_table: - txt = "table"; - break; - case TokenType::keyword_into: - txt = "into"; - break; - case TokenType::keyword_values: - txt = "values"; - break; - case TokenType::keyword_select: - txt = "select"; - break; - case TokenType::keyword_set: - txt = "set"; - break; - case TokenType::keyword_copy: - txt = "copy"; - break; - case TokenType::keyword_not: - txt = "not"; - break; - case TokenType::keyword_null: - txt = "null"; - break; - case TokenType::keyword_int: - txt = "integer"; - break; - case TokenType::keyword_float: - txt = "float"; - break; - case TokenType::keyword_varchar: - txt = "varchar"; - break; - case TokenType::int_number: - txt = "int number"; - break; - case TokenType::double_number: - txt = "double number"; - break; - case TokenType::string_literal: - txt = "string literal"; - break; - case TokenType::open_paren: - txt = "("; - break; - case TokenType::close_paren: - txt = ")"; - break; - case TokenType::logical_and: - txt = "and"; - break; - case TokenType::logical_or: - txt = "or"; - break; - case TokenType::semicolon: - txt = ";"; - break; - case TokenType::comma: - txt = ","; - break; - case TokenType::newline: - txt = "newline"; - break; - case TokenType::comment: - txt = "comment"; - break; - case TokenType::eof: - txt = "eof"; - break; - default: - txt = "FIXME, unknown token type"; - break; - } - return txt; -} + if (token == "varchar") + return TokenType::keyword_varchar; + + if (token == "or") + return TokenType::logical_or; + + if (token == "and") + return TokenType::logical_and; + + if (token == ",") + return TokenType::comma; + + if (token == "\n" || token == "\r\n" || token == "\r") + return TokenType::newline; + + if (token.length() > 1 && token.at(0) == '%' && + (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) + return TokenType::comment; + + // if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"') + // return TokenType::string_literal; + + if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'') + return TokenType::string_literal; + + if (std::regex_match(token, k_int_regex)) + return TokenType::int_number; + + if (std::regex_match(token, k_int_underscored_regex)) + return TokenType::int_number; + + if (std::regex_match(token, k_double_regex)) + return TokenType::double_number; + + if (std::regex_match(token, k_identifier_regex)) + return TokenType::identifier; + + if (m_index + 1 >= m_tokens.size()) + return TokenType::eof; + + return TokenType::undef; + } + + std::string Lexer::stringLiteral(std::string token) { + // remove ' or " from the literal ends + bool replace = token[0] == '\'' && token[token.size() - 1] == '\''; + + std::string str = token.substr(1, token.size() - 2); + if (!replace) { + return str; + } + std::string out = ""; + out.reserve(str.size()); + + + for (std::string::size_type i = 0; i < str.size(); ++i) { + if (str[i] == '\'' && i < str.size() - 1) { + if (str[i + 1] == '\'') { + out.append(1, '\''); + i++; + } else { + out.append(1, str[i]); + } + } else if (str[i] == '\\' && i < str.size() - 1) { + if (str[i + 1] == 'n') { + out.append(1, '\n'); + i++; + } else if (str[i + 1] == 't') { + out.append(1, '\t'); + i++; + } else { + out.append(1, str[i]); + } + } else { + out.append(1, str[i]); + } + } + return out; + } + + std::string Lexer::typeToString(TokenType token_type) { + std::string txt; + switch (token_type) { + case TokenType::undef: + txt = "undef"; + break; + case TokenType::identifier: + txt = "identifier"; + break; + case TokenType::plus: + txt = "+"; + break; + case TokenType::minus: + txt = "-"; + break; + case TokenType::multiply: + txt = "*"; + break; + case TokenType::divide: + txt = "/"; + break; + case TokenType::equal: + txt = "=="; + break; + case TokenType::not_equal: + txt = "!="; + break; + case TokenType::greater: + txt = ">"; + break; + case TokenType::greater_equal: + txt = ">="; + break; + case TokenType::lesser: + txt = "<"; + break; + case TokenType::lesser_equal: + txt = "<="; + break; + case TokenType::keyword_create: + txt = "create"; + break; + case TokenType::keyword_where: + txt = "where"; + break; + case TokenType::keyword_table: + txt = "table"; + break; + case TokenType::keyword_into: + txt = "into"; + break; + case TokenType::keyword_values: + txt = "values"; + break; + case TokenType::keyword_select: + txt = "select"; + break; + case TokenType::keyword_set: + txt = "set"; + break; + case TokenType::keyword_copy: + txt = "copy"; + break; + case TokenType::keyword_update: + txt = "update"; + break; + case TokenType::keyword_load: + txt = "load"; + break; + case TokenType::keyword_not: + txt = "not"; + break; + case TokenType::keyword_null: + txt = "null"; + break; + case TokenType::keyword_int: + txt = "integer"; + break; + case TokenType::keyword_float: + txt = "float"; + break; + case TokenType::keyword_varchar: + txt = "varchar"; + break; + case TokenType::int_number: + txt = "int number"; + break; + case TokenType::double_number: + txt = "double number"; + break; + case TokenType::string_literal: + txt = "string literal"; + break; + case TokenType::open_paren: + txt = "("; + break; + case TokenType::close_paren: + txt = ")"; + break; + case TokenType::logical_and: + txt = "and"; + break; + case TokenType::logical_or: + txt = "or"; + break; + case TokenType::semicolon: + txt = ";"; + break; + case TokenType::comma: + txt = ","; + break; + case TokenType::newline: + txt = "newline"; + break; + case TokenType::comment: + txt = "comment"; + break; + case TokenType::eof: + txt = "eof"; + break; + default: + txt = "FIXME, unknown token type"; + break; + } + return txt; + } + +} \ No newline at end of file diff --git a/lexer.h b/lexer.h index 0a71707..7fba978 100644 --- a/lexer.h +++ b/lexer.h @@ -5,7 +5,9 @@ #include #include -enum class TokenType { +namespace usql { + + enum class TokenType { undef, identifier, plus, @@ -21,8 +23,9 @@ enum class TokenType { keyword_create, keyword_table, keyword_where, - keyword_delete, - keyword_update, + keyword_delete, + keyword_update, + keyword_load, keyword_from, keyword_insert, keyword_into, @@ -48,46 +51,63 @@ enum class TokenType { newline, comment, eof -}; + }; -struct Token { + struct Token { std::string token_string; TokenType type; - Token(const std::string &token_str, TokenType typ); -}; -class Lexer { -public: - Lexer() {}; + Token(const std::string &token_str, TokenType typ); + }; + + class Lexer { + public: + Lexer(); void parse(const std::string &code); void debugTokens(); - Token currentToken(); + Token currentToken(); + Token consumeCurrentToken(); void nextToken(); void skipToken(TokenType type); + void skipTokenOptional(TokenType type); TokenType tokenType(); + TokenType nextTokenType(); + TokenType prevTokenType(); - static bool isRelationalOperator(TokenType token_type); - static bool isLogicalOperator(TokenType token_type); + static bool isRelationalOperator(TokenType token_type); + + static bool isLogicalOperator(TokenType token_type); + static bool isArithmeticalOperator(TokenType token_type); -private: + private: TokenType type(const std::string &token); + std::string stringLiteral(std::string token); + static std::string typeToString(TokenType token_type); -private: - std::string m_code_str; - std::vector m_tokens; - int m_index = 0; -}; + private: + std::string m_code_str; + std::vector m_tokens; + int m_index = 0; + + std::regex k_words_regex; + std::regex k_int_regex; + std::regex k_int_underscored_regex; + std::regex k_double_regex; + std::regex k_identifier_regex; + }; + +} \ No newline at end of file diff --git a/main.cpp b/main.cpp index fb40d20..59dc338 100644 --- a/main.cpp +++ b/main.cpp @@ -8,34 +8,41 @@ // drop table int main(int argc, char *argv[]) { - Parser parser{}; - Executor executor{}; + usql::Parser parser{}; + usql::Executor executor{}; - std::vector sql_commands { - "create table a (i integer not null, s varchar(64), f float null)", - "insert into a (i, s) values(1, 'one')", - "insert into a (i, s) values(2, 'two')", - "insert into a (i, s) values(3, 'two')", - "insert into a (i, s) values(4, 'four')", - "insert into a (i, s) values(5, 'five')", - "select i, s from a where i > 2", - "select i, s from a where i = 1", - "select i, s from a where s = 'two'", - "select i, s from a where i <= 3 and s = 'one'", - "select i, s from a where i > 0", - "delete from a where i = 4", - "select i, s from a where i > 0", - "update a set f = 9.99 where i = 3", -// "update a set s = 'three', f = 1.0 + 2.0 where i = 3", - "select i, s, f from a where i = 3" -// "select i, s from a where i > 0" - }; + std::vector sql_commands{ + "create table a (i integer not null, s varchar(64), f float null)", + "insert into a (i, s) values(1, 'one')", + "insert into a (i, s) values(2, 'two')", + "insert into a (i, s) values(3, 'two')", + "insert into a (i, s) values(4, 'four')", + "insert into a (i, s) values(5, 'five')", + "select i, s from a where i > 2", + "select i, s from a where i = 1", + "select i, s from a where s = 'two'", + "select i, s from a where i <= 3 and s = 'one'", + "select i, s from a where i > 0", + "delete from a where i = 4", + "select i, s from a where i > 0", + "update a set f = 9.99 where i = 3", + "select i, s, f from a where i = 3", + "update a set s = 'three', f = f + 0.01 where i = 3", + "select i, s, f from a where i = 3", + "create table data (ticker varchar(8), price float null)", + "load data from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/data.csv')", + "select ticker, price from data" + }; - for(auto command : sql_commands) { - auto node = parser.parse(command); - executor.execute(*node); - } + for (auto command : sql_commands) { + std::cout << command << std::endl; + auto node = parser.parse(command); + auto result = executor.execute(*node); + + result->print(); + // std::cout << std::endl; + } return 0; } diff --git a/parser.cpp b/parser.cpp index b4a5e25..a22a233 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1,279 +1,314 @@ #include "parser.h" #include "exception.h" +namespace usql { + // TOOD handle premature eof -Parser::Parser() { - lexer = Lexer{}; -} - -std::unique_ptr Parser::parse(const std::string &code) { - lexer.parse(code); - // lexer.debugTokens(); - - if (lexer.tokenType() == TokenType::keyword_create && lexer.nextTokenType() == TokenType::keyword_table) { - return parse_create_table(); - } if (lexer.tokenType() == TokenType::keyword_insert) { - return parse_insert_into_table(); - } if (lexer.tokenType() == TokenType::keyword_select) { - return parse_select_from_table(); - } if (lexer.tokenType() == TokenType::keyword_delete) { - return parse_delete_from_table(); - } if (lexer.tokenType() == TokenType::keyword_update) { - return parse_update_table(); - } - - std::cout << "ERROR, token:" << lexer.currentToken().token_string << std::endl; - return std::make_unique(NodeType::error); -} - -std::unique_ptr Parser::parse_create_table() { - std::vector cols_def {}; - - lexer.skipToken(TokenType::keyword_create); - lexer.skipToken(TokenType::keyword_table); - - if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } - std::string table_name = lexer.consumeCurrentToken().token_string; - - lexer.skipToken(TokenType::open_paren); - int column_order = 0; - do { - std::string column_name; - ColumnType column_type; - int column_len {1}; - bool column_nullable {true}; - - // column name - if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } - column_name = lexer.consumeCurrentToken().token_string; - - // column type and optionaly len - if (lexer.tokenType() == TokenType::keyword_int) { - column_type = ColumnType::integer_type; - lexer.nextToken(); - } else if (lexer.tokenType() == TokenType::keyword_float) { - column_type = ColumnType::float_type; - lexer.nextToken(); - } else if (lexer.tokenType() == TokenType::keyword_varchar) { - column_type = ColumnType::varchar_type; - lexer.nextToken(); - lexer.skipToken(TokenType::open_paren); - if (lexer.tokenType() == TokenType::int_number) { - column_len = std::stoi(lexer.consumeCurrentToken().token_string); - } else { /* TODO handle error */ } - lexer.skipToken(TokenType::close_paren); - } else { /* TODO handle error */ } - - if (lexer.tokenType() == TokenType::keyword_not) { - lexer.nextToken(); - lexer.skipToken(TokenType::keyword_null); - column_nullable = false; - } else if (lexer.tokenType() == TokenType::keyword_null) { - lexer.nextToken(); - } - - cols_def.push_back(ColDefNode(column_name, column_type, column_order++, column_len, column_nullable)); - - lexer.skipTokenOptional(TokenType::comma); - - // TODO in future constraints - - } while (lexer.tokenType() != TokenType::close_paren); - - - return std::make_unique(table_name, cols_def); -} - - -std::unique_ptr Parser::parse_insert_into_table() { - std::vector exec_code {}; - std::vector cols_names {}; - std::vector cols_values {}; - - lexer.skipToken(TokenType::keyword_insert); - lexer.skipToken(TokenType::keyword_into); - - // table name - if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } - std::string table_name = lexer.consumeCurrentToken().token_string; - - // column names - lexer.skipToken(TokenType::open_paren); - do { - if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } - cols_names.push_back(lexer.consumeCurrentToken().token_string); - - lexer.skipTokenOptional(TokenType::comma); - } while (lexer.tokenType() != TokenType::close_paren); - lexer.skipToken(TokenType::close_paren); - - lexer.skipToken(TokenType::keyword_values); - - // column values - lexer.skipToken(TokenType::open_paren); - do { - cols_values.push_back(lexer.consumeCurrentToken().token_string); - - lexer.skipTokenOptional(TokenType::comma); - } while (lexer.tokenType() != TokenType::close_paren); - lexer.skipToken(TokenType::close_paren); - - return std::make_unique(table_name, cols_names, cols_values); -} - -std::unique_ptr Parser::parse_select_from_table() { - std::vector cols_names {}; - - lexer.skipToken(TokenType::keyword_select); - while (lexer.tokenType() != TokenType::keyword_from) { - cols_names.push_back(lexer.consumeCurrentToken().token_string); - lexer.skipTokenOptional(TokenType::comma); + Parser::Parser() { + lexer = Lexer{}; } - lexer.skipToken(TokenType::keyword_from); - std::string table_name = lexer.consumeCurrentToken().token_string; + std::unique_ptr Parser::parse(const std::string &code) { + lexer.parse(code); + // lexer.debugTokens(); - std::unique_ptr where_node = parse_where_clause(); + if (lexer.tokenType() == TokenType::keyword_create && lexer.nextTokenType() == TokenType::keyword_table) { + return parse_create_table(); + } + if (lexer.tokenType() == TokenType::keyword_insert) { + return parse_insert_into_table(); + } + if (lexer.tokenType() == TokenType::keyword_select) { + return parse_select_from_table(); + } + if (lexer.tokenType() == TokenType::keyword_delete) { + return parse_delete_from_table(); + } + if (lexer.tokenType() == TokenType::keyword_update) { + return parse_update_table(); + } + if (lexer.tokenType() == TokenType::keyword_load) { + return parse_load_table(); + } + + std::cout << "ERROR, token:" << lexer.currentToken().token_string << std::endl; + return std::make_unique(NodeType::error); + } + + std::unique_ptr Parser::parse_create_table() { + std::vector cols_def{}; + + lexer.skipToken(TokenType::keyword_create); + lexer.skipToken(TokenType::keyword_table); + + if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } + std::string table_name = lexer.consumeCurrentToken().token_string; + + lexer.skipToken(TokenType::open_paren); + int column_order = 0; + do { + std::string column_name; + ColumnType column_type; + int column_len{1}; + bool column_nullable{true}; + + // column name + if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } + column_name = lexer.consumeCurrentToken().token_string; + + // column type and optionally len + if (lexer.tokenType() == TokenType::keyword_int) { + column_type = ColumnType::integer_type; + lexer.nextToken(); + } else if (lexer.tokenType() == TokenType::keyword_float) { + column_type = ColumnType::float_type; + lexer.nextToken(); + } else if (lexer.tokenType() == TokenType::keyword_varchar) { + column_type = ColumnType::varchar_type; + lexer.nextToken(); + lexer.skipToken(TokenType::open_paren); + if (lexer.tokenType() == TokenType::int_number) { + column_len = std::stoi(lexer.consumeCurrentToken().token_string); + } else { /* TODO handle error */ } + lexer.skipToken(TokenType::close_paren); + } else { /* TODO handle error */ } + + if (lexer.tokenType() == TokenType::keyword_not) { + lexer.nextToken(); + lexer.skipToken(TokenType::keyword_null); + column_nullable = false; + } else if (lexer.tokenType() == TokenType::keyword_null) { + lexer.nextToken(); + } + + cols_def.push_back( + ColDefNode(column_name, column_type, column_order++, column_len, column_nullable)); + + lexer.skipTokenOptional(TokenType::comma); + + // TODO in future constraints + + } while (lexer.tokenType() != TokenType::close_paren); + + + return std::make_unique(table_name, cols_def); + } + + + std::unique_ptr Parser::parse_insert_into_table() { + std::vector exec_code{}; + std::vector cols_names{}; + std::vector cols_values{}; + + lexer.skipToken(TokenType::keyword_insert); + lexer.skipToken(TokenType::keyword_into); + + // table name + if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } + std::string table_name = lexer.consumeCurrentToken().token_string; + + // column names + lexer.skipToken(TokenType::open_paren); + do { + if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } + cols_names.push_back(lexer.consumeCurrentToken().token_string); + + lexer.skipTokenOptional(TokenType::comma); + } while (lexer.tokenType() != TokenType::close_paren); + lexer.skipToken(TokenType::close_paren); + + lexer.skipToken(TokenType::keyword_values); + + // column values + lexer.skipToken(TokenType::open_paren); + do { + cols_values.push_back(lexer.consumeCurrentToken().token_string); + + lexer.skipTokenOptional(TokenType::comma); + } while (lexer.tokenType() != TokenType::close_paren); + lexer.skipToken(TokenType::close_paren); + + return std::make_unique(table_name, cols_names, cols_values); + } + + std::unique_ptr Parser::parse_select_from_table() { + std::vector cols_names{}; + + lexer.skipToken(TokenType::keyword_select); + while (lexer.tokenType() != TokenType::keyword_from) { + cols_names.push_back(lexer.consumeCurrentToken().token_string); + lexer.skipTokenOptional(TokenType::comma); + } + + lexer.skipToken(TokenType::keyword_from); + std::string table_name = lexer.consumeCurrentToken().token_string; + + std::unique_ptr where_node = parse_where_clause(); // if (lexer.tokenType() == TokenType::keyword_order_by) {} // if (lexer.tokenType() == TokenType::keyword_offset) {} // if (lexer.tokenType() == TokenType::keyword_limit) {} - return std::make_unique(table_name, cols_names, std::move(where_node)); -} - -std::unique_ptr Parser::parse_delete_from_table() { - lexer.skipToken(TokenType::keyword_delete); - lexer.skipToken(TokenType::keyword_from); - - std::string table_name = lexer.consumeCurrentToken().token_string; - - std::unique_ptr where_node = parse_where_clause(); - - return std::make_unique(table_name, std::move(where_node)); -} - -std::unique_ptr Parser::parse_update_table() { - lexer.skipToken(TokenType::keyword_update); - lexer.skipTokenOptional(TokenType::keyword_table); - - std::string table_name = lexer.consumeCurrentToken().token_string; - - lexer.skipToken(TokenType::keyword_set); - - std::vector cols_names; - std::vector> values; - - do { - cols_names.push_back(lexer.consumeCurrentToken().token_string); - lexer.skipToken(TokenType::equal); - - std::unique_ptr left = Parser::parse_operand_node(); - if (Lexer::isArithmeticalOperator(lexer.tokenType())) { - ArithmeticalOperatorType op = parse_arithmetical_operator(); - std::unique_ptr right = Parser::parse_operand_node(); - - values.push_back(std::make_unique(op, std::move(left), std::move(right))); - } else { - std::unique_ptr right = std::make_unique(0); - values.push_back(std::make_unique(ArithmeticalOperatorType::copy_value, std::move(left), std::move(right))); - } - lexer.skipTokenOptional(TokenType::comma); - - } while (lexer.tokenType() != TokenType::keyword_where && lexer.tokenType() != TokenType::eof); - - std::unique_ptr where_node = parse_where_clause(); - - return std::make_unique(table_name, cols_names, std::move(values), std::move(where_node)); -} - -std::unique_ptr Parser::parse_where_clause() { - // TODO add support for multiple filters - // TODO add support for parenthesis - - if (lexer.tokenType() != TokenType::keyword_where) { - return std::make_unique(); + return std::make_unique(table_name, cols_names, std::move(where_node)); } - std::unique_ptr node; - lexer.skipToken(TokenType::keyword_where); - do { - node = parse_relational_expression(); + std::unique_ptr Parser::parse_delete_from_table() { + lexer.skipToken(TokenType::keyword_delete); + lexer.skipToken(TokenType::keyword_from); - if (Lexer::isLogicalOperator(lexer.tokenType())) { - auto operation = parse_logical_operator(); - std::unique_ptr node2 = parse_relational_expression(); - node = std::make_unique(operation, std::move(node), std::move(node2)); - } - } while (lexer.tokenType() != TokenType::eof); // until whole where clause parsed + std::string table_name = lexer.consumeCurrentToken().token_string; - return node; -} + std::unique_ptr where_node = parse_where_clause(); -std::unique_ptr Parser::parse_relational_expression() { - auto left = parse_operand_node(); - auto operation = parse_relational_operator(); - auto right = parse_operand_node(); - - return std::make_unique(operation, std::move(left), std::move(right)); -} - -std::unique_ptr Parser::parse_operand_node() { - // while not end or order or limit - auto token_type = lexer.tokenType(); - std::string tokenString = lexer.consumeCurrentToken().token_string; - switch (token_type) { - case TokenType::int_number: - return std::make_unique(std::stoi(tokenString)); - case TokenType::double_number: - return std::make_unique(std::stod(tokenString)); - case TokenType::string_literal: - return std::make_unique(tokenString); - case TokenType::identifier: - return std::make_unique(tokenString); - default: ; - throw Exception("Unknown operand node"); + return std::make_unique(table_name, std::move(where_node)); } -} -RelationalOperatorType Parser::parse_relational_operator() { - auto op = lexer.consumeCurrentToken(); - switch (op.type) { - case TokenType::equal: - return RelationalOperatorType::equal; - case TokenType::not_equal: - return RelationalOperatorType::not_equal; - case TokenType::greater: - return RelationalOperatorType::greater; - case TokenType::greater_equal: - return RelationalOperatorType::greater_equal; - case TokenType::lesser: - return RelationalOperatorType::lesser; - case TokenType::lesser_equal: - return RelationalOperatorType::lesser_equal; - default: - throw Exception("Unknown relational operator"); - } -} -LogicalOperatorType Parser::parse_logical_operator() { - auto op = lexer.consumeCurrentToken(); - switch (op.type) { - case TokenType::logical_and: - return LogicalOperatorType::and_operator; - case TokenType::logical_or: - return LogicalOperatorType::or_operator; - default: - throw Exception("Unknown logical operator"); - } -} + std::unique_ptr Parser::parse_update_table() { + lexer.skipToken(TokenType::keyword_update); + lexer.skipTokenOptional(TokenType::keyword_table); -ArithmeticalOperatorType Parser::parse_arithmetical_operator() { - auto op = lexer.consumeCurrentToken(); - switch (op.type) { - case TokenType::plus: - return ArithmeticalOperatorType::plus_operator; - default: - throw Exception("Unknown arithmetical operator"); + std::string table_name = lexer.consumeCurrentToken().token_string; + + lexer.skipToken(TokenType::keyword_set); + + std::vector cols_names; + std::vector> values; + + do { + cols_names.push_back(lexer.consumeCurrentToken().token_string); + lexer.skipToken(TokenType::equal); + + std::unique_ptr left = Parser::parse_operand_node(); + if (Lexer::isArithmeticalOperator(lexer.tokenType())) { + ArithmeticalOperatorType op = parse_arithmetical_operator(); + std::unique_ptr right = Parser::parse_operand_node(); + + values.push_back(std::make_unique(op, std::move(left), + std::move(right))); + } else { + std::unique_ptr right = std::make_unique(0); + values.push_back( + std::make_unique(ArithmeticalOperatorType::copy_value, + std::move(left), std::move(right))); + } + lexer.skipTokenOptional(TokenType::comma); + + } while (lexer.tokenType() != TokenType::keyword_where && lexer.tokenType() != TokenType::eof); + + std::unique_ptr where_node = parse_where_clause(); + + return std::make_unique(table_name, cols_names, std::move(values), std::move(where_node)); } + + std::unique_ptr Parser::parse_load_table() { + lexer.skipToken(TokenType::keyword_load); + lexer.skipTokenOptional(TokenType::keyword_into); + + std::string table_name = lexer.consumeCurrentToken().token_string; + + lexer.skipTokenOptional(TokenType::keyword_from); + + std::string file_name = lexer.consumeCurrentToken().token_string; + + return std::make_unique(table_name, file_name); + } + + std::unique_ptr Parser::parse_where_clause() { + // TODO add support for multiple filters + // TODO add support for parenthesis + + if (lexer.tokenType() != TokenType::keyword_where) { + return std::make_unique(); + } + + std::unique_ptr node; + lexer.skipToken(TokenType::keyword_where); + do { + node = parse_relational_expression(); + + if (Lexer::isLogicalOperator(lexer.tokenType())) { + auto operation = parse_logical_operator(); + std::unique_ptr node2 = parse_relational_expression(); + node = std::make_unique(operation, std::move(node), std::move(node2)); + } + } while (lexer.tokenType() != TokenType::eof); // until whole where clause parsed + + return node; + } + + std::unique_ptr Parser::parse_relational_expression() { + auto left = parse_operand_node(); + auto operation = parse_relational_operator(); + auto right = parse_operand_node(); + + return std::make_unique(operation, std::move(left), std::move(right)); + } + + std::unique_ptr Parser::parse_operand_node() { + // while not end or order or limit + auto token_type = lexer.tokenType(); + std::string tokenString = lexer.consumeCurrentToken().token_string; + switch (token_type) { + case TokenType::int_number: + return std::make_unique(std::stoi(tokenString)); + case TokenType::double_number: + return std::make_unique(std::stod(tokenString)); + case TokenType::string_literal: + return std::make_unique(tokenString); + case TokenType::identifier: + return std::make_unique(tokenString); + default:; + throw Exception("Unknown operand node"); + } + } + + RelationalOperatorType Parser::parse_relational_operator() { + auto op = lexer.consumeCurrentToken(); + switch (op.type) { + case TokenType::equal: + return RelationalOperatorType::equal; + case TokenType::not_equal: + return RelationalOperatorType::not_equal; + case TokenType::greater: + return RelationalOperatorType::greater; + case TokenType::greater_equal: + return RelationalOperatorType::greater_equal; + case TokenType::lesser: + return RelationalOperatorType::lesser; + case TokenType::lesser_equal: + return RelationalOperatorType::lesser_equal; + default: + throw Exception("Unknown relational operator"); + } + } + + LogicalOperatorType Parser::parse_logical_operator() { + auto op = lexer.consumeCurrentToken(); + switch (op.type) { + case TokenType::logical_and: + return LogicalOperatorType::and_operator; + case TokenType::logical_or: + return LogicalOperatorType::or_operator; + default: + throw Exception("Unknown logical operator"); + } + } + + ArithmeticalOperatorType Parser::parse_arithmetical_operator() { + auto op = lexer.consumeCurrentToken(); + switch (op.type) { + case TokenType::plus: + return ArithmeticalOperatorType::plus_operator; + case TokenType::minus: + return ArithmeticalOperatorType::minus_operator; + case TokenType::multiply: + return ArithmeticalOperatorType::multiply_operator; + case TokenType::divide: + return ArithmeticalOperatorType::divide_operator; + default: + throw Exception("Unknown arithmetical operator"); + } + } + } \ No newline at end of file diff --git a/parser.h b/parser.h index 810970a..1cf5dd6 100644 --- a/parser.h +++ b/parser.h @@ -6,223 +6,276 @@ #include +namespace usql { -enum class ColumnType { + + enum class ColumnType { integer_type, float_type, varchar_type -}; + }; -enum class NodeType { - true_node, - int_value, - float_value, - string_value, - database_value, - logical_operator, - relational_operator, - arithmetical_operator, + enum class NodeType { + true_node, + int_value, + float_value, + string_value, + database_value, + logical_operator, + relational_operator, + arithmetical_operator, create_table, insert_into, select_from, - delete_from, - update_table, - column_name, + delete_from, + update_table, + load_table, + column_name, column_value, column_def, not_implemented_yet, error -}; + }; -struct Node { + struct Node { NodeType node_type; Node(const NodeType type) : node_type(type) {} -}; + }; -struct ColNameNode : Node { - std::string name; + struct ColNameNode : Node { + std::string name; - ColNameNode(const std::string col_name) : - Node(NodeType::column_name), name(col_name) {} -}; + ColNameNode(const std::string col_name) : + Node(NodeType::column_name), name(col_name) {} + }; -struct ColValueNode : Node { - std::string value; + struct ColValueNode : Node { + std::string value; - ColValueNode(const std::string col_value) : - Node(NodeType::column_value), value(col_value) {} -}; + ColValueNode(const std::string col_value) : + Node(NodeType::column_value), value(col_value) {} + }; -// TODO add order in row -struct ColDefNode : Node { - std::string name; - ColumnType type; - int order; - int length; - bool null; + // TODO add order in row + struct ColDefNode : Node { + std::string name; + ColumnType type; + int order; + int length; + bool null; ColDefNode(const std::string col_name, const ColumnType col_type, int col_order, int col_len, bool nullable) : - Node(NodeType::column_def), name(col_name), type(col_type), order(col_order), length(col_len), null(nullable) {} -}; + Node(NodeType::column_def), name(col_name), type(col_type), order(col_order), length(col_len), + null(nullable) {} + }; + struct TrueNode : Node { + TrueNode() : Node(NodeType::true_node) {} + }; -struct TrueNode : Node { - TrueNode() : Node(NodeType::true_node) {} -}; + struct ValueNode : Node { + ValueNode(NodeType type) : Node(type) {} -struct IntValueNode : Node { - int value; + virtual int getIntValue() = 0; - IntValueNode(int value) : Node(NodeType::int_value), value(value) {} -}; + virtual double getDoubleValue() = 0; -struct FloatValueNode : Node { - double value; + virtual std::string getStringValue() = 0; - FloatValueNode(double value) : Node(NodeType::float_value), value(value) {} -}; + virtual ~ValueNode() {}; + }; -struct StringValueNode : Node { - std::string value; + struct IntValueNode : ValueNode { + int value; - StringValueNode(std::string value) : Node(NodeType::string_value), value(value) {} -}; + IntValueNode(int value) : ValueNode(NodeType::int_value), value(value) {} -struct DatabaseValueNode : Node { - std::string col_name; + int getIntValue() { return value; }; - DatabaseValueNode(std::string name) : Node(NodeType::database_value), col_name(name) {} -}; + double getDoubleValue() { return (double) value; }; -enum class LogicalOperatorType { - and_operator, - or_operator, - not_operator -}; + std::string getStringValue() { return std::to_string(value); } + }; -struct LogicalOperatorNode : Node { - LogicalOperatorType op; - std::unique_ptr left; - std::unique_ptr right; + struct FloatValueNode : ValueNode { + double value; - LogicalOperatorNode(LogicalOperatorType op, std::unique_ptr left, std::unique_ptr right) : - Node(NodeType::logical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; -}; + FloatValueNode(double value) : ValueNode(NodeType::float_value), value(value) {} -enum class RelationalOperatorType { - equal, - greater, - greater_equal, - lesser, - lesser_equal, - not_equal - // like -}; + int getIntValue() { return (int) value; }; -struct RelationalOperatorNode : Node { - RelationalOperatorType op; + double getDoubleValue() { return value; }; - std::unique_ptr left; - std::unique_ptr right; + std::string getStringValue() { return std::to_string(value); } + }; - RelationalOperatorNode(RelationalOperatorType op, std::unique_ptr left, std::unique_ptr right) : - Node(NodeType::relational_operator), op(op), left(std::move(left)), right(std::move(right)) {}; -}; + struct StringValueNode : ValueNode { + std::string value; -enum class ArithmeticalOperatorType { - copy_value, // just copy lef value and do nothing with it - plus_operator, - minus_operator, - multiply_operator, - divide_operator -}; + StringValueNode(std::string value) : ValueNode(NodeType::string_value), value(value) {} -struct ArithmeticalOperatorNode : Node { - ArithmeticalOperatorType op; + int getIntValue() { return std::stoi(value); }; - std::unique_ptr left; - std::unique_ptr right; + double getDoubleValue() { return std::stod(value); }; - ArithmeticalOperatorNode(ArithmeticalOperatorType op, std::unique_ptr left, std::unique_ptr right) : - Node(NodeType::arithmetical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; -}; + std::string getStringValue() { return value; }; + }; + + struct DatabaseValueNode : Node { + std::string col_name; + + DatabaseValueNode(std::string name) : Node(NodeType::database_value), col_name(name) {} + }; + + enum class LogicalOperatorType { + and_operator, + or_operator, + not_operator + }; + + struct LogicalOperatorNode : Node { + LogicalOperatorType op; + std::unique_ptr left; + std::unique_ptr right; + + LogicalOperatorNode(LogicalOperatorType op, std::unique_ptr left, std::unique_ptr right) : + Node(NodeType::logical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; + }; + + enum class RelationalOperatorType { + equal, + greater, + greater_equal, + lesser, + lesser_equal, + not_equal + // like + }; + + struct RelationalOperatorNode : Node { + RelationalOperatorType op; + + std::unique_ptr left; + std::unique_ptr right; + + RelationalOperatorNode(RelationalOperatorType op, std::unique_ptr left, std::unique_ptr right) : + Node(NodeType::relational_operator), op(op), left(std::move(left)), right(std::move(right)) {}; + }; + + enum class ArithmeticalOperatorType { + copy_value, // just copy lef value and do nothing with it + plus_operator, + minus_operator, + multiply_operator, + divide_operator + }; + + struct ArithmeticalOperatorNode : Node { + ArithmeticalOperatorType op; + + std::unique_ptr left; + std::unique_ptr right; + + ArithmeticalOperatorNode(ArithmeticalOperatorType op, std::unique_ptr left, std::unique_ptr right) : + Node(NodeType::arithmetical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; + }; -struct CreateTableNode : Node { + struct CreateTableNode : Node { std::string table_name; std::vector cols_defs; CreateTableNode(const std::string name, std::vector defs) : - Node(NodeType::create_table), table_name(name), cols_defs(defs) {} -}; + Node(NodeType::create_table), table_name(name), cols_defs(defs) {} + }; -struct InsertIntoTableNode : Node { - std::string table_name; - std::vector cols_names; - std::vector cols_values; + struct InsertIntoTableNode : Node { + std::string table_name; + std::vector cols_names; + std::vector cols_values; - InsertIntoTableNode(const std::string name, std::vector names, std::vector values) : - Node(NodeType::insert_into), table_name(name), cols_names(names), cols_values(values) {} -}; + InsertIntoTableNode(const std::string name, std::vector names, std::vector values) : + Node(NodeType::insert_into), table_name(name), cols_names(names), cols_values(values) {} + }; -struct SelectFromTableNode : Node { - std::string table_name; - std::vector cols_names; - std::unique_ptr where; + struct SelectFromTableNode : Node { + std::string table_name; + std::vector cols_names; + std::unique_ptr where; - SelectFromTableNode(std::string name, std::vector names, std::unique_ptr where_clause) : - Node(NodeType::select_from), table_name(name), cols_names(names), where(std::move(where_clause)) {} -}; + SelectFromTableNode(std::string name, std::vector names, std::unique_ptr where_clause) : + Node(NodeType::select_from), table_name(name), cols_names(names), where(std::move(where_clause)) {} + }; -struct UpdateTableNode : Node { - std::string table_name; - std::vector cols_names; - std::vector> values; - std::unique_ptr where; + struct UpdateTableNode : Node { + std::string table_name; + std::vector cols_names; + std::vector> values; + std::unique_ptr where; - UpdateTableNode(std::string name, std::vector names, std::vector> vals, - std::unique_ptr where_clause) : - Node(NodeType::update_table), table_name(name), cols_names(names), values(std::move(vals)), where(std::move(where_clause)) {} -}; + UpdateTableNode(std::string name, std::vector names, std::vector> vals, + std::unique_ptr where_clause) : + Node(NodeType::update_table), table_name(name), cols_names(names), values(std::move(vals)), + where(std::move(where_clause)) {} + }; -struct DeleteFromTableNode : Node { - std::string table_name; - std::unique_ptr where; + struct LoadIntoTableNode : Node { + std::string table_name; + std::string filename; - DeleteFromTableNode(const std::string name, std::unique_ptr where_clause) : - Node(NodeType::delete_from), table_name(name), where(std::move(where_clause)) {} + LoadIntoTableNode(const std::string name, std::string file) : + Node(NodeType::load_table), table_name(name), filename(file) {} -}; + }; + + struct DeleteFromTableNode : Node { + std::string table_name; + std::unique_ptr where; + + DeleteFromTableNode(const std::string name, std::unique_ptr where_clause) : + Node(NodeType::delete_from), table_name(name), where(std::move(where_clause)) {} + + }; + class Parser { + private: - -class Parser { -private: - -public: + public: Parser(); std::unique_ptr parse(const std::string &code); -private: - std::unique_ptr parse_create_table(); - std::unique_ptr parse_insert_into_table(); - std::unique_ptr parse_select_from_table(); - std::unique_ptr parse_delete_from_table(); - std::unique_ptr parse_update_table(); + private: + std::unique_ptr parse_create_table(); - std::unique_ptr parse_where_clause(); - std::unique_ptr parse_operand_node(); - RelationalOperatorType parse_relational_operator(); - LogicalOperatorType parse_logical_operator(); - ArithmeticalOperatorType parse_arithmetical_operator(); + std::unique_ptr parse_insert_into_table(); -private: + std::unique_ptr parse_select_from_table(); + + std::unique_ptr parse_delete_from_table(); + + std::unique_ptr parse_update_table(); + + std::unique_ptr parse_load_table(); + + std::unique_ptr parse_where_clause(); + + std::unique_ptr parse_operand_node(); + + RelationalOperatorType parse_relational_operator(); + + LogicalOperatorType parse_logical_operator(); + + ArithmeticalOperatorType parse_arithmetical_operator(); + + private: Lexer lexer; - std::unique_ptr parse_relational_expression(); + std::unique_ptr parse_relational_expression(); -}; + }; + +} \ No newline at end of file diff --git a/row.cpp b/row.cpp index 40e2f85..703114f 100644 --- a/row.cpp +++ b/row.cpp @@ -1,53 +1,59 @@ #include "row.h" +namespace usql { -Row::Row(int cols_count) { - m_columns.reserve(cols_count); - for (int i = 0; i < cols_count; i++) { - m_columns.push_back(std::make_unique()); - } -} - -Row::Row(const Row &other) { - m_columns.reserve(other.m_columns.size()); - // TODO fixme this is nonsense - for (int i = 0; i < other.m_columns.size(); i++) { - m_columns.push_back(std::make_unique()); + Row::Row(int cols_count) { + m_columns.reserve(cols_count); + for (int i = 0; i < cols_count; i++) { + m_columns.push_back(std::make_unique()); + } } - for (int i = 0; i < other.m_columns.size(); i++) { - if (ColIntegerValue* other_v = dynamic_cast(other.m_columns[i].get())) { - setColumnValue(i, other_v->integerValue()); - } - if (ColFloatValue* other_v = dynamic_cast(other.m_columns[i].get())) { - setColumnValue(i, other_v->floatValue()); - } - if (ColStringValue* other_v = dynamic_cast(other.m_columns[i].get())) { - setColumnValue(i, other_v->stringValue()); - } + Row::Row(const Row &other) { + m_columns.reserve(other.m_columns.size()); + // TODO fixme this is nonsense + for (int i = 0; i < other.m_columns.size(); i++) { + m_columns.push_back(std::make_unique()); + } + + for (int i = 0; i < other.m_columns.size(); i++) { + if (ColIntegerValue *other_v = dynamic_cast(other.m_columns[i].get())) { + setColumnValue(i, other_v->integerValue()); + } + if (ColFloatValue *other_v = dynamic_cast(other.m_columns[i].get())) { + setColumnValue(i, other_v->floatValue()); + } + if (ColStringValue *other_v = dynamic_cast(other.m_columns[i].get())) { + setColumnValue(i, other_v->stringValue()); + } + } } -} -Row& Row::operator=(Row other) { - std::swap(m_columns, other.m_columns); - return *this; -} - -void Row::setColumnValue(int col_index, int value) { - m_columns[col_index] = std::make_unique(value); -} - -void Row::setColumnValue(int col_index, double value) { - m_columns[col_index] = std::make_unique(value); -} - -void Row::setColumnValue(int col_index, std::string value) { - m_columns[col_index] = std::make_unique(value); -}; - -void Row::print() { - for(int i=0; iprint(); + Row &Row::operator=(Row other) { + std::swap(m_columns, other.m_columns); + return *this; } -} + + void Row::setColumnValue(int col_index, int value) { + m_columns[col_index] = std::make_unique(value); + } + + void Row::setColumnValue(int col_index, double value) { + m_columns[col_index] = std::make_unique(value); + } + + void Row::setColumnValue(int col_index, std::string value) { + m_columns[col_index] = std::make_unique(value); + }; + + void Row::print() { + for (int ci = 0; ci < m_columns.size(); ci++) { + if (ci > 0) std::cout << ","; + auto v = m_columns[ci]->stringValue(); + std::cout << v; + } + std::cout << std::endl; + } + +} \ No newline at end of file diff --git a/row.h b/row.h index 002e1a0..16dbca2 100644 --- a/row.h +++ b/row.h @@ -5,116 +5,128 @@ #include -class ColumnValue { +namespace usql { + + class ColumnValue { -private: - ColumnType m_type; - union { - int int_value; - double float_value; + private: + ColumnType m_type; + union { + int int_value; + double float_value; + }; }; -}; + struct ColValue { -struct ColValue { + virtual bool isNull() { return false; }; - virtual bool isNull() { return false; }; - virtual bool isInteger() { return false; }; - virtual bool isFloat() { return false; }; - virtual bool isString() { return false; }; + virtual bool isInteger() { return false; }; - virtual int integerValue() { throw Exception("Not supported"); }; - virtual double floatValue() { throw Exception("Not supported"); }; - virtual std::string stringValue() { throw Exception("Not supported"); }; + virtual bool isFloat() { return false; }; - virtual void print() {std::cout << "ColValue:" << std::endl; }; -}; + virtual bool isString() { return false; }; + + virtual int integerValue() { throw Exception("Not supported"); }; + + virtual double floatValue() { throw Exception("Not supported"); }; + + virtual std::string stringValue() { throw Exception("Not supported"); }; + }; -struct ColNullValue : ColValue { + struct ColNullValue : ColValue { - virtual bool isNull() { return true; }; + virtual bool isNull() { return true; }; - virtual void print() {std::cout << "ColNullValue:" << std::endl; }; -}; + virtual std::string stringValue() { return "null"; }; + }; -struct ColIntegerValue : ColValue { + struct ColIntegerValue : ColValue { - ColIntegerValue(int value) : m_integer(value) {}; - ColIntegerValue(const ColIntegerValue &other) : m_integer(other.m_integer) {} + ColIntegerValue(int value) : m_integer(value) {}; - virtual bool isInteger() { return true; }; + ColIntegerValue(const ColIntegerValue &other) : m_integer(other.m_integer) {} - virtual int integerValue() { return m_integer; }; - virtual double floatValue() { return (double) m_integer; }; - virtual std::string stringValue() { return std::to_string(m_integer); }; + virtual bool isInteger() { return true; }; - virtual void print() {std::cout << "ColIntegerValue: " << m_integer <> m_columns; -}; + ColValue &operator[](int i) { + return *m_columns[i]; + } + + ColValue *ithColumn(int i) { + return m_columns[i].get(); + } + + void print(); + + private: + std::vector> m_columns; + }; + +} \ No newline at end of file diff --git a/table.cpp b/table.cpp index 577ae7b..373abfc 100644 --- a/table.cpp +++ b/table.cpp @@ -1,46 +1,47 @@ #include "table.h" -Table::Table(const std::string name, const std::vector columns) { - m_name = name; - m_col_defs = columns; - m_rows.clear(); -} +namespace usql { -ColDefNode Table::get_column_def(const std::string& col_name) { - auto name_cmp = [col_name](ColDefNode cd){ return cd.name == col_name; }; - auto col_def = std::find_if(begin(m_col_defs), end(m_col_defs), name_cmp ); - if (col_def != std::end(m_col_defs)) { - return *col_def; - } else { - throw Exception("column not exists (" + col_name + ")"); + Table::Table(const std::string name, const std::vector columns) { + m_name = name; + m_col_defs = columns; + m_rows.clear(); } -} - -Row Table::createEmptyRow() { - return Row(columns_count()); -} - - -void Table::print() { - std::cout << "** " << m_name << " **" << std::endl; - for(auto row : m_rows) { - for(int ci = 0; ci < columns_count(); ci++) { - auto v = row[ci].stringValue(); - std::cout << v << ","; - } - std::cout << std::endl; + ColDefNode Table::get_column_def(const std::string &col_name) { + auto name_cmp = [col_name](ColDefNode cd) { return cd.name == col_name; }; + auto col_def = std::find_if(begin(m_col_defs), end(m_col_defs), name_cmp); + if (col_def != std::end(m_col_defs)) { + return *col_def; + } else { + throw Exception("column not exists (" + col_name + ")"); + } } -} -Table::Table(const Table& other) { - m_name = other.m_name; - m_col_defs = other.m_col_defs; - m_rows.clear(); // row not copied now -} -void Table::addRow(const Row &row) { - m_rows.push_back(row); -} + Row Table::createEmptyRow() { + return Row(columns_count()); + } + + void Table::print() { + std::cout << "** " << m_name << " **" << std::endl; + for (auto row : m_rows) { + row.print(); + } + } + + Table::Table(const Table &other) { + m_name = other.m_name; + m_col_defs = other.m_col_defs; + m_rows.clear(); // row not copied now + } + + void Table::addRow(const Row &row) { + // TODO validate for not null values + // todo validate for length etc + m_rows.push_back(row); + } + +} \ No newline at end of file diff --git a/table.h b/table.h index afa8d97..428606b 100644 --- a/table.h +++ b/table.h @@ -5,25 +5,26 @@ #include -// TODO make it a class -struct Table { +namespace usql { -// public: - Table(const Table& other); + struct Table { - Table(const std::string name, const std::vector columns); + Table(const Table &other); - ColDefNode get_column_def(const std::string& col_name); - int columns_count() { return m_col_defs.size(); }; + Table(const std::string name, const std::vector columns); - Row createEmptyRow(); // TODO this means unnecessary copying - void addRow(const Row &row); + ColDefNode get_column_def(const std::string &col_name); - void print(); + int columns_count() { return m_col_defs.size(); }; + Row createEmptyRow(); // TODO this means unnecessary copying + void addRow(const Row &row); -// private: - std::string m_name; - std::vector m_col_defs; - std::vector m_rows; -}; + void print(); + + std::string m_name; + std::vector m_col_defs; + std::vector m_rows; + }; + +} \ No newline at end of file