From b55115f7c373c61664b18ee072508fff6cd1d186 Mon Sep 17 00:00:00 2001 From: VaclavT Date: Wed, 30 Jun 2021 23:29:09 +0200 Subject: [PATCH] a bit of further work --- CMakeLists.txt | 2 +- executor.cpp | 101 ++++++++++++++++++++++++++++++++++++++++++++++--- executor.h | 6 ++- lexer.cpp | 39 ++++++++++--------- lexer.h | 12 +++--- main.cpp | 24 +++++++----- parser.cpp | 81 ++++++++++++++++++++++++++++++++++----- parser.h | 51 +++++++++++++++++++++++-- table.cpp | 28 ++++++++++++++ table.h | 21 ++++++++++ 10 files changed, 309 insertions(+), 56 deletions(-) create mode 100644 table.cpp create mode 100644 table.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8303970..e09d4c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ project(msql) set(PROJECT_NAME msql) set(SOURCE - exception.cpp lexer.cpp parser.cpp executor.cpp main.cpp) + exception.cpp lexer.cpp parser.cpp executor.cpp main.cpp table.cpp table.h) add_executable(${PROJECT_NAME} ${SOURCE}) diff --git a/executor.cpp b/executor.cpp index 5626482..c0356dc 100644 --- a/executor.cpp +++ b/executor.cpp @@ -1,18 +1,33 @@ #include "executor.h" #include "exception.h" +#include Executor::Executor() { - // TODO init database + m_tables.clear(); } +Table* Executor::find_table(const std::string name) { + auto name_cmp = [name](Table t){ return t.m_name == name; }; + auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp ); + if (table_def != std::end(m_tables)) { + return table_def.operator->(); + } else { + // TODO throw exception + } +} + + bool Executor::execute(Node& node) { + // TODO optimize node here switch (node.node_type) { case NodeType::create_table: return execute_create_table(static_cast(node)); + case NodeType::insert_into: + return execute_insert_into_table(static_cast(node)); case NodeType::select_from: - return execute_select(node); + return execute_select(static_cast(node)); default: // TODO error message return false; @@ -21,9 +36,85 @@ bool Executor::execute(Node& node) { } bool Executor::execute_create_table(CreateTableNode& node) { - return false; + // TODO check table does not exists + Table table{node.table_name, node.cols_defs}; + m_tables.push_back(table); + + return true; } -bool Executor::execute_select(Node& node) { - return false; +bool Executor::execute_insert_into_table(InsertIntoTableNode& node) { + // TODO check column names.size = values.size + + // find table + Table* table_def = find_table(node.table_name); + + // prepare empty new_row + std::vector new_row; + new_row.reserve(table_def->columns_count()); + for(size_t i=0; icolumns_count(); i++) { + new_row.push_back(std::string {""}); + } + + // copy values + for(size_t i=0; iget_column_def(colNameNode.name); + + // TODO validate + new_row[col_def.order] = node.cols_values[i].value; + } + + // TODO check not null columns + + // append new_row + table_def->m_rows.push_back(new_row); + + return true; } + +bool Executor::execute_select(SelectFromTableNode& node) { + // TODO create plan for accessing rows + + // find source table + Table* table = find_table(node.table_name); + + // create result table + std::vector result_tbl_col_defs{}; + std::vector source_table_col_index{}; + int i = 0; // new column order + for(ColNameNode rc : node.cols_names) { + ColDefNode cdef = table->get_column_def(rc.name); + source_table_col_index.push_back(cdef.order); + + auto col = ColDefNode(rc.name, cdef.type, i, cdef.length, cdef.null); + result_tbl_col_defs.push_back(col); + + i++; + } + Table result {"result", result_tbl_col_defs}; + + + // execute access plan + + + for (auto row = begin (table->m_rows); row != end (table->m_rows); ++row) { + // eval there for row + bool where_true = true; + + if (where_true) { + // prepare empty row + std::vector new_row; + new_row.reserve(result.columns_count()); + for(auto i=0; iat(source_table_col_index[i])); + } + result.m_rows.push_back(new_row); + } + } + + result.print(); + + return true; +} + diff --git a/executor.h b/executor.h index 03ee30a..b77fcd7 100644 --- a/executor.h +++ b/executor.h @@ -1,6 +1,7 @@ #pragma once #include "parser.h" +#include "table.h" #include @@ -14,8 +15,11 @@ public: private: bool execute_create_table(CreateTableNode& node); - bool execute_select(Node& node); + bool execute_insert_into_table(InsertIntoTableNode& node); + bool execute_select(SelectFromTableNode& node); + Table* find_table(const std::string name); private: + std::vector m_tables; }; diff --git a/lexer.cpp b/lexer.cpp index 4bb69cd..e95e847 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -11,54 +11,53 @@ Token::Token(const std::string &token_str, TokenType typ) { void Lexer::parse(const std::string &code) { // TODO handle empty code - tokens.clear(); + m_tokens.clear(); // PERF something like this to prealocate ?? if (code.size() > 100) { - tokens.reserve(code.size() / 10); + m_tokens.reserve(code.size() / 10); } - code_str = code; - if (!code_str.empty() && code_str.back() != '\n') { - code_str.append("\n"); // TODO tempo solution to prevent possible situation when last line is a comment + m_code_str = code; + if (!m_code_str.empty() && m_code_str.back() != '\n') { + m_code_str.append("\n"); // TODO tempo solution to prevent possible situation when last line is a comment } // TODO make it constant std::regex words_regex("[0-9]+\\.[0-9]+|[0-9][0-9_]+[0-9]|[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/" ",;:\?]|==|>=|<=|~=|>|<|=|;|~|\\|\\||&&|\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n"); - auto words_begin = std::sregex_iterator(code_str.begin(), code_str.end(), words_regex); + auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), words_regex); auto words_end = std::sregex_iterator(); for (std::sregex_iterator i = words_begin; i != words_end; ++i) { std::smatch match = *i; std::string match_str = match.str(); TokenType token_type = type(match_str); - if (token_type == TokenType::string_literal) { - match_str = stringLiteral(match_str); - } else { - tokens.push_back(Token{match_str, token_type}); - } + if (token_type == TokenType::string_literal) + match_str = stringLiteral(match_str); + + m_tokens.push_back(Token{match_str, token_type}); } // DEBUG IT // debugTokens(); - index = 0; + m_index = 0; } void Lexer::debugTokens() { int i = 0; - for (std::vector::iterator it = tokens.begin(); it != tokens.end(); ++it) { + for (std::vector::iterator it = m_tokens.begin(); it != m_tokens.end(); ++it) { std::cerr << i << "\t" << it->token_string << std::endl; i++; } } -Token Lexer::currentToken() { return tokens[index]; } +Token Lexer::currentToken() { return m_tokens[m_index]; } void Lexer::nextToken() { - if (index < tokens.size()) { - index++; + if (m_index < m_tokens.size()) { + m_index++; } } @@ -76,11 +75,11 @@ void Lexer::skipTokenOptional(TokenType type) { } } -TokenType Lexer::tokenType() { return index < tokens.size() ? currentToken().type : TokenType::eof; } +TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; } -TokenType Lexer::nextTokenType() { return index < tokens.size() - 1 ? tokens[index + 1].type : TokenType::eof; } +TokenType Lexer::nextTokenType() { return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof; } -TokenType Lexer::prevTokenType() { return index > 0 ? tokens[index - 1].type : TokenType::undef; } +TokenType Lexer::prevTokenType() { return m_index > 0 ? m_tokens[m_index - 1].type : TokenType::undef; } bool Lexer::isRelationalOperator(TokenType token_type) { return (token_type == TokenType::equal || token_type == TokenType::not_equal || token_type == TokenType::greater || token_type == TokenType::greater_equal || @@ -211,7 +210,7 @@ TokenType Lexer::type(const std::string &token) { if (std::regex_match(token, identifier_regex)) return TokenType::identifier; - if (index + 1 >= tokens.size()) + if (m_index + 1 >= m_tokens.size()) return TokenType::eof; return TokenType::undef; diff --git a/lexer.h b/lexer.h index f6122a1..16fbe39 100644 --- a/lexer.h +++ b/lexer.h @@ -54,12 +54,6 @@ struct Token { }; class Lexer { -private: - std::string code_str; - std::vector tokens; - int index = 0; - bool eof = false; - public: Lexer() {}; @@ -84,4 +78,10 @@ private: TokenType type(const std::string &token); std::string stringLiteral(std::string token); static std::string typeToString(TokenType token_type); + + +private: + std::string m_code_str; + std::vector m_tokens; + int m_index = 0; }; diff --git a/main.cpp b/main.cpp index 4c7f33b..2c2c328 100644 --- a/main.cpp +++ b/main.cpp @@ -11,16 +11,22 @@ int main(int argc, char *argv[]) { Parser parser{}; Executor executor{}; - std::string sql_create = "create table a (i integer not null, s varchar(64), f float)"; -// std::string sql_insert = "insert into a (i, s) values(1, 'one')"; -// std::string sql_inser2 = "insert into a (i, s) values(2, 'two')"; -// std::string sql_inser3 = "insert into a (i, s) values(3, 'two')"; -// std::string sql_update = "update a set s = 'three' where i = 3"; -// std::string sql_select = "select i, s from a where i > 0"; -// std::string sql_delete = "delete from a where i = 3"; + std::vector sql_commands { + "create table a (i integer not null, s varchar(64), f float null)", + "insert into a (i, s) values(1, 'one')", + "insert into a (i, s) values(2, 'two')", + "insert into a (i, s) values(3, 'two')", + "select i, s from a where i > 0" +// "update a set s = 'three' where i = 3" +// "delete from a where i = 3" +// "select i, s from a where i > 0" + }; - auto node = parser.parse(sql_create); - executor.execute(*node.get()); + for(auto command : sql_commands) { + auto node = parser.parse(command); + executor.execute(*node.get()); + } + return 0; } diff --git a/parser.cpp b/parser.cpp index e8473d7..a3cf028 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1,7 +1,7 @@ #include "parser.h" #include "exception.h" - +// TOOD handle premature eof Parser::Parser() { lexer = Lexer{}; @@ -9,12 +9,14 @@ Parser::Parser() { std::unique_ptr Parser::parse(const std::string &code) { lexer.parse(code); - lexer.debugTokens(); + // lexer.debugTokens(); if (lexer.tokenType() == TokenType::keyword_create && lexer.nextTokenType() == TokenType::keyword_table) { return parse_create_table(); + } if (lexer.tokenType() == TokenType::keyword_insert) { + return parse_insert_into_table(); } if (lexer.tokenType() == TokenType::keyword_select) { - return parse_select(); + return parse_select_from_table(); } return std::make_unique(NodeType::error); @@ -31,10 +33,11 @@ std::unique_ptr Parser::parse_create_table() { lexer.nextToken(); lexer.skipToken(TokenType::open_paren); + int column_order = 0; do { std::string column_name; ColumnType column_type; - int column_len {1}; + int column_len {1}; bool column_nullable {true}; // column name @@ -68,10 +71,10 @@ std::unique_ptr Parser::parse_create_table() { lexer.nextToken(); } - cols_def.push_back(ColDefNode(column_name, column_type, column_len, column_nullable)); + cols_def.push_back(ColDefNode(column_name, column_type, column_order++, column_len, column_nullable)); + + lexer.skipTokenOptional(TokenType::comma); - if (lexer.tokenType() == TokenType::comma) lexer.nextToken(); - // TODO in future constraints } while (lexer.tokenType() != TokenType::close_paren); @@ -80,8 +83,66 @@ std::unique_ptr Parser::parse_create_table() { return std::make_unique(table_name, cols_def); } -std::unique_ptr Parser::parse_select() { - std::vector exec_code {}; - return std::make_unique(NodeType::not_implemented_yet); +std::unique_ptr Parser::parse_insert_into_table() { + std::vector exec_code {}; + std::vector cols_names {}; + std::vector cols_values {}; + + lexer.skipToken(TokenType::keyword_insert); + lexer.skipToken(TokenType::keyword_into); + + // table name + if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } + std::string table_name = lexer.currentToken().token_string; + lexer.nextToken(); + + // column names + lexer.skipToken(TokenType::open_paren); + do { + if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } + cols_names.push_back(lexer.currentToken().token_string); + lexer.nextToken(); + + lexer.skipTokenOptional(TokenType::comma); + } while (lexer.tokenType() != TokenType::close_paren); + lexer.skipToken(TokenType::close_paren); + + lexer.skipToken(TokenType::keyword_values); + + // column values + lexer.skipToken(TokenType::open_paren); + do { + cols_values.push_back(lexer.currentToken().token_string); + lexer.nextToken(); + + lexer.skipTokenOptional(TokenType::comma); + } while (lexer.tokenType() != TokenType::close_paren); + lexer.skipToken(TokenType::close_paren); + + return std::make_unique(table_name, cols_names, cols_values); +} + +std::unique_ptr Parser::parse_select_from_table() { + std::vector where {}; + std::vector cols_names {}; + + lexer.skipToken(TokenType::keyword_select); + // TODO support also numbers and expressions + while (lexer.tokenType() != TokenType::keyword_from) { + // TODO add consumeToken() which returns token and advances to next token + cols_names.push_back(lexer.currentToken().token_string); + lexer.nextToken(); + lexer.skipTokenOptional(TokenType::comma); + } + lexer.skipToken(TokenType::keyword_from); + std::string table_name = lexer.currentToken().token_string; + lexer.nextToken(); + + if (lexer.tokenType() == TokenType::keyword_where) {} +// if (lexer.tokenType() == TokenType::keyword_order_by) {} +// if (lexer.tokenType() == TokenType::keyword_offset) {} +// if (lexer.tokenType() == TokenType::keyword_limit) {} + + return std::make_unique(table_name, cols_names, where); } \ No newline at end of file diff --git a/parser.h b/parser.h index 0cbca9e..535a928 100644 --- a/parser.h +++ b/parser.h @@ -15,7 +15,10 @@ enum class ColumnType { enum class NodeType { create_table, + insert_into, select_from, + column_name, + column_value, column_def, not_implemented_yet, error @@ -27,14 +30,30 @@ struct Node { Node(const NodeType type) : node_type(type) {} }; +struct ColNameNode : Node { + std::string name; + + ColNameNode(const std::string col_name) : + Node(NodeType::column_name), name(col_name) {} +}; + +struct ColValueNode : Node { + std::string value; + + ColValueNode(const std::string col_value) : + Node(NodeType::column_value), value(col_value) {} +}; + +// TODO add order in row struct ColDefNode : Node { std::string name; ColumnType type; - int length; + int order; + int length; bool null; - ColDefNode(const std::string col_name, const ColumnType col_type, int col_len, bool nullable) : - Node(NodeType::column_def), name(col_name), type(col_type), length(col_len), null(nullable) {} + ColDefNode(const std::string col_name, const ColumnType col_type, int col_order, int col_len, bool nullable) : + Node(NodeType::column_def), name(col_name), type(col_type), order(col_order), length(col_len), null(nullable) {} }; struct CreateTableNode : Node { @@ -45,6 +64,29 @@ struct CreateTableNode : Node { Node(NodeType::create_table), table_name(name), cols_defs(defs) {} }; +struct InsertIntoTableNode : Node { + std::string table_name; + std::vector cols_names; + std::vector cols_values; + + InsertIntoTableNode(const std::string name, std::vector names, std::vector values) : + Node(NodeType::insert_into), table_name(name), cols_names(names), cols_values(values) {} +}; + +struct SelectFromTableNode : Node { + std::string table_name; + std::vector cols_names; + std::vector where; + + SelectFromTableNode(const std::string name, std::vector names, std::vector where_clause) : + Node(NodeType::select_from), table_name(name), cols_names(names), where(where_clause) {} +}; + +struct UpdateTableNode : Node { }; +struct DeleteFromTableNode : Node { }; + + + class Parser { private: @@ -56,7 +98,8 @@ public: private: std::unique_ptr parse_create_table(); - std::unique_ptr parse_select(); + std::unique_ptr parse_insert_into_table(); + std::unique_ptr parse_select_from_table(); private: Lexer lexer; diff --git a/table.cpp b/table.cpp new file mode 100644 index 0000000..0c4b984 --- /dev/null +++ b/table.cpp @@ -0,0 +1,28 @@ + +#include "table.h" + +Table::Table(const std::string name, const std::vector columns) { + m_name = name; + m_col_defs = columns; + m_rows.clear(); +} + +ColDefNode Table::get_column_def(const std::string col_name) { + auto name_cmp = [col_name](ColDefNode cd){ return cd.name == col_name; }; + auto col_def = std::find_if(begin(m_col_defs), end(m_col_defs), name_cmp ); + if (col_def != std::end(m_col_defs)) { + return *col_def; + } else { + // TODO throw exception + } +} + +void Table::print() { + std::cout << "** " << m_name << " **" << std::endl; + for(auto row : m_rows) { + for( auto col : row) { + std::cout << col << ","; + } + std::cout << std::endl; + } +} \ No newline at end of file diff --git a/table.h b/table.h new file mode 100644 index 0000000..d2415f0 --- /dev/null +++ b/table.h @@ -0,0 +1,21 @@ +#pragma once + +#include "parser.h" + +#include + +// TODO make it a class +struct Table { + +// public: + Table(const std::string name, const std::vector columns); + + ColDefNode get_column_def(const std::string col_name); + int columns_count() { return m_col_defs.size(); }; + void print(); + +// private: + std::string m_name; + std::vector m_col_defs; + std::vector> m_rows; +};