a bit of further work

This commit is contained in:
VaclavT 2021-06-30 23:29:09 +02:00
parent 5c7908ac4b
commit b55115f7c3
10 changed files with 309 additions and 56 deletions

View File

@ -12,7 +12,7 @@ project(msql)
set(PROJECT_NAME msql)
set(SOURCE
exception.cpp lexer.cpp parser.cpp executor.cpp main.cpp)
exception.cpp lexer.cpp parser.cpp executor.cpp main.cpp table.cpp table.h)
add_executable(${PROJECT_NAME} ${SOURCE})

View File

@ -1,18 +1,33 @@
#include "executor.h"
#include "exception.h"
#include <algorithm>
Executor::Executor() {
// TODO init database
m_tables.clear();
}
Table* Executor::find_table(const std::string name) {
auto name_cmp = [name](Table t){ return t.m_name == name; };
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp );
if (table_def != std::end(m_tables)) {
return table_def.operator->();
} else {
// TODO throw exception
}
}
bool Executor::execute(Node& node) {
// TODO optimize node here
switch (node.node_type) {
case NodeType::create_table:
return execute_create_table(static_cast<CreateTableNode &>(node));
case NodeType::insert_into:
return execute_insert_into_table(static_cast<InsertIntoTableNode &>(node));
case NodeType::select_from:
return execute_select(node);
return execute_select(static_cast<SelectFromTableNode &>(node));
default:
// TODO error message
return false;
@ -21,9 +36,85 @@ bool Executor::execute(Node& node) {
}
bool Executor::execute_create_table(CreateTableNode& node) {
return false;
// TODO check table does not exists
Table table{node.table_name, node.cols_defs};
m_tables.push_back(table);
return true;
}
bool Executor::execute_select(Node& node) {
return false;
bool Executor::execute_insert_into_table(InsertIntoTableNode& node) {
// TODO check column names.size = values.size
// find table
Table* table_def = find_table(node.table_name);
// prepare empty new_row
std::vector<std::string> new_row;
new_row.reserve(table_def->columns_count());
for(size_t i=0; i<table_def->columns_count(); i++) {
new_row.push_back(std::string {""});
}
// copy values
for(size_t i=0; i<node.cols_names.size(); i++) {
auto colNameNode = node.cols_names[i];
ColDefNode col_def = table_def->get_column_def(colNameNode.name);
// TODO validate
new_row[col_def.order] = node.cols_values[i].value;
}
// TODO check not null columns
// append new_row
table_def->m_rows.push_back(new_row);
return true;
}
bool Executor::execute_select(SelectFromTableNode& node) {
// TODO create plan for accessing rows
// find source table
Table* table = find_table(node.table_name);
// create result table
std::vector<ColDefNode> result_tbl_col_defs{};
std::vector<int> source_table_col_index{};
int i = 0; // new column order
for(ColNameNode rc : node.cols_names) {
ColDefNode cdef = table->get_column_def(rc.name);
source_table_col_index.push_back(cdef.order);
auto col = ColDefNode(rc.name, cdef.type, i, cdef.length, cdef.null);
result_tbl_col_defs.push_back(col);
i++;
}
Table result {"result", result_tbl_col_defs};
// execute access plan
for (auto row = begin (table->m_rows); row != end (table->m_rows); ++row) {
// eval there for row
bool where_true = true;
if (where_true) {
// prepare empty row
std::vector<std::string> new_row;
new_row.reserve(result.columns_count());
for(auto i=0; i<result.columns_count(); i++) {
new_row.push_back(row->at(source_table_col_index[i]));
}
result.m_rows.push_back(new_row);
}
}
result.print();
return true;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include "parser.h"
#include "table.h"
#include <string>
@ -14,8 +15,11 @@ public:
private:
bool execute_create_table(CreateTableNode& node);
bool execute_select(Node& node);
bool execute_insert_into_table(InsertIntoTableNode& node);
bool execute_select(SelectFromTableNode& node);
Table* find_table(const std::string name);
private:
std::vector<Table> m_tables;
};

View File

@ -11,54 +11,53 @@ Token::Token(const std::string &token_str, TokenType typ) {
void Lexer::parse(const std::string &code) {
// TODO handle empty code
tokens.clear();
m_tokens.clear();
// PERF something like this to prealocate ??
if (code.size() > 100) {
tokens.reserve(code.size() / 10);
m_tokens.reserve(code.size() / 10);
}
code_str = code;
if (!code_str.empty() && code_str.back() != '\n') {
code_str.append("\n"); // TODO tempo solution to prevent possible situation when last line is a comment
m_code_str = code;
if (!m_code_str.empty() && m_code_str.back() != '\n') {
m_code_str.append("\n"); // TODO tempo solution to prevent possible situation when last line is a comment
}
// TODO make it constant
std::regex words_regex("[0-9]+\\.[0-9]+|[0-9][0-9_]+[0-9]|[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/"
",;:\?]|==|>=|<=|~=|>|<|=|;|~|\\|\\||&&|\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n");
auto words_begin = std::sregex_iterator(code_str.begin(), code_str.end(), words_regex);
auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), words_regex);
auto words_end = std::sregex_iterator();
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
std::smatch match = *i;
std::string match_str = match.str();
TokenType token_type = type(match_str);
if (token_type == TokenType::string_literal) {
match_str = stringLiteral(match_str);
} else {
tokens.push_back(Token{match_str, token_type});
}
if (token_type == TokenType::string_literal)
match_str = stringLiteral(match_str);
m_tokens.push_back(Token{match_str, token_type});
}
// DEBUG IT
// debugTokens();
index = 0;
m_index = 0;
}
void Lexer::debugTokens() {
int i = 0;
for (std::vector<Token>::iterator it = tokens.begin(); it != tokens.end(); ++it) {
for (std::vector<Token>::iterator it = m_tokens.begin(); it != m_tokens.end(); ++it) {
std::cerr << i << "\t" << it->token_string << std::endl;
i++;
}
}
Token Lexer::currentToken() { return tokens[index]; }
Token Lexer::currentToken() { return m_tokens[m_index]; }
void Lexer::nextToken() {
if (index < tokens.size()) {
index++;
if (m_index < m_tokens.size()) {
m_index++;
}
}
@ -76,11 +75,11 @@ void Lexer::skipTokenOptional(TokenType type) {
}
}
TokenType Lexer::tokenType() { return index < tokens.size() ? currentToken().type : TokenType::eof; }
TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; }
TokenType Lexer::nextTokenType() { return index < tokens.size() - 1 ? tokens[index + 1].type : TokenType::eof; }
TokenType Lexer::nextTokenType() { return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof; }
TokenType Lexer::prevTokenType() { return index > 0 ? tokens[index - 1].type : TokenType::undef; }
TokenType Lexer::prevTokenType() { return m_index > 0 ? m_tokens[m_index - 1].type : TokenType::undef; }
bool Lexer::isRelationalOperator(TokenType token_type) {
return (token_type == TokenType::equal || token_type == TokenType::not_equal || token_type == TokenType::greater || token_type == TokenType::greater_equal ||
@ -211,7 +210,7 @@ TokenType Lexer::type(const std::string &token) {
if (std::regex_match(token, identifier_regex))
return TokenType::identifier;
if (index + 1 >= tokens.size())
if (m_index + 1 >= m_tokens.size())
return TokenType::eof;
return TokenType::undef;

12
lexer.h
View File

@ -54,12 +54,6 @@ struct Token {
};
class Lexer {
private:
std::string code_str;
std::vector<Token> tokens;
int index = 0;
bool eof = false;
public:
Lexer() {};
@ -84,4 +78,10 @@ private:
TokenType type(const std::string &token);
std::string stringLiteral(std::string token);
static std::string typeToString(TokenType token_type);
private:
std::string m_code_str;
std::vector<Token> m_tokens;
int m_index = 0;
};

View File

@ -11,16 +11,22 @@ int main(int argc, char *argv[]) {
Parser parser{};
Executor executor{};
std::string sql_create = "create table a (i integer not null, s varchar(64), f float)";
// std::string sql_insert = "insert into a (i, s) values(1, 'one')";
// std::string sql_inser2 = "insert into a (i, s) values(2, 'two')";
// std::string sql_inser3 = "insert into a (i, s) values(3, 'two')";
// std::string sql_update = "update a set s = 'three' where i = 3";
// std::string sql_select = "select i, s from a where i > 0";
// std::string sql_delete = "delete from a where i = 3";
std::vector<std::string> sql_commands {
"create table a (i integer not null, s varchar(64), f float null)",
"insert into a (i, s) values(1, 'one')",
"insert into a (i, s) values(2, 'two')",
"insert into a (i, s) values(3, 'two')",
"select i, s from a where i > 0"
// "update a set s = 'three' where i = 3"
// "delete from a where i = 3"
// "select i, s from a where i > 0"
};
auto node = parser.parse(sql_create);
executor.execute(*node.get());
for(auto command : sql_commands) {
auto node = parser.parse(command);
executor.execute(*node.get());
}
return 0;
}

View File

@ -1,7 +1,7 @@
#include "parser.h"
#include "exception.h"
// TOOD handle premature eof
Parser::Parser() {
lexer = Lexer{};
@ -9,12 +9,14 @@ Parser::Parser() {
std::unique_ptr<Node> Parser::parse(const std::string &code) {
lexer.parse(code);
lexer.debugTokens();
// lexer.debugTokens();
if (lexer.tokenType() == TokenType::keyword_create && lexer.nextTokenType() == TokenType::keyword_table) {
return parse_create_table();
} if (lexer.tokenType() == TokenType::keyword_insert) {
return parse_insert_into_table();
} if (lexer.tokenType() == TokenType::keyword_select) {
return parse_select();
return parse_select_from_table();
}
return std::make_unique<Node>(NodeType::error);
@ -31,10 +33,11 @@ std::unique_ptr<Node> Parser::parse_create_table() {
lexer.nextToken();
lexer.skipToken(TokenType::open_paren);
int column_order = 0;
do {
std::string column_name;
ColumnType column_type;
int column_len {1};
int column_len {1};
bool column_nullable {true};
// column name
@ -68,9 +71,9 @@ std::unique_ptr<Node> Parser::parse_create_table() {
lexer.nextToken();
}
cols_def.push_back(ColDefNode(column_name, column_type, column_len, column_nullable));
cols_def.push_back(ColDefNode(column_name, column_type, column_order++, column_len, column_nullable));
if (lexer.tokenType() == TokenType::comma) lexer.nextToken();
lexer.skipTokenOptional(TokenType::comma);
// TODO in future constraints
@ -80,8 +83,66 @@ std::unique_ptr<Node> Parser::parse_create_table() {
return std::make_unique<CreateTableNode>(table_name, cols_def);
}
std::unique_ptr<Node> Parser::parse_select() {
std::vector<Node> exec_code {};
return std::make_unique<Node>(NodeType::not_implemented_yet);
std::unique_ptr<Node> Parser::parse_insert_into_table() {
std::vector<Node> exec_code {};
std::vector<ColNameNode> cols_names {};
std::vector<ColValueNode> cols_values {};
lexer.skipToken(TokenType::keyword_insert);
lexer.skipToken(TokenType::keyword_into);
// table name
if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
std::string table_name = lexer.currentToken().token_string;
lexer.nextToken();
// column names
lexer.skipToken(TokenType::open_paren);
do {
if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
cols_names.push_back(lexer.currentToken().token_string);
lexer.nextToken();
lexer.skipTokenOptional(TokenType::comma);
} while (lexer.tokenType() != TokenType::close_paren);
lexer.skipToken(TokenType::close_paren);
lexer.skipToken(TokenType::keyword_values);
// column values
lexer.skipToken(TokenType::open_paren);
do {
cols_values.push_back(lexer.currentToken().token_string);
lexer.nextToken();
lexer.skipTokenOptional(TokenType::comma);
} while (lexer.tokenType() != TokenType::close_paren);
lexer.skipToken(TokenType::close_paren);
return std::make_unique<InsertIntoTableNode>(table_name, cols_names, cols_values);
}
std::unique_ptr<Node> Parser::parse_select_from_table() {
std::vector<Node> where {};
std::vector<ColNameNode> cols_names {};
lexer.skipToken(TokenType::keyword_select);
// TODO support also numbers and expressions
while (lexer.tokenType() != TokenType::keyword_from) {
// TODO add consumeToken() which returns token and advances to next token
cols_names.push_back(lexer.currentToken().token_string);
lexer.nextToken();
lexer.skipTokenOptional(TokenType::comma);
}
lexer.skipToken(TokenType::keyword_from);
std::string table_name = lexer.currentToken().token_string;
lexer.nextToken();
if (lexer.tokenType() == TokenType::keyword_where) {}
// if (lexer.tokenType() == TokenType::keyword_order_by) {}
// if (lexer.tokenType() == TokenType::keyword_offset) {}
// if (lexer.tokenType() == TokenType::keyword_limit) {}
return std::make_unique<SelectFromTableNode>(table_name, cols_names, where);
}

View File

@ -15,7 +15,10 @@ enum class ColumnType {
enum class NodeType {
create_table,
insert_into,
select_from,
column_name,
column_value,
column_def,
not_implemented_yet,
error
@ -27,14 +30,30 @@ struct Node {
Node(const NodeType type) : node_type(type) {}
};
struct ColNameNode : Node {
std::string name;
ColNameNode(const std::string col_name) :
Node(NodeType::column_name), name(col_name) {}
};
struct ColValueNode : Node {
std::string value;
ColValueNode(const std::string col_value) :
Node(NodeType::column_value), value(col_value) {}
};
// TODO add order in row
struct ColDefNode : Node {
std::string name;
ColumnType type;
int length;
int order;
int length;
bool null;
ColDefNode(const std::string col_name, const ColumnType col_type, int col_len, bool nullable) :
Node(NodeType::column_def), name(col_name), type(col_type), length(col_len), null(nullable) {}
ColDefNode(const std::string col_name, const ColumnType col_type, int col_order, int col_len, bool nullable) :
Node(NodeType::column_def), name(col_name), type(col_type), order(col_order), length(col_len), null(nullable) {}
};
struct CreateTableNode : Node {
@ -45,6 +64,29 @@ struct CreateTableNode : Node {
Node(NodeType::create_table), table_name(name), cols_defs(defs) {}
};
struct InsertIntoTableNode : Node {
std::string table_name;
std::vector<ColNameNode> cols_names;
std::vector<ColValueNode> cols_values;
InsertIntoTableNode(const std::string name, std::vector<ColNameNode> names, std::vector<ColValueNode> values) :
Node(NodeType::insert_into), table_name(name), cols_names(names), cols_values(values) {}
};
struct SelectFromTableNode : Node {
std::string table_name;
std::vector<ColNameNode> cols_names;
std::vector<Node> where;
SelectFromTableNode(const std::string name, std::vector<ColNameNode> names, std::vector<Node> where_clause) :
Node(NodeType::select_from), table_name(name), cols_names(names), where(where_clause) {}
};
struct UpdateTableNode : Node { };
struct DeleteFromTableNode : Node { };
class Parser {
private:
@ -56,7 +98,8 @@ public:
private:
std::unique_ptr<Node> parse_create_table();
std::unique_ptr<Node> parse_select();
std::unique_ptr<Node> parse_insert_into_table();
std::unique_ptr<Node> parse_select_from_table();
private:
Lexer lexer;

28
table.cpp Normal file
View File

@ -0,0 +1,28 @@
#include "table.h"
Table::Table(const std::string name, const std::vector<ColDefNode> columns) {
m_name = name;
m_col_defs = columns;
m_rows.clear();
}
ColDefNode Table::get_column_def(const std::string col_name) {
auto name_cmp = [col_name](ColDefNode cd){ return cd.name == col_name; };
auto col_def = std::find_if(begin(m_col_defs), end(m_col_defs), name_cmp );
if (col_def != std::end(m_col_defs)) {
return *col_def;
} else {
// TODO throw exception
}
}
void Table::print() {
std::cout << "** " << m_name << " **" << std::endl;
for(auto row : m_rows) {
for( auto col : row) {
std::cout << col << ",";
}
std::cout << std::endl;
}
}

21
table.h Normal file
View File

@ -0,0 +1,21 @@
#pragma once
#include "parser.h"
#include <vector>
// TODO make it a class
struct Table {
// public:
Table(const std::string name, const std::vector<ColDefNode> columns);
ColDefNode get_column_def(const std::string col_name);
int columns_count() { return m_col_defs.size(); };
void print();
// private:
std::string m_name;
std::vector<ColDefNode> m_col_defs;
std::vector<std::vector<std::string>> m_rows;
};