diff --git a/.gitignore b/.gitignore index 6be3b8f..b88c28b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ build cmake-build-debug +.DS_Store diff --git a/Readme.md b/Readme.md index 3da7ab6..6bacc45 100644 --- a/Readme.md +++ b/Readme.md @@ -1,11 +1,11 @@ ### TODO -- save table command +- drop table command - move csv generation from usql(save_table) to table class -- add exceptions +- add exceptions and rename it to UsqlException - class members should have prefix m_ +- maybe to create iterator on table - add pipe | token -- add to_date a to_string functions -- add min and max functions, eg aggregate functions +- add count min and max functions, eg aggregate functions - add logging -- add const wherever should be \ No newline at end of file +- add const wherever should be diff --git a/exception.cpp b/exception.cpp index 2f6f388..cee6b18 100644 --- a/exception.cpp +++ b/exception.cpp @@ -2,7 +2,7 @@ namespace usql { -Exception::Exception(const std::string &msg) { +Exception::Exception(const std::string &msg) : std::runtime_error(msg) { cause = msg; } diff --git a/exception.h b/exception.h index 312f8d3..aab3ec3 100644 --- a/exception.h +++ b/exception.h @@ -6,7 +6,7 @@ namespace usql { -class Exception : public std::exception { +class Exception : public std::runtime_error { private: std::string cause; diff --git a/lexer.cpp b/lexer.cpp index af39c4b..e86787e 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -13,12 +13,12 @@ namespace usql { Lexer::Lexer() { k_words_regex = - "[0-9]+\\.[0-9]+|[0-9][0-9_]+[0-9]|[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/" + "[-+]?[0-9]+\\.[0-9]+|[-+]?[0-9][0-9_]+[0-9]|[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/" ",;:\?]|==|>=|<=|~=|>|<|=|;|~|\\||or|and|\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n"; - k_int_regex = "[0-9]+"; - k_int_underscored_regex = "[0-9][0-9_]+[0-9]"; - k_double_regex = "[0-9]+\\.[0-9]+"; - k_identifier_regex = "[A-Za-z]+[A-Za-z0-9_#]*"; + k_int_regex = "[-+]?[0-9]+"; + k_int_underscored_regex = "[-+]?[0-9][0-9_]+[0-9]"; + k_double_regex = "[-+]?[0-9]+\\.[0-9]+"; + k_identifier_regex = "[A-Za-z]+[A-Za-z0-9_#]*"; } void Lexer::parse(const std::string &code) { @@ -208,7 +208,7 @@ namespace usql { return TokenType::keyword_null; if (token == "integer") - return TokenType::keyword_int; + return TokenType::keyword_integer; if (token == "float") return TokenType::keyword_float; @@ -374,7 +374,7 @@ namespace usql { case TokenType::keyword_null: txt = "null"; break; - case TokenType::keyword_int: + case TokenType::keyword_integer: txt = "integer"; break; case TokenType::keyword_float: diff --git a/lexer.h b/lexer.h index fe6e5f7..30fdad9 100644 --- a/lexer.h +++ b/lexer.h @@ -37,7 +37,7 @@ namespace usql { keyword_copy, keyword_not, keyword_null, - keyword_int, + keyword_integer, keyword_float, keyword_varchar, int_number, diff --git a/main.cpp b/main.cpp index 4f080f7..68f9bbd 100644 --- a/main.cpp +++ b/main.cpp @@ -11,6 +11,8 @@ int main(int argc, char *argv[]) { std::vector sql_commands{ "create table a (i integer not null, s varchar(64), f float null)", "insert into a (i, s) values(1, upper('one'))", + "update table a set s = 'null string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'", +// "update table a set i = null", "insert into a (i, s) values(2, 'two')", "insert into a (i, s) values(3, 'two')", "insert into a (i, s) values(4, lower('FOUR'))", @@ -35,7 +37,10 @@ int main(int argc, char *argv[]) { "create table x as select i, s, f from a where i < 300", "select i, s, f from x where i < 300", "select i, s, f from a where i > 300", - "select i, to_string(i, '%d.%m.%Y'), s, f from a where i > 300" + "select i, to_string(i, '%d.%m.%Y'), s, f from a where i > 300", + "create table prices (datetime integer, symbol varchar(8), prev_close float, open float, price float, change float, change_prct varchar(16))", + "insert into prices (datetime, symbol, prev_close, open, price, change, change_prct) values (1626979443, 'MPC', 54.08, 53.82, 53.63, -0.832101, '-0.83 %')", + "select to_string(datetime, '%d.%m.%Y %H:%M:%S'), symbol, prev_close, open, price, change, change_prct from prices" }; diff --git a/parser.cpp b/parser.cpp index 6d0ac7c..82bb3b7 100644 --- a/parser.cpp +++ b/parser.cpp @@ -65,11 +65,13 @@ namespace usql { bool column_nullable{true}; // column name - if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } + if (m_lexer.tokenType() != TokenType::identifier) { + throw Exception("syntax error, expected identifier"); + } column_name = m_lexer.consumeCurrentToken().token_string; // column type and optionally len - if (m_lexer.tokenType() == TokenType::keyword_int) { + if (m_lexer.tokenType() == TokenType::keyword_integer) { column_type = ColumnType::integer_type; m_lexer.nextToken(); } else if (m_lexer.tokenType() == TokenType::keyword_float) { @@ -81,9 +83,13 @@ namespace usql { m_lexer.skipToken(TokenType::open_paren); if (m_lexer.tokenType() == TokenType::int_number) { column_len = std::stoi(m_lexer.consumeCurrentToken().token_string); - } else { /* TODO handle error */ } + } else { + throw Exception("syntax error, expected int number"); + } m_lexer.skipToken(TokenType::close_paren); - } else { /* TODO handle error */ } + } else { + throw Exception("syntax error, column type expected"); + } if (m_lexer.tokenType() == TokenType::keyword_not) { m_lexer.nextToken(); @@ -169,7 +175,7 @@ std::unique_ptr Parser::parse_value() { return std::make_unique(name); } - throw Exception("Syntax error"); + throw Exception("Syntax error, current token: " + m_lexer.currentToken().token_string); } std::unique_ptr Parser::parse_select_from_table() { @@ -326,6 +332,8 @@ std::unique_ptr Parser::parse_select_from_table() { return std::make_unique(tokenString); case TokenType::identifier: return std::make_unique(tokenString); + case TokenType::keyword_null: + return std::make_unique(); default:; throw Exception("Unknown operand node"); } diff --git a/parser.h b/parser.h index 5cd0cae..7682b48 100644 --- a/parser.h +++ b/parser.h @@ -1,7 +1,7 @@ #pragma once #include "lexer.h" -#include +#include "exception.h" #include #include @@ -16,6 +16,7 @@ namespace usql { enum class NodeType { true_node, + null_value, int_value, float_value, string_value, @@ -65,7 +66,7 @@ namespace usql { int length; bool null; - ColDefNode(const std::string col_name, const ColumnType col_type, int col_order, int col_len, bool nullable) : + ColDefNode(const std::string col_name, ColumnType col_type, int col_order, int col_len, bool nullable) : Node(NodeType::column_def), name(col_name), type(col_type), order(col_order), length(col_len), null(nullable) {} }; @@ -93,6 +94,17 @@ namespace usql { virtual ~ValueNode() {}; }; + struct NullValueNode : ValueNode { + + NullValueNode() : ValueNode(NodeType::null_value) {} + + bool isNull() override { return true; } + + long getIntValue() override { throw Exception("not supported on null value"); }; + double getDoubleValue() override { throw Exception("not supported on null value"); }; + std::string getStringValue() override { throw Exception("not supported on null value"); }; + }; + struct IntValueNode : ValueNode { long value; diff --git a/row.cpp b/row.cpp index 61c9537..9e77b33 100644 --- a/row.cpp +++ b/row.cpp @@ -6,7 +6,7 @@ namespace usql { Row::Row(int cols_count) { m_columns.reserve(cols_count); for (int i = 0; i < cols_count; i++) { - m_columns.push_back(std::make_unique()); + m_columns.push_back(std::make_unique()); } } diff --git a/row.h b/row.h index be8d0ff..a94d797 100644 --- a/row.h +++ b/row.h @@ -82,7 +82,7 @@ namespace usql { return *m_columns[i]; } - ColValue * ithColumn(int i) const { + ColValue * ith_column(int i) const { return m_columns[i].get(); } diff --git a/table.cpp b/table.cpp index 9370bd1..2e4d3b4 100644 --- a/table.cpp +++ b/table.cpp @@ -20,10 +20,34 @@ ColDefNode Table::get_column_def(const std::string &col_name) { } -Row Table::createEmptyRow() { +Row Table::create_empty_row() { return Row(columns_count()); } +std::string Table::csv_string() { + // header + std::string out_string; + for(int i = 0; i < m_col_defs.size(); i++) { + if (i > 0) out_string += ","; + out_string += m_col_defs[i].name; + } + + // rows + for (auto it = m_rows.begin(); it != m_rows.end(); ++it) { + std::string csv_line{"\n"}; + for(int i = 0; i < m_col_defs.size(); i++) { + if (i > 0) csv_line += ","; + + auto col = it->ith_column(i); + if (!col->isNull()) { + csv_line += col->getStringValue(); // TODO handle enclosing commas etc + } + } + out_string += csv_line; + } + + return out_string; +} void Table::print() { std::cout << "** " << m_name << " **" << std::endl; @@ -36,38 +60,63 @@ Table::Table(const Table &other) { m_name = other.m_name; m_col_defs = other.m_col_defs; for(const Row& orig_row : other.m_rows) { - addCopyOfRow(orig_row); + add_copy_of_row(orig_row); } } -void Table::addRow(const Row &row) { - // TODO validate for not null values - // todo validate for length etc +void Table::add_row(const Row &row) { + validate_row(row); m_rows.push_back(row); } -void Table::addCopyOfRow(const Row &row) { - // TODO validate for not null values - // todo validate for length etc - - Row new_row = createEmptyRow(); +void Table::add_copy_of_row(const Row &row) { + Row new_row = create_empty_row(); for(int i = 0; i < m_col_defs.size(); i++) { - ColValue *ct = row.ithColumn(i); + ColValue *ct = row.ith_column(i); if (ct->isNull()) { new_row.setColumnNull(i); } else { if (m_col_defs[i].type == ColumnType::integer_type) { - new_row.setColumnValue(i, row.ithColumn(i)->getIntValue()); + new_row.setColumnValue(i, row.ith_column(i)->getIntValue()); } else if (m_col_defs[i].type == ColumnType::float_type) { - new_row.setColumnValue(i, row.ithColumn(i)->getDoubleValue()); + new_row.setColumnValue(i, row.ith_column(i)->getDoubleValue()); } else if (m_col_defs[i].type == ColumnType::varchar_type) { - new_row.setColumnValue(i, row.ithColumn(i)->getStringValue()); + new_row.setColumnValue(i, row.ith_column(i)->getStringValue()); } } } - m_rows.push_back(row); + + validate_row(new_row); + m_rows.push_back(new_row); +} + +void Table::validate_column(const ColDefNode *col_def, ValueNode *col_val) { + if (col_def->null == false && col_val->isNull()) { + throw Exception("Column " + col_def->name + " cannot be null"); + } + if (col_def->type == ColumnType::varchar_type && !col_val->isNull() && col_val->getStringValue().size() > col_def->length) { + throw Exception("Column value of " + col_def->name + " is too long (" + col_val->getStringValue() + ")"); + } +} + +void Table::validate_column(const ColDefNode *col_def, ColValue *col_val) { + if (col_def->null == false && col_val->isNull()) { + throw Exception("Column " + col_def->name + " cannot be null"); + } + if (col_def->type == ColumnType::varchar_type && !col_val->isNull() && col_val->getStringValue().size() > col_def->length) { + throw Exception("Column value of " + col_def->name + " is too long (" + col_val->getStringValue() + ")"); + } +} + +void Table::validate_row(const Row &row) { + for(int i = 0; i < m_col_defs.size(); i++) { + ColDefNode col_def = m_col_defs[i]; + ColValue *col_val = row.ith_column(i); + + validate_column(&col_def, col_val); + } } } // namespace \ No newline at end of file diff --git a/table.h b/table.h index 2bfcb23..48f18f7 100644 --- a/table.h +++ b/table.h @@ -4,6 +4,7 @@ #include "row.h" #include +#include namespace usql { @@ -16,15 +17,21 @@ namespace usql { int columns_count() const { return m_col_defs.size(); }; - Row createEmptyRow(); // TODO this means unnecessary copying - void addRow(const Row &row); - void addCopyOfRow(const Row &row); + Row create_empty_row(); // TODO this means unnecessary copying + void add_row(const Row &row); + void add_copy_of_row(const Row &row); + + void validate_column(const ColDefNode *col_def, ValueNode *col_val); + void validate_column(const ColDefNode *col_def, ColValue *col_val); + void validate_row(const Row &row); + + std::string csv_string(); void print(); std::string m_name; std::vector m_col_defs; - std::vector m_rows; + std::list m_rows; }; } \ No newline at end of file diff --git a/usql.cpp b/usql.cpp index 2303da0..26b6489 100644 --- a/usql.cpp +++ b/usql.cpp @@ -9,8 +9,14 @@ namespace usql { std::unique_ptr USql::execute(const std::string &command) { - std::unique_ptr node = m_parser.parse(command); - return execute(*node); + try { + std::unique_ptr node = m_parser.parse(command); + return execute(*node); + + } catch (std::exception &e) { + return create_stmt_result_table(-1, e.what()); + } + } std::unique_ptr
USql::execute(Node &node) { @@ -39,7 +45,8 @@ std::unique_ptr
USql::execute(Node &node) { std::unique_ptr
USql::execute_create_table(CreateTableNode &node) { - // TODO check table does not exists + check_table_not_exists(node.table_name); + Table table{node.table_name, node.cols_defs}; m_tables.push_back(table); @@ -48,7 +55,7 @@ std::unique_ptr
USql::execute_create_table(CreateTableNode &node) { std::unique_ptr
USql::execute_create_table_as_table(CreateTableAsSelectNode &node) { - // TODO check table does not exists + check_table_not_exists(node.table_name); auto select = execute_select((SelectFromTableNode &) *node.select_table); @@ -60,7 +67,7 @@ std::unique_ptr
USql::execute_create_table_as_table(CreateTableAsSelectNo // must be here, if rows are put into new_table, they are lost during m_tables.push_table Table *table = find_table(node.table_name); for( Row& orig_row : select->m_rows) { - table->addCopyOfRow(orig_row); + table->add_copy_of_row(orig_row); } select.release(); // is it correct? hoping not to release select table here and then when releasing CreateTableAsSelectNode @@ -76,7 +83,7 @@ std::unique_ptr
USql::execute_insert_into_table(InsertIntoTableNode &node Table *table_def = find_table(node.table_name); // prepare empty new_row - Row new_row = table_def->createEmptyRow(); + Row new_row = table_def->create_empty_row(); // copy values for (size_t i = 0; i < node.cols_names.size(); i++) { @@ -87,7 +94,7 @@ std::unique_ptr
USql::execute_insert_into_table(InsertIntoTableNode &node } // append new_row - table_def->addRow(new_row); + table_def->add_row(new_row); return create_stmt_result_table(0, "insert succeeded"); } @@ -103,7 +110,8 @@ std::unique_ptr
USql::execute_select(SelectFromTableNode &node) { std::vector source_table_col_index{}; for (int i = 0; i < node.cols_names->size(); i++) { - auto [ src_tbl_col_index, rst_tbl_col_def ] = getColumnDefinition(table, &node.cols_names->operator[](i), i); + auto [ src_tbl_col_index, rst_tbl_col_def ] = get_column_definition(table, + &node.cols_names->operator[](i), i); source_table_col_index.push_back(src_tbl_col_index); result_tbl_col_defs.push_back(rst_tbl_col_def); @@ -116,7 +124,7 @@ std::unique_ptr
USql::execute_select(SelectFromTableNode &node) { // eval where for row if (evalWhere(node.where.get(), table, *row)) { // prepare empty row - Row new_row = result->createEmptyRow(); + Row new_row = result->create_empty_row(); // copy column values for (auto idx = 0; idx < result->columns_count(); idx++) { @@ -128,7 +136,7 @@ std::unique_ptr
USql::execute_select(SelectFromTableNode &node) { new_row.setColumnValue(&result_tbl_col_defs[idx], col_value); } else { - ColValue *col_value = row->ithColumn(row_col_index); + ColValue *col_value = row->ith_column(row_col_index); new_row.setColumnValue(&result_tbl_col_defs[idx], col_value); } } @@ -141,7 +149,7 @@ std::unique_ptr
USql::execute_select(SelectFromTableNode &node) { return std::move(result); } -std::tuple USql::getColumnDefinition(Table *table, SelectColNode *select_col_node, int col_order ) { +std::tuple USql::get_column_definition(Table *table, SelectColNode *select_col_node, int col_order ) { std::string new_col_name = select_col_node->name; if (select_col_node->value->node_type == NodeType::column_name) { @@ -173,7 +181,6 @@ std::unique_ptr
USql::execute_delete(DeleteFromTableNode &node) { auto it = table->m_rows.begin(); for (; it != table->m_rows.end();) { if (evalWhere(node.where.get(), table, *it)) { - // TODO this can be really expensive operation it = table->m_rows.erase(it); } else { ++it; @@ -194,24 +201,15 @@ std::unique_ptr
USql::execute_update(UpdateTableNode &node) { if (evalWhere(node.where.get(), table, *row)) { int i = 0; for (const auto& col : node.cols_names) { - // TODO cache it like in select - ColDefNode cdef = table->get_column_def(col.name); + ColDefNode col_def = table->get_column_def(col.name); // TODO cache it like in select + std::unique_ptr new_val = evalArithmeticOperator(col_def.type, + static_cast(*node.values[i]), table, *row); - std::unique_ptr new_val = evalArithmeticOperator(cdef.type, - static_cast(*node.values[i]), - table, *row); - - if (cdef.type == ColumnType::integer_type) { - row->setColumnValue(cdef.order, new_val->getIntValue()); - } else if (cdef.type == ColumnType::float_type) { - row->setColumnValue(cdef.order, new_val->getDoubleValue()); - } else if (cdef.type == ColumnType::varchar_type) { - row->setColumnValue(cdef.order, new_val->getStringValue()); - } else { - throw Exception("Implement me!"); - } + table->validate_column(&col_def, new_val.get()); + row->setColumnValue(&col_def, new_val.get()); i++; } + // TODO tady je problem, ze kdyz to zfajluje na jednom radku ostatni by se nemely provest } } @@ -237,7 +235,7 @@ std::unique_ptr
USql::execute_load(LoadIntoTableNode &node) { std::vector csv_line = *it; // prepare empty new_row - Row new_row = table_def->createEmptyRow(); + Row new_row = table_def->create_empty_row(); // copy values for (size_t i = 0; i < table_def->columns_count(); i++) { @@ -254,7 +252,7 @@ std::unique_ptr
USql::execute_load(LoadIntoTableNode &node) { } // append new_row - table_def->addRow(new_row); + table_def->add_row(new_row); } return create_stmt_result_table(0, "load succeeded"); @@ -265,31 +263,11 @@ std::unique_ptr
USql::execute_save(SaveTableNode &node) { // find source table Table *table_def = find_table(node.table_name); - // header - std::string out_string; - for(int i = 0; i < table_def->m_col_defs.size(); i++) { - if (i > 0) out_string += ","; - out_string += table_def->m_col_defs[i].name; - } - - // rows - for (auto it = table_def->m_rows.begin() + 1; it != table_def->m_rows.end(); ++it) { - std::string csv_line; - for(int i = 0; i < table_def->m_col_defs.size(); i++) { - if (i > 0) csv_line += ","; - - auto col = it->ithColumn(i); - if (!col->isNull()) { - csv_line += col->getStringValue(); // TODO handle enclosing commas etc - } - } - out_string += "\n"; - out_string += csv_line; - } + std::string csv_string = table_def->csv_string(); // save data std::ofstream file(node.filename); - file << out_string; + file << csv_string; file.close(); return create_stmt_result_table(0, "save succeeded"); @@ -358,6 +336,8 @@ std::unique_ptr USql::evalValueNode(Table *table, Row &row, Node *nod } else if (node->node_type == NodeType::function) { return evalFunctionValueNode(table, row, node); + } else if (node->node_type == NodeType::null_value) { + return std::make_unique(); } throw Exception("unsupported node type"); } @@ -366,16 +346,16 @@ std::unique_ptr USql::evalValueNode(Table *table, Row &row, Node *nod std::unique_ptr USql::evalDatabaseValueNode(Table *table, Row &row, Node *node) { auto *dvl = static_cast(node); ColDefNode col_def = table->get_column_def( dvl->col_name); // TODO optimize it to just get this def once - auto db_value = row.ithColumn(col_def.order); + auto db_value = row.ith_column(col_def.order); if (col_def.type == ColumnType::integer_type) { - return std::__1::make_unique(db_value->getIntValue()); + return std::make_unique(db_value->getIntValue()); } if (col_def.type == ColumnType::float_type) { - return std::__1::make_unique(db_value->getDoubleValue()); + return std::make_unique(db_value->getDoubleValue()); } if (col_def.type == ColumnType::varchar_type) { - return std::__1::make_unique(db_value->getStringValue()); + return std::make_unique(db_value->getStringValue()); } throw Exception("unknown database value type"); } @@ -471,6 +451,7 @@ std::unique_ptr USql::evalArithmeticOperator(ColumnType outType, Arit default: throw Exception("implement me!!"); } + } else if (outType == ColumnType::integer_type) { long l = ((ValueNode *) left.get())->getIntValue(); long r = ((ValueNode *) right.get())->getIntValue(); @@ -509,10 +490,10 @@ std::unique_ptr
USql::create_stmt_result_table(long code, const std::stri auto table_def = std::make_unique
("result", result_tbl_col_defs); - Row new_row = table_def->createEmptyRow(); + Row new_row = table_def->create_empty_row(); new_row.setColumnValue(0, code); new_row.setColumnValue(1, text); - table_def->addRow(new_row); + table_def->add_row(new_row); return std::move(table_def); } @@ -529,4 +510,12 @@ Table *USql::find_table(const std::string &name) { } } +void USql::check_table_not_exists(const std::string &name) { + auto name_cmp = [name](const Table& t) { return t.m_name == name; }; + auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp); + if (table_def != std::end(m_tables)) { + throw Exception("table already exists"); + } +} + } // namespace \ No newline at end of file diff --git a/usql.h b/usql.h index 50ffaf2..fd1feba 100644 --- a/usql.h +++ b/usql.h @@ -44,9 +44,10 @@ private: static std::unique_ptr
create_stmt_result_table(long code, const std::string& text); - static std::tuple getColumnDefinition(Table *table, SelectColNode *select_col_node, int col_order) ; + static std::tuple get_column_definition(Table *table, SelectColNode *select_col_node, int col_order) ; Table *find_table(const std::string &name); + void check_table_not_exists(const std::string &name); private: Parser m_parser;