diff --git a/Readme.md b/Readme.md index 1d34a6b..7cd3f99 100644 --- a/Readme.md +++ b/Readme.md @@ -1,7 +1,9 @@ ### TODO -- support for * - support for order by, offset, limit +- support for btree indexes +- support for parenthesis +- support for * - add pipe | token - add count min and max functions, eg aggregate functions - maybe to create iterator on table diff --git a/lexer.cpp b/lexer.cpp index adac66c..2ef84e9 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -119,115 +119,86 @@ namespace usql { // TODO, FIXME 'one is evaluated as identifier if (token == ";") return TokenType::semicolon; - if (token == "+") return TokenType::plus; - if (token == "-") return TokenType::minus; - if (token == "*") return TokenType::multiply; - if (token == "/") return TokenType::divide; - if (token == "(") return TokenType::open_paren; - if (token == ")") return TokenType::close_paren; - if (token == "=") return TokenType::equal; - if (token == "!=") return TokenType::not_equal; - if (token == ">") return TokenType::greater; - if (token == ">=") return TokenType::greater_equal; - if (token == "<") return TokenType::lesser; - if (token == "<=") return TokenType::lesser_equal; - if (token == "as") return TokenType::keyword_as; - if (token == "create") return TokenType::keyword_create; - if (token == "drop") return TokenType::keyword_drop; - if (token == "where") return TokenType::keyword_where; - + if (token == "order") + return TokenType::keyword_order; + if (token == "by") + return TokenType::keyword_by; + if (token == "asc") + return TokenType::keyword_asc; + if (token == "desc") + return TokenType::keyword_desc; if (token == "from") return TokenType::keyword_from; - if (token == "delete") return TokenType::keyword_delete; - if (token == "table") return TokenType::keyword_table; - if (token == "insert") return TokenType::keyword_insert; - if (token == "into") return TokenType::keyword_into; - if (token == "values") return TokenType::keyword_values; - if (token == "select") return TokenType::keyword_select; - if (token == "set") return TokenType::keyword_set; - if (token == "copy") return TokenType::keyword_copy; - if (token == "update") return TokenType::keyword_update; - if (token == "load") return TokenType::keyword_load; - if (token == "save") return TokenType::keyword_save; - if (token == "not") return TokenType::keyword_not; - if (token == "null") return TokenType::keyword_null; - if (token == "integer") return TokenType::keyword_integer; - if (token == "float") return TokenType::keyword_float; - if (token == "varchar") return TokenType::keyword_varchar; - if (token == "or") return TokenType::logical_or; - if (token == "and") return TokenType::logical_and; - if (token == ",") return TokenType::comma; - if (token == "\n" || token == "\r\n" || token == "\r") return TokenType::newline; @@ -347,6 +318,18 @@ namespace usql { case TokenType::keyword_where: txt = "where"; break; + case TokenType::keyword_order: + txt = "order"; + break; + case TokenType::keyword_by: + txt = "by"; + break; + case TokenType::keyword_asc: + txt = "asc"; + break; + case TokenType::keyword_desc: + txt = "desc"; + break; case TokenType::keyword_table: txt = "table"; break; diff --git a/lexer.h b/lexer.h index 2fbabb9..2dd517a 100644 --- a/lexer.h +++ b/lexer.h @@ -25,6 +25,10 @@ namespace usql { keyword_drop, keyword_table, keyword_where, + keyword_order, + keyword_by, + keyword_asc, + keyword_desc, keyword_delete, keyword_update, keyword_load, diff --git a/main.cpp b/main.cpp index c3db0ad..14ac02c 100644 --- a/main.cpp +++ b/main.cpp @@ -9,7 +9,7 @@ int main(int argc, char *argv[]) { std::vector sql_commands{ "create table a (i integer not null, s varchar(64), f float null)", "insert into a (i, s) values(1, upper('one'))", - "update table a set s = 'null string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'", +// "update table a set s = 'null string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'", // "update table a set i = null", "insert into a (i, s) values(2, 'two')", "insert into a (i, s) values(3, 'two')", @@ -17,30 +17,30 @@ int main(int argc, char *argv[]) { "insert into a (i, s) values(5, 'five')", "insert into a (i, s) values(to_date('20.12.1973', '%d.%m.%Y'), 'six')", "save table a into '/tmp/a.csv'", - "select i, s from a where i > 2", - "select i, s from a where i = 1", - "select i, s from a where s = 'two'", - "select i, s from a where i <= 3 and s = 'one'", - "select i, s from a where i > 0", - "delete from a where i = 4", - "select i, s from a where i > 0", - "update a set f = 9.99 where i = 3", - "select i, s, f from a where i = 3", - "update a set s = 'three', f = f + 0.01 where i = 3", - "select i, s, f from a where i = 3", - "create table data (ticker varchar(8), price float null)", - "load data from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/data.csv')", - "select ticker, price from data", - "select i, s, f from a where i < 300", - "create table x as select i, s, f from a where i < 300", - "select i, s, f from x where i < 300", - "drop table x", - "select i, s, f from a where i > 300", - "select i, to_string(i, '%d.%m.%Y'), s, f from a where i > 300", - "create table prices (datetime integer, symbol varchar(8), prev_close float, open float, price float, change float, change_prct varchar(16))", - "load prices from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/prices.csv'", - "insert into prices (datetime, symbol, prev_close, open, price, change, change_prct) values (1626979443, 'MPC', 54.08, 53.82, 53.63, -0.832101, '-0.83 %')", - "select to_string(datetime, '%d.%m.%Y %H:%M:%S'), symbol, prev_close, open, price, change, change_prct from prices where symbol = 'SYF'" + "select i, s from a where i > 2 order by 1 desc" +// "select i, s from a where i = 1", +// "select i, s from a where s = 'two'", +// "select i, s from a where i <= 3 and s = 'one'", +// "select i, s from a where i > 0", +// "delete from a where i = 4", +// "select i, s from a where i > 0", +// "update a set f = 9.99 where i = 3", +// "select i, s, f from a where i = 3", +// "update a set s = 'three', f = f + 0.01 where i = 3", +// "select i, s, f from a where i = 3", +// "create table data (ticker varchar(8), price float null)", +// "load data from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/data.csv')", +// "select ticker, price from data", +// "select i, s, f from a where i < 300", +// "create table x as select i, s, f from a where i < 300", +// "select i, s, f from x where i < 300", +// "drop table x", +// "select i, s, f from a where i > 300", +// "select i, to_string(i, '%d.%m.%Y'), s, f from a where i > 300", +// "create table prices (datetime integer, symbol varchar(8), prev_close float, open float, price float, change float, change_prct varchar(16))", +// "load prices from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/prices.csv'", +// "insert into prices (datetime, symbol, prev_close, open, price, change, change_prct) values (1626979443, 'MPC', 54.08, 53.82, 53.63, -0.832101, '-0.83 %')", +// "select to_string(datetime, '%d.%m.%Y %H:%M:%S'), symbol, prev_close, open, price, change, change_prct from prices where symbol = 'SYF'" }; diff --git a/parser.cpp b/parser.cpp index 2f926dc..3eb45b8 100644 --- a/parser.cpp +++ b/parser.cpp @@ -176,7 +176,7 @@ namespace usql { return std::make_unique(table_name, column_names, std::move(column_values)); } -std::unique_ptr Parser::parse_value() { + std::unique_ptr Parser::parse_value() { if (m_lexer.tokenType() == TokenType::int_number) { return std::make_unique(std::stoi(m_lexer.consumeCurrentToken().token_string)); } @@ -205,9 +205,9 @@ std::unique_ptr Parser::parse_value() { } throw Exception("Syntax error, current token: " + m_lexer.currentToken().token_string); -} + } -std::unique_ptr Parser::parse_select_from_table() { + std::unique_ptr Parser::parse_select_from_table() { auto cols = std::make_unique>(); m_lexer.skipToken(TokenType::keyword_select); @@ -235,12 +235,13 @@ std::unique_ptr Parser::parse_select_from_table() { std::unique_ptr where_node = parse_where_clause(); -// if (m_lexer.tokenType() == TokenType::keyword_order_by) {} + std::vector orderby_node = parse_orderby_clause(); + // if (m_lexer.tokenType() == TokenType::keyword_offset) {} // if (m_lexer.tokenType() == TokenType::keyword_limit) {} - return std::make_unique(table_name, std::move(cols), std::move(where_node)); -} + return std::make_unique(table_name, std::move(cols), std::move(where_node), orderby_node); + } std::unique_ptr Parser::parse_delete_from_table() { m_lexer.skipToken(TokenType::keyword_delete); @@ -291,26 +292,62 @@ std::unique_ptr Parser::parse_select_from_table() { } std::unique_ptr Parser::parse_where_clause() { - // TODO add support for multiple filters - // TODO add support for parenthesis + if (m_lexer.tokenType() != TokenType::keyword_where) { + return std::make_unique(); + } - if (m_lexer.tokenType() != TokenType::keyword_where) { - return std::make_unique(); - } + std::unique_ptr node; + m_lexer.skipToken(TokenType::keyword_where); + do { + node = parse_relational_expression(); - std::unique_ptr node; - m_lexer.skipToken(TokenType::keyword_where); - do { - node = parse_relational_expression(); + if (Lexer::isLogicalOperator(m_lexer.tokenType())) { + auto operation = parse_logical_operator(); + std::unique_ptr node2 = parse_relational_expression(); + node = std::make_unique(operation, std::move(node), std::move(node2)); + } + } while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_order); - if (Lexer::isLogicalOperator(m_lexer.tokenType())) { - auto operation = parse_logical_operator(); - std::unique_ptr node2 = parse_relational_expression(); - node = std::make_unique(operation, std::move(node), std::move(node2)); - } - } while (m_lexer.tokenType() != TokenType::eof); // until whole where clause parsed + return node; + } - return node; + + std::vector Parser::parse_orderby_clause() { + std::vector order_cols; + + if (m_lexer.tokenType() == TokenType::keyword_order) { + m_lexer.skipToken(TokenType::keyword_order); + m_lexer.skipToken(TokenType::keyword_by); + + do { + int col_index = -1; + bool asc = true; + + auto token_type = m_lexer.tokenType(); + std::string tokenString = m_lexer.consumeCurrentToken().token_string; + switch (token_type) { + case TokenType::int_number: + col_index = std::stoi(tokenString); + break; + default: + throw Exception("column index alloved in order by clause at this moment"); + } + + if (m_lexer.tokenType() == TokenType::keyword_asc) { + m_lexer.skipToken(TokenType::keyword_asc); + } else if (m_lexer.tokenType() == TokenType::keyword_desc) { + m_lexer.skipToken(TokenType::keyword_desc); + asc = false; + } + + order_cols.push_back(ColOrderNode{col_index, asc}); + + m_lexer.skipTokenOptional(TokenType::comma); + + } while (m_lexer.tokenType() != TokenType::eof); // && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit); + } + + return order_cols; } std::unique_ptr Parser::parse_relational_expression() { @@ -336,7 +373,7 @@ std::unique_ptr Parser::parse_select_from_table() { return std::make_unique(tokenString); case TokenType::keyword_null: return std::make_unique(); - default:; + default: throw Exception("Unknown operand node"); } } diff --git a/parser.h b/parser.h index ead5e65..5996ef4 100644 --- a/parser.h +++ b/parser.h @@ -34,6 +34,7 @@ namespace usql { save_table, drop_table, column_name, + column_order, column_value, function, column_def, @@ -52,6 +53,15 @@ namespace usql { ColNameNode(const std::string col_name) : Node(NodeType::column_name), name(col_name) {} }; + struct ColOrderNode : Node { + std::string col_name; + int col_index; + bool ascending; + + ColOrderNode(const std::string name, bool asc) : Node(NodeType::column_order), col_name(name), col_index(-1), ascending(asc) {} + ColOrderNode(int index, bool asc) : Node(NodeType::column_name), col_name(""), col_index(index), ascending(asc) {} + }; + struct SelectColNode : Node { std::unique_ptr value; std::string name; @@ -216,9 +226,10 @@ namespace usql { std::string table_name; std::unique_ptr> cols_names; std::unique_ptr where; + std::vector order_by; - SelectFromTableNode(std::string name, std::unique_ptr> names, std::unique_ptr where_clause) : - Node(NodeType::select_from), table_name(name), cols_names(std::move(names)), where(std::move(where_clause)) {} + SelectFromTableNode(std::string name, std::unique_ptr> names, std::unique_ptr where_clause, std::vector orderby) : + Node(NodeType::select_from), table_name(name), cols_names(std::move(names)), where(std::move(where_clause)), order_by(orderby) {} }; struct CreateTableAsSelectNode : Node { @@ -292,6 +303,8 @@ namespace usql { std::unique_ptr parse_update_table(); std::unique_ptr parse_where_clause(); + std::vector parse_orderby_clause(); + std::unique_ptr parse_operand_node(); std::unique_ptr parse_value(); RelationalOperatorType parse_relational_operator(); diff --git a/table.cpp b/table.cpp index db175d1..fcc4bdd 100644 --- a/table.cpp +++ b/table.cpp @@ -21,6 +21,13 @@ ColDefNode Table::get_column_def(const std::string &col_name) { } } +ColDefNode Table::get_column_def(int col_index) { + if (col_index >= 0 && col_index < columns_count()) { + return m_col_defs[col_index]; + } else { + throw Exception("column with this index does not exists (" + std::to_string(col_index) + ")"); + } +} Row Table::create_empty_row() { return Row(columns_count()); diff --git a/table.h b/table.h index b7b20ff..cb16022 100644 --- a/table.h +++ b/table.h @@ -14,6 +14,7 @@ namespace usql { Table(const std::string name, const std::vector columns); ColDefNode get_column_def(const std::string &col_name); + ColDefNode get_column_def(int col_index); int columns_count() const { return m_col_defs.size(); }; diff --git a/usql.cpp b/usql.cpp index 7258ae9..f66c0da 100644 --- a/usql.cpp +++ b/usql.cpp @@ -155,8 +155,7 @@ std::unique_ptr USql::execute_select(SelectFromTableNode &node) { std::vector source_table_col_index{}; for (int i = 0; i < node.cols_names->size(); i++) { - auto [ src_tbl_col_index, rst_tbl_col_def ] = get_column_definition(table, - &node.cols_names->operator[](i), i); + auto [ src_tbl_col_index, rst_tbl_col_def ] = get_column_definition(table, &node.cols_names->operator[](i), i); source_table_col_index.push_back(src_tbl_col_index); result_tbl_col_defs.push_back(rst_tbl_col_def); @@ -176,8 +175,7 @@ std::unique_ptr
USql::execute_select(SelectFromTableNode &node) { auto row_col_index = source_table_col_index[idx]; if (row_col_index == -1) { // TODO introduce constant here - auto evaluated_value = eval_value_node(table, *row, node.cols_names->operator[]( - idx).value.get()); + auto evaluated_value = eval_value_node(table, *row, node.cols_names->operator[](idx).value.get()); ValueNode *col_value = evaluated_value.get(); new_row.setColumnValue(&result_tbl_col_defs[idx], col_value); @@ -192,9 +190,51 @@ std::unique_ptr
USql::execute_select(SelectFromTableNode &node) { } } + // order by + execute_order_by(node, table, result); + + return std::move(result); } +void USql::execute_order_by(SelectFromTableNode &node, Table *table, std::__unique_if
::__unique_single &result) const { + if (node.order_by.size() == 0) return; + + auto compare_rows = [&node, &table, this](const Row &a, const Row &b) { + for(auto order_by_col_def : node.order_by) { + ColDefNode col_def = table->get_column_def(order_by_col_def.col_index - 1); // TODO validate index + ColValue *a_val = a.ith_column(col_def.order); + ColValue *b_val = b.ith_column(col_def.order); + + if (a_val->isNull() && b_val->isNull()) return true; // both is null so a goes to end + if (!a_val->isNull() && b_val->isNull()) return true; // b is null so goes to end + if (a_val->isNull() && !b_val->isNull()) return false; // a is null so goes to end + + int compare = compare_col_values(col_def, a_val, b_val); + if (compare < 0) return order_by_col_def.ascending ? true : false; + if (compare > 0) return order_by_col_def.ascending ? false : true; + } + return false; + }; + + result->m_rows.sort(compare_rows); +} + +int USql::compare_col_values(const ColDefNode &col_def, ColValue *a_val, ColValue *b_val) const { + double c; + switch (col_def.type) { + case (ColumnType::integer_type): + return a_val->getIntValue() - b_val->getIntValue(); + case (ColumnType::float_type): + c = a_val->getDoubleValue() - b_val->getDoubleValue(); + return c < 0 ? -1 : c==0.0 ? 0 : 1; + case (ColumnType::varchar_type): + return a_val->getStringValue().compare(b_val->getStringValue()); + default: + throw Exception("Unsupported data type"); + } +} + std::tuple USql::get_column_definition(Table *table, SelectColNode *select_col_node, int col_order ) { std::string new_col_name = select_col_node->name; diff --git a/usql.h b/usql.h index 3007014..0c100fd 100644 --- a/usql.h +++ b/usql.h @@ -54,6 +54,11 @@ private: private: Parser m_parser; std::list
m_tables; + + int compare_col_values(const ColDefNode &col_def, ColValue *a_val, ColValue *b_val) const; + + void + execute_order_by(SelectFromTableNode &node, Table *table, std::__unique_if
::__unique_single &result) const; }; } // namespace \ No newline at end of file