From 710531c455ed75f0e8614aecb3450d5e7d607f14 Mon Sep 17 00:00:00 2001 From: VaclavT Date: Mon, 9 Aug 2021 14:15:42 +0200 Subject: [PATCH] work on settings (set and show), perf improvement when adding row into table --- Readme.md | 9 ++++----- lexer.cpp | 5 +++++ lexer.h | 1 + main.cpp | 6 ++++-- parser.cpp | 29 +++++++++++++++++++++++++++-- parser.h | 24 +++++++++++++++++++++--- row.cpp | 1 + settings.cpp | 13 +++++++++++-- settings.h | 6 +++--- table.cpp | 34 +++++++++++++++++----------------- table.h | 6 +++--- usql.cpp | 37 +++++++++++++++++++++++++------------ usql.h | 2 ++ 13 files changed, 124 insertions(+), 49 deletions(-) diff --git a/Readme.md b/Readme.md index d7bc3d5..d60c060 100644 --- a/Readme.md +++ b/Readme.md @@ -1,15 +1,14 @@ ### TODO -- add support for set setting value - command line interface - date functions - now, add_date... +- add pipe | token +- string functions rtrim, ltrim, rpad, lpad - support for order by, offset, limit (allow column name in order by, validate) -- add count min and max functions, eg aggregate functions - support for uniqueue indexes (primary key) - support for btree indexes +- add count min and max functions, eg aggregate functions - support for parenthesis -- functions rtrim, ltrim, rpad, lpad -- add pipe | token -- maybe to create iterator on table - class members should have prefix m_ - add const wherever should be +- PERF in Row::Row(const Row &other), could be more efficient (memory and cpu) diff --git a/lexer.cpp b/lexer.cpp index 97e2126..c21eee6 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -203,6 +203,8 @@ namespace usql { return TokenType::keyword_bool; if (token == "distinct") return TokenType::keyword_distinct; + if (token == "show") + return TokenType::keyword_show; if (token == "or") return TokenType::logical_or; if (token == "and") @@ -397,6 +399,9 @@ namespace usql { case TokenType::keyword_distinct: txt = "distinct"; break; + case TokenType::keyword_show: + txt = "show"; + break; case TokenType::int_number: txt = "int number"; break; diff --git a/lexer.h b/lexer.h index 33e542b..c63a859 100644 --- a/lexer.h +++ b/lexer.h @@ -50,6 +50,7 @@ namespace usql { keyword_date, keyword_bool, keyword_distinct, + keyword_show, int_number, double_number, string_literal, diff --git a/main.cpp b/main.cpp index fea8fdc..a4b9277 100644 --- a/main.cpp +++ b/main.cpp @@ -129,10 +129,12 @@ int main(int argc, char *argv[]) { // "create table ticker ( tablee varchar(5) not null, permaticker integer, ticker varchar(10) not null, name varchar(256) not null, exchange varchar(32), isdelisted boolean, category varchar(32), cusips varchar(256), siccode integer, sicsector varchar(256), sicindustry varchar(256), famasector varchar(256), famaindustry varchar(256), sector varchar(128), industry varchar(128), scalemarketcap varchar(64), scalerevenue varchar(64), relatedtickers varchar(128), currency varchar(3), location varchar(64), lastupdated date, firstadded date, firstpricedate date, lastpricedate date, firstquarter date, lastquarter date, secfilings varchar(256), companysite varchar(256))", // "load ticker from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/tickers.csv')", // "select * from ticker where ticker = 'WFC' and tablee = 'SF1'", + "set 'DATE_FORMAT' = '%Y-%m-%d'", + "show 'DATE_FORMAT'", "create table sf1 ( ticker varchar(8), dimension varchar(3), calendar_date date, date_key date, report_period date, last_updated date, accoci float, assets float, assetsavg float, assetsc float, assetsnc float, assetturnover float, bvps float, capex float, cashneq float, cashnequsd float, cor float, consolinc float, currentratio float, de float, debt float, debtc float, debtnc float, debtusd float, deferredrev float, depamor float, deposits float, divyield float, dps float, ebit float, ebitda float, ebitdamargin float, ebitdausd float, ebitusd float, ebt float, eps float, epsdil float, epsusd float, equity float, equityavg float, equityusd float, ev float, evebit float, evebitda float, fcf float, fcfps float, fxusd float, gp float, grossmargin float, intangibles float, intexp float, invcap float, invcapavg float, inventory float, investments float, investmentsc float, investmentsnc float, liabilities float, liabilitiesc float, liabilitiesnc float, marketcap float, ncf float, ncfbus float, ncfcommon float, ncfdebt float, ncfdiv float, ncff float, ncfi float, ncfinv float, ncfo float, ncfx float, netinc float, netinccmn float, netinccmnusd float, netincdis float, netincnci float, netmargin float, opex float, opinc float, payables float, payoutratio float, pb float, pe float, pe1 float, ppnenet float, prefdivis float, price float, ps float, ps1 float, receivables float, retearn float, revenue float, revenueusd float, rnd float, roa float, roe float, roic float, ros float, sbcomp float, sgna float, sharefactor float, sharesbas float, shareswa float, shareswadil float, sps float, tangibles float, taxassets float, taxexp float, taxliabilities float, tbvps float, workingcapital float)", "load sf1 from '/tmp/sf1.csv')", -// "select * from sf1 where ticker = 'WFC'" - "select * from sf1 limit 10" + "select * from sf1 where ticker = 'WFC'" + // "select * from sf1 limit 10" // "create table a (i integer not null, s varchar(64), f float null, d date null, b boolean)", // "insert into a (i, s, b) values(1, upper('one'), 'Y')", // "select i, s, b from a where i >=1 order by 1 desc offset 0 limit 1", diff --git a/parser.cpp b/parser.cpp index 5cd5f50..8f7b4e0 100644 --- a/parser.cpp +++ b/parser.cpp @@ -15,6 +15,9 @@ namespace usql { if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table) return parse_create_table(); + if (m_lexer.tokenType() == TokenType::keyword_drop) + return parse_drop_table(); + if (m_lexer.tokenType() == TokenType::keyword_insert) return parse_insert_into_table(); if (m_lexer.tokenType() == TokenType::keyword_select) @@ -23,12 +26,16 @@ namespace usql { return parse_delete_from_table(); if (m_lexer.tokenType() == TokenType::keyword_update) return parse_update_table(); + if (m_lexer.tokenType() == TokenType::keyword_load) return parse_load_table(); if (m_lexer.tokenType() == TokenType::keyword_save) return parse_save_table(); - if (m_lexer.tokenType() == TokenType::keyword_drop) - return parse_drop_table(); + + if (m_lexer.tokenType() == TokenType::keyword_set) + return parse_set(); + if (m_lexer.tokenType() == TokenType::keyword_show) + return parse_show(); std::cout << "ERROR, token:" << m_lexer.currentToken().token_string << std::endl; return std::make_unique(NodeType::error); @@ -145,6 +152,24 @@ namespace usql { return std::make_unique(table_name); } + + std::unique_ptr Parser::parse_set() { + m_lexer.skipToken(TokenType::keyword_set); + // TODO check these are string literals + std::string name = m_lexer.consumeCurrentToken().token_string; + m_lexer.skipTokenOptional(TokenType::equal); + std::string value = m_lexer.consumeCurrentToken().token_string; + + return std::make_unique(name, value); + } + + std::unique_ptr Parser::parse_show() { + m_lexer.skipToken(TokenType::keyword_show); + // TODO check these are string literals + std::string name = m_lexer.consumeCurrentToken().token_string; + + return std::make_unique(name); + } std::unique_ptr Parser::parse_insert_into_table() { std::vector column_names{}; diff --git a/parser.h b/parser.h index 8c6dba0..e9cf4f6 100644 --- a/parser.h +++ b/parser.h @@ -38,6 +38,8 @@ namespace usql { load_table, save_table, drop_table, + set, + show, column_name, offset_limit, column_order, @@ -323,6 +325,21 @@ namespace usql { Node(NodeType::delete_from), table_name(name), where(std::move(where_clause)) {} }; + struct SetNode : Node { + std::string name; + std::string value; + + SetNode(const std::string& name_, const std::string& value_) : + Node(NodeType::set), name(name_), value(value_) {} + }; + + struct ShowNode : Node { + std::string name; + + ShowNode(const std::string& name_) : Node(NodeType::show), name(name_) {} + }; + + class Parser { private: @@ -334,9 +351,11 @@ namespace usql { private: std::unique_ptr parse_create_table(); + std::unique_ptr parse_drop_table(); std::unique_ptr parse_load_table(); std::unique_ptr parse_save_table(); - std::unique_ptr parse_drop_table(); + std::unique_ptr parse_set(); + std::unique_ptr parse_show(); std::unique_ptr parse_insert_into_table(); std::unique_ptr parse_select_from_table(); @@ -352,11 +371,10 @@ namespace usql { RelationalOperatorType parse_relational_operator(); LogicalOperatorType parse_logical_operator(); ArithmeticalOperatorType parse_arithmetical_operator(); + std::unique_ptr parse_relational_expression(); private: Lexer m_lexer; - - std::unique_ptr parse_relational_expression(); }; } \ No newline at end of file diff --git a/row.cpp b/row.cpp index 0244f28..35267ac 100644 --- a/row.cpp +++ b/row.cpp @@ -66,6 +66,7 @@ namespace usql { } } + Row &Row::operator=(Row other) { std::swap(m_columns, other.m_columns); return *this; diff --git a/settings.cpp b/settings.cpp index 3471da9..689780e 100644 --- a/settings.cpp +++ b/settings.cpp @@ -20,11 +20,20 @@ std::string Settings::date_to_string(long date) { return ::date_to_string(date, get_setting("DATE_FORMAT")); } -std::basic_string Settings::get_setting(const std::string &name) { +std::string Settings::get_setting(const std::string &name) { for(const auto& pair : m_settings) { if (pair.first == name) return pair.second; } - // TODO exception + throw Exception("unsupported setting name: " + name); +} + +void Settings::set_setting(const std::string &name, const std::string &value) { + for (auto it = begin(m_settings); it != end(m_settings); ++it) { + if (it->first == name) { + *it = std::make_pair(name, value); + return; + } + } throw Exception("unsupported setting name: " + name); } diff --git a/settings.h b/settings.h index e2c809e..9c975a7 100644 --- a/settings.h +++ b/settings.h @@ -8,12 +8,12 @@ namespace usql { class Settings { public: + static void set_setting(const std::string &name, const std::string &value); + static std::string get_setting(const std::string &name); + static long string_to_date(const std::string &datestr); static std::string date_to_string(long date); -private: - static std::basic_string get_setting(const std::string &name); - private: static std::vector> m_settings; }; diff --git a/table.cpp b/table.cpp index 8bb63b0..220299d 100644 --- a/table.cpp +++ b/table.cpp @@ -17,7 +17,7 @@ Table::Table(const Table &other) { m_col_defs = other.m_col_defs; m_rows.reserve(other.m_rows.size()); for(const Row& orig_row : other.m_rows) { - add_copy_of_row(orig_row); + commit_copy_of_row(orig_row); } } @@ -39,8 +39,9 @@ ColDefNode Table::get_column_def(int col_index) { } } -Row Table::create_empty_row() const { - return {columns_count()}; +Row& Table::create_empty_row() { + m_rows.emplace_back(columns_count()); + return m_rows.back(); } std::string Table::csv_string() { @@ -76,10 +77,10 @@ int Table::load_csv_string(const std::string &content) { std::vector &colDefs = m_col_defs; for (auto it = csv.begin() + 1; it != csv.end(); ++it) { - std::vector csv_line = *it; + std::vector &csv_line = *it; // prepare empty new_row - Row new_row = create_empty_row(); + Row& new_row = create_empty_row(); // copy values for (size_t i = 0; i < columns_count(); i++) { @@ -102,7 +103,7 @@ int Table::load_csv_string(const std::string &content) { } // append new_row - add_row(new_row); + commit_row(new_row); row_cnt++; } @@ -144,23 +145,23 @@ void Table::print() { std::cout << out << std::endl; std::cout << out2 << std::endl; - for(auto row : m_rows) { + for(auto& row : m_rows) { row.print(col_char_sizes); } std::cout << std::endl; } -void Table::add_row(const Row &row) { - // PERF, here it is performance botleneck, because - // m_rows.push_back(row) calls Row::Row(const Row &other) constructor - // it would be much more performant to add row directly to m_rows in create_new_row - // and here in case of failed validation only remove it - validate_row(row); - m_rows.push_back(row); +void Table::commit_row(const Row &row) { + try { + validate_row(row); + } catch (Exception &e) { + m_rows.erase(m_rows.end() - 1); + throw e; + } } -void Table::add_copy_of_row(const Row &row) { - Row new_row = create_empty_row(); +void Table::commit_copy_of_row(const Row &row) { + Row& new_row = create_empty_row(); for(int i = 0; i < m_col_defs.size(); i++) { ColValue *ct = row.ith_column(i); @@ -184,7 +185,6 @@ void Table::add_copy_of_row(const Row &row) { } validate_row(new_row); - m_rows.push_back(new_row); } void Table::validate_column(const ColDefNode *col_def, ValueNode *col_val) { diff --git a/table.h b/table.h index 31ee39e..d663b69 100644 --- a/table.h +++ b/table.h @@ -18,9 +18,9 @@ namespace usql { int columns_count() const { return m_col_defs.size(); }; int rows_count() const { return m_rows.size(); }; - Row create_empty_row() const; // TODO this means unnecessary copying - void add_row(const Row &row); - void add_copy_of_row(const Row &row); + Row& create_empty_row(); + void commit_row(const Row &row); + void commit_copy_of_row(const Row &row); static void validate_column(const ColDefNode *col_def, ValueNode *col_val); static void validate_column(const ColDefNode *col_def, ColValue *col_val); diff --git a/usql.cpp b/usql.cpp index e0b6029..1535d73 100644 --- a/usql.cpp +++ b/usql.cpp @@ -25,6 +25,8 @@ std::unique_ptr USql::execute(Node &node) { return execute_create_table(static_cast(node)); case NodeType::create_table_as_select: return execute_create_table_as_table(static_cast(node)); + case NodeType::drop_table: + return execute_drop(static_cast(node)); case NodeType::insert_into: return execute_insert_into_table(static_cast(node)); case NodeType::select_from: @@ -37,8 +39,10 @@ std::unique_ptr
USql::execute(Node &node) { return execute_load(static_cast(node)); case NodeType::save_table: return execute_save(static_cast(node)); - case NodeType::drop_table: - return execute_drop(static_cast(node)); + case NodeType::set: + return execute_set(static_cast(node)); + case NodeType::show: + return execute_show(static_cast(node)); default: return create_stmt_result_table(-1, "unknown statement", 0); } @@ -68,7 +72,7 @@ std::unique_ptr
USql::execute_create_table_as_table(CreateTableAsSelectNo // must be here, if rows are put into new_table, they are lost during m_tables.push_table Table *table = find_table(node.table_name); for( Row& orig_row : select->m_rows) { - table->add_copy_of_row(orig_row); + table->commit_copy_of_row(orig_row); } select.release(); // is it correct? hoping not to release select table here and then when releasing CreateTableAsSelectNode @@ -121,6 +125,16 @@ std::unique_ptr
USql::execute_drop(DropTableNode &node) { throw Exception("table not found (" + node.table_name + ")"); } +std::unique_ptr
USql::execute_set(SetNode &node) { + Settings::set_setting(node.name, node.value); + return create_stmt_result_table(0, "set succeeded", 1); +} + +std::unique_ptr
USql::execute_show(ShowNode &node) { + std::string value = Settings::get_setting(node.name); + return create_stmt_result_table(0, "show succeeded: " + value, 1); +} + std::unique_ptr
USql::execute_insert_into_table(InsertIntoTableNode &node) { // TODO check column names.size = values.size @@ -128,7 +142,7 @@ std::unique_ptr
USql::execute_insert_into_table(InsertIntoTableNode &node Table *table_def = find_table(node.table_name); // prepare empty new_row - Row new_row = table_def->create_empty_row(); + Row& new_row = table_def->create_empty_row(); // copy values for (size_t i = 0; i < node.cols_names.size(); i++) { @@ -139,7 +153,7 @@ std::unique_ptr
USql::execute_insert_into_table(InsertIntoTableNode &node } // append new_row - table_def->add_row(new_row); + table_def->commit_row(new_row); return create_stmt_result_table(0, "insert succeeded", 1); } @@ -175,10 +189,9 @@ std::unique_ptr
USql::execute_select(SelectFromTableNode &node) { for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) { // eval where for row if (eval_where(node.where.get(), table, *row)) { - // prepare empty row - Row new_row = result->create_empty_row(); + // prepare empty row and copy column values + Row& new_row = result->create_empty_row(); - // copy column values for (auto idx = 0; idx < result->columns_count(); idx++) { auto row_col_index = source_table_col_index[idx]; @@ -194,7 +207,7 @@ std::unique_ptr
USql::execute_select(SelectFromTableNode &node) { } // add row to result - result->m_rows.push_back(new_row); + result->commit_row(new_row); } } @@ -317,7 +330,7 @@ std::unique_ptr
USql::execute_update(UpdateTableNode &node) { bool USql::eval_where(Node *where, Table *table, Row &row) { - switch (where->node_type) { // no where clause + switch (where->node_type) { case NodeType::true_node: return true; case NodeType::relational_operator: // just one condition @@ -544,11 +557,11 @@ std::unique_ptr
USql::create_stmt_result_table(long code, const std::stri auto table_def = std::make_unique
("result", result_tbl_col_defs); - Row new_row = table_def->create_empty_row(); + Row& new_row = table_def->create_empty_row(); new_row.setIntColumnValue(0, code); new_row.setStringColumnValue(1, text); new_row.setIntColumnValue(2, affected_rows); - table_def->add_row(new_row); + table_def->commit_row(new_row); return std::move(table_def); } diff --git a/usql.h b/usql.h index 4530c58..17110e7 100644 --- a/usql.h +++ b/usql.h @@ -23,6 +23,8 @@ private: std::unique_ptr
execute_load(LoadIntoTableNode &node); std::unique_ptr
execute_save(SaveTableNode &node); std::unique_ptr
execute_drop(DropTableNode &node); + std::unique_ptr
execute_set(SetNode &node); + std::unique_ptr
execute_show(ShowNode &node); std::unique_ptr
execute_insert_into_table(InsertIntoTableNode &node); std::unique_ptr
execute_select(SelectFromTableNode &node);