usql update

This commit is contained in:
VaclavT 2021-09-20 18:25:17 +02:00
parent 6900c5bdba
commit b4fb6e13d3
7 changed files with 78 additions and 58 deletions

View File

@ -1,15 +1,15 @@
### TODO ### TODO
- set xxx - without value to reset to default value
- escape " in save csv - escape " in save csv
- is null | is not null - is null | is not null
- coalesce, date functions now, add_date; string functions rtrim, ltrim, rpad, lpad; math function round - coalesce, date functions now, add_date; string functions rtrim, ltrim, rpad, lpad; math function round
- add pipe | concatenation - add pipe | concatenation
- add support for 1_000_000 numbers - add support for 1_000_000 numbers
- expand_asterix_char should support multiple and everywhere *
- support for order by, offset, limit (allow column name in order by, validate)
- support for uniqueue indexes (primary key) - support for uniqueue indexes (primary key)
- support for btree indexes - support for btree indexes
- support for joining - support for joining
- add count min and max functions, eg aggregate functions
- use string_to_double and string_to_long (from Table) everywhere - use string_to_double and string_to_long (from Table) everywhere
- add const wherever should be - add const wherever should be

View File

@ -126,11 +126,11 @@ void repl() {
} }
void debug() { void debug() {
std::vector<std::string> sql_commands{ std::vector<std::string> sql_commands {
//"create table history_dividends (symbol varchar(8), ex_date date, pay_date date, div_rate float)", // "create table history_dividends (symbol varchar(8), ex_date date, pay_date date, div_rate float)",
//"set 'DATE_FORMAT' = '%m/%d/%Y' ", // "set 'DATE_FORMAT' = '%m/%d/%Y' ",
//"insert into history_dividends (symbol,ex_date,pay_date,div_rate) values ('symbol', 'ex-date', 'pay-date', 0.1)", // "insert into history_dividends (symbol,ex_date,pay_date,div_rate) values ('symbol', 'ex-date', 'pay-date', 0.1)",
//zpusobi crash "insert into history_dividends (symbol,ex_date,pay_date,div_rate) values ('symbol', ex-date, pay-date)" // "insert into history_dividends (symbol,ex_date,pay_date,div_rate) values ('symbol', ex-date, pay-date)"
// "create table ticker ( tablee varchar(5) not null, permaticker integer, ticker varchar(10) not null, name varchar(256) not null, exchange varchar(32), isdelisted boolean, category varchar(32), cusips varchar(256), siccode integer, sicsector varchar(256), sicindustry varchar(256), famasector varchar(256), famaindustry varchar(256), sector varchar(128), industry varchar(128), scalemarketcap varchar(64), scalerevenue varchar(64), relatedtickers varchar(128), currency varchar(3), location varchar(64), lastupdated date, firstadded date, firstpricedate date, lastpricedate date, firstquarter date, lastquarter date, secfilings varchar(256), companysite varchar(256))", // "create table ticker ( tablee varchar(5) not null, permaticker integer, ticker varchar(10) not null, name varchar(256) not null, exchange varchar(32), isdelisted boolean, category varchar(32), cusips varchar(256), siccode integer, sicsector varchar(256), sicindustry varchar(256), famasector varchar(256), famaindustry varchar(256), sector varchar(128), industry varchar(128), scalemarketcap varchar(64), scalerevenue varchar(64), relatedtickers varchar(128), currency varchar(3), location varchar(64), lastupdated date, firstadded date, firstpricedate date, lastpricedate date, firstquarter date, lastquarter date, secfilings varchar(256), companysite varchar(256))",
// "load ticker from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/tickers.csv')", // "load ticker from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/tickers.csv')",
// "select * from ticker where ticker = 'WFC' and tablee = 'SF1'", // "select * from ticker where ticker = 'WFC' and tablee = 'SF1'",
@ -147,28 +147,28 @@ void debug() {
"create table a (i integer not null, s varchar(64), f float null, d date null, b boolean)", "create table a (i integer not null, s varchar(64), f float null, d date null, b boolean)",
"insert into a (i, s, b) values(1, upper('zero'), 'Y')", "insert into a (i, s, b) values(1, upper('zero'), 'Y')",
"insert into a (i, s, b, f) values(1 + 10000, upper('one'), 'N', 3.1415)", "insert into a (i, s, b, f) values(1 + 10000, upper('one'), 'N', 3.1415)",
"insert into a (i, s, f) values(2 + 10000, upper('two'), 3.1415)", "insert into a (i, s, f) values(2 + 10000, upper('two'), 9.1415)",
"select min(i), max(i), count(*) from a where b is not null", "select * from a order by i",
"select * from a where b is null", // "select min(i), max(f), count(*) from a where b is not null",
"select * from a where b is not null", // "select * from a where b is null",
"select * from a where b='N'", // "select * from a where b is not null",
"update a set i = i * 100, f = f + 0.01 where i > 1", // "select * from a where b='N'",
"select to_string(i, '%d.%m.%Y %H:%M:%S'), i, s from a where i < to_date('20.12.2019', '%d.%m.%Y')", // "update a set i = i * 100, f = f + 0.01 where i > 1",
"select i + 2 as first, i, s, b, f from a where i >=1 order by 1 desc offset 0 limit 1", // "select to_string(i, '%d.%m.%Y %H:%M:%S'), i, s from a where i < to_date('20.12.2019', '%d.%m.%Y')",
// "select i + 2 as first, i, s, b, f from a where i >=1 order by 1 desc offset 0 limit 1",
// "update table a set s = 'null string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'",
"update table a set s = 'null string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'", // "update table a set i = null",
"update table a set i = null", // "insert into a (i, s) values(2, 'two')",
"insert into a (i, s) values(2, 'two')", // "insert into a (i, s) values(3, 'two')",
"insert into a (i, s) values(3, 'two')", // "insert into a (i, s) values(4, lower('FOUR'))",
"insert into a (i, s) values(4, lower('FOUR'))", // "insert into a (i, s) values(5, 'five')",
"insert into a (i, s) values(5, 'five')", // "insert into a (i, s) values(to_date('20.12.1973', '%d.%m.%Y'), 'six')",
"insert into a (i, s) values(to_date('20.12.1973', '%d.%m.%Y'), 'six')",
// tohle zpusobi kresh "insert into a (i, d) values(6', '2006-10-04')", // tohle zpusobi kresh "insert into a (i, d) values(6', '2006-10-04')",
"insert into a (i, d) values(6, '2006-10-04')", // "insert into a (i, d) values(6, '2006-10-04')",
"save table a into '/tmp/a.csv'", // "save table a into '/tmp/a.csv'",
"select i, s from a where i > 2 order by 1 desc offset 1 limit 1", // "select i, s from a where i > 2 order by 1 desc offset 1 limit 1",
"select distinct s, d from a", // "select distinct s, d from a",
// "select i, s from a where i = 1", // "select i, s from a where i = 1",
// "select i, s from a where s = 'two'", // "select i, s from a where s = 'two'",
// "select i, s from a where i <= 3 and s = 'one'", // "select i, s from a where i <= 3 and s = 'one'",
@ -211,10 +211,14 @@ void debug() {
std::cout << std::endl << std::endl; std::cout << std::endl << std::endl;
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
#ifdef NDEBUG
repl();
#else
debug(); debug();
// repl(); #endif
return 0; return 0;
} }

View File

@ -191,8 +191,6 @@ namespace usql {
// column values // column values
m_lexer.skipToken(TokenType::open_paren); m_lexer.skipToken(TokenType::open_paren);
do { do {
// TODO here it is problem when exception from parse_expression<-parse_value is thrown
// it makes double free
auto value = parse_expression(); auto value = parse_expression();
column_values.emplace_back(std::move(value)); column_values.emplace_back(std::move(value));
@ -313,18 +311,10 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_by); m_lexer.skipToken(TokenType::keyword_by);
do { do {
int col_index = FUNCTION_CALL;
bool asc = true; bool asc = true;
auto token_type = m_lexer.tokenType(); auto cspec_token_type = m_lexer.tokenType();
std::string tokenString = m_lexer.consumeToken().token_string; std::string cspec_token = m_lexer.consumeToken().token_string;
switch (token_type) {
case TokenType::int_number:
col_index = std::stoi(tokenString);
break;
default:
throw Exception("column index allowed in order by clause at this moment");
}
if (m_lexer.tokenType() == TokenType::keyword_asc) { if (m_lexer.tokenType() == TokenType::keyword_asc) {
m_lexer.skipToken(TokenType::keyword_asc); m_lexer.skipToken(TokenType::keyword_asc);
@ -333,10 +323,18 @@ namespace usql {
asc = false; asc = false;
} }
order_cols.emplace_back(col_index, asc); switch (cspec_token_type) {
case TokenType::int_number:
order_cols.emplace_back(std::stoi(cspec_token), asc);
break;
case TokenType::identifier:
order_cols.emplace_back(cspec_token, asc);
break;
default:
throw Exception("order by column can be either column index or identifier");
}
m_lexer.skipTokenOptional(TokenType::comma); m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit); } while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit);
} }
@ -405,10 +403,10 @@ namespace usql {
} }
std::unique_ptr<Node> Parser::parse_value() { std::unique_ptr<Node> Parser::parse_value() {
auto token_type = m_lexer.tokenType(); auto token_typcol = m_lexer.tokenType();
// parenthesised expression // parenthesised expression
if (token_type == TokenType::open_paren) { if (token_typcol == TokenType::open_paren) {
m_lexer.skipToken(TokenType::open_paren); m_lexer.skipToken(TokenType::open_paren);
auto left = parse_expression(); auto left = parse_expression();
do { do {
@ -420,7 +418,7 @@ namespace usql {
} }
// function call // function call
if (token_type == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) { if (token_typcol == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string; std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::vector<std::unique_ptr<Node>> pars; std::vector<std::unique_ptr<Node>> pars;
@ -436,27 +434,27 @@ namespace usql {
// numbers and strings // numbers and strings
std::string tokenString = m_lexer.consumeToken().token_string; std::string tokenString = m_lexer.consumeToken().token_string;
if (token_type == TokenType::int_number) if (token_typcol == TokenType::int_number)
return std::make_unique<IntValueNode>(std::stoi(tokenString)); return std::make_unique<IntValueNode>(std::stoi(tokenString));
if (token_type == TokenType::double_number) if (token_typcol == TokenType::double_number)
return std::make_unique<DoubleValueNode>(std::stod(tokenString)); return std::make_unique<DoubleValueNode>(std::stod(tokenString));
if (token_type == TokenType::string_literal) if (token_typcol == TokenType::string_literal)
return std::make_unique<StringValueNode>(tokenString); return std::make_unique<StringValueNode>(tokenString);
// db column // db column
if (token_type == TokenType::identifier) if (token_typcol == TokenType::identifier)
return std::make_unique<DatabaseValueNode>(tokenString); return std::make_unique<DatabaseValueNode>(tokenString);
// null // null
if (token_type == TokenType::keyword_null) if (token_typcol == TokenType::keyword_null)
return std::make_unique<NullValueNode>(); return std::make_unique<NullValueNode>();
// true / false // true / false
if (token_type == TokenType::keyword_true || token_type == TokenType::keyword_false) if (token_typcol == TokenType::keyword_true || token_typcol == TokenType::keyword_false)
return std::make_unique<BooleanValueNode>(token_type == TokenType::keyword_true); return std::make_unique<BooleanValueNode>(token_typcol == TokenType::keyword_true);
// token * for count(*) // token * for count(*)
if (token_type == TokenType::multiply) if (token_typcol == TokenType::multiply)
return std::make_unique<StringValueNode>(tokenString); return std::make_unique<StringValueNode>(tokenString);
throw Exception("Unknown operand node " + tokenString); throw Exception("Unknown operand node " + tokenString);

View File

@ -54,6 +54,7 @@ namespace usql {
NodeType node_type; NodeType node_type;
explicit Node(const NodeType type) : node_type(type) {} explicit Node(const NodeType type) : node_type(type) {}
virtual ~Node() = default;
}; };

View File

@ -304,7 +304,7 @@ std::unique_ptr<ValueNode> USql::pp_function(const std::vector<std::unique_ptr<V
if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) { if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) {
std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : ""; std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : "";
char buf[16] {0}; char buf[20] {0}; // TODO constant here
double value = parsed_value->getDoubleValue(); double value = parsed_value->getDoubleValue();
if (format == "100%") if (format == "100%")

View File

@ -63,6 +63,7 @@ private:
static void execute_offset_limit(OffsetLimitNode &node, Table *result) ; static void execute_offset_limit(OffsetLimitNode &node, Table *result) ;
void expand_asterix_char(SelectFromTableNode &node, Table *table) const; void expand_asterix_char(SelectFromTableNode &node, Table *table) const;
void setup_order_columns(std::vector<ColOrderNode> &node, Table *table) const;
bool check_for_aggregate_only_functions(SelectFromTableNode &node, int result_cols_cnt) const; bool check_for_aggregate_only_functions(SelectFromTableNode &node, int result_cols_cnt) const;

View File

@ -30,8 +30,11 @@ std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
// check for aggregate function // check for aggregate function
bool aggregate_funcs = check_for_aggregate_only_functions(node, result_tbl_col_defs.size()); bool aggregate_funcs = check_for_aggregate_only_functions(node, result_tbl_col_defs.size());
// prepare result table structure
auto result = std::make_unique<Table>("result", result_tbl_col_defs); auto result = std::make_unique<Table>("result", result_tbl_col_defs);
// replace possible order by col names to col indexes and validate
setup_order_columns(node.order_by, result.get());
// execute access plan // execute access plan
Row* new_row = nullptr; Row* new_row = nullptr;
@ -101,11 +104,25 @@ void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) const {
node.cols_names->clear(); node.cols_names->clear();
node.cols_names->reserve(table->columns_count()); node.cols_names->reserve(table->columns_count());
for(const auto& col : table->m_col_defs) { for(const auto& col : table->m_col_defs) {
node.cols_names->emplace_back(SelectColNode{std::__1::make_unique<DatabaseValueNode>(col.name), col.name}); node.cols_names->emplace_back(SelectColNode{std::make_unique<DatabaseValueNode>(col.name), col.name});
} }
} }
} }
void USql::setup_order_columns(std::vector<ColOrderNode> &node, Table *table) const {
for (auto& order_node : node) {
if (!order_node.col_name.empty()) {
ColDefNode col_def = table->get_column_def(order_node.col_name);
order_node.col_index = col_def.order;
} else {
order_node.col_index = order_node.col_index - 1; // user counts from 1
}
if (order_node.col_index < 0 || order_node.col_index >= table->columns_count())
throw Exception("unknown column in order by clause (" + order_node.col_name + ")");
}
}
void USql::execute_distinct(SelectFromTableNode &node, Table *result) { void USql::execute_distinct(SelectFromTableNode &node, Table *result) {
if (!node.distinct) return; if (!node.distinct) return;
@ -120,8 +137,7 @@ void USql::execute_order_by(SelectFromTableNode &node, Table *table, Table *resu
auto compare_rows = [&node, &result](const Row &a, const Row &b) { auto compare_rows = [&node, &result](const Row &a, const Row &b) {
for(const auto& order_by_col_def : node.order_by) { for(const auto& order_by_col_def : node.order_by) {
// TODO validate index ColDefNode col_def = result->get_column_def(order_by_col_def.col_index);
ColDefNode col_def = result->get_column_def(order_by_col_def.col_index - 1);
ColValue &a_val = a[col_def.order]; ColValue &a_val = a[col_def.order];
ColValue &b_val = b[col_def.order]; ColValue &b_val = b[col_def.order];