From b4fb6e13d39c1be080094bbbb9bb62f2a9898c8b Mon Sep 17 00:00:00 2001
From: VaclavT <vaclavt@gmail.com>
Date: Mon, 20 Sep 2021 18:25:17 +0200
Subject: [PATCH] usql update

---
 usql/Readme.md    |  4 ++--
 usql/main.cpp     | 56 +++++++++++++++++++++++++----------------------
 usql/parser.cpp   | 50 ++++++++++++++++++++----------------------
 usql/parser.h     |  1 +
 usql/usql.cpp     |  2 +-
 usql/usql.h       |  1 +
 usql/usql_dml.cpp | 22 ++++++++++++++++---
 7 files changed, 78 insertions(+), 58 deletions(-)

diff --git a/usql/Readme.md b/usql/Readme.md
index 08a5e9d..3d6cd3f 100644
--- a/usql/Readme.md
+++ b/usql/Readme.md
@@ -1,15 +1,15 @@
 ### TODO
+- set xxx - without value to reset to default value
 - escape " in save csv
 - is null | is not null
 - coalesce, date functions now, add_date; string functions rtrim, ltrim, rpad, lpad; math function round
 - add pipe | concatenation
 - add support for 1_000_000 numbers
+- expand_asterix_char should support multiple and everywhere *
 
-- support for order by, offset, limit (allow column name in order by, validate)
 - support for uniqueue indexes (primary key)
 - support for btree indexes
 - support for joining
-- add count min and max functions, eg aggregate functions
 
 - use string_to_double and string_to_long (from Table) everywhere
 - add const wherever should be
diff --git a/usql/main.cpp b/usql/main.cpp
index 49c4257..f128100 100644
--- a/usql/main.cpp
+++ b/usql/main.cpp
@@ -126,11 +126,11 @@ void repl() {
 }
 
 void debug() {
-	std::vector<std::string> sql_commands{
-//"create table history_dividends (symbol varchar(8), ex_date date, pay_date date, div_rate float)",
-//"set 'DATE_FORMAT' = '%m/%d/%Y' ",
-//"insert into history_dividends (symbol,ex_date,pay_date,div_rate) values ('symbol', 'ex-date', 'pay-date', 0.1)",
-//zpusobi crash "insert into history_dividends (symbol,ex_date,pay_date,div_rate) values ('symbol', ex-date, pay-date)"
+	std::vector<std::string> sql_commands {
+//		"create table history_dividends (symbol varchar(8), ex_date date, pay_date date, div_rate float)",
+//		"set 'DATE_FORMAT' = '%m/%d/%Y' ",
+//		"insert into history_dividends (symbol,ex_date,pay_date,div_rate) values ('symbol', 'ex-date', 'pay-date', 0.1)",
+//		"insert into history_dividends (symbol,ex_date,pay_date,div_rate) values ('symbol', ex-date, pay-date)"
 //		    "create table ticker ( tablee varchar(5) not null, permaticker integer, ticker varchar(10) not null, name varchar(256) not null, exchange varchar(32), isdelisted boolean, category varchar(32), cusips varchar(256), siccode integer, sicsector varchar(256), sicindustry varchar(256), famasector varchar(256), famaindustry varchar(256), sector varchar(128), industry varchar(128), scalemarketcap varchar(64), scalerevenue varchar(64), relatedtickers varchar(128), currency varchar(3), location varchar(64), lastupdated date, firstadded date, firstpricedate date, lastpricedate date, firstquarter date, lastquarter date, secfilings varchar(256), companysite varchar(256))",
 //		    "load ticker from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/tickers.csv')",
 //		    "select * from ticker where ticker = 'WFC' and tablee = 'SF1'",
@@ -147,28 +147,28 @@ void debug() {
 		    "create table a (i integer not null, s varchar(64), f float null, d date null,  b boolean)",
 		    "insert into a (i, s, b) values(1, upper('zero'), 'Y')",
 		    "insert into a (i, s, b, f) values(1 + 10000, upper('one'), 'N', 3.1415)",
-		    "insert into a (i, s, f) values(2 + 10000, upper('two'), 3.1415)",
-		    "select min(i), max(i), count(*) from a where b is not null",
-		    "select * from a where b is null",
-		    "select * from a where b is not null",
-		    "select * from a where b='N'",
-		    "update a set i = i * 100, f = f + 0.01 where i > 1",
-		    "select to_string(i, '%d.%m.%Y %H:%M:%S'), i, s from a where i < to_date('20.12.2019', '%d.%m.%Y')",
-		    "select i + 2 as first, i, s, b, f from a where i >=1 order by 1 desc offset 0 limit 1",
+		    "insert into a (i, s, f) values(2 + 10000, upper('two'), 9.1415)",
+		    "select * from a order by i",
+		//     "select min(i), max(f), count(*) from a where b is not null",
+		//     "select * from a where b is null",
+		//     "select * from a where b is not null",
+		//     "select * from a where b='N'",
+		//     "update a set i = i * 100, f = f + 0.01 where i > 1",
+		//     "select to_string(i, '%d.%m.%Y %H:%M:%S'), i, s from a where i < to_date('20.12.2019', '%d.%m.%Y')",
+		//     "select i + 2 as first, i, s, b, f from a where i >=1 order by 1 desc offset 0 limit 1",
 
-
-		    "update table a set s = 'null string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'",
-		    "update table a set i = null",
-		    "insert into a (i, s) values(2, 'two')",
-		    "insert into a (i, s) values(3, 'two')",
-		    "insert into a (i, s) values(4, lower('FOUR'))",
-		    "insert into a (i, s) values(5, 'five')",
-		    "insert into a (i, s) values(to_date('20.12.1973', '%d.%m.%Y'), 'six')",
+		//     "update table a set s = 'null string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'",
+		//     "update table a set i = null",
+		//     "insert into a (i, s) values(2, 'two')",
+		//     "insert into a (i, s) values(3, 'two')",
+		//     "insert into a (i, s) values(4, lower('FOUR'))",
+		//     "insert into a (i, s) values(5, 'five')",
+		//     "insert into a (i, s) values(to_date('20.12.1973', '%d.%m.%Y'), 'six')",
 		    // tohle zpusobi kresh		    "insert into a (i, d) values(6', '2006-10-04')",
-		    "insert into a (i, d) values(6, '2006-10-04')",
-		    "save table a into '/tmp/a.csv'",
-		    "select i, s from a where i > 2 order by 1 desc offset 1 limit 1",
-		    "select distinct s, d from a",
+		//     "insert into a (i, d) values(6, '2006-10-04')",
+		//     "save table a into '/tmp/a.csv'",
+		//     "select i, s from a where i > 2 order by 1 desc offset 1 limit 1",
+		//     "select distinct s, d from a",
 //		    "select i, s from a where i = 1",
 //		    "select i, s from a where s = 'two'",
 //		    "select i, s from a where i <= 3 and s = 'one'",
@@ -211,10 +211,14 @@ void debug() {
 	std::cout << std::endl << std::endl;
 }
 
+
 int main(int argc, char *argv[]) {
 
+#ifdef NDEBUG
+	repl();
+#else
 	debug();
-	// repl();
+#endif
 
 	return 0;
 }
diff --git a/usql/parser.cpp b/usql/parser.cpp
index e7f5c36..4889147 100644
--- a/usql/parser.cpp
+++ b/usql/parser.cpp
@@ -191,8 +191,6 @@ namespace usql {
 	    // column values
 	    m_lexer.skipToken(TokenType::open_paren);
 	    do {
-		    // TODO here it is problem when exception from parse_expression<-parse_value is thrown
-		    // it makes double free
 		    auto value = parse_expression();
 		    column_values.emplace_back(std::move(value));
 
@@ -313,18 +311,10 @@ namespace usql {
 		m_lexer.skipToken(TokenType::keyword_by);
 
 		do {
-			int col_index  = FUNCTION_CALL;
 			bool asc = true;
 			
-			auto token_type = m_lexer.tokenType();
-			std::string tokenString = m_lexer.consumeToken().token_string;
-			switch (token_type) {
-				case TokenType::int_number:
-					col_index = std::stoi(tokenString);
-					break;
-			default:
-				throw Exception("column index allowed in order by clause at this moment");
-			}
+			auto cspec_token_type = m_lexer.tokenType();
+			std::string cspec_token = m_lexer.consumeToken().token_string;
 
 			if (m_lexer.tokenType() == TokenType::keyword_asc) {
 				m_lexer.skipToken(TokenType::keyword_asc);
@@ -332,11 +322,19 @@ namespace usql {
 				m_lexer.skipToken(TokenType::keyword_desc);
 				asc = false;
 			}
-
-			order_cols.emplace_back(col_index, asc);
+	
+			switch (cspec_token_type) {
+				case TokenType::int_number:
+					order_cols.emplace_back(std::stoi(cspec_token), asc);
+					break;
+				case TokenType::identifier:
+					order_cols.emplace_back(cspec_token, asc);
+					break;
+				default:
+					throw Exception("order by column can be either column index or identifier");
+			}
 
 			m_lexer.skipTokenOptional(TokenType::comma);
-
 		} while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit);
     	}
 	    
@@ -405,10 +403,10 @@ namespace usql {
     }
 
     std::unique_ptr<Node> Parser::parse_value() {
-	    auto token_type = m_lexer.tokenType();
+	    auto token_typcol = m_lexer.tokenType();
 
 	    // parenthesised expression
-	    if (token_type == TokenType::open_paren) {
+	    if (token_typcol == TokenType::open_paren) {
             m_lexer.skipToken(TokenType::open_paren);
 		    auto left = parse_expression();
 		    do {
@@ -420,7 +418,7 @@ namespace usql {
 	    }
 
 	    // function call
-	    if (token_type == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
+	    if (token_typcol == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
 		std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string;
 		std::vector<std::unique_ptr<Node>> pars;
 
@@ -436,27 +434,27 @@ namespace usql {
 	    // numbers and strings
 	    std::string tokenString = m_lexer.consumeToken().token_string;
 
-	    if (token_type == TokenType::int_number)
+	    if (token_typcol == TokenType::int_number)
 		return std::make_unique<IntValueNode>(std::stoi(tokenString));
-	    if (token_type == TokenType::double_number)
+	    if (token_typcol == TokenType::double_number)
 		return std::make_unique<DoubleValueNode>(std::stod(tokenString));
-	    if (token_type == TokenType::string_literal)
+	    if (token_typcol == TokenType::string_literal)
 		return std::make_unique<StringValueNode>(tokenString);
 		
 	    // db column
-	    if (token_type == TokenType::identifier)
+	    if (token_typcol == TokenType::identifier)
 		return std::make_unique<DatabaseValueNode>(tokenString);
 
 	    // null
-	    if (token_type == TokenType::keyword_null)
+	    if (token_typcol == TokenType::keyword_null)
 		return std::make_unique<NullValueNode>();
 
 	    // true / false
-	    if (token_type == TokenType::keyword_true || token_type == TokenType::keyword_false)
-		return std::make_unique<BooleanValueNode>(token_type == TokenType::keyword_true);
+	    if (token_typcol == TokenType::keyword_true || token_typcol == TokenType::keyword_false)
+		return std::make_unique<BooleanValueNode>(token_typcol == TokenType::keyword_true);
 
 	    // token * for count(*)
-	    if (token_type == TokenType::multiply)
+	    if (token_typcol == TokenType::multiply)
 		    return std::make_unique<StringValueNode>(tokenString);
 
 	    throw Exception("Unknown operand node " + tokenString);
diff --git a/usql/parser.h b/usql/parser.h
index e64096e..15b8d94 100644
--- a/usql/parser.h
+++ b/usql/parser.h
@@ -54,6 +54,7 @@ namespace usql {
 	NodeType node_type;
 
 	explicit Node(const NodeType type) : node_type(type) {}
+	virtual ~Node() = default;
     };
 
 
diff --git a/usql/usql.cpp b/usql/usql.cpp
index 518c622..7ac76d3 100644
--- a/usql/usql.cpp
+++ b/usql/usql.cpp
@@ -304,7 +304,7 @@ std::unique_ptr<ValueNode> USql::pp_function(const std::vector<std::unique_ptr<V
 
 	if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) {
 		std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : "";
-		char buf[16] {0};
+		char buf[20] {0};	// TODO constant here
 		double value = parsed_value->getDoubleValue();
 
 		if (format == "100%")
diff --git a/usql/usql.h b/usql/usql.h
index 0f84de0..0d910d0 100644
--- a/usql/usql.h
+++ b/usql/usql.h
@@ -63,6 +63,7 @@ private:
     static void execute_offset_limit(OffsetLimitNode &node, Table *result) ;
 
     void expand_asterix_char(SelectFromTableNode &node, Table *table) const;
+    void setup_order_columns(std::vector<ColOrderNode> &node, Table *table) const;
 
     bool check_for_aggregate_only_functions(SelectFromTableNode &node, int result_cols_cnt) const;
 
diff --git a/usql/usql_dml.cpp b/usql/usql_dml.cpp
index 56cd32c..54a297f 100644
--- a/usql/usql_dml.cpp
+++ b/usql/usql_dml.cpp
@@ -30,8 +30,11 @@ std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
 	// check for aggregate function
 	bool aggregate_funcs = check_for_aggregate_only_functions(node, result_tbl_col_defs.size());
 
+	// prepare result table structure
 	auto result = std::make_unique<Table>("result", result_tbl_col_defs);
 
+	// replace possible order by col names to col indexes and validate
+	setup_order_columns(node.order_by, result.get());
 
 	// execute access plan
 	Row* new_row = nullptr;
@@ -101,11 +104,25 @@ void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) const {
 		node.cols_names->clear();
 		node.cols_names->reserve(table->columns_count());
 		for(const auto& col : table->m_col_defs) {
-			node.cols_names->emplace_back(SelectColNode{std::__1::make_unique<DatabaseValueNode>(col.name), col.name});
+			node.cols_names->emplace_back(SelectColNode{std::make_unique<DatabaseValueNode>(col.name), col.name});
 		}
 	}
 }
 
+void USql::setup_order_columns(std::vector<ColOrderNode> &node, Table *table) const {
+	for (auto& order_node : node) {
+		if (!order_node.col_name.empty()) {
+			ColDefNode col_def = table->get_column_def(order_node.col_name);
+			order_node.col_index = col_def.order;
+		} else {
+			order_node.col_index = order_node.col_index - 1;	// user counts from 1
+		}
+
+		if (order_node.col_index < 0 || order_node.col_index >= table->columns_count())
+			throw Exception("unknown column in order by clause (" + order_node.col_name + ")");
+	}	
+}
+
 void USql::execute_distinct(SelectFromTableNode &node, Table *result) {
 	if (!node.distinct) return;
 	
@@ -120,8 +137,7 @@ void USql::execute_order_by(SelectFromTableNode &node, Table *table, Table *resu
 
 	auto compare_rows = [&node, &result](const Row &a, const Row &b) {
 		for(const auto& order_by_col_def : node.order_by) {
-			// TODO validate index
-			ColDefNode col_def = result->get_column_def(order_by_col_def.col_index - 1);
+			ColDefNode col_def = result->get_column_def(order_by_col_def.col_index);
 			ColValue &a_val = a[col_def.order];
 			ColValue &b_val = b[col_def.order];