usql update

2021-08-31 19:00:03 +02:00
parent 85bc6c9363
commit dfa7c1c15b
9 changed files with 1344 additions and 70 deletions
--- a/Readme.md
+++ b/Readme.md
@@ -56,6 +56,7 @@ utils/local_install.sh
 (read-url "https://api.nasdaq.com/api/calendar/dividends/") ; hangs in sslclient.cpp line 132
 ### TODO
 - in ml_date throw exception when invalid date string or format
 - add debug support, at least call stack
 - multiline editing (see kilocpp editor)
 - execute system command should capture stderr
--- a/clib/fast_double_parser.h
+++ b/clib/fast_double_parser.h
--- a/usql/lexer.cpp
+++ b/usql/lexer.cpp
@@ -63,12 +63,18 @@ namespace usql {
    Token Lexer::currentToken() { return m_tokens[m_index]; }
-    Token Lexer::consumeCurrentToken() {
+    Token Lexer::consumeToken() {
 	    int i = m_index;
 	    nextToken();
 	    return m_tokens[i];
    }
    Token Lexer::consumeToken(TokenType type) {
    	int i = m_index;
    	skipToken(type);
    	return m_tokens[i];
    }
    void Lexer::nextToken() {
 	    if (m_index < m_tokens.size()) {
 		    m_index++;
@@ -79,8 +85,7 @@ namespace usql {
 	    if (tokenType() == type) {
 		    nextToken();
 	    } else {
-		    throw Exception("ERROR unexpected token " + consumeCurrentToken().token_string + ", instead of " +
+		    throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type));
 				    typeToString(type));
 	    }
    }
@@ -215,8 +220,8 @@ namespace usql {
 		(token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r'))
 		    return TokenType::comment;
-	    // if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"')
+	    if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"')
-	    //	return TokenType::string_literal;
+	    	return TokenType::string_literal;
 	    if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'')
 		    return TokenType::string_literal;
@@ -233,9 +238,6 @@ namespace usql {
 	    if (std::regex_match(token, k_identifier_regex))
 		    return TokenType::identifier;
 	    if (m_index + 1 >= m_tokens.size())
 		    return TokenType::eof;
 	    return TokenType::undef;
    }
--- a/usql/lexer.h
+++ b/usql/lexer.h
@@ -83,7 +83,9 @@ namespace usql {
 	Token currentToken();
-	Token consumeCurrentToken();
+	Token consumeToken();
 	Token consumeToken(TokenType type);
 	void nextToken();
--- a/usql/parser.cpp
+++ b/usql/parser.cpp
@@ -47,10 +47,7 @@ namespace usql {
 	    m_lexer.skipToken(TokenType::keyword_create);
 	    m_lexer.skipToken(TokenType::keyword_table);
-	    if (m_lexer.tokenType() != TokenType::identifier)
+	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
 	    	throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
 	    std::string table_name = m_lexer.consumeCurrentToken().token_string;
 	    // create as select
 	    if (m_lexer.tokenType() == TokenType::keyword_as) {
@@ -72,7 +69,7 @@ namespace usql {
 	    		if (m_lexer.tokenType() != TokenType::identifier) {
 				throw Exception("syntax error, expected identifier");
 			}
-	    		database_value = m_lexer.consumeCurrentToken().token_string;
+	    		database_value = m_lexer.consumeToken().token_string;
 	    		// column type and optionally len
 	    		if (m_lexer.tokenType() == TokenType::keyword_integer) {
@@ -85,11 +82,7 @@ namespace usql {
 	    			column_type = ColumnType::varchar_type;
 	    			m_lexer.nextToken();
 	    			m_lexer.skipToken(TokenType::open_paren);
-	    			if (m_lexer.tokenType() == TokenType::int_number) {
+	    			column_len = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
 	    				column_len = std::stoi(m_lexer.consumeCurrentToken().token_string);
 	    			} else {
 					throw Exception("syntax error, expected int number");
 				}
 	    			m_lexer.skipToken(TokenType::close_paren);
 			} else if (m_lexer.tokenType() == TokenType::keyword_date) {
 				column_type = ColumnType::date_type;
@@ -125,11 +118,11 @@ namespace usql {
 	    m_lexer.skipToken(TokenType::keyword_load);
 	    m_lexer.skipTokenOptional(TokenType::keyword_into);
-	    std::string table_name = m_lexer.consumeCurrentToken().token_string;
+	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
 	    m_lexer.skipTokenOptional(TokenType::keyword_from);
-	    std::string file_name = m_lexer.consumeCurrentToken().token_string;
+	    std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
 	    return std::make_unique<LoadIntoTableNode>(table_name, file_name);
    }
@@ -138,11 +131,11 @@ namespace usql {
    	m_lexer.skipToken(TokenType::keyword_save);
    	m_lexer.skipTokenOptional(TokenType::keyword_table);
-    	std::string table_name = m_lexer.consumeCurrentToken().token_string;
+    	std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
    	m_lexer.skipTokenOptional(TokenType::keyword_into);
-    	std::string file_name = m_lexer.consumeCurrentToken().token_string;
+    	std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
    	return std::make_unique<SaveTableNode>(table_name, file_name);
    }
@@ -151,7 +144,7 @@ namespace usql {
    	m_lexer.skipToken(TokenType::keyword_drop);
    	m_lexer.skipTokenOptional(TokenType::keyword_table);
-    	std::string table_name = m_lexer.consumeCurrentToken().token_string;
+    	std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
    	return std::make_unique<DropTableNode>(table_name);
    }
@@ -159,13 +152,9 @@ namespace usql {
    std::unique_ptr<Node> Parser::parse_set() {
    	m_lexer.skipToken(TokenType::keyword_set);
-	if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set name");
+    	std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
    	std::string name = m_lexer.consumeCurrentToken().token_string;
    	m_lexer.skipTokenOptional(TokenType::equal);
-
+    	std::string value = m_lexer.consumeToken(TokenType::string_literal).token_string;
 	if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set value");
    	std::string value = m_lexer.consumeCurrentToken().token_string;
    	return std::make_unique<SetNode>(name, value);
    }
@@ -173,8 +162,7 @@ namespace usql {
    std::unique_ptr<Node> Parser::parse_show() {
    	m_lexer.skipToken(TokenType::keyword_show);
-	if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal on show parameter name");
+    	std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
    	std::string name = m_lexer.consumeCurrentToken().token_string;
    	return std::make_unique<ShowNode>(name);
    }
@@ -187,18 +175,12 @@ namespace usql {
 	    m_lexer.skipToken(TokenType::keyword_into);
 	    // table name
-	    if (m_lexer.tokenType() != TokenType::identifier)
+	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
 	    	throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
 	    std::string table_name = m_lexer.consumeCurrentToken().token_string;
 	    // column names
 	    m_lexer.skipToken(TokenType::open_paren);
 	    do {
-		    if (m_lexer.tokenType() != TokenType::identifier)
+		    database_values.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
 			    throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
 		    database_values.emplace_back(m_lexer.consumeCurrentToken().token_string);
 		    m_lexer.skipTokenOptional(TokenType::comma);
 	    } while (m_lexer.tokenType() != TokenType::close_paren);
@@ -233,7 +215,7 @@ namespace usql {
 	int i = 1;
 	while (m_lexer.tokenType() != TokenType::keyword_from) {
 		if (m_lexer.tokenType()==TokenType::multiply) {
-			std::string name = m_lexer.consumeCurrentToken().token_string;
+			std::string name = m_lexer.consumeToken().token_string;
 			auto multiply_char = std::make_unique<DatabaseValueNode>(name);
 			cols->push_back(SelectColNode{std::move(multiply_char), "*"});
@@ -243,7 +225,7 @@ namespace usql {
 			if (m_lexer.tokenType() == TokenType::keyword_as) {
 				m_lexer.skipToken(TokenType::keyword_as);
-				column_alias = m_lexer.consumeCurrentToken().token_string;
+				column_alias = m_lexer.consumeToken(TokenType::identifier).token_string;
 			} else {
 				if (column_value->node_type == NodeType::database_value) {
 					column_alias = ((DatabaseValueNode*) column_value.get())->col_name;
@@ -262,7 +244,7 @@ namespace usql {
 	m_lexer.skipToken(TokenType::keyword_from);
-	std::string table_name = m_lexer.consumeCurrentToken().token_string;
+	std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
 	std::unique_ptr<Node> where_node = parse_where_clause();
@@ -278,7 +260,7 @@ namespace usql {
 	    m_lexer.skipToken(TokenType::keyword_delete);
 	    m_lexer.skipToken(TokenType::keyword_from);
-	    std::string table_name = m_lexer.consumeCurrentToken().token_string;
+	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
 	    std::unique_ptr<Node> where_node = parse_where_clause();
@@ -289,7 +271,7 @@ namespace usql {
 	    m_lexer.skipToken(TokenType::keyword_update);
 	    m_lexer.skipTokenOptional(TokenType::keyword_table);
-	    std::string table_name = m_lexer.consumeCurrentToken().token_string;
+	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
 	    m_lexer.skipToken(TokenType::keyword_set);
@@ -297,7 +279,7 @@ namespace usql {
 	    std::vector<std::unique_ptr<Node>> values;
 	    do {
-		    cols_names.emplace_back(m_lexer.consumeCurrentToken().token_string);
+		    cols_names.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
 		    m_lexer.skipToken(TokenType::equal);
 		    std::unique_ptr<Node> left = Parser::parse_value();
@@ -333,7 +315,7 @@ namespace usql {
 			bool asc = true;
 			auto token_type = m_lexer.tokenType();
-			std::string tokenString = m_lexer.consumeCurrentToken().token_string;
+			std::string tokenString = m_lexer.consumeToken().token_string;
 			switch (token_type) {
 				case TokenType::int_number:
 					col_index = std::stoi(tokenString);
@@ -365,20 +347,12 @@ namespace usql {
 	if (m_lexer.tokenType() == TokenType::keyword_offset) {
 		m_lexer.skipToken(TokenType::keyword_offset);
-
+		offset = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
 		if (m_lexer.tokenType() != TokenType::int_number)
 			throw Exception("expecting integer in offset clause");
 		offset = std::stoi(m_lexer.consumeCurrentToken().token_string);
 	}
 	if (m_lexer.tokenType() == TokenType::keyword_limit) {
 		m_lexer.skipToken(TokenType::keyword_limit);
-
+		limit = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
 		if (m_lexer.tokenType() != TokenType::int_number)
 			throw Exception("expecting integer in limit clause");
 		limit = std::stoi(m_lexer.consumeCurrentToken().token_string);
 	}
 	return OffsetLimitNode{offset, limit};
@@ -446,7 +420,7 @@ namespace usql {
 	    // function call
 	    if (token_type == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
-		std::string function_name = m_lexer.consumeCurrentToken().token_string;
+		std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string;
 		std::vector<std::unique_ptr<Node>> pars;
 		m_lexer.skipToken(TokenType::open_paren);
@@ -459,7 +433,7 @@ namespace usql {
 	    }
 	    // numbers and strings
-	    std::string tokenString = m_lexer.consumeCurrentToken().token_string;
+	    std::string tokenString = m_lexer.consumeToken().token_string;
 	    if (token_type == TokenType::int_number)
 		return std::make_unique<IntValueNode>(std::stoi(tokenString));
@@ -480,7 +454,7 @@ namespace usql {
    }
    RelationalOperatorType Parser::parse_relational_operator() {
-	    auto op = m_lexer.consumeCurrentToken();
+	    auto op = m_lexer.consumeToken();
 	    switch (op.type) {
 		    case TokenType::equal:
 			    return RelationalOperatorType::equal;
@@ -500,7 +474,7 @@ namespace usql {
    }
    LogicalOperatorType Parser::parse_logical_operator() {
-	    auto op = m_lexer.consumeCurrentToken();
+	    auto op = m_lexer.consumeToken();
 	    switch (op.type) {
 		    case TokenType::logical_and:
 			    return LogicalOperatorType::and_operator;
@@ -512,7 +486,7 @@ namespace usql {
    }
    ArithmeticalOperatorType Parser::parse_arithmetical_operator() {
-	    auto op = m_lexer.consumeCurrentToken();
+	    auto op = m_lexer.consumeToken();
 	    switch (op.type) {
 		    case TokenType::plus:
 			    return ArithmeticalOperatorType::plus_operator;
--- a/usql/row.cpp
+++ b/usql/row.cpp
@@ -133,7 +133,7 @@ void Row::setColumnValue(ColDefNode *col_def, ValueNode *col_value) {
 		else if (col_def->type == ColumnType::varchar_type)
 			setStringColumnValue(col_def->order, col_value->getStringValue());
 		else if (col_def->type == ColumnType::date_type)
-			setIntColumnValue(col_def->order, col_value->getDateValue());
+			setDateColumnValue(col_def->order, col_value->getDateValue());
 		else if (col_def->type == ColumnType::bool_type)
 			setBoolColumnValue(col_def->order, col_value->getBooleanValue());
 		else
@@ -169,9 +169,20 @@ void Row::print(const std::vector<ColDefNode> &col_defs) {
 int Row::print_get_column_size(const ColDefNode &col_def) {
-	int col_size = col_def.type == ColumnType::varchar_type ? col_def.length :
+	switch (col_def.type) {
-		    col_def.type == ColumnType::float_type ? 16 : 10;
+		case ColumnType::varchar_type:
-	return col_size;
+			return col_def.length;
 			break;
 		case ColumnType::date_type:
 			return 19;
 			break;
 		case ColumnType::float_type:
 			return 16;
 			break;
 	default:
 		return 10;
 	}
 }
 }	// namespace
--- a/usql/row.h
+++ b/usql/row.h
@@ -35,6 +35,8 @@ namespace usql {
 	bool getBoolValue() override { throw Exception("getDateValue not supported on ColNullValue"); };
 	int compare(ColValue &other) override;
 	virtual ~ColNullValue() = default;
    };
@@ -52,6 +54,8 @@ namespace usql {
 	int compare(ColValue &other) override;
 	long m_integer;
 	virtual ~ColIntegerValue() = default;
    };
@@ -68,6 +72,8 @@ namespace usql {
 	int compare(ColValue &other) override;
 	virtual ~ColDoubleValue() = default;
 	double m_double;
    };
@@ -103,6 +109,8 @@ namespace usql {
        int compare(ColValue &other) override;
 	virtual ~ColDateValue() = default;
        long m_date;	// seconds since epoch for now
    };
@@ -119,6 +127,8 @@ namespace usql {
        int compare(ColValue &other) override;
 	virtual ~ColBooleanValue() = default;
        bool m_bool;
    };
--- a/usql/settings.cpp
+++ b/usql/settings.cpp
@@ -6,7 +6,7 @@
 namespace usql {
 std::vector<std::pair<std::string, std::string>> Settings::m_settings =
-	    { std::make_pair("DATE_FORMAT", "%Y-%m-%d"),
+	    { std::make_pair("DATE_FORMAT", "%Y-%m-%d %H:%M:%S"),
 	      std::make_pair("BOOL_TRUE_LITERAL", "Y"),
 	      std::make_pair("BOOL_FALSE_LITERAL", "N"),
 	      std::make_pair("DOUBLE_FORMAT", "%.2f") };
@@ -21,6 +21,7 @@ std::string Settings::date_to_string(long date) {
 	return ::date_to_string(date, get_setting("DATE_FORMAT"));
 }
 std::string Settings::double_to_string(double d) {
 	char buffer[32];
 	int r, buf_size = 32;
--- a/usql/table.cpp
+++ b/usql/table.cpp
@@ -1,7 +1,9 @@
 #include "table.h"
 #include "csvreader.h"
 #include "ml_string.h"
 #include "fast_double_parser.h"
 #include <charconv>
 #include <fstream>
 #include <algorithm>
@@ -138,11 +140,12 @@ void Table::create_row_from_vector(const std::vector<ColDefNode> &colDefs, const
 }
 double Table::string_to_double(const std::string &s) {
-	try {
+	double result;
-		return std::stod(s);
+	const char * endptr = fast_double_parser::parse_number(s.c_str(), &result);
-	} catch (std::invalid_argument &e) {
+	if (endptr == nullptr) {
 		throw Exception("error parsing as double: " + s);
 	}
 	return result;
 }
 long Table::string_to_long(const std::string &s) {