usql update

This commit is contained in:
VaclavT 2021-08-31 19:00:03 +02:00
parent 85bc6c9363
commit dfa7c1c15b
9 changed files with 1344 additions and 70 deletions

View File

@ -56,6 +56,7 @@ utils/local_install.sh
(read-url "https://api.nasdaq.com/api/calendar/dividends/") ; hangs in sslclient.cpp line 132 (read-url "https://api.nasdaq.com/api/calendar/dividends/") ; hangs in sslclient.cpp line 132
### TODO ### TODO
- in ml_date throw exception when invalid date string or format
- add debug support, at least call stack - add debug support, at least call stack
- multiline editing (see kilocpp editor) - multiline editing (see kilocpp editor)
- execute system command should capture stderr - execute system command should capture stderr

1270
clib/fast_double_parser.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -63,12 +63,18 @@ namespace usql {
Token Lexer::currentToken() { return m_tokens[m_index]; } Token Lexer::currentToken() { return m_tokens[m_index]; }
Token Lexer::consumeCurrentToken() { Token Lexer::consumeToken() {
int i = m_index; int i = m_index;
nextToken(); nextToken();
return m_tokens[i]; return m_tokens[i];
} }
Token Lexer::consumeToken(TokenType type) {
int i = m_index;
skipToken(type);
return m_tokens[i];
}
void Lexer::nextToken() { void Lexer::nextToken() {
if (m_index < m_tokens.size()) { if (m_index < m_tokens.size()) {
m_index++; m_index++;
@ -79,8 +85,7 @@ namespace usql {
if (tokenType() == type) { if (tokenType() == type) {
nextToken(); nextToken();
} else { } else {
throw Exception("ERROR unexpected token " + consumeCurrentToken().token_string + ", instead of " + throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type));
typeToString(type));
} }
} }
@ -215,8 +220,8 @@ namespace usql {
(token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r'))
return TokenType::comment; return TokenType::comment;
// if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"') if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"')
// return TokenType::string_literal; return TokenType::string_literal;
if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'') if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'')
return TokenType::string_literal; return TokenType::string_literal;
@ -233,9 +238,6 @@ namespace usql {
if (std::regex_match(token, k_identifier_regex)) if (std::regex_match(token, k_identifier_regex))
return TokenType::identifier; return TokenType::identifier;
if (m_index + 1 >= m_tokens.size())
return TokenType::eof;
return TokenType::undef; return TokenType::undef;
} }

View File

@ -83,7 +83,9 @@ namespace usql {
Token currentToken(); Token currentToken();
Token consumeCurrentToken(); Token consumeToken();
Token consumeToken(TokenType type);
void nextToken(); void nextToken();

View File

@ -47,10 +47,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_create); m_lexer.skipToken(TokenType::keyword_create);
m_lexer.skipToken(TokenType::keyword_table); m_lexer.skipToken(TokenType::keyword_table);
if (m_lexer.tokenType() != TokenType::identifier) std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
// create as select // create as select
if (m_lexer.tokenType() == TokenType::keyword_as) { if (m_lexer.tokenType() == TokenType::keyword_as) {
@ -72,7 +69,7 @@ namespace usql {
if (m_lexer.tokenType() != TokenType::identifier) { if (m_lexer.tokenType() != TokenType::identifier) {
throw Exception("syntax error, expected identifier"); throw Exception("syntax error, expected identifier");
} }
database_value = m_lexer.consumeCurrentToken().token_string; database_value = m_lexer.consumeToken().token_string;
// column type and optionally len // column type and optionally len
if (m_lexer.tokenType() == TokenType::keyword_integer) { if (m_lexer.tokenType() == TokenType::keyword_integer) {
@ -85,11 +82,7 @@ namespace usql {
column_type = ColumnType::varchar_type; column_type = ColumnType::varchar_type;
m_lexer.nextToken(); m_lexer.nextToken();
m_lexer.skipToken(TokenType::open_paren); m_lexer.skipToken(TokenType::open_paren);
if (m_lexer.tokenType() == TokenType::int_number) { column_len = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
column_len = std::stoi(m_lexer.consumeCurrentToken().token_string);
} else {
throw Exception("syntax error, expected int number");
}
m_lexer.skipToken(TokenType::close_paren); m_lexer.skipToken(TokenType::close_paren);
} else if (m_lexer.tokenType() == TokenType::keyword_date) { } else if (m_lexer.tokenType() == TokenType::keyword_date) {
column_type = ColumnType::date_type; column_type = ColumnType::date_type;
@ -125,11 +118,11 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_load); m_lexer.skipToken(TokenType::keyword_load);
m_lexer.skipTokenOptional(TokenType::keyword_into); m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipTokenOptional(TokenType::keyword_from); m_lexer.skipTokenOptional(TokenType::keyword_from);
std::string file_name = m_lexer.consumeCurrentToken().token_string; std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<LoadIntoTableNode>(table_name, file_name); return std::make_unique<LoadIntoTableNode>(table_name, file_name);
} }
@ -138,11 +131,11 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_save); m_lexer.skipToken(TokenType::keyword_save);
m_lexer.skipTokenOptional(TokenType::keyword_table); m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipTokenOptional(TokenType::keyword_into); m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string file_name = m_lexer.consumeCurrentToken().token_string; std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<SaveTableNode>(table_name, file_name); return std::make_unique<SaveTableNode>(table_name, file_name);
} }
@ -151,7 +144,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_drop); m_lexer.skipToken(TokenType::keyword_drop);
m_lexer.skipTokenOptional(TokenType::keyword_table); m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
return std::make_unique<DropTableNode>(table_name); return std::make_unique<DropTableNode>(table_name);
} }
@ -159,13 +152,9 @@ namespace usql {
std::unique_ptr<Node> Parser::parse_set() { std::unique_ptr<Node> Parser::parse_set() {
m_lexer.skipToken(TokenType::keyword_set); m_lexer.skipToken(TokenType::keyword_set);
if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set name"); std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
std::string name = m_lexer.consumeCurrentToken().token_string;
m_lexer.skipTokenOptional(TokenType::equal); m_lexer.skipTokenOptional(TokenType::equal);
std::string value = m_lexer.consumeToken(TokenType::string_literal).token_string;
if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set value");
std::string value = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<SetNode>(name, value); return std::make_unique<SetNode>(name, value);
} }
@ -173,8 +162,7 @@ namespace usql {
std::unique_ptr<Node> Parser::parse_show() { std::unique_ptr<Node> Parser::parse_show() {
m_lexer.skipToken(TokenType::keyword_show); m_lexer.skipToken(TokenType::keyword_show);
if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal on show parameter name"); std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
std::string name = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<ShowNode>(name); return std::make_unique<ShowNode>(name);
} }
@ -187,18 +175,12 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_into); m_lexer.skipToken(TokenType::keyword_into);
// table name // table name
if (m_lexer.tokenType() != TokenType::identifier) std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
// column names // column names
m_lexer.skipToken(TokenType::open_paren); m_lexer.skipToken(TokenType::open_paren);
do { do {
if (m_lexer.tokenType() != TokenType::identifier) database_values.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
database_values.emplace_back(m_lexer.consumeCurrentToken().token_string);
m_lexer.skipTokenOptional(TokenType::comma); m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::close_paren); } while (m_lexer.tokenType() != TokenType::close_paren);
@ -233,7 +215,7 @@ namespace usql {
int i = 1; int i = 1;
while (m_lexer.tokenType() != TokenType::keyword_from) { while (m_lexer.tokenType() != TokenType::keyword_from) {
if (m_lexer.tokenType()==TokenType::multiply) { if (m_lexer.tokenType()==TokenType::multiply) {
std::string name = m_lexer.consumeCurrentToken().token_string; std::string name = m_lexer.consumeToken().token_string;
auto multiply_char = std::make_unique<DatabaseValueNode>(name); auto multiply_char = std::make_unique<DatabaseValueNode>(name);
cols->push_back(SelectColNode{std::move(multiply_char), "*"}); cols->push_back(SelectColNode{std::move(multiply_char), "*"});
@ -243,7 +225,7 @@ namespace usql {
if (m_lexer.tokenType() == TokenType::keyword_as) { if (m_lexer.tokenType() == TokenType::keyword_as) {
m_lexer.skipToken(TokenType::keyword_as); m_lexer.skipToken(TokenType::keyword_as);
column_alias = m_lexer.consumeCurrentToken().token_string; column_alias = m_lexer.consumeToken(TokenType::identifier).token_string;
} else { } else {
if (column_value->node_type == NodeType::database_value) { if (column_value->node_type == NodeType::database_value) {
column_alias = ((DatabaseValueNode*) column_value.get())->col_name; column_alias = ((DatabaseValueNode*) column_value.get())->col_name;
@ -262,7 +244,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_from); m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::unique_ptr<Node> where_node = parse_where_clause(); std::unique_ptr<Node> where_node = parse_where_clause();
@ -278,7 +260,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_delete); m_lexer.skipToken(TokenType::keyword_delete);
m_lexer.skipToken(TokenType::keyword_from); m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::unique_ptr<Node> where_node = parse_where_clause(); std::unique_ptr<Node> where_node = parse_where_clause();
@ -289,7 +271,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_update); m_lexer.skipToken(TokenType::keyword_update);
m_lexer.skipTokenOptional(TokenType::keyword_table); m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::keyword_set); m_lexer.skipToken(TokenType::keyword_set);
@ -297,7 +279,7 @@ namespace usql {
std::vector<std::unique_ptr<Node>> values; std::vector<std::unique_ptr<Node>> values;
do { do {
cols_names.emplace_back(m_lexer.consumeCurrentToken().token_string); cols_names.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
m_lexer.skipToken(TokenType::equal); m_lexer.skipToken(TokenType::equal);
std::unique_ptr<Node> left = Parser::parse_value(); std::unique_ptr<Node> left = Parser::parse_value();
@ -333,7 +315,7 @@ namespace usql {
bool asc = true; bool asc = true;
auto token_type = m_lexer.tokenType(); auto token_type = m_lexer.tokenType();
std::string tokenString = m_lexer.consumeCurrentToken().token_string; std::string tokenString = m_lexer.consumeToken().token_string;
switch (token_type) { switch (token_type) {
case TokenType::int_number: case TokenType::int_number:
col_index = std::stoi(tokenString); col_index = std::stoi(tokenString);
@ -365,20 +347,12 @@ namespace usql {
if (m_lexer.tokenType() == TokenType::keyword_offset) { if (m_lexer.tokenType() == TokenType::keyword_offset) {
m_lexer.skipToken(TokenType::keyword_offset); m_lexer.skipToken(TokenType::keyword_offset);
offset = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
if (m_lexer.tokenType() != TokenType::int_number)
throw Exception("expecting integer in offset clause");
offset = std::stoi(m_lexer.consumeCurrentToken().token_string);
} }
if (m_lexer.tokenType() == TokenType::keyword_limit) { if (m_lexer.tokenType() == TokenType::keyword_limit) {
m_lexer.skipToken(TokenType::keyword_limit); m_lexer.skipToken(TokenType::keyword_limit);
limit = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
if (m_lexer.tokenType() != TokenType::int_number)
throw Exception("expecting integer in limit clause");
limit = std::stoi(m_lexer.consumeCurrentToken().token_string);
} }
return OffsetLimitNode{offset, limit}; return OffsetLimitNode{offset, limit};
@ -446,7 +420,7 @@ namespace usql {
// function call // function call
if (token_type == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) { if (token_type == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
std::string function_name = m_lexer.consumeCurrentToken().token_string; std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::vector<std::unique_ptr<Node>> pars; std::vector<std::unique_ptr<Node>> pars;
m_lexer.skipToken(TokenType::open_paren); m_lexer.skipToken(TokenType::open_paren);
@ -459,7 +433,7 @@ namespace usql {
} }
// numbers and strings // numbers and strings
std::string tokenString = m_lexer.consumeCurrentToken().token_string; std::string tokenString = m_lexer.consumeToken().token_string;
if (token_type == TokenType::int_number) if (token_type == TokenType::int_number)
return std::make_unique<IntValueNode>(std::stoi(tokenString)); return std::make_unique<IntValueNode>(std::stoi(tokenString));
@ -480,7 +454,7 @@ namespace usql {
} }
RelationalOperatorType Parser::parse_relational_operator() { RelationalOperatorType Parser::parse_relational_operator() {
auto op = m_lexer.consumeCurrentToken(); auto op = m_lexer.consumeToken();
switch (op.type) { switch (op.type) {
case TokenType::equal: case TokenType::equal:
return RelationalOperatorType::equal; return RelationalOperatorType::equal;
@ -500,7 +474,7 @@ namespace usql {
} }
LogicalOperatorType Parser::parse_logical_operator() { LogicalOperatorType Parser::parse_logical_operator() {
auto op = m_lexer.consumeCurrentToken(); auto op = m_lexer.consumeToken();
switch (op.type) { switch (op.type) {
case TokenType::logical_and: case TokenType::logical_and:
return LogicalOperatorType::and_operator; return LogicalOperatorType::and_operator;
@ -512,7 +486,7 @@ namespace usql {
} }
ArithmeticalOperatorType Parser::parse_arithmetical_operator() { ArithmeticalOperatorType Parser::parse_arithmetical_operator() {
auto op = m_lexer.consumeCurrentToken(); auto op = m_lexer.consumeToken();
switch (op.type) { switch (op.type) {
case TokenType::plus: case TokenType::plus:
return ArithmeticalOperatorType::plus_operator; return ArithmeticalOperatorType::plus_operator;

View File

@ -133,7 +133,7 @@ void Row::setColumnValue(ColDefNode *col_def, ValueNode *col_value) {
else if (col_def->type == ColumnType::varchar_type) else if (col_def->type == ColumnType::varchar_type)
setStringColumnValue(col_def->order, col_value->getStringValue()); setStringColumnValue(col_def->order, col_value->getStringValue());
else if (col_def->type == ColumnType::date_type) else if (col_def->type == ColumnType::date_type)
setIntColumnValue(col_def->order, col_value->getDateValue()); setDateColumnValue(col_def->order, col_value->getDateValue());
else if (col_def->type == ColumnType::bool_type) else if (col_def->type == ColumnType::bool_type)
setBoolColumnValue(col_def->order, col_value->getBooleanValue()); setBoolColumnValue(col_def->order, col_value->getBooleanValue());
else else
@ -169,9 +169,20 @@ void Row::print(const std::vector<ColDefNode> &col_defs) {
int Row::print_get_column_size(const ColDefNode &col_def) { int Row::print_get_column_size(const ColDefNode &col_def) {
int col_size = col_def.type == ColumnType::varchar_type ? col_def.length : switch (col_def.type) {
col_def.type == ColumnType::float_type ? 16 : 10; case ColumnType::varchar_type:
return col_size; return col_def.length;
break;
case ColumnType::date_type:
return 19;
break;
case ColumnType::float_type:
return 16;
break;
default:
return 10;
}
} }
} // namespace } // namespace

View File

@ -35,6 +35,8 @@ namespace usql {
bool getBoolValue() override { throw Exception("getDateValue not supported on ColNullValue"); }; bool getBoolValue() override { throw Exception("getDateValue not supported on ColNullValue"); };
int compare(ColValue &other) override; int compare(ColValue &other) override;
virtual ~ColNullValue() = default;
}; };
@ -52,6 +54,8 @@ namespace usql {
int compare(ColValue &other) override; int compare(ColValue &other) override;
long m_integer; long m_integer;
virtual ~ColIntegerValue() = default;
}; };
@ -68,6 +72,8 @@ namespace usql {
int compare(ColValue &other) override; int compare(ColValue &other) override;
virtual ~ColDoubleValue() = default;
double m_double; double m_double;
}; };
@ -103,6 +109,8 @@ namespace usql {
int compare(ColValue &other) override; int compare(ColValue &other) override;
virtual ~ColDateValue() = default;
long m_date; // seconds since epoch for now long m_date; // seconds since epoch for now
}; };
@ -119,6 +127,8 @@ namespace usql {
int compare(ColValue &other) override; int compare(ColValue &other) override;
virtual ~ColBooleanValue() = default;
bool m_bool; bool m_bool;
}; };

View File

@ -6,7 +6,7 @@
namespace usql { namespace usql {
std::vector<std::pair<std::string, std::string>> Settings::m_settings = std::vector<std::pair<std::string, std::string>> Settings::m_settings =
{ std::make_pair("DATE_FORMAT", "%Y-%m-%d"), { std::make_pair("DATE_FORMAT", "%Y-%m-%d %H:%M:%S"),
std::make_pair("BOOL_TRUE_LITERAL", "Y"), std::make_pair("BOOL_TRUE_LITERAL", "Y"),
std::make_pair("BOOL_FALSE_LITERAL", "N"), std::make_pair("BOOL_FALSE_LITERAL", "N"),
std::make_pair("DOUBLE_FORMAT", "%.2f") }; std::make_pair("DOUBLE_FORMAT", "%.2f") };
@ -21,6 +21,7 @@ std::string Settings::date_to_string(long date) {
return ::date_to_string(date, get_setting("DATE_FORMAT")); return ::date_to_string(date, get_setting("DATE_FORMAT"));
} }
std::string Settings::double_to_string(double d) { std::string Settings::double_to_string(double d) {
char buffer[32]; char buffer[32];
int r, buf_size = 32; int r, buf_size = 32;

View File

@ -1,7 +1,9 @@
#include "table.h" #include "table.h"
#include "csvreader.h" #include "csvreader.h"
#include "ml_string.h" #include "ml_string.h"
#include "fast_double_parser.h"
#include <charconv>
#include <fstream> #include <fstream>
#include <algorithm> #include <algorithm>
@ -138,11 +140,12 @@ void Table::create_row_from_vector(const std::vector<ColDefNode> &colDefs, const
} }
double Table::string_to_double(const std::string &s) { double Table::string_to_double(const std::string &s) {
try { double result;
return std::stod(s); const char * endptr = fast_double_parser::parse_number(s.c_str(), &result);
} catch (std::invalid_argument &e) { if (endptr == nullptr) {
throw Exception("error parsing as double: " + s); throw Exception("error parsing as double: " + s);
} }
return result;
} }
long Table::string_to_long(const std::string &s) { long Table::string_to_long(const std::string &s) {