usql update

This commit is contained in:
VaclavT 2021-08-31 19:00:03 +02:00
parent 85bc6c9363
commit dfa7c1c15b
9 changed files with 1344 additions and 70 deletions

View File

@ -56,6 +56,7 @@ utils/local_install.sh
(read-url "https://api.nasdaq.com/api/calendar/dividends/") ; hangs in sslclient.cpp line 132
### TODO
- in ml_date throw exception when invalid date string or format
- add debug support, at least call stack
- multiline editing (see kilocpp editor)
- execute system command should capture stderr

1270
clib/fast_double_parser.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -63,12 +63,18 @@ namespace usql {
Token Lexer::currentToken() { return m_tokens[m_index]; }
Token Lexer::consumeCurrentToken() {
Token Lexer::consumeToken() {
int i = m_index;
nextToken();
return m_tokens[i];
}
Token Lexer::consumeToken(TokenType type) {
int i = m_index;
skipToken(type);
return m_tokens[i];
}
void Lexer::nextToken() {
if (m_index < m_tokens.size()) {
m_index++;
@ -79,8 +85,7 @@ namespace usql {
if (tokenType() == type) {
nextToken();
} else {
throw Exception("ERROR unexpected token " + consumeCurrentToken().token_string + ", instead of " +
typeToString(type));
throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type));
}
}
@ -215,8 +220,8 @@ namespace usql {
(token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r'))
return TokenType::comment;
// if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"')
// return TokenType::string_literal;
if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"')
return TokenType::string_literal;
if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'')
return TokenType::string_literal;
@ -233,9 +238,6 @@ namespace usql {
if (std::regex_match(token, k_identifier_regex))
return TokenType::identifier;
if (m_index + 1 >= m_tokens.size())
return TokenType::eof;
return TokenType::undef;
}

View File

@ -83,7 +83,9 @@ namespace usql {
Token currentToken();
Token consumeCurrentToken();
Token consumeToken();
Token consumeToken(TokenType type);
void nextToken();

View File

@ -47,10 +47,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_create);
m_lexer.skipToken(TokenType::keyword_table);
if (m_lexer.tokenType() != TokenType::identifier)
throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
// create as select
if (m_lexer.tokenType() == TokenType::keyword_as) {
@ -72,7 +69,7 @@ namespace usql {
if (m_lexer.tokenType() != TokenType::identifier) {
throw Exception("syntax error, expected identifier");
}
database_value = m_lexer.consumeCurrentToken().token_string;
database_value = m_lexer.consumeToken().token_string;
// column type and optionally len
if (m_lexer.tokenType() == TokenType::keyword_integer) {
@ -85,11 +82,7 @@ namespace usql {
column_type = ColumnType::varchar_type;
m_lexer.nextToken();
m_lexer.skipToken(TokenType::open_paren);
if (m_lexer.tokenType() == TokenType::int_number) {
column_len = std::stoi(m_lexer.consumeCurrentToken().token_string);
} else {
throw Exception("syntax error, expected int number");
}
column_len = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
m_lexer.skipToken(TokenType::close_paren);
} else if (m_lexer.tokenType() == TokenType::keyword_date) {
column_type = ColumnType::date_type;
@ -125,11 +118,11 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_load);
m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipTokenOptional(TokenType::keyword_from);
std::string file_name = m_lexer.consumeCurrentToken().token_string;
std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<LoadIntoTableNode>(table_name, file_name);
}
@ -138,11 +131,11 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_save);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string file_name = m_lexer.consumeCurrentToken().token_string;
std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<SaveTableNode>(table_name, file_name);
}
@ -151,7 +144,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_drop);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
return std::make_unique<DropTableNode>(table_name);
}
@ -159,13 +152,9 @@ namespace usql {
std::unique_ptr<Node> Parser::parse_set() {
m_lexer.skipToken(TokenType::keyword_set);
if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set name");
std::string name = m_lexer.consumeCurrentToken().token_string;
std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
m_lexer.skipTokenOptional(TokenType::equal);
if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set value");
std::string value = m_lexer.consumeCurrentToken().token_string;
std::string value = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<SetNode>(name, value);
}
@ -173,8 +162,7 @@ namespace usql {
std::unique_ptr<Node> Parser::parse_show() {
m_lexer.skipToken(TokenType::keyword_show);
if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal on show parameter name");
std::string name = m_lexer.consumeCurrentToken().token_string;
std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<ShowNode>(name);
}
@ -187,18 +175,12 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_into);
// table name
if (m_lexer.tokenType() != TokenType::identifier)
throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
// column names
m_lexer.skipToken(TokenType::open_paren);
do {
if (m_lexer.tokenType() != TokenType::identifier)
throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
database_values.emplace_back(m_lexer.consumeCurrentToken().token_string);
database_values.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::close_paren);
@ -233,7 +215,7 @@ namespace usql {
int i = 1;
while (m_lexer.tokenType() != TokenType::keyword_from) {
if (m_lexer.tokenType()==TokenType::multiply) {
std::string name = m_lexer.consumeCurrentToken().token_string;
std::string name = m_lexer.consumeToken().token_string;
auto multiply_char = std::make_unique<DatabaseValueNode>(name);
cols->push_back(SelectColNode{std::move(multiply_char), "*"});
@ -243,7 +225,7 @@ namespace usql {
if (m_lexer.tokenType() == TokenType::keyword_as) {
m_lexer.skipToken(TokenType::keyword_as);
column_alias = m_lexer.consumeCurrentToken().token_string;
column_alias = m_lexer.consumeToken(TokenType::identifier).token_string;
} else {
if (column_value->node_type == NodeType::database_value) {
column_alias = ((DatabaseValueNode*) column_value.get())->col_name;
@ -262,7 +244,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::unique_ptr<Node> where_node = parse_where_clause();
@ -278,7 +260,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_delete);
m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::unique_ptr<Node> where_node = parse_where_clause();
@ -289,7 +271,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_update);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::keyword_set);
@ -297,7 +279,7 @@ namespace usql {
std::vector<std::unique_ptr<Node>> values;
do {
cols_names.emplace_back(m_lexer.consumeCurrentToken().token_string);
cols_names.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
m_lexer.skipToken(TokenType::equal);
std::unique_ptr<Node> left = Parser::parse_value();
@ -333,7 +315,7 @@ namespace usql {
bool asc = true;
auto token_type = m_lexer.tokenType();
std::string tokenString = m_lexer.consumeCurrentToken().token_string;
std::string tokenString = m_lexer.consumeToken().token_string;
switch (token_type) {
case TokenType::int_number:
col_index = std::stoi(tokenString);
@ -365,20 +347,12 @@ namespace usql {
if (m_lexer.tokenType() == TokenType::keyword_offset) {
m_lexer.skipToken(TokenType::keyword_offset);
if (m_lexer.tokenType() != TokenType::int_number)
throw Exception("expecting integer in offset clause");
offset = std::stoi(m_lexer.consumeCurrentToken().token_string);
offset = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
}
if (m_lexer.tokenType() == TokenType::keyword_limit) {
m_lexer.skipToken(TokenType::keyword_limit);
if (m_lexer.tokenType() != TokenType::int_number)
throw Exception("expecting integer in limit clause");
limit = std::stoi(m_lexer.consumeCurrentToken().token_string);
limit = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
}
return OffsetLimitNode{offset, limit};
@ -446,7 +420,7 @@ namespace usql {
// function call
if (token_type == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
std::string function_name = m_lexer.consumeCurrentToken().token_string;
std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::vector<std::unique_ptr<Node>> pars;
m_lexer.skipToken(TokenType::open_paren);
@ -459,7 +433,7 @@ namespace usql {
}
// numbers and strings
std::string tokenString = m_lexer.consumeCurrentToken().token_string;
std::string tokenString = m_lexer.consumeToken().token_string;
if (token_type == TokenType::int_number)
return std::make_unique<IntValueNode>(std::stoi(tokenString));
@ -480,7 +454,7 @@ namespace usql {
}
RelationalOperatorType Parser::parse_relational_operator() {
auto op = m_lexer.consumeCurrentToken();
auto op = m_lexer.consumeToken();
switch (op.type) {
case TokenType::equal:
return RelationalOperatorType::equal;
@ -500,7 +474,7 @@ namespace usql {
}
LogicalOperatorType Parser::parse_logical_operator() {
auto op = m_lexer.consumeCurrentToken();
auto op = m_lexer.consumeToken();
switch (op.type) {
case TokenType::logical_and:
return LogicalOperatorType::and_operator;
@ -512,7 +486,7 @@ namespace usql {
}
ArithmeticalOperatorType Parser::parse_arithmetical_operator() {
auto op = m_lexer.consumeCurrentToken();
auto op = m_lexer.consumeToken();
switch (op.type) {
case TokenType::plus:
return ArithmeticalOperatorType::plus_operator;

View File

@ -133,7 +133,7 @@ void Row::setColumnValue(ColDefNode *col_def, ValueNode *col_value) {
else if (col_def->type == ColumnType::varchar_type)
setStringColumnValue(col_def->order, col_value->getStringValue());
else if (col_def->type == ColumnType::date_type)
setIntColumnValue(col_def->order, col_value->getDateValue());
setDateColumnValue(col_def->order, col_value->getDateValue());
else if (col_def->type == ColumnType::bool_type)
setBoolColumnValue(col_def->order, col_value->getBooleanValue());
else
@ -169,9 +169,20 @@ void Row::print(const std::vector<ColDefNode> &col_defs) {
int Row::print_get_column_size(const ColDefNode &col_def) {
int col_size = col_def.type == ColumnType::varchar_type ? col_def.length :
col_def.type == ColumnType::float_type ? 16 : 10;
return col_size;
switch (col_def.type) {
case ColumnType::varchar_type:
return col_def.length;
break;
case ColumnType::date_type:
return 19;
break;
case ColumnType::float_type:
return 16;
break;
default:
return 10;
}
}
} // namespace

View File

@ -35,6 +35,8 @@ namespace usql {
bool getBoolValue() override { throw Exception("getDateValue not supported on ColNullValue"); };
int compare(ColValue &other) override;
virtual ~ColNullValue() = default;
};
@ -52,6 +54,8 @@ namespace usql {
int compare(ColValue &other) override;
long m_integer;
virtual ~ColIntegerValue() = default;
};
@ -68,6 +72,8 @@ namespace usql {
int compare(ColValue &other) override;
virtual ~ColDoubleValue() = default;
double m_double;
};
@ -103,6 +109,8 @@ namespace usql {
int compare(ColValue &other) override;
virtual ~ColDateValue() = default;
long m_date; // seconds since epoch for now
};
@ -119,6 +127,8 @@ namespace usql {
int compare(ColValue &other) override;
virtual ~ColBooleanValue() = default;
bool m_bool;
};

View File

@ -6,7 +6,7 @@
namespace usql {
std::vector<std::pair<std::string, std::string>> Settings::m_settings =
{ std::make_pair("DATE_FORMAT", "%Y-%m-%d"),
{ std::make_pair("DATE_FORMAT", "%Y-%m-%d %H:%M:%S"),
std::make_pair("BOOL_TRUE_LITERAL", "Y"),
std::make_pair("BOOL_FALSE_LITERAL", "N"),
std::make_pair("DOUBLE_FORMAT", "%.2f") };
@ -21,6 +21,7 @@ std::string Settings::date_to_string(long date) {
return ::date_to_string(date, get_setting("DATE_FORMAT"));
}
std::string Settings::double_to_string(double d) {
char buffer[32];
int r, buf_size = 32;

View File

@ -1,7 +1,9 @@
#include "table.h"
#include "csvreader.h"
#include "ml_string.h"
#include "fast_double_parser.h"
#include <charconv>
#include <fstream>
#include <algorithm>
@ -138,11 +140,12 @@ void Table::create_row_from_vector(const std::vector<ColDefNode> &colDefs, const
}
double Table::string_to_double(const std::string &s) {
try {
return std::stod(s);
} catch (std::invalid_argument &e) {
double result;
const char * endptr = fast_double_parser::parse_number(s.c_str(), &result);
if (endptr == nullptr) {
throw Exception("error parsing as double: " + s);
}
return result;
}
long Table::string_to_long(const std::string &s) {