int is long, select column can be function, some fixes..

just to get it work.. needs improvement
This commit is contained in:
2021-07-19 19:44:46 +02:00
parent 9afbe6435e
commit dec99b823a
14 changed files with 8697 additions and 196 deletions

View File

@@ -6,54 +6,57 @@ namespace usql {
// TOOD handle premature eof
Parser::Parser() {
lexer = Lexer{};
m_lexer = Lexer{};
}
std::unique_ptr<Node> Parser::parse(const std::string &code) {
lexer.parse(code);
// lexer.debugTokens();
m_lexer.parse(code);
// m_lexer.debugTokens();
if (lexer.tokenType() == TokenType::keyword_create && lexer.nextTokenType() == TokenType::keyword_table) {
if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table) {
return parse_create_table();
}
if (lexer.tokenType() == TokenType::keyword_insert) {
if (m_lexer.tokenType() == TokenType::keyword_insert) {
return parse_insert_into_table();
}
if (lexer.tokenType() == TokenType::keyword_select) {
if (m_lexer.tokenType() == TokenType::keyword_select) {
return parse_select_from_table();
}
if (lexer.tokenType() == TokenType::keyword_delete) {
if (m_lexer.tokenType() == TokenType::keyword_delete) {
return parse_delete_from_table();
}
if (lexer.tokenType() == TokenType::keyword_update) {
if (m_lexer.tokenType() == TokenType::keyword_update) {
return parse_update_table();
}
if (lexer.tokenType() == TokenType::keyword_load) {
if (m_lexer.tokenType() == TokenType::keyword_load) {
return parse_load_table();
}
if (m_lexer.tokenType() == TokenType::keyword_save) {
return parse_save_table();
}
std::cout << "ERROR, token:" << lexer.currentToken().token_string << std::endl;
std::cout << "ERROR, token:" << m_lexer.currentToken().token_string << std::endl;
return std::make_unique<Node>(NodeType::error);
}
std::unique_ptr<Node> Parser::parse_create_table() {
std::vector<ColDefNode> cols_def{};
lexer.skipToken(TokenType::keyword_create);
lexer.skipToken(TokenType::keyword_table);
m_lexer.skipToken(TokenType::keyword_create);
m_lexer.skipToken(TokenType::keyword_table);
if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
std::string table_name = lexer.consumeCurrentToken().token_string;
if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
std::string table_name = m_lexer.consumeCurrentToken().token_string;
// create as select
if (lexer.tokenType() == TokenType::keyword_as) {
lexer.skipToken(TokenType::keyword_as);
if (m_lexer.tokenType() == TokenType::keyword_as) {
m_lexer.skipToken(TokenType::keyword_as);
std::unique_ptr<Node> select = parse_select_from_table();
return std::make_unique<CreateTableAsSelectNode>(table_name, std::move(select));
} else {
lexer.skipToken(TokenType::open_paren);
m_lexer.skipToken(TokenType::open_paren);
int column_order = 0;
do {
std::string column_name;
@@ -62,40 +65,40 @@ namespace usql {
bool column_nullable{true};
// column name
if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
column_name = lexer.consumeCurrentToken().token_string;
if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
column_name = m_lexer.consumeCurrentToken().token_string;
// column type and optionally len
if (lexer.tokenType() == TokenType::keyword_int) {
if (m_lexer.tokenType() == TokenType::keyword_int) {
column_type = ColumnType::integer_type;
lexer.nextToken();
} else if (lexer.tokenType() == TokenType::keyword_float) {
m_lexer.nextToken();
} else if (m_lexer.tokenType() == TokenType::keyword_float) {
column_type = ColumnType::float_type;
lexer.nextToken();
} else if (lexer.tokenType() == TokenType::keyword_varchar) {
m_lexer.nextToken();
} else if (m_lexer.tokenType() == TokenType::keyword_varchar) {
column_type = ColumnType::varchar_type;
lexer.nextToken();
lexer.skipToken(TokenType::open_paren);
if (lexer.tokenType() == TokenType::int_number) {
column_len = std::stoi(lexer.consumeCurrentToken().token_string);
m_lexer.nextToken();
m_lexer.skipToken(TokenType::open_paren);
if (m_lexer.tokenType() == TokenType::int_number) {
column_len = std::stoi(m_lexer.consumeCurrentToken().token_string);
} else { /* TODO handle error */ }
lexer.skipToken(TokenType::close_paren);
m_lexer.skipToken(TokenType::close_paren);
} else { /* TODO handle error */ }
if (lexer.tokenType() == TokenType::keyword_not) {
lexer.nextToken();
lexer.skipToken(TokenType::keyword_null);
if (m_lexer.tokenType() == TokenType::keyword_not) {
m_lexer.nextToken();
m_lexer.skipToken(TokenType::keyword_null);
column_nullable = false;
} else if (lexer.tokenType() == TokenType::keyword_null) {
lexer.nextToken();
} else if (m_lexer.tokenType() == TokenType::keyword_null) {
m_lexer.nextToken();
}
cols_def.push_back( ColDefNode(column_name, column_type, column_order++, column_len, column_nullable));
lexer.skipTokenOptional(TokenType::comma);
m_lexer.skipTokenOptional(TokenType::comma);
// TODO in future constraints
} while (lexer.tokenType() != TokenType::close_paren);
} while (m_lexer.tokenType() != TokenType::close_paren);
return std::make_unique<CreateTableNode>(table_name, cols_def);
}
@@ -107,90 +110,107 @@ namespace usql {
std::vector<ColNameNode> cols_names{};
std::vector<std::unique_ptr<Node>> cols_values{};
lexer.skipToken(TokenType::keyword_insert);
lexer.skipToken(TokenType::keyword_into);
m_lexer.skipToken(TokenType::keyword_insert);
m_lexer.skipToken(TokenType::keyword_into);
// table name
if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
std::string table_name = lexer.consumeCurrentToken().token_string;
if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
std::string table_name = m_lexer.consumeCurrentToken().token_string;
// column names
lexer.skipToken(TokenType::open_paren);
m_lexer.skipToken(TokenType::open_paren);
do {
if (lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
cols_names.push_back(lexer.consumeCurrentToken().token_string);
if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
cols_names.push_back(m_lexer.consumeCurrentToken().token_string);
lexer.skipTokenOptional(TokenType::comma);
} while (lexer.tokenType() != TokenType::close_paren);
lexer.skipToken(TokenType::close_paren);
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::close_paren);
m_lexer.skipToken(TokenType::close_paren);
lexer.skipToken(TokenType::keyword_values);
m_lexer.skipToken(TokenType::keyword_values);
// column values
lexer.skipToken(TokenType::open_paren);
m_lexer.skipToken(TokenType::open_paren);
do {
// cols_values.push_back(lexer.consumeCurrentToken().token_string);
// cols_values.push_back(m_lexer.consumeCurrentToken().token_string);
auto col_value = parse_value();
cols_values.push_back(std::move(col_value));
lexer.skipTokenOptional(TokenType::comma);
} while (lexer.tokenType() != TokenType::close_paren);
lexer.skipToken(TokenType::close_paren);
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::close_paren);
m_lexer.skipToken(TokenType::close_paren);
return std::make_unique<InsertIntoTableNode>(table_name, cols_names, std::move(cols_values));
}
std::unique_ptr<Node> Parser::parse_value() {
if (lexer.tokenType() == TokenType::int_number) {
return std::make_unique<IntValueNode>(std::stoi(lexer.consumeCurrentToken().token_string));
if (m_lexer.tokenType() == TokenType::int_number) {
return std::make_unique<IntValueNode>(std::stoi(m_lexer.consumeCurrentToken().token_string));
}
if (lexer.tokenType() == TokenType::double_number) {
return std::make_unique<FloatValueNode>(std::stof(lexer.consumeCurrentToken().token_string));
if (m_lexer.tokenType() == TokenType::double_number) {
return std::make_unique<FloatValueNode>(std::stof(m_lexer.consumeCurrentToken().token_string));
}
if (lexer.tokenType() == TokenType::string_literal) {
return std::make_unique<StringValueNode>(lexer.consumeCurrentToken().token_string);
if (m_lexer.tokenType() == TokenType::string_literal) {
return std::make_unique<StringValueNode>(m_lexer.consumeCurrentToken().token_string);
}
if (lexer.tokenType() == TokenType::identifier) {
std::string func_name = lexer.consumeCurrentToken().token_string;
std::vector<std::unique_ptr<Node>> pars;
if (m_lexer.tokenType() == TokenType::identifier) {
std::string name = m_lexer.consumeCurrentToken().token_string;
lexer.skipToken(TokenType::open_paren);
while (lexer.tokenType() != TokenType::close_paren) { // TODO handle errors
pars.push_back(parse_value());
lexer.skipTokenOptional(TokenType::comma);
// function
if (m_lexer.tokenType() == TokenType::open_paren) {
std::vector<std::unique_ptr<Node>> pars;
m_lexer.skipToken(TokenType::open_paren);
while (m_lexer.tokenType() != TokenType::close_paren) { // TODO handle errors
pars.push_back(parse_value());
m_lexer.skipTokenOptional(TokenType::comma);
}
m_lexer.skipToken(TokenType::close_paren);
return std::make_unique<FunctionNode>(name, std::move(pars));
} else {
return std::make_unique<ColNameNode>(name);
}
lexer.skipToken(TokenType::close_paren);
return std::make_unique<FunctionNode>(func_name, std::move(pars));
}
throw Exception("Syntax error");
}
std::unique_ptr<Node> Parser::parse_select_from_table() {
std::vector<ColNameNode> cols_names{};
std::unique_ptr<Node> Parser::parse_select_from_table() {
auto cols = std::make_unique<std::vector<SelectColNode>>();
lexer.skipToken(TokenType::keyword_select);
while (lexer.tokenType() != TokenType::keyword_from) {
cols_names.push_back(lexer.consumeCurrentToken().token_string);
lexer.skipTokenOptional(TokenType::comma);
}
m_lexer.skipToken(TokenType::keyword_select);
int i = 1;
while (m_lexer.tokenType() != TokenType::keyword_from) {
auto col_value = parse_value();
std::string alias;
if (col_value->node_type == NodeType::column_name) {
alias = ((ColNameNode*) col_value.get())->name;
} else {
alias = "c" + std::to_string(i);
i++;
}
lexer.skipToken(TokenType::keyword_from);
std::string table_name = lexer.consumeCurrentToken().token_string;
cols->push_back(SelectColNode{std::move(col_value), alias});
std::unique_ptr<Node> where_node = parse_where_clause();
m_lexer.skipTokenOptional(TokenType::comma);
}
// if (lexer.tokenType() == TokenType::keyword_order_by) {}
// if (lexer.tokenType() == TokenType::keyword_offset) {}
// if (lexer.tokenType() == TokenType::keyword_limit) {}
m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<SelectFromTableNode>(table_name, cols_names, std::move(where_node));
}
std::unique_ptr<Node> where_node = parse_where_clause();
// if (m_lexer.tokenType() == TokenType::keyword_order_by) {}
// if (m_lexer.tokenType() == TokenType::keyword_offset) {}
// if (m_lexer.tokenType() == TokenType::keyword_limit) {}
return std::make_unique<SelectFromTableNode>(table_name, std::move(cols), std::move(where_node));
}
std::unique_ptr<Node> Parser::parse_delete_from_table() {
lexer.skipToken(TokenType::keyword_delete);
lexer.skipToken(TokenType::keyword_from);
m_lexer.skipToken(TokenType::keyword_delete);
m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::unique_ptr<Node> where_node = parse_where_clause();
@@ -198,22 +218,22 @@ std::unique_ptr<Node> Parser::parse_value() {
}
std::unique_ptr<Node> Parser::parse_update_table() {
lexer.skipToken(TokenType::keyword_update);
lexer.skipTokenOptional(TokenType::keyword_table);
m_lexer.skipToken(TokenType::keyword_update);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeCurrentToken().token_string;
lexer.skipToken(TokenType::keyword_set);
m_lexer.skipToken(TokenType::keyword_set);
std::vector<ColNameNode> cols_names;
std::vector<std::unique_ptr<Node>> values;
do {
cols_names.push_back(lexer.consumeCurrentToken().token_string);
lexer.skipToken(TokenType::equal);
cols_names.push_back(m_lexer.consumeCurrentToken().token_string);
m_lexer.skipToken(TokenType::equal);
std::unique_ptr<Node> left = Parser::parse_operand_node();
if (Lexer::isArithmeticalOperator(lexer.tokenType())) {
if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) {
ArithmeticalOperatorType op = parse_arithmetical_operator();
std::unique_ptr<Node> right = Parser::parse_operand_node();
@@ -225,9 +245,9 @@ std::unique_ptr<Node> Parser::parse_value() {
std::make_unique<ArithmeticalOperatorNode>(ArithmeticalOperatorType::copy_value,
std::move(left), std::move(right)));
}
lexer.skipTokenOptional(TokenType::comma);
m_lexer.skipTokenOptional(TokenType::comma);
} while (lexer.tokenType() != TokenType::keyword_where && lexer.tokenType() != TokenType::eof);
} while (m_lexer.tokenType() != TokenType::keyword_where && m_lexer.tokenType() != TokenType::eof);
std::unique_ptr<Node> where_node = parse_where_clause();
@@ -235,37 +255,51 @@ std::unique_ptr<Node> Parser::parse_value() {
}
std::unique_ptr<Node> Parser::parse_load_table() {
lexer.skipToken(TokenType::keyword_load);
lexer.skipTokenOptional(TokenType::keyword_into);
m_lexer.skipToken(TokenType::keyword_load);
m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string table_name = lexer.consumeCurrentToken().token_string;
std::string table_name = m_lexer.consumeCurrentToken().token_string;
lexer.skipTokenOptional(TokenType::keyword_from);
m_lexer.skipTokenOptional(TokenType::keyword_from);
std::string file_name = lexer.consumeCurrentToken().token_string;
std::string file_name = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<LoadIntoTableNode>(table_name, file_name);
}
std::unique_ptr<Node> Parser::parse_save_table() {
m_lexer.skipToken(TokenType::keyword_save);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string file_name = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<SaveTableNode>(table_name, file_name);
}
std::unique_ptr<Node> Parser::parse_where_clause() {
// TODO add support for multiple filters
// TODO add support for parenthesis
if (lexer.tokenType() != TokenType::keyword_where) {
if (m_lexer.tokenType() != TokenType::keyword_where) {
return std::make_unique<TrueNode>();
}
std::unique_ptr<Node> node;
lexer.skipToken(TokenType::keyword_where);
m_lexer.skipToken(TokenType::keyword_where);
do {
node = parse_relational_expression();
if (Lexer::isLogicalOperator(lexer.tokenType())) {
if (Lexer::isLogicalOperator(m_lexer.tokenType())) {
auto operation = parse_logical_operator();
std::unique_ptr<Node> node2 = parse_relational_expression();
node = std::make_unique<LogicalOperatorNode>(operation, std::move(node), std::move(node2));
}
} while (lexer.tokenType() != TokenType::eof); // until whole where clause parsed
} while (m_lexer.tokenType() != TokenType::eof); // until whole where clause parsed
return node;
}
@@ -280,8 +314,8 @@ std::unique_ptr<Node> Parser::parse_value() {
std::unique_ptr<Node> Parser::parse_operand_node() {
// while not end or order or limit
auto token_type = lexer.tokenType();
std::string tokenString = lexer.consumeCurrentToken().token_string;
auto token_type = m_lexer.tokenType();
std::string tokenString = m_lexer.consumeCurrentToken().token_string;
switch (token_type) {
case TokenType::int_number:
return std::make_unique<IntValueNode>(std::stoi(tokenString));
@@ -297,7 +331,7 @@ std::unique_ptr<Node> Parser::parse_value() {
}
RelationalOperatorType Parser::parse_relational_operator() {
auto op = lexer.consumeCurrentToken();
auto op = m_lexer.consumeCurrentToken();
switch (op.type) {
case TokenType::equal:
return RelationalOperatorType::equal;
@@ -317,7 +351,7 @@ std::unique_ptr<Node> Parser::parse_value() {
}
LogicalOperatorType Parser::parse_logical_operator() {
auto op = lexer.consumeCurrentToken();
auto op = m_lexer.consumeCurrentToken();
switch (op.type) {
case TokenType::logical_and:
return LogicalOperatorType::and_operator;
@@ -329,7 +363,7 @@ std::unique_ptr<Node> Parser::parse_value() {
}
ArithmeticalOperatorType Parser::parse_arithmetical_operator() {
auto op = lexer.consumeCurrentToken();
auto op = m_lexer.consumeCurrentToken();
switch (op.type) {
case TokenType::plus:
return ArithmeticalOperatorType::plus_operator;