usql/parser.cpp

429 lines
15 KiB
C++

#include "parser.h"
#include "exception.h"
namespace usql {
// TOOD handle premature eof
Parser::Parser() {
m_lexer = Lexer{};
}
std::unique_ptr<Node> Parser::parse(const std::string &code) {
m_lexer.parse(code);
// m_lexer.debugTokens();
if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table)
return parse_create_table();
if (m_lexer.tokenType() == TokenType::keyword_insert)
return parse_insert_into_table();
if (m_lexer.tokenType() == TokenType::keyword_select)
return parse_select_from_table();
if (m_lexer.tokenType() == TokenType::keyword_delete)
return parse_delete_from_table();
if (m_lexer.tokenType() == TokenType::keyword_update)
return parse_update_table();
if (m_lexer.tokenType() == TokenType::keyword_load)
return parse_load_table();
if (m_lexer.tokenType() == TokenType::keyword_save)
return parse_save_table();
if (m_lexer.tokenType() == TokenType::keyword_drop)
return parse_drop_table();
std::cout << "ERROR, token:" << m_lexer.currentToken().token_string << std::endl;
return std::make_unique<Node>(NodeType::error);
}
std::unique_ptr<Node> Parser::parse_create_table() {
std::vector<ColDefNode> cols_def{};
m_lexer.skipToken(TokenType::keyword_create);
m_lexer.skipToken(TokenType::keyword_table);
if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
std::string table_name = m_lexer.consumeCurrentToken().token_string;
// create as select
if (m_lexer.tokenType() == TokenType::keyword_as) {
m_lexer.skipToken(TokenType::keyword_as);
std::unique_ptr<Node> select = parse_select_from_table();
return std::make_unique<CreateTableAsSelectNode>(table_name, std::move(select));
} else {
m_lexer.skipToken(TokenType::open_paren);
int column_order = 0;
do {
std::string column_name;
ColumnType column_type;
int column_len{1};
bool column_nullable{true};
// column name
if (m_lexer.tokenType() != TokenType::identifier) {
throw Exception("syntax error, expected identifier");
}
column_name = m_lexer.consumeCurrentToken().token_string;
// column type and optionally len
if (m_lexer.tokenType() == TokenType::keyword_integer) {
column_type = ColumnType::integer_type;
m_lexer.nextToken();
} else if (m_lexer.tokenType() == TokenType::keyword_float) {
column_type = ColumnType::float_type;
m_lexer.nextToken();
} else if (m_lexer.tokenType() == TokenType::keyword_varchar) {
column_type = ColumnType::varchar_type;
m_lexer.nextToken();
m_lexer.skipToken(TokenType::open_paren);
if (m_lexer.tokenType() == TokenType::int_number) {
column_len = std::stoi(m_lexer.consumeCurrentToken().token_string);
} else {
throw Exception("syntax error, expected int number");
}
m_lexer.skipToken(TokenType::close_paren);
} else {
throw Exception("syntax error, column type expected");
}
if (m_lexer.tokenType() == TokenType::keyword_not) {
m_lexer.nextToken();
m_lexer.skipToken(TokenType::keyword_null);
column_nullable = false;
} else if (m_lexer.tokenType() == TokenType::keyword_null) {
m_lexer.nextToken();
}
cols_def.push_back( ColDefNode(column_name, column_type, column_order++, column_len, column_nullable));
m_lexer.skipTokenOptional(TokenType::comma);
// TODO in future constraints
} while (m_lexer.tokenType() != TokenType::close_paren);
return std::make_unique<CreateTableNode>(table_name, cols_def);
}
}
std::unique_ptr<Node> Parser::parse_load_table() {
m_lexer.skipToken(TokenType::keyword_load);
m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
m_lexer.skipTokenOptional(TokenType::keyword_from);
std::string file_name = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<LoadIntoTableNode>(table_name, file_name);
}
std::unique_ptr<Node> Parser::parse_save_table() {
m_lexer.skipToken(TokenType::keyword_save);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string file_name = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<SaveTableNode>(table_name, file_name);
}
std::unique_ptr<Node> Parser::parse_drop_table() {
m_lexer.skipToken(TokenType::keyword_drop);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<DropTableNode>(table_name);
}
std::unique_ptr<Node> Parser::parse_insert_into_table() {
std::vector<ColNameNode> column_names{};
std::vector<std::unique_ptr<Node>> column_values{};
m_lexer.skipToken(TokenType::keyword_insert);
m_lexer.skipToken(TokenType::keyword_into);
// table name
if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
std::string table_name = m_lexer.consumeCurrentToken().token_string;
// column names
m_lexer.skipToken(TokenType::open_paren);
do {
if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ }
column_names.push_back(m_lexer.consumeCurrentToken().token_string);
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::close_paren);
m_lexer.skipToken(TokenType::close_paren);
m_lexer.skipToken(TokenType::keyword_values);
// column values
m_lexer.skipToken(TokenType::open_paren);
do {
auto col_value = parse_value();
column_values.push_back(std::move(col_value));
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::close_paren);
m_lexer.skipToken(TokenType::close_paren);
return std::make_unique<InsertIntoTableNode>(table_name, column_names, std::move(column_values));
}
std::unique_ptr<Node> Parser::parse_value() {
if (m_lexer.tokenType() == TokenType::int_number) {
return std::make_unique<IntValueNode>(std::stoi(m_lexer.consumeCurrentToken().token_string));
}
if (m_lexer.tokenType() == TokenType::double_number) {
return std::make_unique<DoubleValueNode>(std::stof(m_lexer.consumeCurrentToken().token_string));
}
if (m_lexer.tokenType() == TokenType::string_literal) {
return std::make_unique<StringValueNode>(m_lexer.consumeCurrentToken().token_string);
}
if (m_lexer.tokenType() == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
// function
std::string function_name = m_lexer.consumeCurrentToken().token_string;
std::vector<std::unique_ptr<Node>> pars;
m_lexer.skipToken(TokenType::open_paren);
while (m_lexer.tokenType() != TokenType::close_paren) { // TODO handle errors
pars.push_back(parse_value());
m_lexer.skipTokenOptional(TokenType::comma);
}
m_lexer.skipToken(TokenType::close_paren);
return std::make_unique<FunctionNode>(function_name, std::move(pars));
}
if (m_lexer.tokenType() == TokenType::identifier) {
std::string name = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<ColNameNode>(name);
}
throw Exception("Syntax error, current token: " + m_lexer.currentToken().token_string);
}
std::unique_ptr<Node> Parser::parse_select_from_table() {
auto cols = std::make_unique<std::vector<SelectColNode>>();
m_lexer.skipToken(TokenType::keyword_select);
int i = 1;
while (m_lexer.tokenType() != TokenType::keyword_from) {
auto column_value = parse_value();
std::string column_alias;
if (column_value->node_type == NodeType::column_name) {
column_alias = ((ColNameNode*) column_value.get())->name;
} else {
column_alias = "c" + std::to_string(i);
i++;
}
cols->push_back(SelectColNode{std::move(column_value), column_alias});
m_lexer.skipTokenOptional(TokenType::comma);
}
m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::unique_ptr<Node> where_node = parse_where_clause();
std::vector<ColOrderNode> orderby_node = parse_orderby_clause();
// if (m_lexer.tokenType() == TokenType::keyword_offset) {}
// if (m_lexer.tokenType() == TokenType::keyword_limit) {}
return std::make_unique<SelectFromTableNode>(table_name, std::move(cols), std::move(where_node), orderby_node);
}
std::unique_ptr<Node> Parser::parse_delete_from_table() {
m_lexer.skipToken(TokenType::keyword_delete);
m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
std::unique_ptr<Node> where_node = parse_where_clause();
return std::make_unique<DeleteFromTableNode>(table_name, std::move(where_node));
}
std::unique_ptr<Node> Parser::parse_update_table() {
m_lexer.skipToken(TokenType::keyword_update);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
m_lexer.skipToken(TokenType::keyword_set);
std::vector<ColNameNode> cols_names;
std::vector<std::unique_ptr<Node>> values;
do {
cols_names.push_back(m_lexer.consumeCurrentToken().token_string);
m_lexer.skipToken(TokenType::equal);
std::unique_ptr<Node> left = Parser::parse_operand_node();
if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) {
ArithmeticalOperatorType op = parse_arithmetical_operator();
std::unique_ptr<Node> right = Parser::parse_operand_node();
values.push_back(std::make_unique<ArithmeticalOperatorNode>(op, std::move(left),
std::move(right)));
} else {
std::unique_ptr<Node> right = std::make_unique<IntValueNode>(0);
values.push_back(
std::make_unique<ArithmeticalOperatorNode>(ArithmeticalOperatorType::copy_value,
std::move(left), std::move(right)));
}
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::keyword_where && m_lexer.tokenType() != TokenType::eof);
std::unique_ptr<Node> where_node = parse_where_clause();
return std::make_unique<UpdateTableNode>(table_name, cols_names, std::move(values), std::move(where_node));
}
std::unique_ptr<Node> Parser::parse_where_clause() {
if (m_lexer.tokenType() != TokenType::keyword_where) {
return std::make_unique<TrueNode>();
}
std::unique_ptr<Node> node;
m_lexer.skipToken(TokenType::keyword_where);
do {
node = parse_relational_expression();
if (Lexer::isLogicalOperator(m_lexer.tokenType())) {
auto operation = parse_logical_operator();
std::unique_ptr<Node> node2 = parse_relational_expression();
node = std::make_unique<LogicalOperatorNode>(operation, std::move(node), std::move(node2));
}
} while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_order);
return node;
}
std::vector<ColOrderNode> Parser::parse_orderby_clause() {
std::vector<ColOrderNode> order_cols;
if (m_lexer.tokenType() == TokenType::keyword_order) {
m_lexer.skipToken(TokenType::keyword_order);
m_lexer.skipToken(TokenType::keyword_by);
do {
int col_index = -1;
bool asc = true;
auto token_type = m_lexer.tokenType();
std::string tokenString = m_lexer.consumeCurrentToken().token_string;
switch (token_type) {
case TokenType::int_number:
col_index = std::stoi(tokenString);
break;
default:
throw Exception("column index alloved in order by clause at this moment");
}
if (m_lexer.tokenType() == TokenType::keyword_asc) {
m_lexer.skipToken(TokenType::keyword_asc);
} else if (m_lexer.tokenType() == TokenType::keyword_desc) {
m_lexer.skipToken(TokenType::keyword_desc);
asc = false;
}
order_cols.push_back(ColOrderNode{col_index, asc});
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::eof); // && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit);
}
return order_cols;
}
std::unique_ptr<Node> Parser::parse_relational_expression() {
auto left = parse_operand_node();
auto operation = parse_relational_operator();
auto right = parse_operand_node();
return std::make_unique<RelationalOperatorNode>(operation, std::move(left), std::move(right));
}
std::unique_ptr<Node> Parser::parse_operand_node() {
// while not end or order or limit
auto token_type = m_lexer.tokenType();
std::string tokenString = m_lexer.consumeCurrentToken().token_string;
switch (token_type) {
case TokenType::int_number:
return std::make_unique<IntValueNode>(std::stoi(tokenString));
case TokenType::double_number:
return std::make_unique<DoubleValueNode>(std::stod(tokenString));
case TokenType::string_literal:
return std::make_unique<StringValueNode>(tokenString);
case TokenType::identifier:
return std::make_unique<DatabaseValueNode>(tokenString);
case TokenType::keyword_null:
return std::make_unique<NullValueNode>();
default:
throw Exception("Unknown operand node");
}
}
RelationalOperatorType Parser::parse_relational_operator() {
auto op = m_lexer.consumeCurrentToken();
switch (op.type) {
case TokenType::equal:
return RelationalOperatorType::equal;
case TokenType::not_equal:
return RelationalOperatorType::not_equal;
case TokenType::greater:
return RelationalOperatorType::greater;
case TokenType::greater_equal:
return RelationalOperatorType::greater_equal;
case TokenType::lesser:
return RelationalOperatorType::lesser;
case TokenType::lesser_equal:
return RelationalOperatorType::lesser_equal;
default:
throw Exception("Unknown relational operator");
}
}
LogicalOperatorType Parser::parse_logical_operator() {
auto op = m_lexer.consumeCurrentToken();
switch (op.type) {
case TokenType::logical_and:
return LogicalOperatorType::and_operator;
case TokenType::logical_or:
return LogicalOperatorType::or_operator;
default:
throw Exception("Unknown logical operator");
}
}
ArithmeticalOperatorType Parser::parse_arithmetical_operator() {
auto op = m_lexer.consumeCurrentToken();
switch (op.type) {
case TokenType::plus:
return ArithmeticalOperatorType::plus_operator;
case TokenType::minus:
return ArithmeticalOperatorType::minus_operator;
case TokenType::multiply:
return ArithmeticalOperatorType::multiply_operator;
case TokenType::divide:
return ArithmeticalOperatorType::divide_operator;
default:
throw Exception("Unknown arithmetical operator");
}
}
}