usql/parser.cpp

542 lines
20 KiB
C++

#include "parser.h"
#include "exception.h"
namespace usql {
// TOOD handle premature eof
std::string column_type_name(const ColumnType type) {
if (type == ColumnType::integer_type) return "integer_type";
if (type == ColumnType::float_type) return "float_type";
if (type == ColumnType::varchar_type) return "varchar_type";
if (type == ColumnType::date_type) return "date_type";
if (type == ColumnType::bool_type) return "bool_type";
throw Exception("invalid column type: " + std::to_string((int)type));
};
Parser::Parser() {
m_lexer = Lexer{};
}
std::unique_ptr<Node> Parser::parse(const std::string &code) {
m_lexer.parse(code);
// m_lexer.debugTokens();
if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table)
return parse_create_table();
if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_index)
return parse_create_index();
if (m_lexer.tokenType() == TokenType::keyword_drop)
return parse_drop_table();
if (m_lexer.tokenType() == TokenType::keyword_insert)
return parse_insert_into_table();
if (m_lexer.tokenType() == TokenType::keyword_select)
return parse_select_from_table();
if (m_lexer.tokenType() == TokenType::keyword_delete)
return parse_delete_from_table();
if (m_lexer.tokenType() == TokenType::keyword_update)
return parse_update_table();
if (m_lexer.tokenType() == TokenType::keyword_load)
return parse_load_table();
if (m_lexer.tokenType() == TokenType::keyword_save)
return parse_save_table();
if (m_lexer.tokenType() == TokenType::keyword_set)
return parse_set();
if (m_lexer.tokenType() == TokenType::keyword_show)
return parse_show();
std::cout << "ERROR, token:" << m_lexer.currentToken().token_string << std::endl;
return std::make_unique<Node>(NodeType::error);
}
std::unique_ptr<Node> Parser::parse_create_table() {
std::vector<ColDefNode> cols_def{};
m_lexer.skipToken(TokenType::keyword_create);
m_lexer.skipToken(TokenType::keyword_table);
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
// create as select
if (m_lexer.tokenType() == TokenType::keyword_as) {
m_lexer.skipToken(TokenType::keyword_as);
std::unique_ptr<Node> select = parse_select_from_table();
return std::make_unique<CreateTableAsSelectNode>(table_name, std::move(select));
} else {
m_lexer.skipToken(TokenType::open_paren);
int column_order = 0;
do {
std::string database_value;
ColumnType column_type;
int column_len = 1;
bool column_nullable = true;
// column name
if (m_lexer.tokenType() != TokenType::identifier) {
throw Exception("syntax error, expected identifier");
}
database_value = m_lexer.consumeToken().token_string;
// column type and optionally len
if (m_lexer.tokenType() == TokenType::keyword_integer) {
column_type = ColumnType::integer_type;
m_lexer.nextToken();
} else if (m_lexer.tokenType() == TokenType::keyword_float) {
column_type = ColumnType::float_type;
m_lexer.nextToken();
} else if (m_lexer.tokenType() == TokenType::keyword_varchar) {
column_type = ColumnType::varchar_type;
m_lexer.nextToken();
m_lexer.skipToken(TokenType::open_paren);
column_len = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
m_lexer.skipToken(TokenType::close_paren);
} else if (m_lexer.tokenType() == TokenType::keyword_date) {
column_type = ColumnType::date_type;
m_lexer.nextToken();
} else if (m_lexer.tokenType() == TokenType::keyword_bool) {
column_type = ColumnType::bool_type;
m_lexer.nextToken();
} else {
throw Exception("syntax error, column type expected, found " + m_lexer.currentToken().token_string);
}
if (m_lexer.tokenType() == TokenType::keyword_not) {
m_lexer.nextToken();
m_lexer.skipToken(TokenType::keyword_null);
column_nullable = false;
} else if (m_lexer.tokenType() == TokenType::keyword_null) {
m_lexer.nextToken();
}
cols_def.emplace_back(database_value, column_type, column_order++, column_len, column_nullable);
m_lexer.skipTokenOptional(TokenType::comma);
//constraints
//defaults
} while (m_lexer.tokenType() != TokenType::close_paren);
return std::make_unique<CreateTableNode>(table_name, cols_def);
}
}
std::unique_ptr<Node> Parser::parse_load_table() {
m_lexer.skipToken(TokenType::keyword_load);
m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipTokenOptional(TokenType::keyword_from);
std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<LoadIntoTableNode>(table_name, file_name);
}
std::unique_ptr<Node> Parser::parse_save_table() {
m_lexer.skipToken(TokenType::keyword_save);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<SaveTableNode>(table_name, file_name);
}
std::unique_ptr<Node> Parser::parse_drop_table() {
m_lexer.skipToken(TokenType::keyword_drop);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
return std::make_unique<DropTableNode>(table_name);
}
std::unique_ptr<Node> Parser::parse_set() {
m_lexer.skipToken(TokenType::keyword_set);
std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
m_lexer.skipTokenOptional(TokenType::equal);
std::string value = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<SetNode>(name, value);
}
std::unique_ptr<Node> Parser::parse_show() {
m_lexer.skipToken(TokenType::keyword_show);
std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<ShowNode>(name);
}
std::unique_ptr<Node> Parser::parse_insert_into_table() {
std::vector<DatabaseValueNode> database_values{};
std::vector<std::unique_ptr<Node>> column_values{};
m_lexer.skipToken(TokenType::keyword_insert);
m_lexer.skipToken(TokenType::keyword_into);
// table name
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
// column names
m_lexer.skipToken(TokenType::open_paren);
do {
database_values.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::close_paren);
m_lexer.skipToken(TokenType::close_paren);
m_lexer.skipToken(TokenType::keyword_values);
// column values
m_lexer.skipToken(TokenType::open_paren);
do {
auto value = parse_expression();
column_values.emplace_back(std::move(value));
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::close_paren);
m_lexer.skipToken(TokenType::close_paren);
return std::make_unique<InsertIntoTableNode>(table_name, database_values, std::move(column_values));
}
std::unique_ptr<Node> Parser::parse_select_from_table() {
bool distinct = false;
auto cols = std::make_unique<std::vector<SelectColNode>>();
m_lexer.skipToken(TokenType::keyword_select);
if (m_lexer.tokenType() == TokenType::keyword_distinct) {
distinct = true;
m_lexer.skipToken(TokenType::keyword_distinct);
}
int i = 1;
while (m_lexer.tokenType() != TokenType::keyword_from) {
if (m_lexer.tokenType()==TokenType::multiply) {
std::string name = m_lexer.consumeToken().token_string;
auto multiply_char = std::make_unique<DatabaseValueNode>(name);
cols->push_back(SelectColNode{std::move(multiply_char), "*"});
} else {
auto column_value = parse_expression();
std::string column_alias;
if (m_lexer.tokenType() == TokenType::keyword_as) {
m_lexer.skipToken(TokenType::keyword_as);
column_alias = m_lexer.consumeToken(TokenType::identifier).token_string;
} else {
if (column_value->node_type == NodeType::database_value) {
column_alias = ((DatabaseValueNode*) column_value.get())->col_name;
} else {
column_alias = "c" + std::to_string(i);
i++;
}
}
cols->push_back(SelectColNode{std::move(column_value), column_alias});
}
m_lexer.skipTokenOptional(TokenType::comma);
}
m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::unique_ptr<Node> where_node = parse_where_clause();
std::vector<ColOrderNode> orderby_node = parse_order_by_clause();
OffsetLimitNode offsetlimit_node = parse_offset_limit_clause();
return std::make_unique<SelectFromTableNode>(table_name, std::move(cols), std::move(where_node), orderby_node, offsetlimit_node, distinct);
}
std::unique_ptr<Node> Parser::parse_delete_from_table() {
m_lexer.skipToken(TokenType::keyword_delete);
m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::unique_ptr<Node> where_node = parse_where_clause();
return std::make_unique<DeleteFromTableNode>(table_name, std::move(where_node));
}
std::unique_ptr<Node> Parser::parse_update_table() {
m_lexer.skipToken(TokenType::keyword_update);
m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::keyword_set);
std::vector<DatabaseValueNode> cols_names;
std::vector<std::unique_ptr<Node>> values;
do {
cols_names.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
m_lexer.skipToken(TokenType::equal);
std::unique_ptr<Node> left = Parser::parse_value();
if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) {
ArithmeticalOperatorType op = parse_arithmetical_operator();
std::unique_ptr<Node> right = Parser::parse_value();
values.push_back(std::make_unique<ArithmeticalOperatorNode>(op, std::move(left), std::move(right)));
} else {
std::unique_ptr<Node> right = std::make_unique<IntValueNode>(0);
values.push_back(std::make_unique<ArithmeticalOperatorNode>(ArithmeticalOperatorType::copy_value,
std::move(left), std::move(right)));
}
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::keyword_where && m_lexer.tokenType() != TokenType::eof);
std::unique_ptr<Node> where_node = parse_where_clause();
return std::make_unique<UpdateTableNode>(table_name, cols_names, std::move(values), std::move(where_node));
}
std::unique_ptr<Node> Parser::parse_create_index() {
m_lexer.skipToken(TokenType::keyword_create);
m_lexer.skipToken(TokenType::keyword_index);
std::string index_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::keyword_on);
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::open_paren);
std::string column_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::close_paren);
return std::make_unique<CreateIndexNode>(index_name, table_name, column_name);
}
std::vector<ColOrderNode> Parser::parse_order_by_clause() {
std::vector<ColOrderNode> order_cols;
if (m_lexer.tokenType() == TokenType::keyword_order) {
m_lexer.skipToken(TokenType::keyword_order);
m_lexer.skipToken(TokenType::keyword_by);
do {
bool asc = true;
auto cspec_token_type = m_lexer.tokenType();
std::string cspec_token = m_lexer.consumeToken().token_string;
if (m_lexer.tokenType() == TokenType::keyword_asc) {
m_lexer.skipToken(TokenType::keyword_asc);
} else if (m_lexer.tokenType() == TokenType::keyword_desc) {
m_lexer.skipToken(TokenType::keyword_desc);
asc = false;
}
switch (cspec_token_type) {
case TokenType::int_number:
order_cols.emplace_back(std::stoi(cspec_token), asc);
break;
case TokenType::identifier:
order_cols.emplace_back(cspec_token, asc);
break;
default:
throw Exception("order by column can be either column m_index or identifier");
}
m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit);
}
return order_cols;
}
OffsetLimitNode Parser::parse_offset_limit_clause() {
size_t offset = 0;
size_t limit = SIZE_MAX;
if (m_lexer.tokenType() == TokenType::keyword_offset) {
m_lexer.skipToken(TokenType::keyword_offset);
offset = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
}
if (m_lexer.tokenType() == TokenType::keyword_limit) {
m_lexer.skipToken(TokenType::keyword_limit);
limit = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
}
return OffsetLimitNode{offset, limit};
}
std::unique_ptr<Node> Parser::parse_where_clause() {
if (m_lexer.tokenType() != TokenType::keyword_where) {
return std::make_unique<TrueNode>();
}
m_lexer.skipToken(TokenType::keyword_where);
std::unique_ptr<Node> left = parse_expression();
do {
left = parse_expression(std::move(left));
} while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_order && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit && m_lexer.tokenType() != TokenType::semicolon);
return left;
}
std::unique_ptr<Node> Parser::parse_expression() {
std::unique_ptr<Node> left = parse_value();
return parse_expression(std::move(left));
}
std::unique_ptr<Node> Parser::parse_expression(std::unique_ptr<Node> left) {
if (Lexer::isRelationalOperator(m_lexer.tokenType())) {
auto operation = parse_relational_operator();
auto right = parse_value();
return std::make_unique<RelationalOperatorNode>(operation, std::move(left), std::move(right));
} else if (Lexer::isLogicalOperator(m_lexer.tokenType())) {
auto operation = parse_logical_operator();
auto right = parse_expression();
return std::make_unique<LogicalOperatorNode>(operation, std::move(left), std::move(right));
} else if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) {
auto operation = parse_arithmetical_operator();
auto right = parse_value();
return std::make_unique<ArithmeticalOperatorNode>(operation, std::move(left), std::move(right));
} else if (m_lexer.tokenType() == TokenType::int_number || m_lexer.tokenType() == TokenType::double_number ||m_lexer.tokenType() == TokenType::string_literal ||m_lexer.tokenType() == TokenType::identifier || m_lexer.tokenType() == TokenType::keyword_null || m_lexer.tokenType() == TokenType::open_paren) {
return parse_value();
}
return left;
}
std::unique_ptr<Node> Parser::parse_value() {
auto token_typcol = m_lexer.tokenType();
// parenthesised expression
if (token_typcol == TokenType::open_paren) {
m_lexer.skipToken(TokenType::open_paren);
auto left = parse_expression();
do {
left = parse_expression(std::move(left));
} while (m_lexer.tokenType() != TokenType::close_paren && m_lexer.tokenType() != TokenType::eof);
m_lexer.skipToken(TokenType::close_paren);
return left;
}
// function call
if (token_typcol == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::vector<std::unique_ptr<Node>> pars;
m_lexer.skipToken(TokenType::open_paren);
while (m_lexer.tokenType() != TokenType::close_paren && m_lexer.tokenType() != TokenType::eof) {
pars.push_back(parse_expression());
m_lexer.skipTokenOptional(TokenType::comma);
}
m_lexer.skipToken(TokenType::close_paren);
return std::make_unique<FunctionNode>(function_name, std::move(pars));
}
// numbers and strings
std::string tokenString = m_lexer.consumeToken().token_string;
if (token_typcol == TokenType::int_number)
return std::make_unique<IntValueNode>(std::stoi(tokenString));
if (token_typcol == TokenType::double_number)
return std::make_unique<DoubleValueNode>(std::stod(tokenString));
if (token_typcol == TokenType::string_literal)
return std::make_unique<StringValueNode>(tokenString);
// db column
if (token_typcol == TokenType::identifier)
return std::make_unique<DatabaseValueNode>(tokenString);
// null
if (token_typcol == TokenType::keyword_null)
return std::make_unique<NullValueNode>();
// true / false
if (token_typcol == TokenType::keyword_true || token_typcol == TokenType::keyword_false)
return std::make_unique<BooleanValueNode>(token_typcol == TokenType::keyword_true);
// token * for count(*)
if (token_typcol == TokenType::multiply)
return std::make_unique<StringValueNode>(tokenString);
throw Exception("Unknown operand node " + tokenString);
}
RelationalOperatorType Parser::parse_relational_operator() {
auto op = m_lexer.consumeToken();
switch (op.type) {
case TokenType::equal:
return RelationalOperatorType::equal;
case TokenType::not_equal:
return RelationalOperatorType::not_equal;
case TokenType::greater:
return RelationalOperatorType::greater;
case TokenType::greater_equal:
return RelationalOperatorType::greater_equal;
case TokenType::lesser:
return RelationalOperatorType::lesser;
case TokenType::lesser_equal:
return RelationalOperatorType::lesser_equal;
case TokenType::is:
if (m_lexer.tokenType() == TokenType::keyword_not) {
m_lexer.skipToken(TokenType::keyword_not);
return RelationalOperatorType::is_not;
}
return RelationalOperatorType::is;
default:
throw Exception("Unknown relational operator " + op.token_string);
}
}
LogicalOperatorType Parser::parse_logical_operator() {
auto op = m_lexer.consumeToken();
switch (op.type) {
case TokenType::logical_and:
return LogicalOperatorType::and_operator;
case TokenType::logical_or:
return LogicalOperatorType::or_operator;
default:
throw Exception("Unknown logical operator");
}
}
ArithmeticalOperatorType Parser::parse_arithmetical_operator() {
auto op = m_lexer.consumeToken();
switch (op.type) {
case TokenType::plus:
return ArithmeticalOperatorType::plus_operator;
case TokenType::minus:
return ArithmeticalOperatorType::minus_operator;
case TokenType::multiply:
return ArithmeticalOperatorType::multiply_operator;
case TokenType::divide:
return ArithmeticalOperatorType::divide_operator;
default:
throw Exception("Unknown arithmetical operator");
}
}
} // namespace