542 lines
20 KiB
C++
542 lines
20 KiB
C++
#include "parser.h"
|
|
#include "exception.h"
|
|
|
|
namespace usql {
|
|
|
|
// TOOD handle premature eof
|
|
|
|
std::string column_type_name(const ColumnType type) {
|
|
if (type == ColumnType::integer_type) return "integer_type";
|
|
if (type == ColumnType::float_type) return "float_type";
|
|
if (type == ColumnType::varchar_type) return "varchar_type";
|
|
if (type == ColumnType::date_type) return "date_type";
|
|
if (type == ColumnType::bool_type) return "bool_type";
|
|
|
|
throw Exception("invalid column type: " + std::to_string((int)type));
|
|
};
|
|
|
|
|
|
Parser::Parser() {
|
|
m_lexer = Lexer{};
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse(const std::string &code) {
|
|
m_lexer.parse(code);
|
|
// m_lexer.debugTokens();
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table)
|
|
return parse_create_table();
|
|
if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_index)
|
|
return parse_create_index();
|
|
if (m_lexer.tokenType() == TokenType::keyword_drop)
|
|
return parse_drop_table();
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_insert)
|
|
return parse_insert_into_table();
|
|
if (m_lexer.tokenType() == TokenType::keyword_select)
|
|
return parse_select_from_table();
|
|
if (m_lexer.tokenType() == TokenType::keyword_delete)
|
|
return parse_delete_from_table();
|
|
if (m_lexer.tokenType() == TokenType::keyword_update)
|
|
return parse_update_table();
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_load)
|
|
return parse_load_table();
|
|
if (m_lexer.tokenType() == TokenType::keyword_save)
|
|
return parse_save_table();
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_set)
|
|
return parse_set();
|
|
if (m_lexer.tokenType() == TokenType::keyword_show)
|
|
return parse_show();
|
|
|
|
std::cout << "ERROR, token:" << m_lexer.currentToken().token_string << std::endl;
|
|
return std::make_unique<Node>(NodeType::error);
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_create_table() {
|
|
std::vector<ColDefNode> cols_def{};
|
|
|
|
m_lexer.skipToken(TokenType::keyword_create);
|
|
m_lexer.skipToken(TokenType::keyword_table);
|
|
|
|
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
|
|
// create as select
|
|
if (m_lexer.tokenType() == TokenType::keyword_as) {
|
|
m_lexer.skipToken(TokenType::keyword_as);
|
|
|
|
std::unique_ptr<Node> select = parse_select_from_table();
|
|
|
|
return std::make_unique<CreateTableAsSelectNode>(table_name, std::move(select));
|
|
} else {
|
|
m_lexer.skipToken(TokenType::open_paren);
|
|
int column_order = 0;
|
|
do {
|
|
std::string database_value;
|
|
ColumnType column_type;
|
|
int column_len = 1;
|
|
bool column_nullable = true;
|
|
|
|
// column name
|
|
if (m_lexer.tokenType() != TokenType::identifier) {
|
|
throw Exception("syntax error, expected identifier");
|
|
}
|
|
database_value = m_lexer.consumeToken().token_string;
|
|
|
|
// column type and optionally len
|
|
if (m_lexer.tokenType() == TokenType::keyword_integer) {
|
|
column_type = ColumnType::integer_type;
|
|
m_lexer.nextToken();
|
|
} else if (m_lexer.tokenType() == TokenType::keyword_float) {
|
|
column_type = ColumnType::float_type;
|
|
m_lexer.nextToken();
|
|
} else if (m_lexer.tokenType() == TokenType::keyword_varchar) {
|
|
column_type = ColumnType::varchar_type;
|
|
m_lexer.nextToken();
|
|
m_lexer.skipToken(TokenType::open_paren);
|
|
column_len = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
|
|
m_lexer.skipToken(TokenType::close_paren);
|
|
} else if (m_lexer.tokenType() == TokenType::keyword_date) {
|
|
column_type = ColumnType::date_type;
|
|
m_lexer.nextToken();
|
|
} else if (m_lexer.tokenType() == TokenType::keyword_bool) {
|
|
column_type = ColumnType::bool_type;
|
|
m_lexer.nextToken();
|
|
} else {
|
|
throw Exception("syntax error, column type expected, found " + m_lexer.currentToken().token_string);
|
|
}
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_not) {
|
|
m_lexer.nextToken();
|
|
m_lexer.skipToken(TokenType::keyword_null);
|
|
column_nullable = false;
|
|
} else if (m_lexer.tokenType() == TokenType::keyword_null) {
|
|
m_lexer.nextToken();
|
|
}
|
|
|
|
cols_def.emplace_back(database_value, column_type, column_order++, column_len, column_nullable);
|
|
|
|
m_lexer.skipTokenOptional(TokenType::comma);
|
|
|
|
//constraints
|
|
//defaults
|
|
} while (m_lexer.tokenType() != TokenType::close_paren);
|
|
|
|
return std::make_unique<CreateTableNode>(table_name, cols_def);
|
|
}
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_load_table() {
|
|
m_lexer.skipToken(TokenType::keyword_load);
|
|
m_lexer.skipTokenOptional(TokenType::keyword_into);
|
|
|
|
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
|
|
m_lexer.skipTokenOptional(TokenType::keyword_from);
|
|
|
|
std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
|
|
|
|
return std::make_unique<LoadIntoTableNode>(table_name, file_name);
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_save_table() {
|
|
m_lexer.skipToken(TokenType::keyword_save);
|
|
m_lexer.skipTokenOptional(TokenType::keyword_table);
|
|
|
|
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
|
|
m_lexer.skipTokenOptional(TokenType::keyword_into);
|
|
|
|
std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
|
|
|
|
return std::make_unique<SaveTableNode>(table_name, file_name);
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_drop_table() {
|
|
m_lexer.skipToken(TokenType::keyword_drop);
|
|
m_lexer.skipTokenOptional(TokenType::keyword_table);
|
|
|
|
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
|
|
return std::make_unique<DropTableNode>(table_name);
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_set() {
|
|
m_lexer.skipToken(TokenType::keyword_set);
|
|
|
|
std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
|
|
m_lexer.skipTokenOptional(TokenType::equal);
|
|
std::string value = m_lexer.consumeToken(TokenType::string_literal).token_string;
|
|
|
|
return std::make_unique<SetNode>(name, value);
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_show() {
|
|
m_lexer.skipToken(TokenType::keyword_show);
|
|
|
|
std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
|
|
|
|
return std::make_unique<ShowNode>(name);
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_insert_into_table() {
|
|
std::vector<DatabaseValueNode> database_values{};
|
|
std::vector<std::unique_ptr<Node>> column_values{};
|
|
|
|
m_lexer.skipToken(TokenType::keyword_insert);
|
|
m_lexer.skipToken(TokenType::keyword_into);
|
|
|
|
// table name
|
|
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
|
|
// column names
|
|
m_lexer.skipToken(TokenType::open_paren);
|
|
do {
|
|
database_values.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
|
|
|
|
m_lexer.skipTokenOptional(TokenType::comma);
|
|
} while (m_lexer.tokenType() != TokenType::close_paren);
|
|
m_lexer.skipToken(TokenType::close_paren);
|
|
|
|
m_lexer.skipToken(TokenType::keyword_values);
|
|
|
|
// column values
|
|
m_lexer.skipToken(TokenType::open_paren);
|
|
do {
|
|
auto value = parse_expression();
|
|
column_values.emplace_back(std::move(value));
|
|
|
|
m_lexer.skipTokenOptional(TokenType::comma);
|
|
} while (m_lexer.tokenType() != TokenType::close_paren);
|
|
m_lexer.skipToken(TokenType::close_paren);
|
|
|
|
return std::make_unique<InsertIntoTableNode>(table_name, database_values, std::move(column_values));
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_select_from_table() {
|
|
bool distinct = false;
|
|
auto cols = std::make_unique<std::vector<SelectColNode>>();
|
|
|
|
m_lexer.skipToken(TokenType::keyword_select);
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_distinct) {
|
|
distinct = true;
|
|
m_lexer.skipToken(TokenType::keyword_distinct);
|
|
}
|
|
|
|
int i = 1;
|
|
while (m_lexer.tokenType() != TokenType::keyword_from) {
|
|
if (m_lexer.tokenType()==TokenType::multiply) {
|
|
std::string name = m_lexer.consumeToken().token_string;
|
|
auto multiply_char = std::make_unique<DatabaseValueNode>(name);
|
|
|
|
cols->push_back(SelectColNode{std::move(multiply_char), "*"});
|
|
} else {
|
|
auto column_value = parse_expression();
|
|
std::string column_alias;
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_as) {
|
|
m_lexer.skipToken(TokenType::keyword_as);
|
|
column_alias = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
} else {
|
|
if (column_value->node_type == NodeType::database_value) {
|
|
column_alias = ((DatabaseValueNode*) column_value.get())->col_name;
|
|
} else {
|
|
column_alias = "c" + std::to_string(i);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
cols->push_back(SelectColNode{std::move(column_value), column_alias});
|
|
}
|
|
|
|
|
|
m_lexer.skipTokenOptional(TokenType::comma);
|
|
}
|
|
|
|
m_lexer.skipToken(TokenType::keyword_from);
|
|
|
|
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
|
|
std::unique_ptr<Node> where_node = parse_where_clause();
|
|
|
|
std::vector<ColOrderNode> orderby_node = parse_order_by_clause();
|
|
|
|
OffsetLimitNode offsetlimit_node = parse_offset_limit_clause();
|
|
|
|
|
|
return std::make_unique<SelectFromTableNode>(table_name, std::move(cols), std::move(where_node), orderby_node, offsetlimit_node, distinct);
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_delete_from_table() {
|
|
m_lexer.skipToken(TokenType::keyword_delete);
|
|
m_lexer.skipToken(TokenType::keyword_from);
|
|
|
|
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
|
|
std::unique_ptr<Node> where_node = parse_where_clause();
|
|
|
|
return std::make_unique<DeleteFromTableNode>(table_name, std::move(where_node));
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_update_table() {
|
|
m_lexer.skipToken(TokenType::keyword_update);
|
|
m_lexer.skipTokenOptional(TokenType::keyword_table);
|
|
|
|
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
|
|
m_lexer.skipToken(TokenType::keyword_set);
|
|
|
|
std::vector<DatabaseValueNode> cols_names;
|
|
std::vector<std::unique_ptr<Node>> values;
|
|
|
|
do {
|
|
cols_names.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
|
|
m_lexer.skipToken(TokenType::equal);
|
|
|
|
std::unique_ptr<Node> left = Parser::parse_value();
|
|
if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) {
|
|
ArithmeticalOperatorType op = parse_arithmetical_operator();
|
|
std::unique_ptr<Node> right = Parser::parse_value();
|
|
|
|
values.push_back(std::make_unique<ArithmeticalOperatorNode>(op, std::move(left), std::move(right)));
|
|
} else {
|
|
std::unique_ptr<Node> right = std::make_unique<IntValueNode>(0);
|
|
values.push_back(std::make_unique<ArithmeticalOperatorNode>(ArithmeticalOperatorType::copy_value,
|
|
std::move(left), std::move(right)));
|
|
}
|
|
m_lexer.skipTokenOptional(TokenType::comma);
|
|
|
|
} while (m_lexer.tokenType() != TokenType::keyword_where && m_lexer.tokenType() != TokenType::eof);
|
|
|
|
std::unique_ptr<Node> where_node = parse_where_clause();
|
|
|
|
return std::make_unique<UpdateTableNode>(table_name, cols_names, std::move(values), std::move(where_node));
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_create_index() {
|
|
m_lexer.skipToken(TokenType::keyword_create);
|
|
m_lexer.skipToken(TokenType::keyword_index);
|
|
std::string index_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
m_lexer.skipToken(TokenType::keyword_on);
|
|
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
m_lexer.skipToken(TokenType::open_paren);
|
|
std::string column_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
m_lexer.skipToken(TokenType::close_paren);
|
|
|
|
return std::make_unique<CreateIndexNode>(index_name, table_name, column_name);
|
|
}
|
|
|
|
std::vector<ColOrderNode> Parser::parse_order_by_clause() {
|
|
std::vector<ColOrderNode> order_cols;
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_order) {
|
|
m_lexer.skipToken(TokenType::keyword_order);
|
|
m_lexer.skipToken(TokenType::keyword_by);
|
|
|
|
do {
|
|
bool asc = true;
|
|
|
|
auto cspec_token_type = m_lexer.tokenType();
|
|
std::string cspec_token = m_lexer.consumeToken().token_string;
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_asc) {
|
|
m_lexer.skipToken(TokenType::keyword_asc);
|
|
} else if (m_lexer.tokenType() == TokenType::keyword_desc) {
|
|
m_lexer.skipToken(TokenType::keyword_desc);
|
|
asc = false;
|
|
}
|
|
|
|
switch (cspec_token_type) {
|
|
case TokenType::int_number:
|
|
order_cols.emplace_back(std::stoi(cspec_token), asc);
|
|
break;
|
|
case TokenType::identifier:
|
|
order_cols.emplace_back(cspec_token, asc);
|
|
break;
|
|
default:
|
|
throw Exception("order by column can be either column m_index or identifier");
|
|
}
|
|
|
|
m_lexer.skipTokenOptional(TokenType::comma);
|
|
} while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit);
|
|
}
|
|
|
|
return order_cols;
|
|
}
|
|
|
|
OffsetLimitNode Parser::parse_offset_limit_clause() {
|
|
size_t offset = 0;
|
|
size_t limit = SIZE_MAX;
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_offset) {
|
|
m_lexer.skipToken(TokenType::keyword_offset);
|
|
offset = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
|
|
}
|
|
|
|
if (m_lexer.tokenType() == TokenType::keyword_limit) {
|
|
m_lexer.skipToken(TokenType::keyword_limit);
|
|
limit = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
|
|
}
|
|
|
|
return OffsetLimitNode{offset, limit};
|
|
}
|
|
|
|
|
|
std::unique_ptr<Node> Parser::parse_where_clause() {
|
|
if (m_lexer.tokenType() != TokenType::keyword_where) {
|
|
return std::make_unique<TrueNode>();
|
|
}
|
|
|
|
m_lexer.skipToken(TokenType::keyword_where);
|
|
|
|
std::unique_ptr<Node> left = parse_expression();
|
|
do {
|
|
left = parse_expression(std::move(left));
|
|
} while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_order && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit && m_lexer.tokenType() != TokenType::semicolon);
|
|
|
|
return left;
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_expression() {
|
|
std::unique_ptr<Node> left = parse_value();
|
|
|
|
return parse_expression(std::move(left));
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_expression(std::unique_ptr<Node> left) {
|
|
if (Lexer::isRelationalOperator(m_lexer.tokenType())) {
|
|
auto operation = parse_relational_operator();
|
|
auto right = parse_value();
|
|
return std::make_unique<RelationalOperatorNode>(operation, std::move(left), std::move(right));
|
|
} else if (Lexer::isLogicalOperator(m_lexer.tokenType())) {
|
|
auto operation = parse_logical_operator();
|
|
auto right = parse_expression();
|
|
return std::make_unique<LogicalOperatorNode>(operation, std::move(left), std::move(right));
|
|
} else if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) {
|
|
auto operation = parse_arithmetical_operator();
|
|
auto right = parse_value();
|
|
|
|
return std::make_unique<ArithmeticalOperatorNode>(operation, std::move(left), std::move(right));
|
|
} else if (m_lexer.tokenType() == TokenType::int_number || m_lexer.tokenType() == TokenType::double_number ||m_lexer.tokenType() == TokenType::string_literal ||m_lexer.tokenType() == TokenType::identifier || m_lexer.tokenType() == TokenType::keyword_null || m_lexer.tokenType() == TokenType::open_paren) {
|
|
return parse_value();
|
|
}
|
|
|
|
return left;
|
|
}
|
|
|
|
std::unique_ptr<Node> Parser::parse_value() {
|
|
auto token_typcol = m_lexer.tokenType();
|
|
|
|
// parenthesised expression
|
|
if (token_typcol == TokenType::open_paren) {
|
|
m_lexer.skipToken(TokenType::open_paren);
|
|
auto left = parse_expression();
|
|
do {
|
|
left = parse_expression(std::move(left));
|
|
} while (m_lexer.tokenType() != TokenType::close_paren && m_lexer.tokenType() != TokenType::eof);
|
|
|
|
m_lexer.skipToken(TokenType::close_paren);
|
|
return left;
|
|
}
|
|
|
|
// function call
|
|
if (token_typcol == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
|
|
std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string;
|
|
std::vector<std::unique_ptr<Node>> pars;
|
|
|
|
m_lexer.skipToken(TokenType::open_paren);
|
|
while (m_lexer.tokenType() != TokenType::close_paren && m_lexer.tokenType() != TokenType::eof) {
|
|
pars.push_back(parse_expression());
|
|
m_lexer.skipTokenOptional(TokenType::comma);
|
|
}
|
|
m_lexer.skipToken(TokenType::close_paren);
|
|
return std::make_unique<FunctionNode>(function_name, std::move(pars));
|
|
}
|
|
|
|
// numbers and strings
|
|
std::string tokenString = m_lexer.consumeToken().token_string;
|
|
|
|
if (token_typcol == TokenType::int_number)
|
|
return std::make_unique<IntValueNode>(std::stoi(tokenString));
|
|
if (token_typcol == TokenType::double_number)
|
|
return std::make_unique<DoubleValueNode>(std::stod(tokenString));
|
|
if (token_typcol == TokenType::string_literal)
|
|
return std::make_unique<StringValueNode>(tokenString);
|
|
|
|
// db column
|
|
if (token_typcol == TokenType::identifier)
|
|
return std::make_unique<DatabaseValueNode>(tokenString);
|
|
|
|
// null
|
|
if (token_typcol == TokenType::keyword_null)
|
|
return std::make_unique<NullValueNode>();
|
|
|
|
// true / false
|
|
if (token_typcol == TokenType::keyword_true || token_typcol == TokenType::keyword_false)
|
|
return std::make_unique<BooleanValueNode>(token_typcol == TokenType::keyword_true);
|
|
|
|
// token * for count(*)
|
|
if (token_typcol == TokenType::multiply)
|
|
return std::make_unique<StringValueNode>(tokenString);
|
|
|
|
throw Exception("Unknown operand node " + tokenString);
|
|
}
|
|
|
|
RelationalOperatorType Parser::parse_relational_operator() {
|
|
auto op = m_lexer.consumeToken();
|
|
switch (op.type) {
|
|
case TokenType::equal:
|
|
return RelationalOperatorType::equal;
|
|
case TokenType::not_equal:
|
|
return RelationalOperatorType::not_equal;
|
|
case TokenType::greater:
|
|
return RelationalOperatorType::greater;
|
|
case TokenType::greater_equal:
|
|
return RelationalOperatorType::greater_equal;
|
|
case TokenType::lesser:
|
|
return RelationalOperatorType::lesser;
|
|
case TokenType::lesser_equal:
|
|
return RelationalOperatorType::lesser_equal;
|
|
case TokenType::is:
|
|
if (m_lexer.tokenType() == TokenType::keyword_not) {
|
|
m_lexer.skipToken(TokenType::keyword_not);
|
|
return RelationalOperatorType::is_not;
|
|
}
|
|
return RelationalOperatorType::is;
|
|
default:
|
|
throw Exception("Unknown relational operator " + op.token_string);
|
|
}
|
|
}
|
|
|
|
LogicalOperatorType Parser::parse_logical_operator() {
|
|
auto op = m_lexer.consumeToken();
|
|
switch (op.type) {
|
|
case TokenType::logical_and:
|
|
return LogicalOperatorType::and_operator;
|
|
case TokenType::logical_or:
|
|
return LogicalOperatorType::or_operator;
|
|
default:
|
|
throw Exception("Unknown logical operator");
|
|
}
|
|
}
|
|
|
|
ArithmeticalOperatorType Parser::parse_arithmetical_operator() {
|
|
auto op = m_lexer.consumeToken();
|
|
switch (op.type) {
|
|
case TokenType::plus:
|
|
return ArithmeticalOperatorType::plus_operator;
|
|
case TokenType::minus:
|
|
return ArithmeticalOperatorType::minus_operator;
|
|
case TokenType::multiply:
|
|
return ArithmeticalOperatorType::multiply_operator;
|
|
case TokenType::divide:
|
|
return ArithmeticalOperatorType::divide_operator;
|
|
default:
|
|
throw Exception("Unknown arithmetical operator");
|
|
}
|
|
}
|
|
|
|
} // namespace
|