#include "parser.h" #include "exception.h" namespace usql { // TOOD handle premature eof Parser::Parser() { m_lexer = Lexer{}; } std::unique_ptr Parser::parse(const std::string &code) { m_lexer.parse(code); // m_lexer.debugTokens(); if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table) return parse_create_table(); if (m_lexer.tokenType() == TokenType::keyword_insert) return parse_insert_into_table(); if (m_lexer.tokenType() == TokenType::keyword_select) return parse_select_from_table(); if (m_lexer.tokenType() == TokenType::keyword_delete) return parse_delete_from_table(); if (m_lexer.tokenType() == TokenType::keyword_update) return parse_update_table(); if (m_lexer.tokenType() == TokenType::keyword_load) return parse_load_table(); if (m_lexer.tokenType() == TokenType::keyword_save) return parse_save_table(); if (m_lexer.tokenType() == TokenType::keyword_drop) return parse_drop_table(); std::cout << "ERROR, token:" << m_lexer.currentToken().token_string << std::endl; return std::make_unique(NodeType::error); } std::unique_ptr Parser::parse_create_table() { std::vector cols_def{}; m_lexer.skipToken(TokenType::keyword_create); m_lexer.skipToken(TokenType::keyword_table); if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } std::string table_name = m_lexer.consumeCurrentToken().token_string; // create as select if (m_lexer.tokenType() == TokenType::keyword_as) { m_lexer.skipToken(TokenType::keyword_as); std::unique_ptr select = parse_select_from_table(); return std::make_unique(table_name, std::move(select)); } else { m_lexer.skipToken(TokenType::open_paren); int column_order = 0; do { std::string column_name; ColumnType column_type; int column_len{1}; bool column_nullable{true}; // column name if (m_lexer.tokenType() != TokenType::identifier) { throw Exception("syntax error, expected identifier"); } column_name = m_lexer.consumeCurrentToken().token_string; // column type and optionally len if (m_lexer.tokenType() == TokenType::keyword_integer) { column_type = ColumnType::integer_type; m_lexer.nextToken(); } else if (m_lexer.tokenType() == TokenType::keyword_float) { column_type = ColumnType::float_type; m_lexer.nextToken(); } else if (m_lexer.tokenType() == TokenType::keyword_varchar) { column_type = ColumnType::varchar_type; m_lexer.nextToken(); m_lexer.skipToken(TokenType::open_paren); if (m_lexer.tokenType() == TokenType::int_number) { column_len = std::stoi(m_lexer.consumeCurrentToken().token_string); } else { throw Exception("syntax error, expected int number"); } m_lexer.skipToken(TokenType::close_paren); } else { throw Exception("syntax error, column type expected"); } if (m_lexer.tokenType() == TokenType::keyword_not) { m_lexer.nextToken(); m_lexer.skipToken(TokenType::keyword_null); column_nullable = false; } else if (m_lexer.tokenType() == TokenType::keyword_null) { m_lexer.nextToken(); } cols_def.push_back( ColDefNode(column_name, column_type, column_order++, column_len, column_nullable)); m_lexer.skipTokenOptional(TokenType::comma); // TODO in future constraints } while (m_lexer.tokenType() != TokenType::close_paren); return std::make_unique(table_name, cols_def); } } std::unique_ptr Parser::parse_load_table() { m_lexer.skipToken(TokenType::keyword_load); m_lexer.skipTokenOptional(TokenType::keyword_into); std::string table_name = m_lexer.consumeCurrentToken().token_string; m_lexer.skipTokenOptional(TokenType::keyword_from); std::string file_name = m_lexer.consumeCurrentToken().token_string; return std::make_unique(table_name, file_name); } std::unique_ptr Parser::parse_save_table() { m_lexer.skipToken(TokenType::keyword_save); m_lexer.skipTokenOptional(TokenType::keyword_table); std::string table_name = m_lexer.consumeCurrentToken().token_string; m_lexer.skipTokenOptional(TokenType::keyword_into); std::string file_name = m_lexer.consumeCurrentToken().token_string; return std::make_unique(table_name, file_name); } std::unique_ptr Parser::parse_drop_table() { m_lexer.skipToken(TokenType::keyword_drop); m_lexer.skipTokenOptional(TokenType::keyword_table); std::string table_name = m_lexer.consumeCurrentToken().token_string; return std::make_unique(table_name); } std::unique_ptr Parser::parse_insert_into_table() { std::vector column_names{}; std::vector> column_values{}; m_lexer.skipToken(TokenType::keyword_insert); m_lexer.skipToken(TokenType::keyword_into); // table name if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } std::string table_name = m_lexer.consumeCurrentToken().token_string; // column names m_lexer.skipToken(TokenType::open_paren); do { if (m_lexer.tokenType() != TokenType::identifier) { /* TODO handle error */ } column_names.push_back(m_lexer.consumeCurrentToken().token_string); m_lexer.skipTokenOptional(TokenType::comma); } while (m_lexer.tokenType() != TokenType::close_paren); m_lexer.skipToken(TokenType::close_paren); m_lexer.skipToken(TokenType::keyword_values); // column values m_lexer.skipToken(TokenType::open_paren); do { auto col_value = parse_value(); column_values.push_back(std::move(col_value)); m_lexer.skipTokenOptional(TokenType::comma); } while (m_lexer.tokenType() != TokenType::close_paren); m_lexer.skipToken(TokenType::close_paren); return std::make_unique(table_name, column_names, std::move(column_values)); } std::unique_ptr Parser::parse_value() { if (m_lexer.tokenType() == TokenType::int_number) { return std::make_unique(std::stoi(m_lexer.consumeCurrentToken().token_string)); } if (m_lexer.tokenType() == TokenType::double_number) { return std::make_unique(std::stof(m_lexer.consumeCurrentToken().token_string)); } if (m_lexer.tokenType() == TokenType::string_literal) { return std::make_unique(m_lexer.consumeCurrentToken().token_string); } if (m_lexer.tokenType() == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) { // function std::string function_name = m_lexer.consumeCurrentToken().token_string; std::vector> pars; m_lexer.skipToken(TokenType::open_paren); while (m_lexer.tokenType() != TokenType::close_paren) { // TODO handle errors pars.push_back(parse_value()); m_lexer.skipTokenOptional(TokenType::comma); } m_lexer.skipToken(TokenType::close_paren); return std::make_unique(function_name, std::move(pars)); } if (m_lexer.tokenType() == TokenType::identifier) { std::string name = m_lexer.consumeCurrentToken().token_string; return std::make_unique(name); } throw Exception("Syntax error, current token: " + m_lexer.currentToken().token_string); } std::unique_ptr Parser::parse_select_from_table() { auto cols = std::make_unique>(); m_lexer.skipToken(TokenType::keyword_select); int i = 1; while (m_lexer.tokenType() != TokenType::keyword_from) { auto column_value = parse_value(); std::string column_alias; if (column_value->node_type == NodeType::column_name) { column_alias = ((ColNameNode*) column_value.get())->name; } else { column_alias = "c" + std::to_string(i); i++; } cols->push_back(SelectColNode{std::move(column_value), column_alias}); m_lexer.skipTokenOptional(TokenType::comma); } m_lexer.skipToken(TokenType::keyword_from); std::string table_name = m_lexer.consumeCurrentToken().token_string; std::unique_ptr where_node = parse_where_clause(); std::vector orderby_node = parse_orderby_clause(); // if (m_lexer.tokenType() == TokenType::keyword_offset) {} // if (m_lexer.tokenType() == TokenType::keyword_limit) {} return std::make_unique(table_name, std::move(cols), std::move(where_node), orderby_node); } std::unique_ptr Parser::parse_delete_from_table() { m_lexer.skipToken(TokenType::keyword_delete); m_lexer.skipToken(TokenType::keyword_from); std::string table_name = m_lexer.consumeCurrentToken().token_string; std::unique_ptr where_node = parse_where_clause(); return std::make_unique(table_name, std::move(where_node)); } std::unique_ptr Parser::parse_update_table() { m_lexer.skipToken(TokenType::keyword_update); m_lexer.skipTokenOptional(TokenType::keyword_table); std::string table_name = m_lexer.consumeCurrentToken().token_string; m_lexer.skipToken(TokenType::keyword_set); std::vector cols_names; std::vector> values; do { cols_names.push_back(m_lexer.consumeCurrentToken().token_string); m_lexer.skipToken(TokenType::equal); std::unique_ptr left = Parser::parse_operand_node(); if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) { ArithmeticalOperatorType op = parse_arithmetical_operator(); std::unique_ptr right = Parser::parse_operand_node(); values.push_back(std::make_unique(op, std::move(left), std::move(right))); } else { std::unique_ptr right = std::make_unique(0); values.push_back( std::make_unique(ArithmeticalOperatorType::copy_value, std::move(left), std::move(right))); } m_lexer.skipTokenOptional(TokenType::comma); } while (m_lexer.tokenType() != TokenType::keyword_where && m_lexer.tokenType() != TokenType::eof); std::unique_ptr where_node = parse_where_clause(); return std::make_unique(table_name, cols_names, std::move(values), std::move(where_node)); } std::unique_ptr Parser::parse_where_clause() { if (m_lexer.tokenType() != TokenType::keyword_where) { return std::make_unique(); } std::unique_ptr node; m_lexer.skipToken(TokenType::keyword_where); do { node = parse_relational_expression(); if (Lexer::isLogicalOperator(m_lexer.tokenType())) { auto operation = parse_logical_operator(); std::unique_ptr node2 = parse_relational_expression(); node = std::make_unique(operation, std::move(node), std::move(node2)); } } while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_order); return node; } std::vector Parser::parse_orderby_clause() { std::vector order_cols; if (m_lexer.tokenType() == TokenType::keyword_order) { m_lexer.skipToken(TokenType::keyword_order); m_lexer.skipToken(TokenType::keyword_by); do { int col_index = -1; bool asc = true; auto token_type = m_lexer.tokenType(); std::string tokenString = m_lexer.consumeCurrentToken().token_string; switch (token_type) { case TokenType::int_number: col_index = std::stoi(tokenString); break; default: throw Exception("column index alloved in order by clause at this moment"); } if (m_lexer.tokenType() == TokenType::keyword_asc) { m_lexer.skipToken(TokenType::keyword_asc); } else if (m_lexer.tokenType() == TokenType::keyword_desc) { m_lexer.skipToken(TokenType::keyword_desc); asc = false; } order_cols.push_back(ColOrderNode{col_index, asc}); m_lexer.skipTokenOptional(TokenType::comma); } while (m_lexer.tokenType() != TokenType::eof); // && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit); } return order_cols; } std::unique_ptr Parser::parse_relational_expression() { auto left = parse_operand_node(); auto operation = parse_relational_operator(); auto right = parse_operand_node(); return std::make_unique(operation, std::move(left), std::move(right)); } std::unique_ptr Parser::parse_operand_node() { // while not end or order or limit auto token_type = m_lexer.tokenType(); std::string tokenString = m_lexer.consumeCurrentToken().token_string; switch (token_type) { case TokenType::int_number: return std::make_unique(std::stoi(tokenString)); case TokenType::double_number: return std::make_unique(std::stod(tokenString)); case TokenType::string_literal: return std::make_unique(tokenString); case TokenType::identifier: return std::make_unique(tokenString); case TokenType::keyword_null: return std::make_unique(); default: throw Exception("Unknown operand node"); } } RelationalOperatorType Parser::parse_relational_operator() { auto op = m_lexer.consumeCurrentToken(); switch (op.type) { case TokenType::equal: return RelationalOperatorType::equal; case TokenType::not_equal: return RelationalOperatorType::not_equal; case TokenType::greater: return RelationalOperatorType::greater; case TokenType::greater_equal: return RelationalOperatorType::greater_equal; case TokenType::lesser: return RelationalOperatorType::lesser; case TokenType::lesser_equal: return RelationalOperatorType::lesser_equal; default: throw Exception("Unknown relational operator"); } } LogicalOperatorType Parser::parse_logical_operator() { auto op = m_lexer.consumeCurrentToken(); switch (op.type) { case TokenType::logical_and: return LogicalOperatorType::and_operator; case TokenType::logical_or: return LogicalOperatorType::or_operator; default: throw Exception("Unknown logical operator"); } } ArithmeticalOperatorType Parser::parse_arithmetical_operator() { auto op = m_lexer.consumeCurrentToken(); switch (op.type) { case TokenType::plus: return ArithmeticalOperatorType::plus_operator; case TokenType::minus: return ArithmeticalOperatorType::minus_operator; case TokenType::multiply: return ArithmeticalOperatorType::multiply_operator; case TokenType::divide: return ArithmeticalOperatorType::divide_operator; default: throw Exception("Unknown arithmetical operator"); } } }