#include "parser.h" #include "exception.h" namespace usql { // TOOD handle premature eof std::string column_type_name(const ColumnType type) { if (type == ColumnType::integer_type) return "integer_type"; if (type == ColumnType::float_type) return "float_type"; if (type == ColumnType::varchar_type) return "varchar_type"; if (type == ColumnType::date_type) return "date_type"; if (type == ColumnType::bool_type) return "bool_type"; throw Exception("invalid column type: " + std::to_string((int)type)); }; Parser::Parser() { m_lexer = Lexer{}; } std::unique_ptr Parser::parse(const std::string &code) { m_lexer.parse(code); // m_lexer.debugTokens(); if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table) return parse_create_table(); if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_index) return parse_create_index(); if (m_lexer.tokenType() == TokenType::keyword_drop) return parse_drop_table(); if (m_lexer.tokenType() == TokenType::keyword_insert) return parse_insert_into_table(); if (m_lexer.tokenType() == TokenType::keyword_select) return parse_select_from_table(); if (m_lexer.tokenType() == TokenType::keyword_delete) return parse_delete_from_table(); if (m_lexer.tokenType() == TokenType::keyword_update) return parse_update_table(); if (m_lexer.tokenType() == TokenType::keyword_load) return parse_load_table(); if (m_lexer.tokenType() == TokenType::keyword_save) return parse_save_table(); if (m_lexer.tokenType() == TokenType::keyword_set) return parse_set(); if (m_lexer.tokenType() == TokenType::keyword_show) return parse_show(); std::cout << "ERROR, token:" << m_lexer.currentToken().token_string << std::endl; return std::make_unique(NodeType::error); } std::unique_ptr Parser::parse_create_table() { std::vector cols_def{}; m_lexer.skipToken(TokenType::keyword_create); m_lexer.skipToken(TokenType::keyword_table); std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; // create as select if (m_lexer.tokenType() == TokenType::keyword_as) { m_lexer.skipToken(TokenType::keyword_as); std::unique_ptr select = parse_select_from_table(); return std::make_unique(table_name, std::move(select)); } else { m_lexer.skipToken(TokenType::open_paren); int column_order = 0; do { std::string database_value; ColumnType column_type; int column_len = 1; bool column_nullable = true; // column name if (m_lexer.tokenType() != TokenType::identifier) { throw Exception("syntax error, expected identifier"); } database_value = m_lexer.consumeToken().token_string; // column type and optionally len if (m_lexer.tokenType() == TokenType::keyword_integer) { column_type = ColumnType::integer_type; m_lexer.nextToken(); } else if (m_lexer.tokenType() == TokenType::keyword_float) { column_type = ColumnType::float_type; m_lexer.nextToken(); } else if (m_lexer.tokenType() == TokenType::keyword_varchar) { column_type = ColumnType::varchar_type; m_lexer.nextToken(); m_lexer.skipToken(TokenType::open_paren); column_len = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string); m_lexer.skipToken(TokenType::close_paren); } else if (m_lexer.tokenType() == TokenType::keyword_date) { column_type = ColumnType::date_type; m_lexer.nextToken(); } else if (m_lexer.tokenType() == TokenType::keyword_bool) { column_type = ColumnType::bool_type; m_lexer.nextToken(); } else { throw Exception("syntax error, column type expected, found " + m_lexer.currentToken().token_string); } if (m_lexer.tokenType() == TokenType::keyword_not) { m_lexer.nextToken(); m_lexer.skipToken(TokenType::keyword_null); column_nullable = false; } else if (m_lexer.tokenType() == TokenType::keyword_null) { m_lexer.nextToken(); } cols_def.emplace_back(database_value, column_type, column_order++, column_len, column_nullable); m_lexer.skipTokenOptional(TokenType::comma); //constraints //defaults } while (m_lexer.tokenType() != TokenType::close_paren); return std::make_unique(table_name, cols_def); } } std::unique_ptr Parser::parse_load_table() { m_lexer.skipToken(TokenType::keyword_load); m_lexer.skipTokenOptional(TokenType::keyword_into); std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; m_lexer.skipTokenOptional(TokenType::keyword_from); std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string; return std::make_unique(table_name, file_name); } std::unique_ptr Parser::parse_save_table() { m_lexer.skipToken(TokenType::keyword_save); m_lexer.skipTokenOptional(TokenType::keyword_table); std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; m_lexer.skipTokenOptional(TokenType::keyword_into); std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string; return std::make_unique(table_name, file_name); } std::unique_ptr Parser::parse_drop_table() { m_lexer.skipToken(TokenType::keyword_drop); m_lexer.skipTokenOptional(TokenType::keyword_table); std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; return std::make_unique(table_name); } std::unique_ptr Parser::parse_set() { m_lexer.skipToken(TokenType::keyword_set); std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string; m_lexer.skipTokenOptional(TokenType::equal); std::string value = m_lexer.consumeToken(TokenType::string_literal).token_string; return std::make_unique(name, value); } std::unique_ptr Parser::parse_show() { m_lexer.skipToken(TokenType::keyword_show); std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string; return std::make_unique(name); } std::unique_ptr Parser::parse_insert_into_table() { std::vector database_values{}; std::vector> column_values{}; m_lexer.skipToken(TokenType::keyword_insert); m_lexer.skipToken(TokenType::keyword_into); // table name std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; // column names m_lexer.skipToken(TokenType::open_paren); do { database_values.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string); m_lexer.skipTokenOptional(TokenType::comma); } while (m_lexer.tokenType() != TokenType::close_paren); m_lexer.skipToken(TokenType::close_paren); m_lexer.skipToken(TokenType::keyword_values); // column values m_lexer.skipToken(TokenType::open_paren); do { auto value = parse_expression(); column_values.emplace_back(std::move(value)); m_lexer.skipTokenOptional(TokenType::comma); } while (m_lexer.tokenType() != TokenType::close_paren); m_lexer.skipToken(TokenType::close_paren); return std::make_unique(table_name, database_values, std::move(column_values)); } std::unique_ptr Parser::parse_select_from_table() { bool distinct = false; auto cols = std::make_unique>(); m_lexer.skipToken(TokenType::keyword_select); if (m_lexer.tokenType() == TokenType::keyword_distinct) { distinct = true; m_lexer.skipToken(TokenType::keyword_distinct); } int i = 1; while (m_lexer.tokenType() != TokenType::keyword_from) { if (m_lexer.tokenType()==TokenType::multiply) { std::string name = m_lexer.consumeToken().token_string; auto multiply_char = std::make_unique(name); cols->push_back(SelectColNode{std::move(multiply_char), "*"}); } else { auto column_value = parse_expression(); std::string column_alias; if (m_lexer.tokenType() == TokenType::keyword_as) { m_lexer.skipToken(TokenType::keyword_as); column_alias = m_lexer.consumeToken(TokenType::identifier).token_string; } else { if (column_value->node_type == NodeType::database_value) { column_alias = ((DatabaseValueNode*) column_value.get())->col_name; } else { column_alias = "c" + std::to_string(i); i++; } } cols->push_back(SelectColNode{std::move(column_value), column_alias}); } m_lexer.skipTokenOptional(TokenType::comma); } m_lexer.skipToken(TokenType::keyword_from); std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; std::unique_ptr where_node = parse_where_clause(); std::vector orderby_node = parse_order_by_clause(); OffsetLimitNode offsetlimit_node = parse_offset_limit_clause(); return std::make_unique(table_name, std::move(cols), std::move(where_node), orderby_node, offsetlimit_node, distinct); } std::unique_ptr Parser::parse_delete_from_table() { m_lexer.skipToken(TokenType::keyword_delete); m_lexer.skipToken(TokenType::keyword_from); std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; std::unique_ptr where_node = parse_where_clause(); return std::make_unique(table_name, std::move(where_node)); } std::unique_ptr Parser::parse_update_table() { m_lexer.skipToken(TokenType::keyword_update); m_lexer.skipTokenOptional(TokenType::keyword_table); std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; m_lexer.skipToken(TokenType::keyword_set); std::vector cols_names; std::vector> values; do { cols_names.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string); m_lexer.skipToken(TokenType::equal); std::unique_ptr left = Parser::parse_value(); if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) { ArithmeticalOperatorType op = parse_arithmetical_operator(); std::unique_ptr right = Parser::parse_value(); values.push_back(std::make_unique(op, std::move(left), std::move(right))); } else { std::unique_ptr right = std::make_unique(0); values.push_back(std::make_unique(ArithmeticalOperatorType::copy_value, std::move(left), std::move(right))); } m_lexer.skipTokenOptional(TokenType::comma); } while (m_lexer.tokenType() != TokenType::keyword_where && m_lexer.tokenType() != TokenType::eof); std::unique_ptr where_node = parse_where_clause(); return std::make_unique(table_name, cols_names, std::move(values), std::move(where_node)); } std::unique_ptr Parser::parse_create_index() { m_lexer.skipToken(TokenType::keyword_create); m_lexer.skipToken(TokenType::keyword_index); std::string index_name = m_lexer.consumeToken(TokenType::identifier).token_string; m_lexer.skipToken(TokenType::keyword_on); std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; m_lexer.skipToken(TokenType::open_paren); std::string column_name = m_lexer.consumeToken(TokenType::identifier).token_string; m_lexer.skipToken(TokenType::close_paren); return std::make_unique(index_name, table_name, column_name); } std::vector Parser::parse_order_by_clause() { std::vector order_cols; if (m_lexer.tokenType() == TokenType::keyword_order) { m_lexer.skipToken(TokenType::keyword_order); m_lexer.skipToken(TokenType::keyword_by); do { bool asc = true; auto cspec_token_type = m_lexer.tokenType(); std::string cspec_token = m_lexer.consumeToken().token_string; if (m_lexer.tokenType() == TokenType::keyword_asc) { m_lexer.skipToken(TokenType::keyword_asc); } else if (m_lexer.tokenType() == TokenType::keyword_desc) { m_lexer.skipToken(TokenType::keyword_desc); asc = false; } switch (cspec_token_type) { case TokenType::int_number: order_cols.emplace_back(std::stoi(cspec_token), asc); break; case TokenType::identifier: order_cols.emplace_back(cspec_token, asc); break; default: throw Exception("order by column can be either column m_index or identifier"); } m_lexer.skipTokenOptional(TokenType::comma); } while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit); } return order_cols; } OffsetLimitNode Parser::parse_offset_limit_clause() { size_t offset = 0; size_t limit = SIZE_MAX; if (m_lexer.tokenType() == TokenType::keyword_offset) { m_lexer.skipToken(TokenType::keyword_offset); offset = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string); } if (m_lexer.tokenType() == TokenType::keyword_limit) { m_lexer.skipToken(TokenType::keyword_limit); limit = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string); } return OffsetLimitNode{offset, limit}; } std::unique_ptr Parser::parse_where_clause() { if (m_lexer.tokenType() != TokenType::keyword_where) { return std::make_unique(); } m_lexer.skipToken(TokenType::keyword_where); std::unique_ptr left = parse_expression(); do { left = parse_expression(std::move(left)); } while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_order && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit); return left; } std::unique_ptr Parser::parse_expression() { std::unique_ptr left = parse_value(); return parse_expression(std::move(left)); } std::unique_ptr Parser::parse_expression(std::unique_ptr left) { if (Lexer::isRelationalOperator(m_lexer.tokenType())) { auto operation = parse_relational_operator(); auto right = parse_value(); return std::make_unique(operation, std::move(left), std::move(right)); } else if (Lexer::isLogicalOperator(m_lexer.tokenType())) { auto operation = parse_logical_operator(); auto right = parse_expression(); return std::make_unique(operation, std::move(left), std::move(right)); } else if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) { auto operation = parse_arithmetical_operator(); auto right = parse_value(); return std::make_unique(operation, std::move(left), std::move(right)); } else if (m_lexer.tokenType() == TokenType::int_number || m_lexer.tokenType() == TokenType::double_number ||m_lexer.tokenType() == TokenType::string_literal ||m_lexer.tokenType() == TokenType::identifier || m_lexer.tokenType() == TokenType::keyword_null || m_lexer.tokenType() == TokenType::open_paren) { return parse_value(); } return left; } std::unique_ptr Parser::parse_value() { auto token_typcol = m_lexer.tokenType(); // parenthesised expression if (token_typcol == TokenType::open_paren) { m_lexer.skipToken(TokenType::open_paren); auto left = parse_expression(); do { left = parse_expression(std::move(left)); } while (m_lexer.tokenType() != TokenType::close_paren && m_lexer.tokenType() != TokenType::eof); m_lexer.skipToken(TokenType::close_paren); return left; } // function call if (token_typcol == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) { std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string; std::vector> pars; m_lexer.skipToken(TokenType::open_paren); while (m_lexer.tokenType() != TokenType::close_paren && m_lexer.tokenType() != TokenType::eof) { pars.push_back(parse_expression()); m_lexer.skipTokenOptional(TokenType::comma); } m_lexer.skipToken(TokenType::close_paren); return std::make_unique(function_name, std::move(pars)); } // numbers and strings std::string tokenString = m_lexer.consumeToken().token_string; if (token_typcol == TokenType::int_number) return std::make_unique(std::stoi(tokenString)); if (token_typcol == TokenType::double_number) return std::make_unique(std::stod(tokenString)); if (token_typcol == TokenType::string_literal) return std::make_unique(tokenString); // db column if (token_typcol == TokenType::identifier) return std::make_unique(tokenString); // null if (token_typcol == TokenType::keyword_null) return std::make_unique(); // true / false if (token_typcol == TokenType::keyword_true || token_typcol == TokenType::keyword_false) return std::make_unique(token_typcol == TokenType::keyword_true); // token * for count(*) if (token_typcol == TokenType::multiply) return std::make_unique(tokenString); throw Exception("Unknown operand node " + tokenString); } RelationalOperatorType Parser::parse_relational_operator() { auto op = m_lexer.consumeToken(); switch (op.type) { case TokenType::equal: return RelationalOperatorType::equal; case TokenType::not_equal: return RelationalOperatorType::not_equal; case TokenType::greater: return RelationalOperatorType::greater; case TokenType::greater_equal: return RelationalOperatorType::greater_equal; case TokenType::lesser: return RelationalOperatorType::lesser; case TokenType::lesser_equal: return RelationalOperatorType::lesser_equal; case TokenType::is: if (m_lexer.tokenType() == TokenType::keyword_not) { m_lexer.skipToken(TokenType::keyword_not); return RelationalOperatorType::is_not; } return RelationalOperatorType::is; default: throw Exception("Unknown relational operator " + op.token_string); } } LogicalOperatorType Parser::parse_logical_operator() { auto op = m_lexer.consumeToken(); switch (op.type) { case TokenType::logical_and: return LogicalOperatorType::and_operator; case TokenType::logical_or: return LogicalOperatorType::or_operator; default: throw Exception("Unknown logical operator"); } } ArithmeticalOperatorType Parser::parse_arithmetical_operator() { auto op = m_lexer.consumeToken(); switch (op.type) { case TokenType::plus: return ArithmeticalOperatorType::plus_operator; case TokenType::minus: return ArithmeticalOperatorType::minus_operator; case TokenType::multiply: return ArithmeticalOperatorType::multiply_operator; case TokenType::divide: return ArithmeticalOperatorType::divide_operator; default: throw Exception("Unknown arithmetical operator"); } } } // namespace