usql/parser.cpp

#include "parser.h"
#include "exception.h"

namespace usql {

	// TOOD handle premature eof

	std::string column_type_name(const ColumnType type) {
		if (type == ColumnType::integer_type)	return "integer_type";
		if (type == ColumnType::float_type)	return "float_type";
		if (type == ColumnType::varchar_type)	return "varchar_type";
		if (type == ColumnType::date_type)	return "date_type";
		if (type == ColumnType::bool_type)	return "bool_type";

		throw Exception("invalid column type: " + std::to_string((int)type));
	};


    Parser::Parser() {
	    m_lexer = Lexer{};
    }

    std::unique_ptr<Node> Parser::parse(const std::string &code) {
	    m_lexer.parse(code);
	    // m_lexer.debugTokens();

	    if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table)
		    return parse_create_table();
	    if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_index)
		    return parse_create_index();
	    if (m_lexer.tokenType() == TokenType::keyword_drop)
		    return parse_drop_table();

	    if (m_lexer.tokenType() == TokenType::keyword_insert)
		    return parse_insert_into_table();
	    if (m_lexer.tokenType() == TokenType::keyword_select)
		    return parse_select_from_table();
	    if (m_lexer.tokenType() == TokenType::keyword_delete)
		    return parse_delete_from_table();
	    if (m_lexer.tokenType() == TokenType::keyword_update)
		    return parse_update_table();

	    if (m_lexer.tokenType() == TokenType::keyword_load)
		    return parse_load_table();
	    if (m_lexer.tokenType() == TokenType::keyword_save)
		    return parse_save_table();

	    if (m_lexer.tokenType() == TokenType::keyword_set)
		    return parse_set();
	    if (m_lexer.tokenType() == TokenType::keyword_show)
		    return parse_show();

	    std::cout << "ERROR, token:" << m_lexer.currentToken().token_string << std::endl;
	    return std::make_unique<Node>(NodeType::error);
    }

    std::unique_ptr<Node> Parser::parse_create_table() {
	    std::vector<ColDefNode> cols_def{};

	    m_lexer.skipToken(TokenType::keyword_create);
	    m_lexer.skipToken(TokenType::keyword_table);

	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;

	    // create as select
	    if (m_lexer.tokenType() == TokenType::keyword_as) {
			m_lexer.skipToken(TokenType::keyword_as);

			std::unique_ptr<Node> select = parse_select_from_table();

			return  std::make_unique<CreateTableAsSelectNode>(table_name, std::move(select));
	    } else {
	    	m_lexer.skipToken(TokenType::open_paren);
	    	int column_order = 0;
	    	do {
	    		std::string database_value;
	    		ColumnType column_type;
	    		int column_len = 1;
	    		bool column_nullable = true;

	    		// column name
	    		if (m_lexer.tokenType() != TokenType::identifier) {
					throw Exception("syntax error, expected identifier");
			}
    		database_value = m_lexer.consumeToken().token_string;

    		// column type and optionally len
    		if (m_lexer.tokenType() == TokenType::keyword_integer) {
    			column_type = ColumnType::integer_type;
    			m_lexer.nextToken();
    		} else if (m_lexer.tokenType() == TokenType::keyword_float) {
    			column_type = ColumnType::float_type;
    			m_lexer.nextToken();
    		} else if (m_lexer.tokenType() == TokenType::keyword_varchar) {
    			column_type = ColumnType::varchar_type;
    			m_lexer.nextToken();
    			m_lexer.skipToken(TokenType::open_paren);
    			column_len = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
    			m_lexer.skipToken(TokenType::close_paren);
			} else if (m_lexer.tokenType() == TokenType::keyword_date) {
				column_type = ColumnType::date_type;
				m_lexer.nextToken();
	    	} else if (m_lexer.tokenType() == TokenType::keyword_bool) {
    			column_type = ColumnType::bool_type;
    			m_lexer.nextToken();
	    	} else {
				throw Exception("syntax error, column type expected, found " + m_lexer.currentToken().token_string);
			}

	    		if (m_lexer.tokenType() == TokenType::keyword_not) {
	    			m_lexer.nextToken();
	    			m_lexer.skipToken(TokenType::keyword_null);
	    			column_nullable = false;
	    		} else if (m_lexer.tokenType() == TokenType::keyword_null) {
	    			m_lexer.nextToken();
	    		}

	    		cols_def.emplace_back(database_value, column_type, column_order++, column_len, column_nullable);

	    		m_lexer.skipTokenOptional(TokenType::comma);

			//constraints
	    	//defaults
	    	} while (m_lexer.tokenType() != TokenType::close_paren);

	    	return std::make_unique<CreateTableNode>(table_name, cols_def);
	    }
    }

    std::unique_ptr<Node> Parser::parse_load_table() {
	    m_lexer.skipToken(TokenType::keyword_load);
	    m_lexer.skipTokenOptional(TokenType::keyword_into);

	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;

	    m_lexer.skipTokenOptional(TokenType::keyword_from);

	    std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;

	    return std::make_unique<LoadIntoTableNode>(table_name, file_name);
    }

    std::unique_ptr<Node> Parser::parse_save_table() {
    	m_lexer.skipToken(TokenType::keyword_save);
    	m_lexer.skipTokenOptional(TokenType::keyword_table);

    	std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;

    	m_lexer.skipTokenOptional(TokenType::keyword_into);

    	std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;

    	return std::make_unique<SaveTableNode>(table_name, file_name);
    }

    std::unique_ptr<Node> Parser::parse_drop_table() {
    	m_lexer.skipToken(TokenType::keyword_drop);
    	m_lexer.skipTokenOptional(TokenType::keyword_table);

    	std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;

    	return std::make_unique<DropTableNode>(table_name);
    }

    std::unique_ptr<Node> Parser::parse_set() {
    	m_lexer.skipToken(TokenType::keyword_set);

    	std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
    	m_lexer.skipTokenOptional(TokenType::equal);
    	std::string value = m_lexer.consumeToken(TokenType::string_literal).token_string;

    	return std::make_unique<SetNode>(name, value);
    }

    std::unique_ptr<Node> Parser::parse_show() {
    	m_lexer.skipToken(TokenType::keyword_show);

    	std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;

    	return std::make_unique<ShowNode>(name);
    }

    std::unique_ptr<Node> Parser::parse_insert_into_table() {
	    std::vector<DatabaseValueNode> database_values{};
	    std::vector<std::unique_ptr<Node>> column_values{};

	    m_lexer.skipToken(TokenType::keyword_insert);
	    m_lexer.skipToken(TokenType::keyword_into);

	    // table name
	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;

	    // column names
	    m_lexer.skipToken(TokenType::open_paren);
	    do {
		    database_values.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);

		    m_lexer.skipTokenOptional(TokenType::comma);
	    } while (m_lexer.tokenType() != TokenType::close_paren);
	    m_lexer.skipToken(TokenType::close_paren);

	    m_lexer.skipToken(TokenType::keyword_values);

	    // column values
	    m_lexer.skipToken(TokenType::open_paren);
	    do {
		    auto value = parse_expression();
		    column_values.emplace_back(std::move(value));

		    m_lexer.skipTokenOptional(TokenType::comma);
	    } while (m_lexer.tokenType() != TokenType::close_paren);
	    m_lexer.skipToken(TokenType::close_paren);

	    return std::make_unique<InsertIntoTableNode>(table_name, database_values, std::move(column_values));
    }

    std::unique_ptr<Node> Parser::parse_select_from_table() {
	bool distinct = false;
	auto cols = std::make_unique<std::vector<SelectColNode>>();

	m_lexer.skipToken(TokenType::keyword_select);

	if (m_lexer.tokenType() == TokenType::keyword_distinct) {
		distinct = true;
		m_lexer.skipToken(TokenType::keyword_distinct);
	}

	int i = 1;
	while (m_lexer.tokenType() != TokenType::keyword_from) {
		if (m_lexer.tokenType()==TokenType::multiply) {
			std::string name = m_lexer.consumeToken().token_string;
			auto multiply_char = std::make_unique<DatabaseValueNode>(name);

			cols->push_back(SelectColNode{std::move(multiply_char), "*"});
		} else {
			auto column_value = parse_expression();
			std::string column_alias;

			if (m_lexer.tokenType() == TokenType::keyword_as) {
				m_lexer.skipToken(TokenType::keyword_as);
				column_alias = m_lexer.consumeToken(TokenType::identifier).token_string;
			} else {
				if (column_value->node_type == NodeType::database_value) {
					column_alias = ((DatabaseValueNode*) column_value.get())->col_name;
				} else {
					column_alias = "c" + std::to_string(i);
					i++;
				}
			}

			cols->push_back(SelectColNode{std::move(column_value), column_alias});
		}


		m_lexer.skipTokenOptional(TokenType::comma);
	}

	m_lexer.skipToken(TokenType::keyword_from);

	std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;

	std::unique_ptr<Node> where_node = parse_where_clause();

	std::vector<ColOrderNode> orderby_node = parse_order_by_clause();

	OffsetLimitNode offsetlimit_node = parse_offset_limit_clause();


	return std::make_unique<SelectFromTableNode>(table_name, std::move(cols), std::move(where_node), orderby_node, offsetlimit_node, distinct);
    }

    std::unique_ptr<Node> Parser::parse_delete_from_table() {
	    m_lexer.skipToken(TokenType::keyword_delete);
	    m_lexer.skipToken(TokenType::keyword_from);

	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;

	    std::unique_ptr<Node> where_node = parse_where_clause();

	    return std::make_unique<DeleteFromTableNode>(table_name, std::move(where_node));
    }

    std::unique_ptr<Node> Parser::parse_update_table() {
	    m_lexer.skipToken(TokenType::keyword_update);
	    m_lexer.skipTokenOptional(TokenType::keyword_table);

	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;

	    m_lexer.skipToken(TokenType::keyword_set);

	    std::vector<DatabaseValueNode> cols_names;
	    std::vector<std::unique_ptr<Node>> values;

	    do {
		    cols_names.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
		    m_lexer.skipToken(TokenType::equal);

		    std::unique_ptr<Node> left = Parser::parse_value();
		    if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) {
			    ArithmeticalOperatorType op = parse_arithmetical_operator();
			    std::unique_ptr<Node> right = Parser::parse_value();

			    values.push_back(std::make_unique<ArithmeticalOperatorNode>(op, std::move(left), std::move(right)));
		    } else {
			    std::unique_ptr<Node> right = std::make_unique<IntValueNode>(0);
			    values.push_back(std::make_unique<ArithmeticalOperatorNode>(ArithmeticalOperatorType::copy_value,
										   std::move(left), std::move(right)));
		    }
		    m_lexer.skipTokenOptional(TokenType::comma);

	    } while (m_lexer.tokenType() != TokenType::keyword_where && m_lexer.tokenType() != TokenType::eof);

	    std::unique_ptr<Node> where_node = parse_where_clause();

	    return std::make_unique<UpdateTableNode>(table_name, cols_names, std::move(values), std::move(where_node));
    }

    std::unique_ptr<Node> Parser::parse_create_index() {
	    m_lexer.skipToken(TokenType::keyword_create);
	    m_lexer.skipToken(TokenType::keyword_index);
	    std::string index_name = m_lexer.consumeToken(TokenType::identifier).token_string;
	    m_lexer.skipToken(TokenType::keyword_on);
	    std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
	    m_lexer.skipToken(TokenType::open_paren);
	    std::string column_name = m_lexer.consumeToken(TokenType::identifier).token_string;
	    m_lexer.skipToken(TokenType::close_paren);

	    return std::make_unique<CreateIndexNode>(index_name, table_name, column_name);
    }

    std::vector<ColOrderNode>  Parser::parse_order_by_clause() {
		std::vector<ColOrderNode> order_cols;

		if (m_lexer.tokenType() == TokenType::keyword_order) {
			m_lexer.skipToken(TokenType::keyword_order);
			m_lexer.skipToken(TokenType::keyword_by);

			do {
				bool asc = true;

				auto cspec_token_type = m_lexer.tokenType();
				std::string cspec_token = m_lexer.consumeToken().token_string;

				if (m_lexer.tokenType() == TokenType::keyword_asc) {
					m_lexer.skipToken(TokenType::keyword_asc);
				} else if (m_lexer.tokenType() == TokenType::keyword_desc) {
					m_lexer.skipToken(TokenType::keyword_desc);
					asc = false;
				}

				switch (cspec_token_type) {
					case TokenType::int_number:
						order_cols.emplace_back(std::stoi(cspec_token), asc);
						break;
					case TokenType::identifier:
						order_cols.emplace_back(cspec_token, asc);
						break;
					default:
						throw Exception("order by column can be either column m_index or identifier");
				}

				m_lexer.skipTokenOptional(TokenType::comma);
			} while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit);
		}

		return order_cols;
    }

    OffsetLimitNode Parser::parse_offset_limit_clause() {
		size_t offset = 0;
		size_t limit = SIZE_MAX;

		if (m_lexer.tokenType() == TokenType::keyword_offset) {
			m_lexer.skipToken(TokenType::keyword_offset);
			offset = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
		}

		if (m_lexer.tokenType() == TokenType::keyword_limit) {
			m_lexer.skipToken(TokenType::keyword_limit);
			limit = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
		}

		return OffsetLimitNode{offset, limit};
    }


    std::unique_ptr<Node> Parser::parse_where_clause() {
		if (m_lexer.tokenType() != TokenType::keyword_where) {
			return std::make_unique<TrueNode>();
		}

		m_lexer.skipToken(TokenType::keyword_where);

		std::unique_ptr<Node> left = parse_expression();
		do {
			left = parse_expression(std::move(left));
		} while (m_lexer.tokenType() != TokenType::eof && m_lexer.tokenType() != TokenType::keyword_order && m_lexer.tokenType() != TokenType::keyword_offset && m_lexer.tokenType() != TokenType::keyword_limit && m_lexer.tokenType() != TokenType::semicolon);

		return left;
    }

    std::unique_ptr<Node> Parser::parse_expression() {
	    std::unique_ptr<Node> left = parse_value();

	    return parse_expression(std::move(left));
    }

    std::unique_ptr<Node> Parser::parse_expression(std::unique_ptr<Node> left) {
	    if (Lexer::isRelationalOperator(m_lexer.tokenType())) {
		    auto operation = parse_relational_operator();
		    auto right = parse_value();
		    return std::make_unique<RelationalOperatorNode>(operation, std::move(left), std::move(right));
	    } else if (Lexer::isLogicalOperator(m_lexer.tokenType())) {
		    auto operation = parse_logical_operator();
		    auto right = parse_expression();
		    return std::make_unique<LogicalOperatorNode>(operation, std::move(left), std::move(right));
	    } else if (Lexer::isArithmeticalOperator(m_lexer.tokenType())) {
		    auto operation = parse_arithmetical_operator();
		    auto right = parse_value();

		    return std::make_unique<ArithmeticalOperatorNode>(operation, std::move(left), std::move(right));
	    } else if (m_lexer.tokenType() == TokenType::int_number || m_lexer.tokenType() == TokenType::double_number ||m_lexer.tokenType() == TokenType::string_literal ||m_lexer.tokenType() == TokenType::identifier || m_lexer.tokenType() == TokenType::keyword_null || m_lexer.tokenType() == TokenType::open_paren) {
	    	return parse_value();
	    }

	    return left;
    }

    std::unique_ptr<Node> Parser::parse_value() {
	    auto token_typcol = m_lexer.tokenType();

	    // parenthesised expression
	    if (token_typcol == TokenType::open_paren) {
            m_lexer.skipToken(TokenType::open_paren);
		    auto left = parse_expression();
		    do {
			left = parse_expression(std::move(left));
		    } while (m_lexer.tokenType() != TokenType::close_paren && m_lexer.tokenType() != TokenType::eof);

		    m_lexer.skipToken(TokenType::close_paren);
		    return left;
	    }

	    // function call
	    if (token_typcol == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
			std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string;
			std::vector<std::unique_ptr<Node>> pars;

			m_lexer.skipToken(TokenType::open_paren);
			while (m_lexer.tokenType() != TokenType::close_paren && m_lexer.tokenType() != TokenType::eof) {
				pars.push_back(parse_expression());
				m_lexer.skipTokenOptional(TokenType::comma);
			}
			m_lexer.skipToken(TokenType::close_paren);
			return std::make_unique<FunctionNode>(function_name, std::move(pars));
	    }

	    // numbers and strings
	    std::string tokenString = m_lexer.consumeToken().token_string;

	    if (token_typcol == TokenType::int_number)
			return std::make_unique<IntValueNode>(std::stoi(tokenString));
	    if (token_typcol == TokenType::double_number)
			return std::make_unique<DoubleValueNode>(std::stod(tokenString));
	    if (token_typcol == TokenType::string_literal)
			return std::make_unique<StringValueNode>(tokenString);

	    // db column
	    if (token_typcol == TokenType::identifier)
			return std::make_unique<DatabaseValueNode>(tokenString);

	    // null
	    if (token_typcol == TokenType::keyword_null)
			return std::make_unique<NullValueNode>();

	    // true / false
	    if (token_typcol == TokenType::keyword_true || token_typcol == TokenType::keyword_false)
			return std::make_unique<BooleanValueNode>(token_typcol == TokenType::keyword_true);

	    // token * for count(*)
	    if (token_typcol == TokenType::multiply)
		    return std::make_unique<StringValueNode>(tokenString);

	    throw Exception("Unknown operand node " + tokenString);
    }

    RelationalOperatorType Parser::parse_relational_operator() {
	    auto op = m_lexer.consumeToken();
	    switch (op.type) {
		    case TokenType::equal:
			    return RelationalOperatorType::equal;
		    case TokenType::not_equal:
			    return RelationalOperatorType::not_equal;
		    case TokenType::greater:
			    return RelationalOperatorType::greater;
		    case TokenType::greater_equal:
			    return RelationalOperatorType::greater_equal;
		    case TokenType::lesser:
			    return RelationalOperatorType::lesser;
		    case TokenType::lesser_equal:
			    return RelationalOperatorType::lesser_equal;
		    case TokenType::is:
		    	    if (m_lexer.tokenType() == TokenType::keyword_not) {
						m_lexer.skipToken(TokenType::keyword_not);
			    		return RelationalOperatorType::is_not;
			    }
			    return RelationalOperatorType::is;
		    default:
			    throw Exception("Unknown relational operator " + op.token_string);
	    }
    }

    LogicalOperatorType Parser::parse_logical_operator() {
	    auto op = m_lexer.consumeToken();
	    switch (op.type) {
		    case TokenType::logical_and:
			    return LogicalOperatorType::and_operator;
		    case TokenType::logical_or:
			    return LogicalOperatorType::or_operator;
		    default:
			    throw Exception("Unknown logical operator");
	    }
    }

    ArithmeticalOperatorType Parser::parse_arithmetical_operator() {
	    auto op = m_lexer.consumeToken();
	    switch (op.type) {
		    case TokenType::plus:
			    return ArithmeticalOperatorType::plus_operator;
		    case TokenType::minus:
			    return ArithmeticalOperatorType::minus_operator;
		    case TokenType::multiply:
			    return ArithmeticalOperatorType::multiply_operator;
		    case TokenType::divide:
			    return ArithmeticalOperatorType::divide_operator;
		    default:
			    throw Exception("Unknown arithmetical operator");
	    }
    }

}	// namespace