#pragma once #include #include #include #include namespace usql { enum class TokenType { undef, identifier, plus, minus, multiply, divide, equal, not_equal, greater, greater_equal, lesser, lesser_equal, keyword_as, keyword_create, keyword_drop, keyword_table, keyword_where, keyword_order, keyword_by, keyword_offset, keyword_limit, keyword_asc, keyword_desc, keyword_delete, keyword_update, keyword_load, keyword_save, keyword_from, keyword_insert, keyword_into, keyword_values, keyword_select, keyword_set, keyword_copy, keyword_not, keyword_null, keyword_integer, keyword_float, keyword_varchar, keyword_date, keyword_bool, keyword_distinct, keyword_show, int_number, double_number, string_literal, open_paren, close_paren, logical_and, logical_or, pipe, semicolon, comma, newline, comment, eof }; struct Token { std::string token_string; TokenType type; Token(const std::string &token_str, TokenType typ); }; class Lexer { public: Lexer(); void parse(const std::string &code); void debugTokens(); Token currentToken(); Token consumeCurrentToken(); void nextToken(); void skipToken(TokenType type); void skipTokenOptional(TokenType type); TokenType tokenType(); TokenType nextTokenType(); static bool isRelationalOperator(TokenType token_type); static bool isLogicalOperator(TokenType token_type); static bool isArithmeticalOperator(TokenType token_type); private: TokenType type(const std::string &token); static std::string stringLiteral(std::string token); static std::string typeToString(TokenType token_type); private: std::string m_code_str; std::vector m_tokens; int m_index = 0; std::regex k_words_regex; std::regex k_int_regex; std::regex k_int_underscored_regex; std::regex k_double_regex; std::regex k_identifier_regex; }; }