usql/lexer.h

131 lines
2.0 KiB
C++

#pragma once
#include <iostream>
#include <regex>
#include <stdexcept>
#include <string>
namespace usql {
enum class TokenType {
undef,
identifier,
plus,
minus,
multiply,
divide,
equal,
not_equal,
greater,
greater_equal,
lesser,
lesser_equal,
is,
keyword_as,
keyword_create,
keyword_drop,
keyword_table,
keyword_index,
keyword_on,
keyword_where,
keyword_order,
keyword_by,
keyword_offset,
keyword_limit,
keyword_asc,
keyword_desc,
keyword_delete,
keyword_update,
keyword_load,
keyword_save,
keyword_from,
keyword_insert,
keyword_into,
keyword_values,
keyword_select,
keyword_set,
keyword_copy,
keyword_not,
keyword_null,
keyword_integer,
keyword_float,
keyword_varchar,
keyword_date,
keyword_bool,
keyword_true,
keyword_false,
keyword_distinct,
keyword_show,
int_number,
double_number,
string_literal,
open_paren,
close_paren,
logical_and,
logical_or,
pipe,
semicolon,
comma,
newline,
comment,
eof
};
struct Token {
std::string token_string;
TokenType type;
Token(const std::string &token_str, TokenType typ);
};
class Lexer {
public:
Lexer();
void parse(const std::string &code);
void debugTokens();
Token currentToken();
Token consumeToken();
Token consumeToken(TokenType type);
void nextToken();
void skipToken(TokenType type);
void skipTokenOptional(TokenType type);
TokenType tokenType();
TokenType nextTokenType();
static bool isRelationalOperator(TokenType token_type);
static bool isLogicalOperator(TokenType token_type);
static bool isArithmeticalOperator(TokenType token_type);
private:
TokenType type(const std::string &token);
static std::string stringLiteral(std::string token);
static std::string typeToString(TokenType token_type);
private:
std::string m_code_str;
std::vector<Token> m_tokens;
size_t m_index = 0;
std::regex k_words_regex;
std::regex k_int_regex;
std::regex k_int_underscored_regex;
std::regex k_double_regex;
std::regex k_identifier_regex;
};
} // namespace