a bit of further work
This commit is contained in:
39
lexer.cpp
39
lexer.cpp
@@ -11,54 +11,53 @@ Token::Token(const std::string &token_str, TokenType typ) {
|
||||
|
||||
void Lexer::parse(const std::string &code) {
|
||||
// TODO handle empty code
|
||||
tokens.clear();
|
||||
m_tokens.clear();
|
||||
|
||||
// PERF something like this to prealocate ??
|
||||
if (code.size() > 100) {
|
||||
tokens.reserve(code.size() / 10);
|
||||
m_tokens.reserve(code.size() / 10);
|
||||
}
|
||||
code_str = code;
|
||||
if (!code_str.empty() && code_str.back() != '\n') {
|
||||
code_str.append("\n"); // TODO tempo solution to prevent possible situation when last line is a comment
|
||||
m_code_str = code;
|
||||
if (!m_code_str.empty() && m_code_str.back() != '\n') {
|
||||
m_code_str.append("\n"); // TODO tempo solution to prevent possible situation when last line is a comment
|
||||
}
|
||||
|
||||
// TODO make it constant
|
||||
std::regex words_regex("[0-9]+\\.[0-9]+|[0-9][0-9_]+[0-9]|[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/"
|
||||
",;:\?]|==|>=|<=|~=|>|<|=|;|~|\\|\\||&&|\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n");
|
||||
|
||||
auto words_begin = std::sregex_iterator(code_str.begin(), code_str.end(), words_regex);
|
||||
auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), words_regex);
|
||||
auto words_end = std::sregex_iterator();
|
||||
|
||||
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
|
||||
std::smatch match = *i;
|
||||
std::string match_str = match.str();
|
||||
TokenType token_type = type(match_str);
|
||||
if (token_type == TokenType::string_literal) {
|
||||
match_str = stringLiteral(match_str);
|
||||
} else {
|
||||
tokens.push_back(Token{match_str, token_type});
|
||||
}
|
||||
if (token_type == TokenType::string_literal)
|
||||
match_str = stringLiteral(match_str);
|
||||
|
||||
m_tokens.push_back(Token{match_str, token_type});
|
||||
}
|
||||
|
||||
// DEBUG IT
|
||||
// debugTokens();
|
||||
|
||||
index = 0;
|
||||
m_index = 0;
|
||||
}
|
||||
|
||||
void Lexer::debugTokens() {
|
||||
int i = 0;
|
||||
for (std::vector<Token>::iterator it = tokens.begin(); it != tokens.end(); ++it) {
|
||||
for (std::vector<Token>::iterator it = m_tokens.begin(); it != m_tokens.end(); ++it) {
|
||||
std::cerr << i << "\t" << it->token_string << std::endl;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
Token Lexer::currentToken() { return tokens[index]; }
|
||||
Token Lexer::currentToken() { return m_tokens[m_index]; }
|
||||
|
||||
void Lexer::nextToken() {
|
||||
if (index < tokens.size()) {
|
||||
index++;
|
||||
if (m_index < m_tokens.size()) {
|
||||
m_index++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,11 +75,11 @@ void Lexer::skipTokenOptional(TokenType type) {
|
||||
}
|
||||
}
|
||||
|
||||
TokenType Lexer::tokenType() { return index < tokens.size() ? currentToken().type : TokenType::eof; }
|
||||
TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; }
|
||||
|
||||
TokenType Lexer::nextTokenType() { return index < tokens.size() - 1 ? tokens[index + 1].type : TokenType::eof; }
|
||||
TokenType Lexer::nextTokenType() { return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof; }
|
||||
|
||||
TokenType Lexer::prevTokenType() { return index > 0 ? tokens[index - 1].type : TokenType::undef; }
|
||||
TokenType Lexer::prevTokenType() { return m_index > 0 ? m_tokens[m_index - 1].type : TokenType::undef; }
|
||||
|
||||
bool Lexer::isRelationalOperator(TokenType token_type) {
|
||||
return (token_type == TokenType::equal || token_type == TokenType::not_equal || token_type == TokenType::greater || token_type == TokenType::greater_equal ||
|
||||
@@ -211,7 +210,7 @@ TokenType Lexer::type(const std::string &token) {
|
||||
if (std::regex_match(token, identifier_regex))
|
||||
return TokenType::identifier;
|
||||
|
||||
if (index + 1 >= tokens.size())
|
||||
if (m_index + 1 >= m_tokens.size())
|
||||
return TokenType::eof;
|
||||
|
||||
return TokenType::undef;
|
||||
|
||||
Reference in New Issue
Block a user