From 4e54c6d1348c12a4b726224d624cff291645ac38 Mon Sep 17 00:00:00 2001 From: VaclavT Date: Tue, 31 Aug 2021 18:53:49 +0200 Subject: [PATCH] more strict parsing --- lexer.cpp | 18 ++++++------ lexer.h | 4 ++- parser.cpp | 80 ++++++++++++++++++------------------------------------ 3 files changed, 40 insertions(+), 62 deletions(-) diff --git a/lexer.cpp b/lexer.cpp index 0316c15..66e4bc8 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -63,12 +63,18 @@ namespace usql { Token Lexer::currentToken() { return m_tokens[m_index]; } - Token Lexer::consumeCurrentToken() { + Token Lexer::consumeToken() { int i = m_index; nextToken(); return m_tokens[i]; } + Token Lexer::consumeToken(TokenType type) { + int i = m_index; + skipToken(type); + return m_tokens[i]; + } + void Lexer::nextToken() { if (m_index < m_tokens.size()) { m_index++; @@ -79,8 +85,7 @@ namespace usql { if (tokenType() == type) { nextToken(); } else { - throw Exception("ERROR unexpected token " + consumeCurrentToken().token_string + ", instead of " + - typeToString(type)); + throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type)); } } @@ -215,8 +220,8 @@ namespace usql { (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) return TokenType::comment; - // if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"') - // return TokenType::string_literal; + if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"') + return TokenType::string_literal; if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'') return TokenType::string_literal; @@ -233,9 +238,6 @@ namespace usql { if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier; - if (m_index + 1 >= m_tokens.size()) - return TokenType::eof; - return TokenType::undef; } diff --git a/lexer.h b/lexer.h index 2552ae2..2281fdf 100644 --- a/lexer.h +++ b/lexer.h @@ -83,7 +83,9 @@ namespace usql { Token currentToken(); - Token consumeCurrentToken(); + Token consumeToken(); + + Token consumeToken(TokenType type); void nextToken(); diff --git a/parser.cpp b/parser.cpp index c98b772..b5951b6 100644 --- a/parser.cpp +++ b/parser.cpp @@ -47,10 +47,7 @@ namespace usql { m_lexer.skipToken(TokenType::keyword_create); m_lexer.skipToken(TokenType::keyword_table); - if (m_lexer.tokenType() != TokenType::identifier) - throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string); - - std::string table_name = m_lexer.consumeCurrentToken().token_string; + std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; // create as select if (m_lexer.tokenType() == TokenType::keyword_as) { @@ -72,7 +69,7 @@ namespace usql { if (m_lexer.tokenType() != TokenType::identifier) { throw Exception("syntax error, expected identifier"); } - database_value = m_lexer.consumeCurrentToken().token_string; + database_value = m_lexer.consumeToken().token_string; // column type and optionally len if (m_lexer.tokenType() == TokenType::keyword_integer) { @@ -85,11 +82,7 @@ namespace usql { column_type = ColumnType::varchar_type; m_lexer.nextToken(); m_lexer.skipToken(TokenType::open_paren); - if (m_lexer.tokenType() == TokenType::int_number) { - column_len = std::stoi(m_lexer.consumeCurrentToken().token_string); - } else { - throw Exception("syntax error, expected int number"); - } + column_len = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string); m_lexer.skipToken(TokenType::close_paren); } else if (m_lexer.tokenType() == TokenType::keyword_date) { column_type = ColumnType::date_type; @@ -125,11 +118,11 @@ namespace usql { m_lexer.skipToken(TokenType::keyword_load); m_lexer.skipTokenOptional(TokenType::keyword_into); - std::string table_name = m_lexer.consumeCurrentToken().token_string; + std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; m_lexer.skipTokenOptional(TokenType::keyword_from); - std::string file_name = m_lexer.consumeCurrentToken().token_string; + std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string; return std::make_unique(table_name, file_name); } @@ -138,11 +131,11 @@ namespace usql { m_lexer.skipToken(TokenType::keyword_save); m_lexer.skipTokenOptional(TokenType::keyword_table); - std::string table_name = m_lexer.consumeCurrentToken().token_string; + std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; m_lexer.skipTokenOptional(TokenType::keyword_into); - std::string file_name = m_lexer.consumeCurrentToken().token_string; + std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string; return std::make_unique(table_name, file_name); } @@ -151,7 +144,7 @@ namespace usql { m_lexer.skipToken(TokenType::keyword_drop); m_lexer.skipTokenOptional(TokenType::keyword_table); - std::string table_name = m_lexer.consumeCurrentToken().token_string; + std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; return std::make_unique(table_name); } @@ -159,13 +152,9 @@ namespace usql { std::unique_ptr Parser::parse_set() { m_lexer.skipToken(TokenType::keyword_set); - if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set name"); - std::string name = m_lexer.consumeCurrentToken().token_string; - + std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string; m_lexer.skipTokenOptional(TokenType::equal); - - if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set value"); - std::string value = m_lexer.consumeCurrentToken().token_string; + std::string value = m_lexer.consumeToken(TokenType::string_literal).token_string; return std::make_unique(name, value); } @@ -173,8 +162,7 @@ namespace usql { std::unique_ptr Parser::parse_show() { m_lexer.skipToken(TokenType::keyword_show); - if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal on show parameter name"); - std::string name = m_lexer.consumeCurrentToken().token_string; + std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string; return std::make_unique(name); } @@ -187,18 +175,12 @@ namespace usql { m_lexer.skipToken(TokenType::keyword_into); // table name - if (m_lexer.tokenType() != TokenType::identifier) - throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string); - - std::string table_name = m_lexer.consumeCurrentToken().token_string; + std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; // column names m_lexer.skipToken(TokenType::open_paren); do { - if (m_lexer.tokenType() != TokenType::identifier) - throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string); - - database_values.emplace_back(m_lexer.consumeCurrentToken().token_string); + database_values.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string); m_lexer.skipTokenOptional(TokenType::comma); } while (m_lexer.tokenType() != TokenType::close_paren); @@ -233,7 +215,7 @@ namespace usql { int i = 1; while (m_lexer.tokenType() != TokenType::keyword_from) { if (m_lexer.tokenType()==TokenType::multiply) { - std::string name = m_lexer.consumeCurrentToken().token_string; + std::string name = m_lexer.consumeToken().token_string; auto multiply_char = std::make_unique(name); cols->push_back(SelectColNode{std::move(multiply_char), "*"}); @@ -243,7 +225,7 @@ namespace usql { if (m_lexer.tokenType() == TokenType::keyword_as) { m_lexer.skipToken(TokenType::keyword_as); - column_alias = m_lexer.consumeCurrentToken().token_string; + column_alias = m_lexer.consumeToken(TokenType::identifier).token_string; } else { if (column_value->node_type == NodeType::database_value) { column_alias = ((DatabaseValueNode*) column_value.get())->col_name; @@ -262,7 +244,7 @@ namespace usql { m_lexer.skipToken(TokenType::keyword_from); - std::string table_name = m_lexer.consumeCurrentToken().token_string; + std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; std::unique_ptr where_node = parse_where_clause(); @@ -278,7 +260,7 @@ namespace usql { m_lexer.skipToken(TokenType::keyword_delete); m_lexer.skipToken(TokenType::keyword_from); - std::string table_name = m_lexer.consumeCurrentToken().token_string; + std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; std::unique_ptr where_node = parse_where_clause(); @@ -289,7 +271,7 @@ namespace usql { m_lexer.skipToken(TokenType::keyword_update); m_lexer.skipTokenOptional(TokenType::keyword_table); - std::string table_name = m_lexer.consumeCurrentToken().token_string; + std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; m_lexer.skipToken(TokenType::keyword_set); @@ -297,7 +279,7 @@ namespace usql { std::vector> values; do { - cols_names.emplace_back(m_lexer.consumeCurrentToken().token_string); + cols_names.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string); m_lexer.skipToken(TokenType::equal); std::unique_ptr left = Parser::parse_value(); @@ -333,7 +315,7 @@ namespace usql { bool asc = true; auto token_type = m_lexer.tokenType(); - std::string tokenString = m_lexer.consumeCurrentToken().token_string; + std::string tokenString = m_lexer.consumeToken().token_string; switch (token_type) { case TokenType::int_number: col_index = std::stoi(tokenString); @@ -365,20 +347,12 @@ namespace usql { if (m_lexer.tokenType() == TokenType::keyword_offset) { m_lexer.skipToken(TokenType::keyword_offset); - - if (m_lexer.tokenType() != TokenType::int_number) - throw Exception("expecting integer in offset clause"); - - offset = std::stoi(m_lexer.consumeCurrentToken().token_string); + offset = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string); } if (m_lexer.tokenType() == TokenType::keyword_limit) { m_lexer.skipToken(TokenType::keyword_limit); - - if (m_lexer.tokenType() != TokenType::int_number) - throw Exception("expecting integer in limit clause"); - - limit = std::stoi(m_lexer.consumeCurrentToken().token_string); + limit = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string); } return OffsetLimitNode{offset, limit}; @@ -446,7 +420,7 @@ namespace usql { // function call if (token_type == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) { - std::string function_name = m_lexer.consumeCurrentToken().token_string; + std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string; std::vector> pars; m_lexer.skipToken(TokenType::open_paren); @@ -459,7 +433,7 @@ namespace usql { } // numbers and strings - std::string tokenString = m_lexer.consumeCurrentToken().token_string; + std::string tokenString = m_lexer.consumeToken().token_string; if (token_type == TokenType::int_number) return std::make_unique(std::stoi(tokenString)); @@ -480,7 +454,7 @@ namespace usql { } RelationalOperatorType Parser::parse_relational_operator() { - auto op = m_lexer.consumeCurrentToken(); + auto op = m_lexer.consumeToken(); switch (op.type) { case TokenType::equal: return RelationalOperatorType::equal; @@ -500,7 +474,7 @@ namespace usql { } LogicalOperatorType Parser::parse_logical_operator() { - auto op = m_lexer.consumeCurrentToken(); + auto op = m_lexer.consumeToken(); switch (op.type) { case TokenType::logical_and: return LogicalOperatorType::and_operator; @@ -512,7 +486,7 @@ namespace usql { } ArithmeticalOperatorType Parser::parse_arithmetical_operator() { - auto op = m_lexer.consumeCurrentToken(); + auto op = m_lexer.consumeToken(); switch (op.type) { case TokenType::plus: return ArithmeticalOperatorType::plus_operator;