more strict parsing

This commit is contained in:
VaclavT 2021-08-31 18:53:49 +02:00
parent be89b55b17
commit 4e54c6d134
3 changed files with 40 additions and 62 deletions

View File

@ -63,12 +63,18 @@ namespace usql {
Token Lexer::currentToken() { return m_tokens[m_index]; } Token Lexer::currentToken() { return m_tokens[m_index]; }
Token Lexer::consumeCurrentToken() { Token Lexer::consumeToken() {
int i = m_index; int i = m_index;
nextToken(); nextToken();
return m_tokens[i]; return m_tokens[i];
} }
Token Lexer::consumeToken(TokenType type) {
int i = m_index;
skipToken(type);
return m_tokens[i];
}
void Lexer::nextToken() { void Lexer::nextToken() {
if (m_index < m_tokens.size()) { if (m_index < m_tokens.size()) {
m_index++; m_index++;
@ -79,8 +85,7 @@ namespace usql {
if (tokenType() == type) { if (tokenType() == type) {
nextToken(); nextToken();
} else { } else {
throw Exception("ERROR unexpected token " + consumeCurrentToken().token_string + ", instead of " + throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type));
typeToString(type));
} }
} }
@ -215,8 +220,8 @@ namespace usql {
(token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r'))
return TokenType::comment; return TokenType::comment;
// if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"') if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"')
// return TokenType::string_literal; return TokenType::string_literal;
if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'') if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'')
return TokenType::string_literal; return TokenType::string_literal;
@ -233,9 +238,6 @@ namespace usql {
if (std::regex_match(token, k_identifier_regex)) if (std::regex_match(token, k_identifier_regex))
return TokenType::identifier; return TokenType::identifier;
if (m_index + 1 >= m_tokens.size())
return TokenType::eof;
return TokenType::undef; return TokenType::undef;
} }

View File

@ -83,7 +83,9 @@ namespace usql {
Token currentToken(); Token currentToken();
Token consumeCurrentToken(); Token consumeToken();
Token consumeToken(TokenType type);
void nextToken(); void nextToken();

View File

@ -47,10 +47,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_create); m_lexer.skipToken(TokenType::keyword_create);
m_lexer.skipToken(TokenType::keyword_table); m_lexer.skipToken(TokenType::keyword_table);
if (m_lexer.tokenType() != TokenType::identifier) std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
// create as select // create as select
if (m_lexer.tokenType() == TokenType::keyword_as) { if (m_lexer.tokenType() == TokenType::keyword_as) {
@ -72,7 +69,7 @@ namespace usql {
if (m_lexer.tokenType() != TokenType::identifier) { if (m_lexer.tokenType() != TokenType::identifier) {
throw Exception("syntax error, expected identifier"); throw Exception("syntax error, expected identifier");
} }
database_value = m_lexer.consumeCurrentToken().token_string; database_value = m_lexer.consumeToken().token_string;
// column type and optionally len // column type and optionally len
if (m_lexer.tokenType() == TokenType::keyword_integer) { if (m_lexer.tokenType() == TokenType::keyword_integer) {
@ -85,11 +82,7 @@ namespace usql {
column_type = ColumnType::varchar_type; column_type = ColumnType::varchar_type;
m_lexer.nextToken(); m_lexer.nextToken();
m_lexer.skipToken(TokenType::open_paren); m_lexer.skipToken(TokenType::open_paren);
if (m_lexer.tokenType() == TokenType::int_number) { column_len = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
column_len = std::stoi(m_lexer.consumeCurrentToken().token_string);
} else {
throw Exception("syntax error, expected int number");
}
m_lexer.skipToken(TokenType::close_paren); m_lexer.skipToken(TokenType::close_paren);
} else if (m_lexer.tokenType() == TokenType::keyword_date) { } else if (m_lexer.tokenType() == TokenType::keyword_date) {
column_type = ColumnType::date_type; column_type = ColumnType::date_type;
@ -125,11 +118,11 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_load); m_lexer.skipToken(TokenType::keyword_load);
m_lexer.skipTokenOptional(TokenType::keyword_into); m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipTokenOptional(TokenType::keyword_from); m_lexer.skipTokenOptional(TokenType::keyword_from);
std::string file_name = m_lexer.consumeCurrentToken().token_string; std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<LoadIntoTableNode>(table_name, file_name); return std::make_unique<LoadIntoTableNode>(table_name, file_name);
} }
@ -138,11 +131,11 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_save); m_lexer.skipToken(TokenType::keyword_save);
m_lexer.skipTokenOptional(TokenType::keyword_table); m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipTokenOptional(TokenType::keyword_into); m_lexer.skipTokenOptional(TokenType::keyword_into);
std::string file_name = m_lexer.consumeCurrentToken().token_string; std::string file_name = m_lexer.consumeToken(TokenType::string_literal).token_string;
return std::make_unique<SaveTableNode>(table_name, file_name); return std::make_unique<SaveTableNode>(table_name, file_name);
} }
@ -151,7 +144,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_drop); m_lexer.skipToken(TokenType::keyword_drop);
m_lexer.skipTokenOptional(TokenType::keyword_table); m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
return std::make_unique<DropTableNode>(table_name); return std::make_unique<DropTableNode>(table_name);
} }
@ -159,13 +152,9 @@ namespace usql {
std::unique_ptr<Node> Parser::parse_set() { std::unique_ptr<Node> Parser::parse_set() {
m_lexer.skipToken(TokenType::keyword_set); m_lexer.skipToken(TokenType::keyword_set);
if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set name"); std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
std::string name = m_lexer.consumeCurrentToken().token_string;
m_lexer.skipTokenOptional(TokenType::equal); m_lexer.skipTokenOptional(TokenType::equal);
std::string value = m_lexer.consumeToken(TokenType::string_literal).token_string;
if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal in set value");
std::string value = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<SetNode>(name, value); return std::make_unique<SetNode>(name, value);
} }
@ -173,8 +162,7 @@ namespace usql {
std::unique_ptr<Node> Parser::parse_show() { std::unique_ptr<Node> Parser::parse_show() {
m_lexer.skipToken(TokenType::keyword_show); m_lexer.skipToken(TokenType::keyword_show);
if (m_lexer.currentToken().type!=TokenType::string_literal) throw Exception("Expecting literal on show parameter name"); std::string name = m_lexer.consumeToken(TokenType::string_literal).token_string;
std::string name = m_lexer.consumeCurrentToken().token_string;
return std::make_unique<ShowNode>(name); return std::make_unique<ShowNode>(name);
} }
@ -187,18 +175,12 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_into); m_lexer.skipToken(TokenType::keyword_into);
// table name // table name
if (m_lexer.tokenType() != TokenType::identifier) std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
std::string table_name = m_lexer.consumeCurrentToken().token_string;
// column names // column names
m_lexer.skipToken(TokenType::open_paren); m_lexer.skipToken(TokenType::open_paren);
do { do {
if (m_lexer.tokenType() != TokenType::identifier) database_values.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
throw Exception("syntax error, expecting identifier but found " + m_lexer.currentToken().token_string);
database_values.emplace_back(m_lexer.consumeCurrentToken().token_string);
m_lexer.skipTokenOptional(TokenType::comma); m_lexer.skipTokenOptional(TokenType::comma);
} while (m_lexer.tokenType() != TokenType::close_paren); } while (m_lexer.tokenType() != TokenType::close_paren);
@ -233,7 +215,7 @@ namespace usql {
int i = 1; int i = 1;
while (m_lexer.tokenType() != TokenType::keyword_from) { while (m_lexer.tokenType() != TokenType::keyword_from) {
if (m_lexer.tokenType()==TokenType::multiply) { if (m_lexer.tokenType()==TokenType::multiply) {
std::string name = m_lexer.consumeCurrentToken().token_string; std::string name = m_lexer.consumeToken().token_string;
auto multiply_char = std::make_unique<DatabaseValueNode>(name); auto multiply_char = std::make_unique<DatabaseValueNode>(name);
cols->push_back(SelectColNode{std::move(multiply_char), "*"}); cols->push_back(SelectColNode{std::move(multiply_char), "*"});
@ -243,7 +225,7 @@ namespace usql {
if (m_lexer.tokenType() == TokenType::keyword_as) { if (m_lexer.tokenType() == TokenType::keyword_as) {
m_lexer.skipToken(TokenType::keyword_as); m_lexer.skipToken(TokenType::keyword_as);
column_alias = m_lexer.consumeCurrentToken().token_string; column_alias = m_lexer.consumeToken(TokenType::identifier).token_string;
} else { } else {
if (column_value->node_type == NodeType::database_value) { if (column_value->node_type == NodeType::database_value) {
column_alias = ((DatabaseValueNode*) column_value.get())->col_name; column_alias = ((DatabaseValueNode*) column_value.get())->col_name;
@ -262,7 +244,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_from); m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::unique_ptr<Node> where_node = parse_where_clause(); std::unique_ptr<Node> where_node = parse_where_clause();
@ -278,7 +260,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_delete); m_lexer.skipToken(TokenType::keyword_delete);
m_lexer.skipToken(TokenType::keyword_from); m_lexer.skipToken(TokenType::keyword_from);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::unique_ptr<Node> where_node = parse_where_clause(); std::unique_ptr<Node> where_node = parse_where_clause();
@ -289,7 +271,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_update); m_lexer.skipToken(TokenType::keyword_update);
m_lexer.skipTokenOptional(TokenType::keyword_table); m_lexer.skipTokenOptional(TokenType::keyword_table);
std::string table_name = m_lexer.consumeCurrentToken().token_string; std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::keyword_set); m_lexer.skipToken(TokenType::keyword_set);
@ -297,7 +279,7 @@ namespace usql {
std::vector<std::unique_ptr<Node>> values; std::vector<std::unique_ptr<Node>> values;
do { do {
cols_names.emplace_back(m_lexer.consumeCurrentToken().token_string); cols_names.emplace_back(m_lexer.consumeToken(TokenType::identifier).token_string);
m_lexer.skipToken(TokenType::equal); m_lexer.skipToken(TokenType::equal);
std::unique_ptr<Node> left = Parser::parse_value(); std::unique_ptr<Node> left = Parser::parse_value();
@ -333,7 +315,7 @@ namespace usql {
bool asc = true; bool asc = true;
auto token_type = m_lexer.tokenType(); auto token_type = m_lexer.tokenType();
std::string tokenString = m_lexer.consumeCurrentToken().token_string; std::string tokenString = m_lexer.consumeToken().token_string;
switch (token_type) { switch (token_type) {
case TokenType::int_number: case TokenType::int_number:
col_index = std::stoi(tokenString); col_index = std::stoi(tokenString);
@ -365,20 +347,12 @@ namespace usql {
if (m_lexer.tokenType() == TokenType::keyword_offset) { if (m_lexer.tokenType() == TokenType::keyword_offset) {
m_lexer.skipToken(TokenType::keyword_offset); m_lexer.skipToken(TokenType::keyword_offset);
offset = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
if (m_lexer.tokenType() != TokenType::int_number)
throw Exception("expecting integer in offset clause");
offset = std::stoi(m_lexer.consumeCurrentToken().token_string);
} }
if (m_lexer.tokenType() == TokenType::keyword_limit) { if (m_lexer.tokenType() == TokenType::keyword_limit) {
m_lexer.skipToken(TokenType::keyword_limit); m_lexer.skipToken(TokenType::keyword_limit);
limit = std::stoi(m_lexer.consumeToken(TokenType::int_number).token_string);
if (m_lexer.tokenType() != TokenType::int_number)
throw Exception("expecting integer in limit clause");
limit = std::stoi(m_lexer.consumeCurrentToken().token_string);
} }
return OffsetLimitNode{offset, limit}; return OffsetLimitNode{offset, limit};
@ -446,7 +420,7 @@ namespace usql {
// function call // function call
if (token_type == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) { if (token_type == TokenType::identifier && m_lexer.nextTokenType() == TokenType::open_paren) {
std::string function_name = m_lexer.consumeCurrentToken().token_string; std::string function_name = m_lexer.consumeToken(TokenType::identifier).token_string;
std::vector<std::unique_ptr<Node>> pars; std::vector<std::unique_ptr<Node>> pars;
m_lexer.skipToken(TokenType::open_paren); m_lexer.skipToken(TokenType::open_paren);
@ -459,7 +433,7 @@ namespace usql {
} }
// numbers and strings // numbers and strings
std::string tokenString = m_lexer.consumeCurrentToken().token_string; std::string tokenString = m_lexer.consumeToken().token_string;
if (token_type == TokenType::int_number) if (token_type == TokenType::int_number)
return std::make_unique<IntValueNode>(std::stoi(tokenString)); return std::make_unique<IntValueNode>(std::stoi(tokenString));
@ -480,7 +454,7 @@ namespace usql {
} }
RelationalOperatorType Parser::parse_relational_operator() { RelationalOperatorType Parser::parse_relational_operator() {
auto op = m_lexer.consumeCurrentToken(); auto op = m_lexer.consumeToken();
switch (op.type) { switch (op.type) {
case TokenType::equal: case TokenType::equal:
return RelationalOperatorType::equal; return RelationalOperatorType::equal;
@ -500,7 +474,7 @@ namespace usql {
} }
LogicalOperatorType Parser::parse_logical_operator() { LogicalOperatorType Parser::parse_logical_operator() {
auto op = m_lexer.consumeCurrentToken(); auto op = m_lexer.consumeToken();
switch (op.type) { switch (op.type) {
case TokenType::logical_and: case TokenType::logical_and:
return LogicalOperatorType::and_operator; return LogicalOperatorType::and_operator;
@ -512,7 +486,7 @@ namespace usql {
} }
ArithmeticalOperatorType Parser::parse_arithmetical_operator() { ArithmeticalOperatorType Parser::parse_arithmetical_operator() {
auto op = m_lexer.consumeCurrentToken(); auto op = m_lexer.consumeToken();
switch (op.type) { switch (op.type) {
case TokenType::plus: case TokenType::plus:
return ArithmeticalOperatorType::plus_operator; return ArithmeticalOperatorType::plus_operator;