diff --git a/csvreader.cpp b/csvreader.cpp index d4f5031..2d6b72a 100644 --- a/csvreader.cpp +++ b/csvreader.cpp @@ -1,4 +1,4 @@ -#include +#include #include "exception.h" #include "csvreader.h" @@ -7,15 +7,15 @@ namespace usql { - CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) { - skip_header = skip_hdr; - field_separator = field_sep; - quote_character = quote_ch; - line_separator = line_sep; - line_separator2 = line_sep2; +CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) { + skip_header = skip_hdr; + field_separator = field_sep; + quote_character = quote_ch; + line_separator = line_sep; + line_separator2 = line_sep2; - header_skiped = !skip_hdr; - } + header_skiped = !skip_hdr; +} int CsvReader::parseCSV(const std::string &filename, std::vector &cols_def, Table &table) { @@ -41,7 +41,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector &co size_t len = 0; - int read_chars; + long read_chars; while ((read_chars = getline(&line_str, &len, fp)) != -1) { if (skip_header && !header_skiped) { header_skiped = true; @@ -59,7 +59,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector &co if (*aChar == quote_character) { inQuote = !inQuote; } else if (*aChar == field_separator) { - if (inQuote == true) { + if (inQuote) { field += *aChar; } else { line.push_back(field); @@ -80,9 +80,6 @@ int CsvReader::parseCSV(const std::string &filename, std::vector &co field.clear(); line.clear(); -// DEBUG -// if (row_cnt > 50000) break; -// } fclose(fp); @@ -93,53 +90,53 @@ int CsvReader::parseCSV(const std::string &filename, std::vector &co return row_cnt; } - int CsvReader::parseCSV2(const std::string &csvSource, std::vector &cols_def, Table& table) { - int row_cnt = 0; - bool inQuote(false); - bool newLine(false); - std::string field; +int CsvReader::parseCSV2(const std::string &csvSource, std::vector &cols_def, Table& table) { + int row_cnt = 0; + bool inQuote(false); + bool newLine(false); + std::string field; - std::vector line; - line.reserve(32); + std::vector line; + line.reserve(32); - std::string::const_iterator aChar = csvSource.begin(); - while (aChar != csvSource.end()) { - if (*aChar == quote_character) { - newLine = false; - inQuote = !inQuote; - } else if (*aChar == field_separator) { - newLine = false; - if (inQuote == true) { - field += *aChar; - } else { - line.push_back(field); - field.clear(); - } - } else if (*aChar == line_separator || *aChar == line_separator2) { - if (inQuote == true) { - field += *aChar; - } else { - if (newLine == false) { - line.push_back(field); - if (header_skiped) { - table.create_row_from_vector(cols_def, line); - row_cnt++; - } - header_skiped = true; - field.clear(); - line.clear(); - newLine = true; - } - } - } else { - newLine = false; - field.push_back(*aChar); - } + std::string::const_iterator aChar = csvSource.begin(); + while (aChar != csvSource.end()) { + if (*aChar == quote_character) { + newLine = false; + inQuote = !inQuote; + } else if (*aChar == field_separator) { + newLine = false; + if (inQuote) { + field += *aChar; + } else { + line.push_back(field); + field.clear(); + } + } else if (*aChar == line_separator || *aChar == line_separator2) { + if (inQuote) { + field += *aChar; + } else { + if (!newLine) { + line.push_back(field); + if (header_skiped) { + table.create_row_from_vector(cols_def, line); + row_cnt++; + } + header_skiped = true; + field.clear(); + line.clear(); + newLine = true; + } + } + } else { + newLine = false; + field.push_back(*aChar); + } - aChar++; - } + aChar++; + } - if (!field.empty()) line.push_back(field); + if (!field.empty()) line.push_back(field); if (header_skiped) { table.create_row_from_vector(cols_def, line); diff --git a/csvreader.h b/csvreader.h index 7940606..1778437 100644 --- a/csvreader.h +++ b/csvreader.h @@ -24,7 +24,7 @@ namespace usql { bool header_skiped; public: - CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n'); + explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n'); int parseCSV2(const std::string &csvSource, std::vector &cols_def, Table& table); diff --git a/debug.h b/debug.h index 9b5ca77..387d3b3 100644 --- a/debug.h +++ b/debug.h @@ -25,6 +25,8 @@ std::vector k_debug_sql_commands { "insert into a (i, s) values(2, 'two')", "insert into a (i, s) values(2, 'second two')", "insert into a (i, s) values(3, 'three')", + "insert into a (i, s) values(4, 'four')", + "save a into '/tmp/a.csv'", "set 'USE_INDEXSCAN' = 'true'", // "select * from a where 1 = i", // "delete from a where i = 2 and s ='two'", diff --git a/lexer.cpp b/lexer.cpp index 0a1723e..9bd09a6 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -5,292 +5,292 @@ namespace usql { - Token::Token(const std::string &token_str, TokenType typ) { - token_string = token_str; - type = typ; - } +Token::Token(const std::string &token_str, TokenType typ) { + token_string = token_str; + type = typ; +} - Lexer::Lexer() { - k_words_regex = - "[-+]?[0-9]+\\.[0-9]+|[-+]?[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/" - ",;:\?]|!=|<>|==|>=|<=|~=|>|<|=|;|~|\\||\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n"; - k_int_regex = "[-+]?[0-9]+"; +Lexer::Lexer() { + k_words_regex = + "[-+]?[0-9]+\\.[0-9]+|[-+]?[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/" + ",;:\?]|!=|<>|==|>=|<=|~=|>|<|=|;|~|\\||\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n"; + k_int_regex = "[-+]?[0-9]+"; k_int_underscored_regex = "[-+]?[0-9][0-9_]+[0-9]"; k_double_regex = "[-+]?[0-9]+\\.[0-9]+"; k_identifier_regex = "[A-Za-z]+[A-Za-z0-9_#]*"; - } +} - void Lexer::parse(const std::string &code) { - if (code.empty()) - throw Exception("Lexer.parse empty code"); +void Lexer::parse(const std::string &code) { + if (code.empty()) + throw Exception("Lexer.parse empty code"); - m_tokens.clear(); - m_tokens.reserve(64); + m_tokens.clear(); + m_tokens.reserve(64); - m_code_str = code; - if (!m_code_str.empty() && m_code_str.back() != '\n') { - m_code_str.append("\n"); // temp solution to prevent possible situation when last line is a comment - } + m_code_str = code; + if (!m_code_str.empty() && m_code_str.back() != '\n') { + m_code_str.append("\n"); // temp solution to prevent possible situation when last line is a comment + } - auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), k_words_regex); - auto words_end = std::sregex_iterator(); + auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), k_words_regex); + auto words_end = std::sregex_iterator(); - for (std::sregex_iterator i = words_begin; i != words_end; ++i) { - std::smatch match = *i; - std::string match_str = match.str(); - TokenType token_type = type(match_str); - if (token_type == TokenType::undef) - throw Exception("Lexer.parse unknown token type: " + match_str); - if (token_type == TokenType::string_literal) - match_str = stringLiteral(match_str); + for (std::sregex_iterator i = words_begin; i != words_end; ++i) { + std::smatch match = *i; + std::string match_str = match.str(); + TokenType token_type = type(match_str); + if (token_type == TokenType::undef) + throw Exception("Lexer.parse unknown token type: " + match_str); + if (token_type == TokenType::string_literal) + match_str = stringLiteral(match_str); - if (token_type != TokenType::newline) - m_tokens.emplace_back(match_str, token_type); - } + if (token_type != TokenType::newline) + m_tokens.emplace_back(match_str, token_type); + } - // DEBUG IT - // debugTokens(); + // DEBUG IT + // debugTokens(); - m_index = 0; - } + m_index = 0; +} - void Lexer::debugTokens() { - int i = 0; - for (auto & m_token : m_tokens) { - std::cerr << i << "\t" << m_token.token_string << std::endl; - i++; - } - } +void Lexer::debugTokens() { + int i = 0; + for (auto & m_token : m_tokens) { + std::cerr << i << "\t" << m_token.token_string << std::endl; + i++; + } +} - Token Lexer::currentToken() { return m_tokens[m_index]; } +Token Lexer::currentToken() { return m_tokens[m_index]; } - Token Lexer::consumeToken() { - int i = m_index; - nextToken(); - return m_tokens[i]; - } +Token Lexer::consumeToken() { + int i = m_index; + nextToken(); + return m_tokens[i]; +} - Token Lexer::consumeToken(TokenType type) { - int i = m_index; - skipToken(type); - return m_tokens[i]; - } +Token Lexer::consumeToken(TokenType type) { + int i = m_index; + skipToken(type); + return m_tokens[i]; +} - void Lexer::nextToken() { - if (m_index < m_tokens.size()) { - m_index++; - } - } +void Lexer::nextToken() { + if (m_index < m_tokens.size()) { + m_index++; + } +} - void Lexer::skipToken(TokenType type) { - if (tokenType() == type) { - nextToken(); - } else { - throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type)); - } - } +void Lexer::skipToken(TokenType type) { + if (tokenType() == type) { + nextToken(); + } else { + throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type)); + } +} - void Lexer::skipTokenOptional(TokenType type) { - if (tokenType() == type) { - nextToken(); - } - } +void Lexer::skipTokenOptional(TokenType type) { + if (tokenType() == type) { + nextToken(); + } +} - TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; } +TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; } - TokenType Lexer::nextTokenType() { - return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof; - } +TokenType Lexer::nextTokenType() { + return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof; +} - bool Lexer::isRelationalOperator(TokenType token_type) { - return (token_type == TokenType::equal || token_type == TokenType::not_equal || - token_type == TokenType::greater || token_type == TokenType::greater_equal || - token_type == TokenType::lesser || token_type == TokenType::lesser_equal || - token_type == TokenType::is); - } +bool Lexer::isRelationalOperator(TokenType token_type) { + return (token_type == TokenType::equal || token_type == TokenType::not_equal || + token_type == TokenType::greater || token_type == TokenType::greater_equal || + token_type == TokenType::lesser || token_type == TokenType::lesser_equal || + token_type == TokenType::is); +} - bool Lexer::isLogicalOperator(TokenType token_type) { - return (token_type == TokenType::logical_and || token_type == TokenType::logical_or); - } +bool Lexer::isLogicalOperator(TokenType token_type) { + return (token_type == TokenType::logical_and || token_type == TokenType::logical_or); +} - bool Lexer::isArithmeticalOperator(TokenType token_type) { - return (token_type == TokenType::plus || token_type == TokenType::minus || - token_type == TokenType::multiply || - token_type == TokenType::divide); - } +bool Lexer::isArithmeticalOperator(TokenType token_type) { + return (token_type == TokenType::plus || token_type == TokenType::minus || + token_type == TokenType::multiply || + token_type == TokenType::divide); +} - TokenType Lexer::type(const std::string &token) { - if (token == ";") return TokenType::semicolon; - if (token == "+") return TokenType::plus; - if (token == "-") return TokenType::minus; - if (token == "*") return TokenType::multiply; - if (token == "/") return TokenType::divide; - if (token == "(") return TokenType::open_paren; - if (token == ")") return TokenType::close_paren; - if (token == "=") return TokenType::equal; - if (token == "!=" || token == "<>") return TokenType::not_equal; - if (token == ">") return TokenType::greater; - if (token == ">=") return TokenType::greater_equal; - if (token == "<") return TokenType::lesser; - if (token == "<=") return TokenType::lesser_equal; - if (token == "is") return TokenType::is; - if (token == "as") return TokenType::keyword_as; - if (token == "create") return TokenType::keyword_create; - if (token == "drop") return TokenType::keyword_drop; - if (token == "where") return TokenType::keyword_where; - if (token == "order") return TokenType::keyword_order; - if (token == "by") return TokenType::keyword_by; - if (token == "offset") return TokenType::keyword_offset; - if (token == "limit") return TokenType::keyword_limit; - if (token == "asc") return TokenType::keyword_asc; - if (token == "desc") return TokenType::keyword_desc; - if (token == "from") return TokenType::keyword_from; - if (token == "delete") return TokenType::keyword_delete; - if (token == "table") return TokenType::keyword_table; - if (token == "index") return TokenType::keyword_index; - if (token == "on") return TokenType::keyword_on; - if (token == "insert") return TokenType::keyword_insert; - if (token == "into") return TokenType::keyword_into; - if (token == "values") return TokenType::keyword_values; - if (token == "select") return TokenType::keyword_select; - if (token == "set") return TokenType::keyword_set; - if (token == "copy") return TokenType::keyword_copy; - if (token == "update") return TokenType::keyword_update; - if (token == "load") return TokenType::keyword_load; - if (token == "save") return TokenType::keyword_save; - if (token == "not") return TokenType::keyword_not; - if (token == "null") return TokenType::keyword_null; - if (token == "integer") return TokenType::keyword_integer; - if (token == "float") return TokenType::keyword_float; - if (token == "varchar") return TokenType::keyword_varchar; - if (token == "date") return TokenType::keyword_date; - if (token == "boolean") return TokenType::keyword_bool; - if (token == "true") return TokenType::keyword_true; - if (token == "false") return TokenType::keyword_false; - if (token == "distinct") return TokenType::keyword_distinct; - if (token == "show") return TokenType::keyword_show; - if (token == "or") return TokenType::logical_or; - if (token == "and") return TokenType::logical_and; - if (token == ",") return TokenType::comma; - if (token == "\n" || token == "\r\n" || token == "\r") return TokenType::newline; +TokenType Lexer::type(const std::string &token) { + if (token == ";") return TokenType::semicolon; + if (token == "+") return TokenType::plus; + if (token == "-") return TokenType::minus; + if (token == "*") return TokenType::multiply; + if (token == "/") return TokenType::divide; + if (token == "(") return TokenType::open_paren; + if (token == ")") return TokenType::close_paren; + if (token == "=") return TokenType::equal; + if (token == "!=" || token == "<>") return TokenType::not_equal; + if (token == ">") return TokenType::greater; + if (token == ">=") return TokenType::greater_equal; + if (token == "<") return TokenType::lesser; + if (token == "<=") return TokenType::lesser_equal; + if (token == "is") return TokenType::is; + if (token == "as") return TokenType::keyword_as; + if (token == "create") return TokenType::keyword_create; + if (token == "drop") return TokenType::keyword_drop; + if (token == "where") return TokenType::keyword_where; + if (token == "order") return TokenType::keyword_order; + if (token == "by") return TokenType::keyword_by; + if (token == "offset") return TokenType::keyword_offset; + if (token == "limit") return TokenType::keyword_limit; + if (token == "asc") return TokenType::keyword_asc; + if (token == "desc") return TokenType::keyword_desc; + if (token == "from") return TokenType::keyword_from; + if (token == "delete") return TokenType::keyword_delete; + if (token == "table") return TokenType::keyword_table; + if (token == "index") return TokenType::keyword_index; + if (token == "on") return TokenType::keyword_on; + if (token == "insert") return TokenType::keyword_insert; + if (token == "into") return TokenType::keyword_into; + if (token == "values") return TokenType::keyword_values; + if (token == "select") return TokenType::keyword_select; + if (token == "set") return TokenType::keyword_set; + if (token == "copy") return TokenType::keyword_copy; + if (token == "update") return TokenType::keyword_update; + if (token == "load") return TokenType::keyword_load; + if (token == "save") return TokenType::keyword_save; + if (token == "not") return TokenType::keyword_not; + if (token == "null") return TokenType::keyword_null; + if (token == "integer") return TokenType::keyword_integer; + if (token == "float") return TokenType::keyword_float; + if (token == "varchar") return TokenType::keyword_varchar; + if (token == "date") return TokenType::keyword_date; + if (token == "boolean") return TokenType::keyword_bool; + if (token == "true") return TokenType::keyword_true; + if (token == "false") return TokenType::keyword_false; + if (token == "distinct") return TokenType::keyword_distinct; + if (token == "show") return TokenType::keyword_show; + if (token == "or") return TokenType::logical_or; + if (token == "and") return TokenType::logical_and; + if (token == ",") return TokenType::comma; + if (token == "\n" || token == "\r\n" || token == "\r") return TokenType::newline; - if (token.length() > 1 && token.at(0) == '%' && (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) - return TokenType::comment; + if (token.length() > 1 && token.at(0) == '%' && (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) + return TokenType::comment; - if (token.length() >= 2 && token.at(0) == '"') - return (token.at(token.length() - 1) == '"') ? TokenType::string_literal : TokenType::undef; + if (token.length() >= 2 && token.at(0) == '"') + return (token.at(token.length() - 1) == '"') ? TokenType::string_literal : TokenType::undef; - if (token.length() >= 2 && token.at(0) == '\'') - return (token.at(token.length() - 1) == '\'') ? TokenType::string_literal : TokenType::undef; + if (token.length() >= 2 && token.at(0) == '\'') + return (token.at(token.length() - 1) == '\'') ? TokenType::string_literal : TokenType::undef; - if (std::regex_match(token, k_int_regex)) return TokenType::int_number; - if (std::regex_match(token, k_int_underscored_regex)) return TokenType::int_number; - if (std::regex_match(token, k_double_regex)) return TokenType::double_number; - if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier; + if (std::regex_match(token, k_int_regex)) return TokenType::int_number; + if (std::regex_match(token, k_int_underscored_regex)) return TokenType::int_number; + if (std::regex_match(token, k_double_regex)) return TokenType::double_number; + if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier; - return TokenType::undef; - } + return TokenType::undef; +} - std::string Lexer::stringLiteral(std::string token) { - // remove ' or " from the literal ends - bool replace = token[0] == '\'' && token[token.size() - 1] == '\''; +std::string Lexer::stringLiteral(std::string token) { + // remove ' or " from the literal ends + bool replace = token[0] == '\'' && token[token.size() - 1] == '\''; - std::string str = token.substr(1, token.size() - 2); - if (!replace) { - return str; - } - std::string out; - out.reserve(str.size()); + std::string str = token.substr(1, token.size() - 2); + if (!replace) { + return str; + } + std::string out; + out.reserve(str.size()); - for (std::string::size_type i = 0; i < str.size(); ++i) { - if (str[i] == '\'' && i < str.size() - 1) { - if (str[i + 1] == '\'') { - out.append(1, '\''); - i++; - } else { - out.append(1, str[i]); - } - } else if (str[i] == '\\' && i < str.size() - 1) { - if (str[i + 1] == 'n') { - out.append(1, '\n'); - i++; - } else if (str[i + 1] == 't') { - out.append(1, '\t'); - i++; - } else { - out.append(1, str[i]); - } - } else { - out.append(1, str[i]); - } - } - return out; - } + for (std::string::size_type i = 0; i < str.size(); ++i) { + if (str[i] == '\'' && i < str.size() - 1) { + if (str[i + 1] == '\'') { + out.append(1, '\''); + i++; + } else { + out.append(1, str[i]); + } + } else if (str[i] == '\\' && i < str.size() - 1) { + if (str[i + 1] == 'n') { + out.append(1, '\n'); + i++; + } else if (str[i + 1] == 't') { + out.append(1, '\t'); + i++; + } else { + out.append(1, str[i]); + } + } else { + out.append(1, str[i]); + } + } + return out; +} - std::string Lexer::typeToString(TokenType token_type) { - switch (token_type) { - case TokenType::undef: return "undef"; - case TokenType::identifier: return "identifier"; - case TokenType::plus: return "+"; - case TokenType::minus: return "-"; - case TokenType::multiply: return "*"; - case TokenType::divide: return "/"; - case TokenType::equal: return "=="; - case TokenType::not_equal: return "!="; - case TokenType::greater: return ">"; - case TokenType::greater_equal: return ">="; - case TokenType::lesser: return "<"; - case TokenType::lesser_equal: return "<="; - case TokenType::is: return "is"; - case TokenType::keyword_as: return "as"; - case TokenType::keyword_create: return "create"; - case TokenType::keyword_drop: return "drop"; - case TokenType::keyword_where: return "where"; - case TokenType::keyword_order: return "order"; - case TokenType::keyword_by: return "by"; - case TokenType::keyword_offset: return "offset"; - case TokenType::keyword_limit: return "limit"; - case TokenType::keyword_asc: return "asc"; - case TokenType::keyword_desc: return "desc"; - case TokenType::keyword_table: return "table"; - case TokenType::keyword_index: return "index"; - case TokenType::keyword_on: return "on"; - case TokenType::keyword_into: return "into"; - case TokenType::keyword_values: return "values"; - case TokenType::keyword_select: return "select"; - case TokenType::keyword_set: return "set"; - case TokenType::keyword_copy: return "copy"; - case TokenType::keyword_update: return "update"; - case TokenType::keyword_load: return "load"; - case TokenType::keyword_save: return "save"; - case TokenType::keyword_not: return "not"; - case TokenType::keyword_null: return "null"; - case TokenType::keyword_integer: return "integer"; - case TokenType::keyword_float: return "float"; - case TokenType::keyword_varchar: return "varchar"; - case TokenType::keyword_date: return "date"; - case TokenType::keyword_bool: return "boolean"; - case TokenType::keyword_true: return "true"; - case TokenType::keyword_false: return "false"; - case TokenType::keyword_distinct: return "distinct"; - case TokenType::keyword_show: return "show"; - case TokenType::int_number: return "int number"; - case TokenType::double_number: return "double number"; - case TokenType::string_literal: return "string literal"; - case TokenType::open_paren: return "("; - case TokenType::close_paren: return ")"; - case TokenType::logical_and: return "and"; - case TokenType::logical_or: return "or"; - case TokenType::semicolon: return ";"; - case TokenType::comma: return ","; - case TokenType::newline: return "newline"; - case TokenType::comment: return "comment"; - case TokenType::eof: return "eof"; - default: - return "FIXME, unknown token type"; - } - } +std::string Lexer::typeToString(TokenType token_type) { + switch (token_type) { + case TokenType::undef: return "undef"; + case TokenType::identifier: return "identifier"; + case TokenType::plus: return "+"; + case TokenType::minus: return "-"; + case TokenType::multiply: return "*"; + case TokenType::divide: return "/"; + case TokenType::equal: return "=="; + case TokenType::not_equal: return "!="; + case TokenType::greater: return ">"; + case TokenType::greater_equal: return ">="; + case TokenType::lesser: return "<"; + case TokenType::lesser_equal: return "<="; + case TokenType::is: return "is"; + case TokenType::keyword_as: return "as"; + case TokenType::keyword_create: return "create"; + case TokenType::keyword_drop: return "drop"; + case TokenType::keyword_where: return "where"; + case TokenType::keyword_order: return "order"; + case TokenType::keyword_by: return "by"; + case TokenType::keyword_offset: return "offset"; + case TokenType::keyword_limit: return "limit"; + case TokenType::keyword_asc: return "asc"; + case TokenType::keyword_desc: return "desc"; + case TokenType::keyword_table: return "table"; + case TokenType::keyword_index: return "index"; + case TokenType::keyword_on: return "on"; + case TokenType::keyword_into: return "into"; + case TokenType::keyword_values: return "values"; + case TokenType::keyword_select: return "select"; + case TokenType::keyword_set: return "set"; + case TokenType::keyword_copy: return "copy"; + case TokenType::keyword_update: return "update"; + case TokenType::keyword_load: return "load"; + case TokenType::keyword_save: return "save"; + case TokenType::keyword_not: return "not"; + case TokenType::keyword_null: return "null"; + case TokenType::keyword_integer: return "integer"; + case TokenType::keyword_float: return "float"; + case TokenType::keyword_varchar: return "varchar"; + case TokenType::keyword_date: return "date"; + case TokenType::keyword_bool: return "boolean"; + case TokenType::keyword_true: return "true"; + case TokenType::keyword_false: return "false"; + case TokenType::keyword_distinct: return "distinct"; + case TokenType::keyword_show: return "show"; + case TokenType::int_number: return "int number"; + case TokenType::double_number: return "double number"; + case TokenType::string_literal: return "string literal"; + case TokenType::open_paren: return "("; + case TokenType::close_paren: return ")"; + case TokenType::logical_and: return "and"; + case TokenType::logical_or: return "or"; + case TokenType::semicolon: return ";"; + case TokenType::comma: return ","; + case TokenType::newline: return "newline"; + case TokenType::comment: return "comment"; + case TokenType::eof: return "eof"; + default: + return "FIXME, unknown token type"; + } +} -} \ No newline at end of file +} // namespace usql \ No newline at end of file diff --git a/parser.h b/parser.h index 8a78fb9..a18505c 100644 --- a/parser.h +++ b/parser.h @@ -13,15 +13,15 @@ static const int FUNCTION_CALL = -1; namespace usql { - enum class ColumnType { +enum class ColumnType { integer_type, float_type, varchar_type, date_type, bool_type - }; +}; - enum class NodeType { +enum class NodeType { true_node, null_value, int_value, @@ -49,463 +49,463 @@ namespace usql { function, column_def, error - }; +}; - struct Node { - NodeType node_type; +struct Node { + NodeType node_type; - explicit Node(const NodeType type) : node_type(type) {} - virtual ~Node() = default; + explicit Node(const NodeType type) : node_type(type) {} + virtual ~Node() = default; - virtual void dump() const { - std::cout << "type: Node" << std::endl; - } - }; + virtual void dump() const { + std::cout << "type: Node" << std::endl; + } +}; - struct ColOrderNode : Node { - std::string col_name; - int col_index; - bool ascending; +struct ColOrderNode : Node { + std::string col_name; + int col_index; + bool ascending; - ColOrderNode(std::string name, bool asc) : Node(NodeType::column_order), col_name(std::move(name)), col_index(-1), ascending(asc) {} - ColOrderNode(int index, bool asc) : Node(NodeType::database_value), col_index(index), ascending(asc) {} + ColOrderNode(std::string name, bool asc) : Node(NodeType::column_order), col_name(std::move(name)), col_index(-1), ascending(asc) {} + ColOrderNode(int index, bool asc) : Node(NodeType::database_value), col_index(index), ascending(asc) {} - void dump() const override { - std::cout << "type: ColOrderNode, col_name: " << col_name << ", col_index: " << col_index << ", asc: " << ascending << std::endl; - } - }; + void dump() const override { + std::cout << "type: ColOrderNode, col_name: " << col_name << ", col_index: " << col_index << ", asc: " << ascending << std::endl; + } +}; - - struct OffsetLimitNode : Node { - int offset; - int limit; - OffsetLimitNode(int off, int lim) : Node(NodeType::offset_limit), offset(off), limit(lim) {} +struct OffsetLimitNode : Node { + int offset; + int limit; - void dump() const override { - std::cout << "type: OffsetLimitNode, offset: " << offset << ", limit: " << limit << std::endl; - } - }; + OffsetLimitNode(int off, int lim) : Node(NodeType::offset_limit), offset(off), limit(lim) {} + void dump() const override { + std::cout << "type: OffsetLimitNode, offset: " << offset << ", limit: " << limit << std::endl; + } +}; - struct SelectColNode : Node { - std::unique_ptr value; - std::string name; - SelectColNode(std::unique_ptr column, std::string alias) : - Node(NodeType::database_value), value(std::move(column)), name(std::move(alias)) {} +struct SelectColNode : Node { + std::unique_ptr value; + std::string name; - void dump() const override { - std::cout << "type: SelectColNode, name:" << name << "value:" << std::endl; - value->dump(); - } - }; + SelectColNode(std::unique_ptr column, std::string alias) : + Node(NodeType::database_value), value(std::move(column)), name(std::move(alias)) {} - struct ColDefNode : Node { - std::string name; - ColumnType type; - int order; - int length; - bool null; + void dump() const override { + std::cout << "type: SelectColNode, name:" << name << "value:" << std::endl; + value->dump(); + } +}; - ColDefNode(std::string col_name, ColumnType col_type, int col_order, int col_len, bool nullable) : - Node(NodeType::column_def), name(std::move(col_name)), type(col_type), order(col_order), length(col_len), - null(nullable) {} +struct ColDefNode : Node { + std::string name; + ColumnType type; + int order; + int length; + bool null; - void dump() const override { - std::cout << "type: ColDefNode, name: " << name << ", type: " << (int)type << " TODO add more" << std::endl; - } - }; + ColDefNode(std::string col_name, ColumnType col_type, int col_order, int col_len, bool nullable) : + Node(NodeType::column_def), name(std::move(col_name)), type(col_type), order(col_order), length(col_len), + null(nullable) {} - struct FunctionNode : Node { - std::string function; // TODO use enum - std::vector> params; + void dump() const override { + std::cout << "type: ColDefNode, name: " << name << ", type: " << (int)type << " TODO add more" << std::endl; + } +}; - FunctionNode(std::string func_name, std::vector> pars) : - Node(NodeType::function), function(std::move(func_name)), params(std::move(pars)) {} +struct FunctionNode : Node { + std::string function; // TODO use enum + std::vector> params; - void dump() const override { - std::cout << "type: FunctionNode, function: " << function << " TODO add more" << std::endl; - } - }; + FunctionNode(std::string func_name, std::vector> pars) : + Node(NodeType::function), function(std::move(func_name)), params(std::move(pars)) {} - struct TrueNode : Node { - TrueNode() : Node(NodeType::true_node) {} + void dump() const override { + std::cout << "type: FunctionNode, function: " << function << " TODO add more" << std::endl; + } +}; - void dump() const override { - std::cout << "type: TrueNode," << std::endl; - } - }; +struct TrueNode : Node { + TrueNode() : Node(NodeType::true_node) {} - struct ValueNode : Node { - explicit ValueNode(NodeType type) : Node(type) {} + void dump() const override { + std::cout << "type: TrueNode," << std::endl; + } +}; - virtual bool isNull() const { return false; } - virtual long getIntegerValue() const = 0; - virtual double getDoubleValue() const = 0; - virtual std::string getStringValue() const = 0; - virtual long getDateValue() const = 0; - virtual bool getBooleanValue() const = 0; +struct ValueNode : Node { + explicit ValueNode(NodeType type) : Node(type) {} - ~ValueNode() override = default; - }; + virtual bool isNull() const { return false; } + virtual long getIntegerValue() const = 0; + virtual double getDoubleValue() const = 0; + virtual std::string getStringValue() const = 0; + virtual long getDateValue() const = 0; + virtual bool getBooleanValue() const = 0; - struct NullValueNode : ValueNode { + ~ValueNode() override = default; +}; - NullValueNode() : ValueNode(NodeType::null_value) {} +struct NullValueNode : ValueNode { - bool isNull() const override { return true; } + NullValueNode() : ValueNode(NodeType::null_value) {} - long getIntegerValue() const override { throw Exception("getIntegerValue not supported on NullValueNode"); }; - double getDoubleValue() const override { throw Exception("getDoubleValue not supported on NullValueNode"); }; - std::string getStringValue() const override { throw Exception("getStringValue not supported on NullValueNode"); }; - long getDateValue() const override { throw Exception("getDateValue not supported on NullValueNode"); }; - bool getBooleanValue() const override { throw Exception("getBooleanValue not supported on NullValueNode"); }; + bool isNull() const override { return true; } - void dump() const override { - std::cout << "type: NullValueNode," << std::endl; - } - }; + long getIntegerValue() const override { throw Exception("getIntegerValue not supported on NullValueNode"); }; + double getDoubleValue() const override { throw Exception("getDoubleValue not supported on NullValueNode"); }; + std::string getStringValue() const override { throw Exception("getStringValue not supported on NullValueNode"); }; + long getDateValue() const override { throw Exception("getDateValue not supported on NullValueNode"); }; + bool getBooleanValue() const override { throw Exception("getBooleanValue not supported on NullValueNode"); }; - struct IntValueNode : ValueNode { - long value; - - explicit IntValueNode(long value) : ValueNode(NodeType::int_value), value(value) {} - - long getIntegerValue() const override { return value; }; - double getDoubleValue() const override { return (double) value; }; - std::string getStringValue() const override { return Settings::long_to_string(value); } - long getDateValue() const override { return value; }; - bool getBooleanValue() const override { return value != 0; }; - - void dump() const override { - std::cout << "type: IntValueNode, value: " << value << std::endl; - } - }; - - struct DoubleValueNode : ValueNode { - double value; - - explicit DoubleValueNode(double value) : ValueNode(NodeType::float_value), value(value) {} - - long getIntegerValue() const override { return (long) value; }; - double getDoubleValue() const override { return value; }; - std::string getStringValue() const override { return Settings::double_to_string(value); } - long getDateValue() const override { return (long) value; }; - bool getBooleanValue() const override { return value != 0.0; }; - - void dump() const override { - std::cout << "type: DoubleValueNode, value: " << value << std::endl; - } - }; - - struct StringValueNode : ValueNode { - std::string value; - - explicit StringValueNode(std::string value) : ValueNode(NodeType::string_value), value(std::move(value)) {} - - long getIntegerValue() const override { return Settings::string_to_long(value); }; - double getDoubleValue() const override { return Settings::string_to_double(value); }; - std::string getStringValue() const override { return value; }; - long getDateValue() const override { return Settings::string_to_date(value); }; - bool getBooleanValue() const override { return Settings::string_to_bool(value); }; - - void dump() const override { - std::cout << "type: StringValueNode, value: " << value << std::endl; - } - }; - - struct BooleanValueNode : ValueNode { - bool value; - - explicit BooleanValueNode(bool value) : ValueNode(NodeType::bool_value), value(value) {} - - long getIntegerValue() const override { return (long) value; }; - double getDoubleValue() const override { return (double) value; }; - std::string getStringValue() const override { return Settings::bool_to_string(value); } - long getDateValue() const override { return (long) value; }; - bool getBooleanValue() const override { return value; }; - - void dump() const override { - std::cout << "type: BooleanValueNode, value: " << value << std::endl; - } - }; - - - struct DatabaseValueNode : Node { - std::string col_name; - - explicit DatabaseValueNode(std::string name) : Node(NodeType::database_value), col_name(std::move(name)) {} - - void dump() const override { - std::cout << "type: DatabaseValueNode, col_name: " << col_name << std::endl; - } - }; - - enum class LogicalOperatorType { - and_operator, - or_operator - // not_operator - }; - - struct LogicalOperatorNode : Node { - LogicalOperatorType op; - std::unique_ptr left; - std::unique_ptr right; - - LogicalOperatorNode(LogicalOperatorType op, std::unique_ptr left, std::unique_ptr right) : - Node(NodeType::logical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; - - void dump() const override { - std::cout << "type: LogicalOperatorNode, op: " << (int)op << std::endl; - left->dump(); - right->dump(); - } - }; - - enum class RelationalOperatorType { - equal, - greater, - greater_equal, - lesser, - lesser_equal, - not_equal, - is, - is_not - // like - }; - - struct RelationalOperatorNode : Node { - RelationalOperatorType op; - - std::unique_ptr left; - std::unique_ptr right; - - RelationalOperatorNode(RelationalOperatorType op, std::unique_ptr left, std::unique_ptr right) : - Node(NodeType::relational_operator), op(op), left(std::move(left)), right(std::move(right)) {}; - - void dump() const override { - std::cout << "type: RelationalOperatorNode, op: " << (int)op << std::endl; - left->dump(); - right->dump(); - } - }; - - enum class ArithmeticalOperatorType { - copy_value, // just copy lef value and do nothing with it - plus_operator, - minus_operator, - multiply_operator, - divide_operator - }; - - struct ArithmeticalOperatorNode : Node { - ArithmeticalOperatorType op; - - std::unique_ptr left; - std::unique_ptr right; - - ArithmeticalOperatorNode(ArithmeticalOperatorType op, std::unique_ptr left, std::unique_ptr right) : - Node(NodeType::arithmetical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; - - void dump() const override { - std::cout << "type: ArithmeticalOperatorNode, op: " << (int)op << std::endl; - left->dump(); - right->dump(); - } - }; - - struct CreateTableNode : Node { - std::string table_name; - std::vector cols_defs; - - CreateTableNode(std::string name, std::vector defs) : - Node(NodeType::create_table), table_name(std::move(name)), cols_defs(std::move(defs)) {} - - void dump() const override { - std::cout << "type: CreateTableNode, table_name: " << table_name << "TODO complete me" << std::endl; - } - }; - - struct InsertIntoTableNode : Node { - std::string table_name; - std::vector cols_names; - std::vector> cols_values; - - InsertIntoTableNode(std::string name, std::vector names, std::vector> values) : - Node(NodeType::insert_into), table_name(std::move(name)), cols_names(std::move(names)), cols_values(std::move(values)) {} - - void dump() const override { - std::cout << "type: InsertIntoTableNode, table_name: " << table_name << "TODO complete me" << std::endl; - } - }; - - struct SelectFromTableNode : Node { - std::string table_name; - std::unique_ptr> cols_names; - std::unique_ptr where; - std::vector order_by; - OffsetLimitNode offset_limit; - bool distinct; - - SelectFromTableNode(std::string name, std::unique_ptr> names, std::unique_ptr where_clause, std::vector orderby, OffsetLimitNode offlim, bool distinct_): - Node(NodeType::select_from), table_name(std::move(name)), cols_names(std::move(names)), where(std::move(where_clause)), order_by(std::move(orderby)), offset_limit(std::move(offlim)), distinct(distinct_) {} - - void dump() const override { - std::cout << "type: SelectFromTableNode, table_name: " << table_name << "TODO complete me" << std::endl; - where->dump(); - } - }; - - struct CreateTableAsSelectNode : Node { - std::string table_name; - std::unique_ptr select_table; - - CreateTableAsSelectNode(std::string name, std::unique_ptr table) : - Node(NodeType::create_table_as_select), table_name(std::move(name)), select_table(std::move(table)) {} - - void dump() const override { - std::cout << "type: CreateTableAsSelectNode, table_name: " << table_name << std::endl; - select_table->dump(); - } - }; - - struct UpdateTableNode : Node { - std::string table_name; - std::vector cols_names; - std::vector> values; - std::unique_ptr where; - - UpdateTableNode(std::string name, std::vector names, std::vector> vals, - std::unique_ptr where_clause) : - Node(NodeType::update_table), table_name(std::move(name)), cols_names(std::move(names)), values(std::move(vals)), - where(std::move(where_clause)) {} - - void dump() const override { - std::cout << "type: UpdateTableNode, table_name: " << table_name << "TODO complete me" << std::endl; - where->dump(); - } - }; - - struct LoadIntoTableNode : Node { - std::string table_name; - std::string filename; - - LoadIntoTableNode(std::string name, std::string file) : - Node(NodeType::load_table), table_name(std::move(name)), filename(std::move(file)) {} - - void dump() const override { - std::cout << "type: LoadIntoTableNode, table_name: " << table_name << ", filename" << filename << std::endl; - } - }; - - struct SaveTableNode : Node { - std::string table_name; - std::string filename; - - SaveTableNode(std::string name, std::string file) : - Node(NodeType::save_table), table_name(std::move(name)), filename(std::move(file)) {} - - void dump() const override { - std::cout << "type: SaveTableNode, table_name: " << table_name << ", filename" << filename << std::endl; - } - }; - - struct DropTableNode : Node { - std::string table_name; - - explicit DropTableNode(std::string name) : Node(NodeType::drop_table), table_name(std::move(name)) {} - - void dump() const override { - std::cout << "type: SelectFromTableNode, table_name: " << table_name << std::endl; - } - }; - - struct DeleteFromTableNode : Node { - std::string table_name; - std::unique_ptr where; - - DeleteFromTableNode(std::string name, std::unique_ptr where_clause) : - Node(NodeType::delete_from), table_name(std::move(name)), where(std::move(where_clause)) {} - - void dump() const override { - std::cout << "type: DeleteFromTableNode, table_name: " << table_name << std::endl; - where->dump(); - } - }; - - struct SetNode : Node { - std::string name; - std::string value; - - SetNode(std::string node_name, std::string node_value) : - Node(NodeType::set), name(std::move(node_name)), value(std::move(node_value)) {} - - void dump() const override { - std::cout << "type: SetNode, name: " << name << ", value: " << value << std::endl; - } - }; - - struct ShowNode : Node { - std::string name; - - explicit ShowNode(std::string node_name) : Node(NodeType::show), name(std::move(node_name)) {} - - void dump() const override { - std::cout << "type: ShowNode, name: " << name << std::endl; - } - }; - - struct CreateIndexNode : Node { - std::string index_name; - std::string table_name; - std::string column_name; - - CreateIndexNode(std::string idx_name, std::string tbl_name, std::string col_name) : - Node(NodeType::create_index), index_name(std::move(idx_name)), table_name(std::move(tbl_name)), column_name(std::move(col_name)) {} - - void dump() const override { - std::cout << "type: CreateIndexNode, table_name: " << table_name << ", index_name: " << index_name << ", column_name: " << column_name << std::endl; - } - }; - - class Parser { - private: - public: - Parser(); - - std::unique_ptr parse(const std::string &code); - - private: - std::unique_ptr parse_create_table(); - std::unique_ptr parse_drop_table(); - std::unique_ptr parse_load_table(); - std::unique_ptr parse_save_table(); - std::unique_ptr parse_set(); - std::unique_ptr parse_show(); - - std::unique_ptr parse_insert_into_table(); - std::unique_ptr parse_select_from_table(); - std::unique_ptr parse_delete_from_table(); - std::unique_ptr parse_update_table(); - std::unique_ptr parse_create_index(); - - std::vector parse_order_by_clause(); - OffsetLimitNode parse_offset_limit_clause(); - - std::unique_ptr parse_where_clause(); - std::unique_ptr parse_expression(); - std::unique_ptr parse_expression(std::unique_ptr left); - - std::unique_ptr parse_value(); - RelationalOperatorType parse_relational_operator(); - LogicalOperatorType parse_logical_operator(); - ArithmeticalOperatorType parse_arithmetical_operator(); - - private: - Lexer m_lexer; - }; + void dump() const override { + std::cout << "type: NullValueNode," << std::endl; + } +}; + +struct IntValueNode : ValueNode { + long value; + + explicit IntValueNode(long value) : ValueNode(NodeType::int_value), value(value) {} + + long getIntegerValue() const override { return value; }; + double getDoubleValue() const override { return (double) value; }; + std::string getStringValue() const override { return Settings::long_to_string(value); } + long getDateValue() const override { return value; }; + bool getBooleanValue() const override { return value != 0; }; + + void dump() const override { + std::cout << "type: IntValueNode, value: " << value << std::endl; + } +}; + +struct DoubleValueNode : ValueNode { + double value; + + explicit DoubleValueNode(double value) : ValueNode(NodeType::float_value), value(value) {} + + long getIntegerValue() const override { return (long) value; }; + double getDoubleValue() const override { return value; }; + std::string getStringValue() const override { return Settings::double_to_string(value); } + long getDateValue() const override { return (long) value; }; + bool getBooleanValue() const override { return value != 0.0; }; + + void dump() const override { + std::cout << "type: DoubleValueNode, value: " << value << std::endl; + } +}; + +struct StringValueNode : ValueNode { + std::string value; + + explicit StringValueNode(std::string value) : ValueNode(NodeType::string_value), value(std::move(value)) {} + + long getIntegerValue() const override { return Settings::string_to_long(value); }; + double getDoubleValue() const override { return Settings::string_to_double(value); }; + std::string getStringValue() const override { return value; }; + long getDateValue() const override { return Settings::string_to_date(value); }; + bool getBooleanValue() const override { return Settings::string_to_bool(value); }; + + void dump() const override { + std::cout << "type: StringValueNode, value: " << value << std::endl; + } +}; + +struct BooleanValueNode : ValueNode { + bool value; + + explicit BooleanValueNode(bool value) : ValueNode(NodeType::bool_value), value(value) {} + + long getIntegerValue() const override { return (long) value; }; + double getDoubleValue() const override { return (double) value; }; + std::string getStringValue() const override { return Settings::bool_to_string(value); } + long getDateValue() const override { return (long) value; }; + bool getBooleanValue() const override { return value; }; + + void dump() const override { + std::cout << "type: BooleanValueNode, value: " << value << std::endl; + } +}; + + +struct DatabaseValueNode : Node { + std::string col_name; + + explicit DatabaseValueNode(std::string name) : Node(NodeType::database_value), col_name(std::move(name)) {} + + void dump() const override { + std::cout << "type: DatabaseValueNode, col_name: " << col_name << std::endl; + } +}; + +enum class LogicalOperatorType { + and_operator, + or_operator + // not_operator +}; + +struct LogicalOperatorNode : Node { + LogicalOperatorType op; + std::unique_ptr left; + std::unique_ptr right; + + LogicalOperatorNode(LogicalOperatorType op, std::unique_ptr left, std::unique_ptr right) : + Node(NodeType::logical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; + + void dump() const override { + std::cout << "type: LogicalOperatorNode, op: " << (int)op << std::endl; + left->dump(); + right->dump(); + } +}; + +enum class RelationalOperatorType { + equal, + greater, + greater_equal, + lesser, + lesser_equal, + not_equal, + is, + is_not + // like +}; + +struct RelationalOperatorNode : Node { + RelationalOperatorType op; + + std::unique_ptr left; + std::unique_ptr right; + + RelationalOperatorNode(RelationalOperatorType op, std::unique_ptr left, std::unique_ptr right) : + Node(NodeType::relational_operator), op(op), left(std::move(left)), right(std::move(right)) {}; + + void dump() const override { + std::cout << "type: RelationalOperatorNode, op: " << (int)op << std::endl; + left->dump(); + right->dump(); + } +}; + +enum class ArithmeticalOperatorType { + copy_value, // just copy lef value and do nothing with it + plus_operator, + minus_operator, + multiply_operator, + divide_operator +}; + +struct ArithmeticalOperatorNode : Node { + ArithmeticalOperatorType op; + + std::unique_ptr left; + std::unique_ptr right; + + ArithmeticalOperatorNode(ArithmeticalOperatorType op, std::unique_ptr left, std::unique_ptr right) : + Node(NodeType::arithmetical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; + + void dump() const override { + std::cout << "type: ArithmeticalOperatorNode, op: " << (int)op << std::endl; + left->dump(); + right->dump(); + } +}; + +struct CreateTableNode : Node { + std::string table_name; + std::vector cols_defs; + + CreateTableNode(std::string name, std::vector defs) : + Node(NodeType::create_table), table_name(std::move(name)), cols_defs(std::move(defs)) {} + + void dump() const override { + std::cout << "type: CreateTableNode, table_name: " << table_name << "TODO complete me" << std::endl; + } +}; + +struct InsertIntoTableNode : Node { + std::string table_name; + std::vector cols_names; + std::vector> cols_values; + + InsertIntoTableNode(std::string name, std::vector names, std::vector> values) : + Node(NodeType::insert_into), table_name(std::move(name)), cols_names(std::move(names)), cols_values(std::move(values)) {} + + void dump() const override { + std::cout << "type: InsertIntoTableNode, table_name: " << table_name << "TODO complete me" << std::endl; + } +}; + +struct SelectFromTableNode : Node { + std::string table_name; + std::unique_ptr> cols_names; + std::unique_ptr where; + std::vector order_by; + OffsetLimitNode offset_limit; + bool distinct; + + SelectFromTableNode(std::string name, std::unique_ptr> names, std::unique_ptr where_clause, std::vector orderby, OffsetLimitNode offlim, bool distinct_): + Node(NodeType::select_from), table_name(std::move(name)), cols_names(std::move(names)), where(std::move(where_clause)), order_by(std::move(orderby)), offset_limit(std::move(offlim)), distinct(distinct_) {} + + void dump() const override { + std::cout << "type: SelectFromTableNode, table_name: " << table_name << "TODO complete me" << std::endl; + where->dump(); + } +}; + +struct CreateTableAsSelectNode : Node { + std::string table_name; + std::unique_ptr select_table; + + CreateTableAsSelectNode(std::string name, std::unique_ptr table) : + Node(NodeType::create_table_as_select), table_name(std::move(name)), select_table(std::move(table)) {} + + void dump() const override { + std::cout << "type: CreateTableAsSelectNode, table_name: " << table_name << std::endl; + select_table->dump(); + } +}; + +struct UpdateTableNode : Node { + std::string table_name; + std::vector cols_names; + std::vector> values; + std::unique_ptr where; + + UpdateTableNode(std::string name, std::vector names, std::vector> vals, + std::unique_ptr where_clause) : + Node(NodeType::update_table), table_name(std::move(name)), cols_names(std::move(names)), values(std::move(vals)), + where(std::move(where_clause)) {} + + void dump() const override { + std::cout << "type: UpdateTableNode, table_name: " << table_name << "TODO complete me" << std::endl; + where->dump(); + } +}; + +struct LoadIntoTableNode : Node { + std::string table_name; + std::string filename; + + LoadIntoTableNode(std::string name, std::string file) : + Node(NodeType::load_table), table_name(std::move(name)), filename(std::move(file)) {} + + void dump() const override { + std::cout << "type: LoadIntoTableNode, table_name: " << table_name << ", filename" << filename << std::endl; + } +}; + +struct SaveTableNode : Node { + std::string table_name; + std::string filename; + + SaveTableNode(std::string name, std::string file) : + Node(NodeType::save_table), table_name(std::move(name)), filename(std::move(file)) {} + + void dump() const override { + std::cout << "type: SaveTableNode, table_name: " << table_name << ", filename" << filename << std::endl; + } +}; + +struct DropTableNode : Node { + std::string table_name; + + explicit DropTableNode(std::string name) : Node(NodeType::drop_table), table_name(std::move(name)) {} + + void dump() const override { + std::cout << "type: SelectFromTableNode, table_name: " << table_name << std::endl; + } +}; + +struct DeleteFromTableNode : Node { + std::string table_name; + std::unique_ptr where; + + DeleteFromTableNode(std::string name, std::unique_ptr where_clause) : + Node(NodeType::delete_from), table_name(std::move(name)), where(std::move(where_clause)) {} + + void dump() const override { + std::cout << "type: DeleteFromTableNode, table_name: " << table_name << std::endl; + where->dump(); + } +}; + +struct SetNode : Node { + std::string name; + std::string value; + + SetNode(std::string node_name, std::string node_value) : + Node(NodeType::set), name(std::move(node_name)), value(std::move(node_value)) {} + + void dump() const override { + std::cout << "type: SetNode, name: " << name << ", value: " << value << std::endl; + } +}; + +struct ShowNode : Node { + std::string name; + + explicit ShowNode(std::string node_name) : Node(NodeType::show), name(std::move(node_name)) {} + + void dump() const override { + std::cout << "type: ShowNode, name: " << name << std::endl; + } +}; + +struct CreateIndexNode : Node { + std::string index_name; + std::string table_name; + std::string column_name; + + CreateIndexNode(std::string idx_name, std::string tbl_name, std::string col_name) : + Node(NodeType::create_index), index_name(std::move(idx_name)), table_name(std::move(tbl_name)), column_name(std::move(col_name)) {} + + void dump() const override { + std::cout << "type: CreateIndexNode, table_name: " << table_name << ", index_name: " << index_name << ", column_name: " << column_name << std::endl; + } +}; + +class Parser { +private: +public: + Parser(); + + std::unique_ptr parse(const std::string &code); + +private: + std::unique_ptr parse_create_table(); + std::unique_ptr parse_drop_table(); + std::unique_ptr parse_load_table(); + std::unique_ptr parse_save_table(); + std::unique_ptr parse_set(); + std::unique_ptr parse_show(); + + std::unique_ptr parse_insert_into_table(); + std::unique_ptr parse_select_from_table(); + std::unique_ptr parse_delete_from_table(); + std::unique_ptr parse_update_table(); + std::unique_ptr parse_create_index(); + + std::vector parse_order_by_clause(); + OffsetLimitNode parse_offset_limit_clause(); + + std::unique_ptr parse_where_clause(); + std::unique_ptr parse_expression(); + std::unique_ptr parse_expression(std::unique_ptr left); + + std::unique_ptr parse_value(); + RelationalOperatorType parse_relational_operator(); + LogicalOperatorType parse_logical_operator(); + ArithmeticalOperatorType parse_arithmetical_operator(); + +private: + Lexer m_lexer; +}; } // namespace diff --git a/row.cpp b/row.cpp index 6472525..930f172 100644 --- a/row.cpp +++ b/row.cpp @@ -29,6 +29,18 @@ int ColStringValue::compare(ColValue &other) const { return other.isNull() ? 1 : m_string->compare(other.getStringValue()); // null goes to end } +std::string ColStringValue::getCsvStringValue() const { + auto src_str = getStringValue(); + std::string toSearch{"\""}, replaceStr{"\\\""}; + + size_t pos = src_str.find(toSearch); + while(pos != std::string::npos) { + src_str.replace(pos, toSearch.size(), replaceStr); + pos =src_str.find(toSearch, pos + replaceStr.size()); + } + return src_str; +} + int ColDateValue::compare(ColValue &other) const { long r = m_date - other.getIntegerValue(); return other.isNull() ? 1 : r > 0 ? 1 : r == 0 ? 0 : -1; diff --git a/row.h b/row.h index a11e9e4..81d9642 100644 --- a/row.h +++ b/row.h @@ -88,11 +88,7 @@ struct ColStringValue : ColValue { long getIntegerValue() const override { return std::stoi(*m_string); }; double getDoubleValue() const override { return std::stod(*m_string); }; std::string getStringValue() const override { return *m_string; }; - std::string getCsvStringValue() const override { - // TODO handle correctly CSV string - // ?? return std::regex_replace(getStringValue(), std::regex( "\"" ), "\\\"" ); - return getStringValue(); - }; + std::string getCsvStringValue() const override;; long getDateValue() const override { return std::stoi(*m_string); }; bool getBoolValue() const override { throw Exception("Not supported on ColStringValue"); }; diff --git a/utils/cp_to_mlisp.sh b/utils/cp_to_mlisp.sh old mode 100644 new mode 100755 index 38c1bd6..738e55f --- a/utils/cp_to_mlisp.sh +++ b/utils/cp_to_mlisp.sh @@ -1,4 +1,4 @@ - +#!/usr/bin/env bash cp \ csvreader.h \ @@ -21,4 +21,4 @@ parser.cpp \ table.h \ table.cpp \ index.h \ -../mlisp/usql \ No newline at end of file +../mlisp/usql