From 5c925f2608d2ecea8b04fd1643c5fd8ae733b549 Mon Sep 17 00:00:00 2001 From: VaclavT Date: Sun, 19 Dec 2021 13:33:47 +0100 Subject: [PATCH] usql update --- CMakeLists.txt | 1 + ml_usql.cpp | 4 +- usql/csvreader.cpp | 111 +++++---- usql/csvreader.h | 2 +- usql/exception.cpp | 2 +- usql/exception.h | 4 +- usql/index.h | 119 ++++++++++ usql/lexer.cpp | 499 +++++++++++++++++++++-------------------- usql/lexer.h | 2 + usql/main.cpp | 165 -------------- usql/parser.cpp | 16 +- usql/parser.h | 441 +++++++++++++++++++++++------------- usql/row.cpp | 34 ++- usql/row.h | 181 ++++++++------- usql/settings.cpp | 41 ++-- usql/settings.h | 9 +- usql/table.cpp | 210 +++++++++++++---- usql/table.h | 61 +++-- usql/usql.cpp | 229 ++++--------------- usql/usql.h | 60 +++-- usql/usql_ddl.cpp | 33 ++- usql/usql_dml.cpp | 302 ++++++++++++++++--------- usql/usql_function.cpp | 168 ++++++++++++++ 23 files changed, 1570 insertions(+), 1124 deletions(-) create mode 100644 usql/index.h delete mode 100644 usql/main.cpp create mode 100644 usql/usql_function.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bd82555..0e63abb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ set(SOURCE usql/usql.cpp usql/usql_dml.cpp usql/usql_ddl.cpp + usql/usql_function.cpp usql/table.cpp usql/table.h usql/row.cpp diff --git a/ml_usql.cpp b/ml_usql.cpp index 03f43c6..025a6b0 100644 --- a/ml_usql.cpp +++ b/ml_usql.cpp @@ -24,9 +24,9 @@ MlValue uSQL::ivaluize(const usql::Table *table) { if (c.isNull()) { columns.push_back(MlValue::nil()); } else if (type == ColumnType::integer_type || type == ColumnType::date_type) { - columns.push_back(MlValue(c.getIntValue())); + columns.push_back(MlValue(c.getIntegerValue())); } else if (type == ColumnType::bool_type) { - columns.push_back(c.getBoolValue() ? MlValue(c.getIntValue()) : MlValue::nil()); + columns.push_back(c.getBoolValue() ? MlValue(c.getIntegerValue()) : MlValue::nil()); } else if (type == ColumnType::float_type) { columns.push_back(MlValue(c.getDoubleValue())); } else { diff --git a/usql/csvreader.cpp b/usql/csvreader.cpp index d4f5031..2d6b72a 100644 --- a/usql/csvreader.cpp +++ b/usql/csvreader.cpp @@ -1,4 +1,4 @@ -#include +#include #include "exception.h" #include "csvreader.h" @@ -7,15 +7,15 @@ namespace usql { - CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) { - skip_header = skip_hdr; - field_separator = field_sep; - quote_character = quote_ch; - line_separator = line_sep; - line_separator2 = line_sep2; +CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) { + skip_header = skip_hdr; + field_separator = field_sep; + quote_character = quote_ch; + line_separator = line_sep; + line_separator2 = line_sep2; - header_skiped = !skip_hdr; - } + header_skiped = !skip_hdr; +} int CsvReader::parseCSV(const std::string &filename, std::vector &cols_def, Table &table) { @@ -41,7 +41,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector &co size_t len = 0; - int read_chars; + long read_chars; while ((read_chars = getline(&line_str, &len, fp)) != -1) { if (skip_header && !header_skiped) { header_skiped = true; @@ -59,7 +59,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector &co if (*aChar == quote_character) { inQuote = !inQuote; } else if (*aChar == field_separator) { - if (inQuote == true) { + if (inQuote) { field += *aChar; } else { line.push_back(field); @@ -80,9 +80,6 @@ int CsvReader::parseCSV(const std::string &filename, std::vector &co field.clear(); line.clear(); -// DEBUG -// if (row_cnt > 50000) break; -// } fclose(fp); @@ -93,53 +90,53 @@ int CsvReader::parseCSV(const std::string &filename, std::vector &co return row_cnt; } - int CsvReader::parseCSV2(const std::string &csvSource, std::vector &cols_def, Table& table) { - int row_cnt = 0; - bool inQuote(false); - bool newLine(false); - std::string field; +int CsvReader::parseCSV2(const std::string &csvSource, std::vector &cols_def, Table& table) { + int row_cnt = 0; + bool inQuote(false); + bool newLine(false); + std::string field; - std::vector line; - line.reserve(32); + std::vector line; + line.reserve(32); - std::string::const_iterator aChar = csvSource.begin(); - while (aChar != csvSource.end()) { - if (*aChar == quote_character) { - newLine = false; - inQuote = !inQuote; - } else if (*aChar == field_separator) { - newLine = false; - if (inQuote == true) { - field += *aChar; - } else { - line.push_back(field); - field.clear(); - } - } else if (*aChar == line_separator || *aChar == line_separator2) { - if (inQuote == true) { - field += *aChar; - } else { - if (newLine == false) { - line.push_back(field); - if (header_skiped) { - table.create_row_from_vector(cols_def, line); - row_cnt++; - } - header_skiped = true; - field.clear(); - line.clear(); - newLine = true; - } - } - } else { - newLine = false; - field.push_back(*aChar); - } + std::string::const_iterator aChar = csvSource.begin(); + while (aChar != csvSource.end()) { + if (*aChar == quote_character) { + newLine = false; + inQuote = !inQuote; + } else if (*aChar == field_separator) { + newLine = false; + if (inQuote) { + field += *aChar; + } else { + line.push_back(field); + field.clear(); + } + } else if (*aChar == line_separator || *aChar == line_separator2) { + if (inQuote) { + field += *aChar; + } else { + if (!newLine) { + line.push_back(field); + if (header_skiped) { + table.create_row_from_vector(cols_def, line); + row_cnt++; + } + header_skiped = true; + field.clear(); + line.clear(); + newLine = true; + } + } + } else { + newLine = false; + field.push_back(*aChar); + } - aChar++; - } + aChar++; + } - if (!field.empty()) line.push_back(field); + if (!field.empty()) line.push_back(field); if (header_skiped) { table.create_row_from_vector(cols_def, line); diff --git a/usql/csvreader.h b/usql/csvreader.h index 7940606..1778437 100644 --- a/usql/csvreader.h +++ b/usql/csvreader.h @@ -24,7 +24,7 @@ namespace usql { bool header_skiped; public: - CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n'); + explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n'); int parseCSV2(const std::string &csvSource, std::vector &cols_def, Table& table); diff --git a/usql/exception.cpp b/usql/exception.cpp index 028cdea..04b660e 100644 --- a/usql/exception.cpp +++ b/usql/exception.cpp @@ -2,6 +2,6 @@ namespace usql { -Exception::Exception(const std::string msg) : std::runtime_error(msg) {} +Exception::Exception(std::string msg) : std::runtime_error(msg) {} } // namespace diff --git a/usql/exception.h b/usql/exception.h index 9670174..7d3c2af 100644 --- a/usql/exception.h +++ b/usql/exception.h @@ -1,14 +1,12 @@ #pragma once -#include "lexer.h" - #include namespace usql { class Exception : public std::runtime_error { public: - Exception(const std::string msg); + explicit Exception(std::string msg); }; } // namespace \ No newline at end of file diff --git a/usql/index.h b/usql/index.h new file mode 100644 index 0000000..8e1fae4 --- /dev/null +++ b/usql/index.h @@ -0,0 +1,119 @@ +#pragma once + +#include "exception.h" +#include "parser.h" +#include "row.h" + +#include +#include +#include +#include +#include + + +namespace usql { + +using IndexValue = std::variant; +using rowid_t = size_t; // int is now enough but size_t is correct + +static const int k_default_rowids_size = 16; + + +class Index { +public: + Index(std::string index_name, std::string col_name, ColumnType type) : + m_index_name(std::move(index_name)), m_column_name(std::move(col_name)), + m_data_type(type), m_uniq(false) { + if (type != ColumnType::integer_type && type != ColumnType::varchar_type) + throw Exception("creating index on unsupported type"); + } + + + std::vector search(const ValueNode *key) { + return search(to_index_value(key)); + } + + void insert(const ColValue *key, rowid_t rowid) { + return insert(to_index_value(key), rowid); + } + + void remove(const ColValue *key, rowid_t rowid) { + return remove(to_index_value(key), rowid); + } + + void truncate() { + m_index.clear(); + } + + [[nodiscard]] const std::string &get_column_name() const { + return m_column_name; + } + + [[nodiscard]] const std::string &get_index_name() const { + return m_index_name; + } + + +private: + IndexValue to_index_value(const ValueNode *key) { + if (m_data_type == ColumnType::integer_type) + return key->getIntegerValue(); + else if (m_data_type == ColumnType::varchar_type) + return key->getStringValue(); + else + throw Exception("using index on unsupported type"); + } + + IndexValue to_index_value(const ColValue *key) { + if (m_data_type == ColumnType::integer_type) + return key->getIntegerValue(); + else if (m_data_type == ColumnType::varchar_type) + return key->getStringValue(); + else + throw Exception("using index on unsupported type"); + } + + void insert(const IndexValue& key, rowid_t rowid) { + auto search = m_index.find(key); + if (search != m_index.end()) { + if (m_uniq) + throw Exception("Inserting duplicate value into unique index"); + + search->second.push_back(rowid); + } else { + std::vector rowids{rowid}; + if (!m_uniq) + rowids.reserve(k_default_rowids_size); + m_index[key] = rowids; + } + } + + void remove(const IndexValue& key, rowid_t rowid) { + auto search = m_index.find(key); + if (search != m_index.end()) { + search->second.erase(find(search->second.begin(), search->second.end(), rowid)); + if (search->second.empty()) + m_index.erase(search); + } + } + + std::vector search(const IndexValue& key) { + auto search = m_index.find(key); + if (search != m_index.end()) { + return search->second; + } else { + return std::vector{}; + } + } + + +private: + bool m_uniq; + std::string m_index_name; + std::string m_column_name; + ColumnType m_data_type; + + std::map > m_index; +}; + +} // namespace \ No newline at end of file diff --git a/usql/lexer.cpp b/usql/lexer.cpp index 57a0c55..9bd09a6 100644 --- a/usql/lexer.cpp +++ b/usql/lexer.cpp @@ -5,287 +5,292 @@ namespace usql { - Token::Token(const std::string &token_str, TokenType typ) { - token_string = token_str; - type = typ; - } +Token::Token(const std::string &token_str, TokenType typ) { + token_string = token_str; + type = typ; +} - Lexer::Lexer() { - k_words_regex = - "[-+]?[0-9]+\\.[0-9]+|[-+]?[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/" - ",;:\?]|!=|<>|==|>=|<=|~=|>|<|=|;|~|\\||\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n"; - k_int_regex = "[-+]?[0-9]+"; +Lexer::Lexer() { + k_words_regex = + "[-+]?[0-9]+\\.[0-9]+|[-+]?[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/" + ",;:\?]|!=|<>|==|>=|<=|~=|>|<|=|;|~|\\||\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n"; + k_int_regex = "[-+]?[0-9]+"; k_int_underscored_regex = "[-+]?[0-9][0-9_]+[0-9]"; k_double_regex = "[-+]?[0-9]+\\.[0-9]+"; k_identifier_regex = "[A-Za-z]+[A-Za-z0-9_#]*"; - } +} - void Lexer::parse(const std::string &code) { - if (code.empty()) - throw Exception("empty code"); +void Lexer::parse(const std::string &code) { + if (code.empty()) + throw Exception("Lexer.parse empty code"); - m_tokens.clear(); - m_tokens.reserve(64); + m_tokens.clear(); + m_tokens.reserve(64); - m_code_str = code; - if (!m_code_str.empty() && m_code_str.back() != '\n') { - m_code_str.append("\n"); // temp solution to prevent possible situation when last line is a comment - } + m_code_str = code; + if (!m_code_str.empty() && m_code_str.back() != '\n') { + m_code_str.append("\n"); // temp solution to prevent possible situation when last line is a comment + } - auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), k_words_regex); - auto words_end = std::sregex_iterator(); + auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), k_words_regex); + auto words_end = std::sregex_iterator(); - for (std::sregex_iterator i = words_begin; i != words_end; ++i) { - std::smatch match = *i; - std::string match_str = match.str(); - TokenType token_type = type(match_str); - if (token_type == TokenType::string_literal) - match_str = stringLiteral(match_str); + for (std::sregex_iterator i = words_begin; i != words_end; ++i) { + std::smatch match = *i; + std::string match_str = match.str(); + TokenType token_type = type(match_str); + if (token_type == TokenType::undef) + throw Exception("Lexer.parse unknown token type: " + match_str); + if (token_type == TokenType::string_literal) + match_str = stringLiteral(match_str); - if (token_type != TokenType::newline) - m_tokens.emplace_back(match_str, token_type); - } + if (token_type != TokenType::newline) + m_tokens.emplace_back(match_str, token_type); + } - // DEBUG IT - // debugTokens(); + // DEBUG IT + // debugTokens(); - m_index = 0; - } + m_index = 0; +} - void Lexer::debugTokens() { - int i = 0; - for (auto & m_token : m_tokens) { - std::cerr << i << "\t" << m_token.token_string << std::endl; - i++; - } - } +void Lexer::debugTokens() { + int i = 0; + for (auto & m_token : m_tokens) { + std::cerr << i << "\t" << m_token.token_string << std::endl; + i++; + } +} - Token Lexer::currentToken() { return m_tokens[m_index]; } +Token Lexer::currentToken() { return m_tokens[m_index]; } - Token Lexer::consumeToken() { - int i = m_index; - nextToken(); - return m_tokens[i]; - } +Token Lexer::consumeToken() { + int i = m_index; + nextToken(); + return m_tokens[i]; +} - Token Lexer::consumeToken(TokenType type) { - int i = m_index; - skipToken(type); - return m_tokens[i]; - } +Token Lexer::consumeToken(TokenType type) { + int i = m_index; + skipToken(type); + return m_tokens[i]; +} - void Lexer::nextToken() { - if (m_index < m_tokens.size()) { - m_index++; - } - } +void Lexer::nextToken() { + if (m_index < m_tokens.size()) { + m_index++; + } +} - void Lexer::skipToken(TokenType type) { - if (tokenType() == type) { - nextToken(); - } else { - throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type)); - } - } +void Lexer::skipToken(TokenType type) { + if (tokenType() == type) { + nextToken(); + } else { + throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type)); + } +} - void Lexer::skipTokenOptional(TokenType type) { - if (tokenType() == type) { - nextToken(); - } - } +void Lexer::skipTokenOptional(TokenType type) { + if (tokenType() == type) { + nextToken(); + } +} - TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; } +TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; } - TokenType Lexer::nextTokenType() { - return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof; - } +TokenType Lexer::nextTokenType() { + return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof; +} - bool Lexer::isRelationalOperator(TokenType token_type) { - return (token_type == TokenType::equal || token_type == TokenType::not_equal || - token_type == TokenType::greater || token_type == TokenType::greater_equal || - token_type == TokenType::lesser || token_type == TokenType::lesser_equal || - token_type == TokenType::is); - } +bool Lexer::isRelationalOperator(TokenType token_type) { + return (token_type == TokenType::equal || token_type == TokenType::not_equal || + token_type == TokenType::greater || token_type == TokenType::greater_equal || + token_type == TokenType::lesser || token_type == TokenType::lesser_equal || + token_type == TokenType::is); +} - bool Lexer::isLogicalOperator(TokenType token_type) { - return (token_type == TokenType::logical_and || token_type == TokenType::logical_or); - } +bool Lexer::isLogicalOperator(TokenType token_type) { + return (token_type == TokenType::logical_and || token_type == TokenType::logical_or); +} - bool Lexer::isArithmeticalOperator(TokenType token_type) { - return (token_type == TokenType::plus || token_type == TokenType::minus || - token_type == TokenType::multiply || - token_type == TokenType::divide); - } +bool Lexer::isArithmeticalOperator(TokenType token_type) { + return (token_type == TokenType::plus || token_type == TokenType::minus || + token_type == TokenType::multiply || + token_type == TokenType::divide); +} - TokenType Lexer::type(const std::string &token) { - // FIXME 'one is evaluated as identifier - if (token == ";") return TokenType::semicolon; - if (token == "+") return TokenType::plus; - if (token == "-") return TokenType::minus; - if (token == "*") return TokenType::multiply; - if (token == "/") return TokenType::divide; - if (token == "(") return TokenType::open_paren; - if (token == ")") return TokenType::close_paren; - if (token == "=") return TokenType::equal; - if (token == "!=" || token == "<>") return TokenType::not_equal; - if (token == ">") return TokenType::greater; - if (token == ">=") return TokenType::greater_equal; - if (token == "<") return TokenType::lesser; - if (token == "<=") return TokenType::lesser_equal; - if (token == "is") return TokenType::is; - if (token == "as") return TokenType::keyword_as; - if (token == "create") return TokenType::keyword_create; - if (token == "drop") return TokenType::keyword_drop; - if (token == "where") return TokenType::keyword_where; - if (token == "order") return TokenType::keyword_order; - if (token == "by") return TokenType::keyword_by; - if (token == "offset") return TokenType::keyword_offset; - if (token == "limit") return TokenType::keyword_limit; - if (token == "asc") return TokenType::keyword_asc; - if (token == "desc") return TokenType::keyword_desc; - if (token == "from") return TokenType::keyword_from; - if (token == "delete") return TokenType::keyword_delete; - if (token == "table") return TokenType::keyword_table; - if (token == "insert") return TokenType::keyword_insert; - if (token == "into") return TokenType::keyword_into; - if (token == "values") return TokenType::keyword_values; - if (token == "select") return TokenType::keyword_select; - if (token == "set") return TokenType::keyword_set; - if (token == "copy") return TokenType::keyword_copy; - if (token == "update") return TokenType::keyword_update; - if (token == "load") return TokenType::keyword_load; - if (token == "save") return TokenType::keyword_save; - if (token == "not") return TokenType::keyword_not; - if (token == "null") return TokenType::keyword_null; - if (token == "integer") return TokenType::keyword_integer; - if (token == "float") return TokenType::keyword_float; - if (token == "varchar") return TokenType::keyword_varchar; - if (token == "date") return TokenType::keyword_date; - if (token == "boolean") return TokenType::keyword_bool; - if (token == "true") return TokenType::keyword_true; - if (token == "false") return TokenType::keyword_false; - if (token == "distinct") return TokenType::keyword_distinct; - if (token == "show") return TokenType::keyword_show; - if (token == "or") return TokenType::logical_or; - if (token == "and") return TokenType::logical_and; - if (token == ",") return TokenType::comma; - if (token == "\n" || token == "\r\n" || token == "\r") return TokenType::newline; +TokenType Lexer::type(const std::string &token) { + if (token == ";") return TokenType::semicolon; + if (token == "+") return TokenType::plus; + if (token == "-") return TokenType::minus; + if (token == "*") return TokenType::multiply; + if (token == "/") return TokenType::divide; + if (token == "(") return TokenType::open_paren; + if (token == ")") return TokenType::close_paren; + if (token == "=") return TokenType::equal; + if (token == "!=" || token == "<>") return TokenType::not_equal; + if (token == ">") return TokenType::greater; + if (token == ">=") return TokenType::greater_equal; + if (token == "<") return TokenType::lesser; + if (token == "<=") return TokenType::lesser_equal; + if (token == "is") return TokenType::is; + if (token == "as") return TokenType::keyword_as; + if (token == "create") return TokenType::keyword_create; + if (token == "drop") return TokenType::keyword_drop; + if (token == "where") return TokenType::keyword_where; + if (token == "order") return TokenType::keyword_order; + if (token == "by") return TokenType::keyword_by; + if (token == "offset") return TokenType::keyword_offset; + if (token == "limit") return TokenType::keyword_limit; + if (token == "asc") return TokenType::keyword_asc; + if (token == "desc") return TokenType::keyword_desc; + if (token == "from") return TokenType::keyword_from; + if (token == "delete") return TokenType::keyword_delete; + if (token == "table") return TokenType::keyword_table; + if (token == "index") return TokenType::keyword_index; + if (token == "on") return TokenType::keyword_on; + if (token == "insert") return TokenType::keyword_insert; + if (token == "into") return TokenType::keyword_into; + if (token == "values") return TokenType::keyword_values; + if (token == "select") return TokenType::keyword_select; + if (token == "set") return TokenType::keyword_set; + if (token == "copy") return TokenType::keyword_copy; + if (token == "update") return TokenType::keyword_update; + if (token == "load") return TokenType::keyword_load; + if (token == "save") return TokenType::keyword_save; + if (token == "not") return TokenType::keyword_not; + if (token == "null") return TokenType::keyword_null; + if (token == "integer") return TokenType::keyword_integer; + if (token == "float") return TokenType::keyword_float; + if (token == "varchar") return TokenType::keyword_varchar; + if (token == "date") return TokenType::keyword_date; + if (token == "boolean") return TokenType::keyword_bool; + if (token == "true") return TokenType::keyword_true; + if (token == "false") return TokenType::keyword_false; + if (token == "distinct") return TokenType::keyword_distinct; + if (token == "show") return TokenType::keyword_show; + if (token == "or") return TokenType::logical_or; + if (token == "and") return TokenType::logical_and; + if (token == ",") return TokenType::comma; + if (token == "\n" || token == "\r\n" || token == "\r") return TokenType::newline; - if (token.length() > 1 && token.at(0) == '%' && (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) - return TokenType::comment; + if (token.length() > 1 && token.at(0) == '%' && (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) + return TokenType::comment; - if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"') - return TokenType::string_literal; + if (token.length() >= 2 && token.at(0) == '"') + return (token.at(token.length() - 1) == '"') ? TokenType::string_literal : TokenType::undef; - if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'') - return TokenType::string_literal; + if (token.length() >= 2 && token.at(0) == '\'') + return (token.at(token.length() - 1) == '\'') ? TokenType::string_literal : TokenType::undef; - if (std::regex_match(token, k_int_regex)) return TokenType::int_number; - if (std::regex_match(token, k_int_underscored_regex)) return TokenType::int_number; - if (std::regex_match(token, k_double_regex)) return TokenType::double_number; - if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier; + if (std::regex_match(token, k_int_regex)) return TokenType::int_number; + if (std::regex_match(token, k_int_underscored_regex)) return TokenType::int_number; + if (std::regex_match(token, k_double_regex)) return TokenType::double_number; + if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier; - return TokenType::undef; - } + return TokenType::undef; +} - std::string Lexer::stringLiteral(std::string token) { - // remove ' or " from the literal ends - bool replace = token[0] == '\'' && token[token.size() - 1] == '\''; +std::string Lexer::stringLiteral(std::string token) { + // remove ' or " from the literal ends + bool replace = token[0] == '\'' && token[token.size() - 1] == '\''; - std::string str = token.substr(1, token.size() - 2); - if (!replace) { - return str; - } - std::string out; - out.reserve(str.size()); + std::string str = token.substr(1, token.size() - 2); + if (!replace) { + return str; + } + std::string out; + out.reserve(str.size()); - for (std::string::size_type i = 0; i < str.size(); ++i) { - if (str[i] == '\'' && i < str.size() - 1) { - if (str[i + 1] == '\'') { - out.append(1, '\''); - i++; - } else { - out.append(1, str[i]); - } - } else if (str[i] == '\\' && i < str.size() - 1) { - if (str[i + 1] == 'n') { - out.append(1, '\n'); - i++; - } else if (str[i + 1] == 't') { - out.append(1, '\t'); - i++; - } else { - out.append(1, str[i]); - } - } else { - out.append(1, str[i]); - } - } - return out; - } + for (std::string::size_type i = 0; i < str.size(); ++i) { + if (str[i] == '\'' && i < str.size() - 1) { + if (str[i + 1] == '\'') { + out.append(1, '\''); + i++; + } else { + out.append(1, str[i]); + } + } else if (str[i] == '\\' && i < str.size() - 1) { + if (str[i + 1] == 'n') { + out.append(1, '\n'); + i++; + } else if (str[i + 1] == 't') { + out.append(1, '\t'); + i++; + } else { + out.append(1, str[i]); + } + } else { + out.append(1, str[i]); + } + } + return out; +} - std::string Lexer::typeToString(TokenType token_type) { - switch (token_type) { - case TokenType::undef: return "undef"; - case TokenType::identifier: return "identifier"; - case TokenType::plus: return "+"; - case TokenType::minus: return "-"; - case TokenType::multiply: return "*"; - case TokenType::divide: return "/"; - case TokenType::equal: return "=="; - case TokenType::not_equal: return "!="; - case TokenType::greater: return ">"; - case TokenType::greater_equal: return ">="; - case TokenType::lesser: return "<"; - case TokenType::lesser_equal: return "<="; - case TokenType::is: return "is"; - case TokenType::keyword_as: return "as"; - case TokenType::keyword_create: return "create"; - case TokenType::keyword_drop: return "drop"; - case TokenType::keyword_where: return "where"; - case TokenType::keyword_order: return "order"; - case TokenType::keyword_by: return "by"; - case TokenType::keyword_offset: return "offset"; - case TokenType::keyword_limit: return "limit"; - case TokenType::keyword_asc: return "asc"; - case TokenType::keyword_desc: return "desc"; - case TokenType::keyword_table: return "table"; - case TokenType::keyword_into: return "into"; - case TokenType::keyword_values: return "values"; - case TokenType::keyword_select: return "select"; - case TokenType::keyword_set: return "set"; - case TokenType::keyword_copy: return "copy"; - case TokenType::keyword_update: return "update"; - case TokenType::keyword_load: return "load"; - case TokenType::keyword_save: return "save"; - case TokenType::keyword_not: return "not"; - case TokenType::keyword_null: return "null"; - case TokenType::keyword_integer: return "integer"; - case TokenType::keyword_float: return "float"; - case TokenType::keyword_varchar: return "varchar"; - case TokenType::keyword_date: return "date"; - case TokenType::keyword_bool: return "boolean"; - case TokenType::keyword_true: return "true"; - case TokenType::keyword_false: return "false"; - case TokenType::keyword_distinct: return "distinct"; - case TokenType::keyword_show: return "show"; - case TokenType::int_number: return "int number"; - case TokenType::double_number: return "double number"; - case TokenType::string_literal: return "string literal"; - case TokenType::open_paren: return "("; - case TokenType::close_paren: return ")"; - case TokenType::logical_and: return "and"; - case TokenType::logical_or: return "or"; - case TokenType::semicolon: return ";"; - case TokenType::comma: return ","; - case TokenType::newline: return "newline"; - case TokenType::comment: return "comment"; - case TokenType::eof: return "eof"; - default: - return "FIXME, unknown token type"; - } - } +std::string Lexer::typeToString(TokenType token_type) { + switch (token_type) { + case TokenType::undef: return "undef"; + case TokenType::identifier: return "identifier"; + case TokenType::plus: return "+"; + case TokenType::minus: return "-"; + case TokenType::multiply: return "*"; + case TokenType::divide: return "/"; + case TokenType::equal: return "=="; + case TokenType::not_equal: return "!="; + case TokenType::greater: return ">"; + case TokenType::greater_equal: return ">="; + case TokenType::lesser: return "<"; + case TokenType::lesser_equal: return "<="; + case TokenType::is: return "is"; + case TokenType::keyword_as: return "as"; + case TokenType::keyword_create: return "create"; + case TokenType::keyword_drop: return "drop"; + case TokenType::keyword_where: return "where"; + case TokenType::keyword_order: return "order"; + case TokenType::keyword_by: return "by"; + case TokenType::keyword_offset: return "offset"; + case TokenType::keyword_limit: return "limit"; + case TokenType::keyword_asc: return "asc"; + case TokenType::keyword_desc: return "desc"; + case TokenType::keyword_table: return "table"; + case TokenType::keyword_index: return "index"; + case TokenType::keyword_on: return "on"; + case TokenType::keyword_into: return "into"; + case TokenType::keyword_values: return "values"; + case TokenType::keyword_select: return "select"; + case TokenType::keyword_set: return "set"; + case TokenType::keyword_copy: return "copy"; + case TokenType::keyword_update: return "update"; + case TokenType::keyword_load: return "load"; + case TokenType::keyword_save: return "save"; + case TokenType::keyword_not: return "not"; + case TokenType::keyword_null: return "null"; + case TokenType::keyword_integer: return "integer"; + case TokenType::keyword_float: return "float"; + case TokenType::keyword_varchar: return "varchar"; + case TokenType::keyword_date: return "date"; + case TokenType::keyword_bool: return "boolean"; + case TokenType::keyword_true: return "true"; + case TokenType::keyword_false: return "false"; + case TokenType::keyword_distinct: return "distinct"; + case TokenType::keyword_show: return "show"; + case TokenType::int_number: return "int number"; + case TokenType::double_number: return "double number"; + case TokenType::string_literal: return "string literal"; + case TokenType::open_paren: return "("; + case TokenType::close_paren: return ")"; + case TokenType::logical_and: return "and"; + case TokenType::logical_or: return "or"; + case TokenType::semicolon: return ";"; + case TokenType::comma: return ","; + case TokenType::newline: return "newline"; + case TokenType::comment: return "comment"; + case TokenType::eof: return "eof"; + default: + return "FIXME, unknown token type"; + } +} -} \ No newline at end of file +} // namespace usql \ No newline at end of file diff --git a/usql/lexer.h b/usql/lexer.h index 09399b9..77342a7 100644 --- a/usql/lexer.h +++ b/usql/lexer.h @@ -25,6 +25,8 @@ namespace usql { keyword_create, keyword_drop, keyword_table, + keyword_index, + keyword_on, keyword_where, keyword_order, keyword_by, diff --git a/usql/main.cpp b/usql/main.cpp deleted file mode 100644 index 3e65234..0000000 --- a/usql/main.cpp +++ /dev/null @@ -1,165 +0,0 @@ -#include "parser.h" -#include "usql.h" - -#include "linenoise.h" - -// https://dev.to/joaoh82/what-would-sqlite-look-like-if-written-in-rust-part-1-2np4 - -using namespace std::chrono; - -const std::vector commands { - "select", "create", "load", "table" -}; - -std::string get_history_file_dir() { - std::string file{"/.usql_history.txt"}; - const char *t = std::getenv("HOME"); - - if (t == nullptr) return "/tmp/" + file; - else return std::string{t} + "/" + file; -} - - -size_t last_token_index( std::string str ) { - // remove trailing white space - while( !str.empty() && std::isspace( str.back() ) ) str.pop_back() ; - - // locate the last white space - return str.find_last_of( "() \t\n" ) ; -} - - -void completion(const char *buf, linenoiseCompletions *lc) { - if (buf != nullptr) { - std::string str{buf}; - - const auto pos = last_token_index(str); - if (pos == std::string::npos) - return; // cannot find what to complete - - std::string token = str.substr(pos + 1); - std::string begining = str.substr(0, pos + 1); - - for (const auto & command : commands) { - if (command.find(token) == 0) { - std::string completion_string = begining + command; - linenoiseAddCompletion(lc, completion_string.c_str()); - } - } - } -} - -char *hints(const char *buf, int *color, int *bold) { - // if (!strcasecmp(buf,"hello")) { - // *color = 35; - // *bold = 0; - // return " World"; - // } - return nullptr; -} - - -void setup_linenoise() { - std::string history_file = get_history_file_dir(); - - linenoiseHistorySetMaxLen(500); - linenoiseSetCompletionCallback(completion); - linenoiseSetHintsCallback(hints); - linenoiseSetMultiLine(1); - linenoiseHistoryLoad(history_file.c_str()); -} - -void linenoise_line_read(char *line) { - linenoiseHistoryAdd(line); -} - -void close_linenoise() { - std::string history_file = get_history_file_dir(); - - linenoiseHistorySave(history_file.c_str()); -} - - -void repl() { - std::string code; - std::string input; - - setup_linenoise(); - - usql::USql uSql{}; - - - while (true) { - char *line = linenoise(">>> "); - if (line == nullptr) break; - - linenoise_line_read(line); - - input = std::string(line); - - if (input == "!quit" || input == "!q") - break; - else if (input == "!export" || input == "!x") { - std::cout << "File to export to: "; - std::getline(std::cin, input); - - //write_file_contents(input, code); - } else if (!input.empty()) { - try { - time_point start_time = high_resolution_clock::now(); - auto result = uSql.execute(input); - time_point end_time = high_resolution_clock::now(); - - std::cout << input << std::endl; - std::cout << "run time: " << duration_cast(end_time - start_time).count() << " ms " << std::endl <print(); - - code += input + "\n"; - } catch (std::exception &e) { - std::cerr << e.what() << std::endl; - } - } - } - - close_linenoise(); -} - -void debug() { - std::vector sql_commands { - "set 'DATE_FORMAT' = '%Y-%m-%d' ", - "create table history_earnings_dates (datetime date, symbol varchar(8), time varchar(18), title varchar(256))", - "insert into history_earnings_dates (symbol,time,datetime,title) values ('BABA', '07:00:00', '2021-11-04', 'Alibaba Group Holding')", - "insert into history_earnings_dates (symbol,time,datetime,title) values ('BABA', '07:00:00', '2021-11-04', 'Alibaba Group Holding')", - "delete from history_earnings_dates where symbol='BABA' and datetime=to_date('2021-11-04', '%Y-%m-%d')", - "select * from history_earnings_dates" - }; - - usql::USql uSql{}; - - for (const auto &command : sql_commands) { - time_point start_time = high_resolution_clock::now(); - auto result = uSql.execute(command); - time_point end_time = high_resolution_clock::now(); - - std::cout << command << std::endl; - std::cout << "run time: " << duration_cast(end_time - start_time).count() << " ms " - << std::endl << std::endl; - - result->print(); - } - - std::cout << std::endl << std::endl; -} - - -int main(int argc, char *argv[]) { - -#ifdef NDEBUG - repl(); -#else - debug(); -#endif - - return 0; -} diff --git a/usql/parser.cpp b/usql/parser.cpp index 1109241..d93b775 100644 --- a/usql/parser.cpp +++ b/usql/parser.cpp @@ -15,6 +15,8 @@ namespace usql { if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table) return parse_create_table(); + if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_index) + return parse_create_index(); if (m_lexer.tokenType() == TokenType::keyword_drop) return parse_drop_table(); @@ -302,6 +304,18 @@ namespace usql { return std::make_unique(table_name, cols_names, std::move(values), std::move(where_node)); } + std::unique_ptr Parser::parse_create_index() { + m_lexer.skipToken(TokenType::keyword_create); + m_lexer.skipToken(TokenType::keyword_index); + std::string index_name = m_lexer.consumeToken(TokenType::identifier).token_string; + m_lexer.skipToken(TokenType::keyword_on); + std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string; + m_lexer.skipToken(TokenType::open_paren); + std::string column_name = m_lexer.consumeToken(TokenType::identifier).token_string; + m_lexer.skipToken(TokenType::close_paren); + + return std::make_unique(index_name, table_name, column_name); + } std::vector Parser::parse_order_by_clause() { std::vector order_cols; @@ -331,7 +345,7 @@ namespace usql { order_cols.emplace_back(cspec_token, asc); break; default: - throw Exception("order by column can be either column index or identifier"); + throw Exception("order by column can be either column m_index or identifier"); } m_lexer.skipTokenOptional(TokenType::comma); diff --git a/usql/parser.h b/usql/parser.h index 15b8d94..a18505c 100644 --- a/usql/parser.h +++ b/usql/parser.h @@ -6,21 +6,22 @@ #include "settings.h" #include +#include #include static const int FUNCTION_CALL = -1; namespace usql { - enum class ColumnType { +enum class ColumnType { integer_type, float_type, varchar_type, date_type, bool_type - }; +}; - enum class NodeType { +enum class NodeType { true_node, null_value, int_value, @@ -39,173 +40,232 @@ namespace usql { load_table, save_table, drop_table, + create_index, set, show, database_value, offset_limit, column_order, - column_value, function, column_def, error - }; +}; - struct Node { +struct Node { NodeType node_type; explicit Node(const NodeType type) : node_type(type) {} virtual ~Node() = default; - }; + + virtual void dump() const { + std::cout << "type: Node" << std::endl; + } +}; - struct ColOrderNode : Node { +struct ColOrderNode : Node { std::string col_name; int col_index; bool ascending; - ColOrderNode(const std::string& name, bool asc) : Node(NodeType::column_order), col_name(name), col_index(-1), ascending(asc) {} - ColOrderNode(int index, bool asc) : Node(NodeType::database_value), col_name(""), col_index(index), ascending(asc) {} - }; + ColOrderNode(std::string name, bool asc) : Node(NodeType::column_order), col_name(std::move(name)), col_index(-1), ascending(asc) {} + ColOrderNode(int index, bool asc) : Node(NodeType::database_value), col_index(index), ascending(asc) {} + + void dump() const override { + std::cout << "type: ColOrderNode, col_name: " << col_name << ", col_index: " << col_index << ", asc: " << ascending << std::endl; + } +}; - - struct OffsetLimitNode : Node { + +struct OffsetLimitNode : Node { int offset; int limit; OffsetLimitNode(int off, int lim) : Node(NodeType::offset_limit), offset(off), limit(lim) {} - }; + + void dump() const override { + std::cout << "type: OffsetLimitNode, offset: " << offset << ", limit: " << limit << std::endl; + } +}; - struct SelectColNode : Node { - std::unique_ptr value; - std::string name; +struct SelectColNode : Node { + std::unique_ptr value; + std::string name; - SelectColNode(std::unique_ptr column, const std::string &alias) : - Node(NodeType::database_value), value(std::move(column)), name(alias) {} - }; + SelectColNode(std::unique_ptr column, std::string alias) : + Node(NodeType::database_value), value(std::move(column)), name(std::move(alias)) {} - struct ColDefNode : Node { + void dump() const override { + std::cout << "type: SelectColNode, name:" << name << "value:" << std::endl; + value->dump(); + } +}; + +struct ColDefNode : Node { std::string name; ColumnType type; int order; int length; bool null; - ColDefNode(const std::string& col_name, ColumnType col_type, int col_order, int col_len, bool nullable) : - Node(NodeType::column_def), name(col_name), type(col_type), order(col_order), length(col_len), - null(nullable) {} - }; + ColDefNode(std::string col_name, ColumnType col_type, int col_order, int col_len, bool nullable) : + Node(NodeType::column_def), name(std::move(col_name)), type(col_type), order(col_order), length(col_len), + null(nullable) {} - struct FunctionNode : Node { - std::string function; // TODO use enum - std::vector> params; + void dump() const override { + std::cout << "type: ColDefNode, name: " << name << ", type: " << (int)type << " TODO add more" << std::endl; + } +}; - FunctionNode(const std::string& func_name, std::vector> pars) : - Node(NodeType::function), function(func_name), params(std::move(pars)) {} - }; +struct FunctionNode : Node { + std::string function; // TODO use enum + std::vector> params; - struct TrueNode : Node { + FunctionNode(std::string func_name, std::vector> pars) : + Node(NodeType::function), function(std::move(func_name)), params(std::move(pars)) {} + + void dump() const override { + std::cout << "type: FunctionNode, function: " << function << " TODO add more" << std::endl; + } +}; + +struct TrueNode : Node { TrueNode() : Node(NodeType::true_node) {} - }; - struct ValueNode : Node { + void dump() const override { + std::cout << "type: TrueNode," << std::endl; + } +}; + +struct ValueNode : Node { explicit ValueNode(NodeType type) : Node(type) {} - virtual bool isNull() { return false; } - virtual long getIntegerValue() = 0; - virtual double getDoubleValue() = 0; - virtual std::string getStringValue() = 0; - virtual long getDateValue() = 0; - virtual bool getBooleanValue() = 0; + virtual bool isNull() const { return false; } + virtual long getIntegerValue() const = 0; + virtual double getDoubleValue() const = 0; + virtual std::string getStringValue() const = 0; + virtual long getDateValue() const = 0; + virtual bool getBooleanValue() const = 0; - virtual ~ValueNode() = default; - }; + ~ValueNode() override = default; +}; - struct NullValueNode : ValueNode { +struct NullValueNode : ValueNode { - NullValueNode() : ValueNode(NodeType::null_value) {} + NullValueNode() : ValueNode(NodeType::null_value) {} - bool isNull() override { return true; } + bool isNull() const override { return true; } - long getIntegerValue() override { throw Exception("getIntegerValue not supported on NullValueNode"); }; - double getDoubleValue() override { throw Exception("getDoubleValue not supported on NullValueNode"); }; - std::string getStringValue() override { throw Exception("getStringValue not supported on NullValueNode"); }; - long getDateValue() override { throw Exception("getDateValue not supported on NullValueNode"); }; - bool getBooleanValue() override { throw Exception("getBooleanValue not supported on NullValueNode"); }; - }; + long getIntegerValue() const override { throw Exception("getIntegerValue not supported on NullValueNode"); }; + double getDoubleValue() const override { throw Exception("getDoubleValue not supported on NullValueNode"); }; + std::string getStringValue() const override { throw Exception("getStringValue not supported on NullValueNode"); }; + long getDateValue() const override { throw Exception("getDateValue not supported on NullValueNode"); }; + bool getBooleanValue() const override { throw Exception("getBooleanValue not supported on NullValueNode"); }; - struct IntValueNode : ValueNode { + void dump() const override { + std::cout << "type: NullValueNode," << std::endl; + } +}; + +struct IntValueNode : ValueNode { long value; explicit IntValueNode(long value) : ValueNode(NodeType::int_value), value(value) {} - long getIntegerValue() override { return value; }; - double getDoubleValue() override { return (double) value; }; - std::string getStringValue() override { return Settings::int_to_string(value); } - long getDateValue() override { return value; }; - bool getBooleanValue() override { return value != 0; }; - }; + long getIntegerValue() const override { return value; }; + double getDoubleValue() const override { return (double) value; }; + std::string getStringValue() const override { return Settings::long_to_string(value); } + long getDateValue() const override { return value; }; + bool getBooleanValue() const override { return value != 0; }; - struct DoubleValueNode : ValueNode { + void dump() const override { + std::cout << "type: IntValueNode, value: " << value << std::endl; + } +}; + +struct DoubleValueNode : ValueNode { double value; explicit DoubleValueNode(double value) : ValueNode(NodeType::float_value), value(value) {} - long getIntegerValue() override { return (long) value; }; - double getDoubleValue() override { return value; }; - std::string getStringValue() override { return Settings::double_to_string(value); } - long getDateValue() override { return (long) value; }; - bool getBooleanValue() override { return value != 0.0; }; - }; + long getIntegerValue() const override { return (long) value; }; + double getDoubleValue() const override { return value; }; + std::string getStringValue() const override { return Settings::double_to_string(value); } + long getDateValue() const override { return (long) value; }; + bool getBooleanValue() const override { return value != 0.0; }; - struct StringValueNode : ValueNode { + void dump() const override { + std::cout << "type: DoubleValueNode, value: " << value << std::endl; + } +}; + +struct StringValueNode : ValueNode { std::string value; - explicit StringValueNode(const std::string &value) : ValueNode(NodeType::string_value), value(value) {} + explicit StringValueNode(std::string value) : ValueNode(NodeType::string_value), value(std::move(value)) {} - long getIntegerValue() override { return Settings::string_to_int(value); }; - double getDoubleValue() override { return Settings::string_to_double(value); }; - std::string getStringValue() override { return value; }; - long getDateValue() override { return Settings::string_to_date(value); }; - bool getBooleanValue() override { return Settings::string_to_bool(value); }; - }; + long getIntegerValue() const override { return Settings::string_to_long(value); }; + double getDoubleValue() const override { return Settings::string_to_double(value); }; + std::string getStringValue() const override { return value; }; + long getDateValue() const override { return Settings::string_to_date(value); }; + bool getBooleanValue() const override { return Settings::string_to_bool(value); }; - struct BooleanValueNode : ValueNode { - bool value; + void dump() const override { + std::cout << "type: StringValueNode, value: " << value << std::endl; + } +}; - explicit BooleanValueNode(bool value) : ValueNode(NodeType::bool_value), value(value) {} +struct BooleanValueNode : ValueNode { + bool value; - long getIntegerValue() override { return (long) value; }; - double getDoubleValue() override { return (double) value; }; - std::string getStringValue() override { return Settings::bool_to_string(value); } - long getDateValue() override { return (long) value; }; - bool getBooleanValue() override { return value; }; - }; + explicit BooleanValueNode(bool value) : ValueNode(NodeType::bool_value), value(value) {} + + long getIntegerValue() const override { return (long) value; }; + double getDoubleValue() const override { return (double) value; }; + std::string getStringValue() const override { return Settings::bool_to_string(value); } + long getDateValue() const override { return (long) value; }; + bool getBooleanValue() const override { return value; }; + + void dump() const override { + std::cout << "type: BooleanValueNode, value: " << value << std::endl; + } +}; - struct DatabaseValueNode : Node { +struct DatabaseValueNode : Node { std::string col_name; - explicit DatabaseValueNode(const std::string &name) : Node(NodeType::database_value), col_name(name) {} - }; + explicit DatabaseValueNode(std::string name) : Node(NodeType::database_value), col_name(std::move(name)) {} - enum class LogicalOperatorType { + void dump() const override { + std::cout << "type: DatabaseValueNode, col_name: " << col_name << std::endl; + } +}; + +enum class LogicalOperatorType { and_operator, - or_operator, - not_operator - }; + or_operator + // not_operator +}; - struct LogicalOperatorNode : Node { +struct LogicalOperatorNode : Node { LogicalOperatorType op; std::unique_ptr left; std::unique_ptr right; LogicalOperatorNode(LogicalOperatorType op, std::unique_ptr left, std::unique_ptr right) : - Node(NodeType::logical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; - }; + Node(NodeType::logical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; - enum class RelationalOperatorType { + void dump() const override { + std::cout << "type: LogicalOperatorNode, op: " << (int)op << std::endl; + left->dump(); + right->dump(); + } +}; + +enum class RelationalOperatorType { equal, greater, greater_equal, @@ -215,54 +275,74 @@ namespace usql { is, is_not // like - }; +}; - struct RelationalOperatorNode : Node { +struct RelationalOperatorNode : Node { RelationalOperatorType op; std::unique_ptr left; std::unique_ptr right; RelationalOperatorNode(RelationalOperatorType op, std::unique_ptr left, std::unique_ptr right) : - Node(NodeType::relational_operator), op(op), left(std::move(left)), right(std::move(right)) {}; - }; + Node(NodeType::relational_operator), op(op), left(std::move(left)), right(std::move(right)) {}; - enum class ArithmeticalOperatorType { + void dump() const override { + std::cout << "type: RelationalOperatorNode, op: " << (int)op << std::endl; + left->dump(); + right->dump(); + } +}; + +enum class ArithmeticalOperatorType { copy_value, // just copy lef value and do nothing with it plus_operator, minus_operator, multiply_operator, divide_operator - }; +}; - struct ArithmeticalOperatorNode : Node { +struct ArithmeticalOperatorNode : Node { ArithmeticalOperatorType op; std::unique_ptr left; std::unique_ptr right; ArithmeticalOperatorNode(ArithmeticalOperatorType op, std::unique_ptr left, std::unique_ptr right) : - Node(NodeType::arithmetical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; - }; + Node(NodeType::arithmetical_operator), op(op), left(std::move(left)), right(std::move(right)) {}; - struct CreateTableNode : Node { + void dump() const override { + std::cout << "type: ArithmeticalOperatorNode, op: " << (int)op << std::endl; + left->dump(); + right->dump(); + } +}; + +struct CreateTableNode : Node { std::string table_name; std::vector cols_defs; - CreateTableNode(const std::string& name, std::vector defs) : - Node(NodeType::create_table), table_name(name), cols_defs(std::move(defs)) {} - }; + CreateTableNode(std::string name, std::vector defs) : + Node(NodeType::create_table), table_name(std::move(name)), cols_defs(std::move(defs)) {} - struct InsertIntoTableNode : Node { + void dump() const override { + std::cout << "type: CreateTableNode, table_name: " << table_name << "TODO complete me" << std::endl; + } +}; + +struct InsertIntoTableNode : Node { std::string table_name; std::vector cols_names; std::vector> cols_values; - InsertIntoTableNode(const std::string& name, std::vector names, std::vector> values) : - Node(NodeType::insert_into), table_name(name), cols_names(std::move(names)), cols_values(std::move(values)) {} - }; + InsertIntoTableNode(std::string name, std::vector names, std::vector> values) : + Node(NodeType::insert_into), table_name(std::move(name)), cols_names(std::move(names)), cols_values(std::move(values)) {} - struct SelectFromTableNode : Node { + void dump() const override { + std::cout << "type: InsertIntoTableNode, table_name: " << table_name << "TODO complete me" << std::endl; + } +}; + +struct SelectFromTableNode : Node { std::string table_name; std::unique_ptr> cols_names; std::unique_ptr where; @@ -271,84 +351,134 @@ namespace usql { bool distinct; SelectFromTableNode(std::string name, std::unique_ptr> names, std::unique_ptr where_clause, std::vector orderby, OffsetLimitNode offlim, bool distinct_): - Node(NodeType::select_from), table_name(std::move(name)), cols_names(std::move(names)), where(std::move(where_clause)), order_by(std::move(orderby)), offset_limit(offlim), distinct(distinct_) {} - }; + Node(NodeType::select_from), table_name(std::move(name)), cols_names(std::move(names)), where(std::move(where_clause)), order_by(std::move(orderby)), offset_limit(std::move(offlim)), distinct(distinct_) {} - struct CreateTableAsSelectNode : Node { - std::string table_name; - std::unique_ptr select_table; + void dump() const override { + std::cout << "type: SelectFromTableNode, table_name: " << table_name << "TODO complete me" << std::endl; + where->dump(); + } +}; - CreateTableAsSelectNode(const std::string& name, std::unique_ptr table) : - Node(NodeType::create_table_as_select), table_name(name), select_table(std::move(table)) {} - }; +struct CreateTableAsSelectNode : Node { + std::string table_name; + std::unique_ptr select_table; - struct UpdateTableNode : Node { + CreateTableAsSelectNode(std::string name, std::unique_ptr table) : + Node(NodeType::create_table_as_select), table_name(std::move(name)), select_table(std::move(table)) {} + + void dump() const override { + std::cout << "type: CreateTableAsSelectNode, table_name: " << table_name << std::endl; + select_table->dump(); + } +}; + +struct UpdateTableNode : Node { std::string table_name; std::vector cols_names; std::vector> values; std::unique_ptr where; - UpdateTableNode(const std::string &name, std::vector names, std::vector> vals, + UpdateTableNode(std::string name, std::vector names, std::vector> vals, std::unique_ptr where_clause) : - Node(NodeType::update_table), table_name(name), cols_names(names), values(std::move(vals)), - where(std::move(where_clause)) {} - }; + Node(NodeType::update_table), table_name(std::move(name)), cols_names(std::move(names)), values(std::move(vals)), + where(std::move(where_clause)) {} - struct LoadIntoTableNode : Node { + void dump() const override { + std::cout << "type: UpdateTableNode, table_name: " << table_name << "TODO complete me" << std::endl; + where->dump(); + } +}; + +struct LoadIntoTableNode : Node { std::string table_name; std::string filename; - LoadIntoTableNode(const std::string& name, const std::string &file) : - Node(NodeType::load_table), table_name(name), filename(file) {} - }; + LoadIntoTableNode(std::string name, std::string file) : + Node(NodeType::load_table), table_name(std::move(name)), filename(std::move(file)) {} - struct SaveTableNode : Node { - std::string table_name; - std::string filename; + void dump() const override { + std::cout << "type: LoadIntoTableNode, table_name: " << table_name << ", filename" << filename << std::endl; + } +}; - SaveTableNode(const std::string& name, const std::string &file) : - Node(NodeType::save_table), table_name(name), filename(file) {} - }; +struct SaveTableNode : Node { + std::string table_name; + std::string filename; - struct DropTableNode : Node { - std::string table_name; + SaveTableNode(std::string name, std::string file) : + Node(NodeType::save_table), table_name(std::move(name)), filename(std::move(file)) {} - explicit DropTableNode(const std::string& name) : Node(NodeType::drop_table), table_name(name) {} - }; + void dump() const override { + std::cout << "type: SaveTableNode, table_name: " << table_name << ", filename" << filename << std::endl; + } +}; - struct DeleteFromTableNode : Node { +struct DropTableNode : Node { + std::string table_name; + + explicit DropTableNode(std::string name) : Node(NodeType::drop_table), table_name(std::move(name)) {} + + void dump() const override { + std::cout << "type: SelectFromTableNode, table_name: " << table_name << std::endl; + } +}; + +struct DeleteFromTableNode : Node { std::string table_name; std::unique_ptr where; - DeleteFromTableNode(const std::string& name, std::unique_ptr where_clause) : - Node(NodeType::delete_from), table_name(name), where(std::move(where_clause)) {} - }; + DeleteFromTableNode(std::string name, std::unique_ptr where_clause) : + Node(NodeType::delete_from), table_name(std::move(name)), where(std::move(where_clause)) {} - struct SetNode : Node { + void dump() const override { + std::cout << "type: DeleteFromTableNode, table_name: " << table_name << std::endl; + where->dump(); + } +}; + +struct SetNode : Node { std::string name; std::string value; - SetNode(const std::string& name_, const std::string& value_) : - Node(NodeType::set), name(name_), value(value_) {} - }; + SetNode(std::string node_name, std::string node_value) : + Node(NodeType::set), name(std::move(node_name)), value(std::move(node_value)) {} - struct ShowNode : Node { + void dump() const override { + std::cout << "type: SetNode, name: " << name << ", value: " << value << std::endl; + } +}; + +struct ShowNode : Node { std::string name; - explicit ShowNode(const std::string& name_) : Node(NodeType::show), name(name_) {} - }; + explicit ShowNode(std::string node_name) : Node(NodeType::show), name(std::move(node_name)) {} + void dump() const override { + std::cout << "type: ShowNode, name: " << name << std::endl; + } +}; +struct CreateIndexNode : Node { + std::string index_name; + std::string table_name; + std::string column_name; - class Parser { - private: + CreateIndexNode(std::string idx_name, std::string tbl_name, std::string col_name) : + Node(NodeType::create_index), index_name(std::move(idx_name)), table_name(std::move(tbl_name)), column_name(std::move(col_name)) {} - public: + void dump() const override { + std::cout << "type: CreateIndexNode, table_name: " << table_name << ", index_name: " << index_name << ", column_name: " << column_name << std::endl; + } +}; + +class Parser { +private: +public: Parser(); std::unique_ptr parse(const std::string &code); - private: +private: std::unique_ptr parse_create_table(); std::unique_ptr parse_drop_table(); std::unique_ptr parse_load_table(); @@ -360,6 +490,7 @@ namespace usql { std::unique_ptr parse_select_from_table(); std::unique_ptr parse_delete_from_table(); std::unique_ptr parse_update_table(); + std::unique_ptr parse_create_index(); std::vector parse_order_by_clause(); OffsetLimitNode parse_offset_limit_clause(); @@ -373,8 +504,8 @@ namespace usql { LogicalOperatorType parse_logical_operator(); ArithmeticalOperatorType parse_arithmetical_operator(); - private: +private: Lexer m_lexer; - }; +}; } // namespace diff --git a/usql/row.cpp b/usql/row.cpp index 7557d0c..930f172 100644 --- a/usql/row.cpp +++ b/usql/row.cpp @@ -4,16 +4,16 @@ namespace usql { -int ColNullValue::compare(ColValue &other) { +int ColNullValue::compare(ColValue &other) const { return other.isNull() ? 0 : -1; // null goes to end } -int ColIntegerValue::compare(ColValue &other) { - long r = m_integer - other.getIntValue(); +int ColIntegerValue::compare(ColValue &other) const { + long r = m_integer - other.getIntegerValue(); return other.isNull() ? 1 : r > 0 ? 1 : r == 0 ? 0 : -1; } -int ColDoubleValue::compare(ColValue &other) { +int ColDoubleValue::compare(ColValue &other) const { if (other.isNull()) return 1; // null goes to end double c = m_double - other.getDoubleValue(); @@ -25,22 +25,34 @@ ColStringValue & ColStringValue::operator=(ColStringValue other) { return *this; } -int ColStringValue::compare(ColValue &other) { +int ColStringValue::compare(ColValue &other) const { return other.isNull() ? 1 : m_string->compare(other.getStringValue()); // null goes to end } -int ColDateValue::compare(ColValue &other) { - long r = m_date - other.getIntValue(); +std::string ColStringValue::getCsvStringValue() const { + auto src_str = getStringValue(); + std::string toSearch{"\""}, replaceStr{"\\\""}; + + size_t pos = src_str.find(toSearch); + while(pos != std::string::npos) { + src_str.replace(pos, toSearch.size(), replaceStr); + pos =src_str.find(toSearch, pos + replaceStr.size()); + } + return src_str; +} + +int ColDateValue::compare(ColValue &other) const { + long r = m_date - other.getIntegerValue(); return other.isNull() ? 1 : r > 0 ? 1 : r == 0 ? 0 : -1; } -int ColBooleanValue::compare(ColValue &other) { +int ColBooleanValue::compare(ColValue &other) const { if (other.isNull()) return 1; // null goes to end return m_bool == other.getBoolValue() ? 0 : m_bool && !other.getBoolValue() ? -1 : 1; // true first } -Row::Row(const Row &other) : m_columns(other.m_columns.size()) { +Row::Row(const Row &other) : m_columns(other.m_columns.size()), m_visible(other.m_visible) { for (int i = 0; i < other.m_columns.size(); i++) { if (other[i].isNull()) continue; // for null NOP @@ -48,7 +60,7 @@ Row::Row(const Row &other) : m_columns(other.m_columns.size()) { ColumnType col_type = other[i].getColType(); switch (col_type) { case ColumnType::integer_type : - setIntColumnValue(i, other[i].getIntValue()); + setIntColumnValue(i, other[i].getIntegerValue()); break; case ColumnType::float_type : setFloatColumnValue(i, other[i].getDoubleValue()); @@ -110,7 +122,7 @@ void Row::setBoolColumnValue(int col_index, const std::string &value) { void Row::setColumnValue(ColDefNode *col_def, ColValue &col_value) { if (!col_value.isNull()) { if (col_def->type == ColumnType::integer_type) - setIntColumnValue(col_def->order, col_value.getIntValue()); + setIntColumnValue(col_def->order, col_value.getIntegerValue()); else if (col_def->type == ColumnType::float_type) setFloatColumnValue(col_def->order, col_value.getDoubleValue()); else if (col_def->type == ColumnType::varchar_type) diff --git a/usql/row.h b/usql/row.h index 046119a..81d9642 100644 --- a/usql/row.h +++ b/usql/row.h @@ -9,135 +9,136 @@ namespace usql { - struct ColValue { - virtual bool isNull() { return false; }; - virtual ColumnType getColType() = 0; - virtual long getIntValue() = 0; - virtual double getDoubleValue() = 0; - virtual std::string getStringValue() = 0; - virtual long getDateValue() = 0; - virtual bool getBoolValue() = 0; +struct ColValue { + virtual bool isNull() const { return false; }; + virtual ColumnType getColType() const = 0; + virtual long getIntegerValue() const = 0; + virtual double getDoubleValue() const = 0; + virtual std::string getStringValue() const = 0; + virtual std::string getCsvStringValue() const { return getStringValue(); }; + virtual long getDateValue() const = 0; + virtual bool getBoolValue() const = 0; - virtual int compare(ColValue &other) = 0; + virtual int compare(ColValue &other) const = 0; virtual ~ColValue() = default; - - }; +}; - struct ColNullValue : ColValue { - bool isNull() override { return true; }; - ColumnType getColType() override { throw Exception("getColType not supported on ColNullValue"); } - long getIntValue() override { throw Exception("getIntValue not supported on ColNullValue"); }; - double getDoubleValue() override { throw Exception("getDoubleValue not supported on ColNullValue"); }; - std::string getStringValue() override { return "null"; }; - long getDateValue() override { throw Exception("getDateValue not supported on ColNullValue"); }; - bool getBoolValue() override { throw Exception("getDateValue not supported on ColNullValue"); }; +struct ColNullValue : ColValue { + bool isNull() const override { return true; }; + ColumnType getColType() const override { throw Exception("getColType not supported on ColNullValue"); } + long getIntegerValue() const override { throw Exception("getIntegerValue not supported on ColNullValue"); }; + double getDoubleValue() const override { throw Exception("getDoubleValue not supported on ColNullValue"); }; + std::string getStringValue() const override { return "null"; }; + long getDateValue() const override { throw Exception("getDateValue not supported on ColNullValue"); }; + bool getBoolValue() const override { throw Exception("getDateValue not supported on ColNullValue"); }; - int compare(ColValue &other) override; + int compare(ColValue &other) const override; - virtual ~ColNullValue() = default; - }; + ~ColNullValue() override = default; +}; - struct ColIntegerValue : ColValue { +struct ColIntegerValue : ColValue { explicit ColIntegerValue(long value) : m_integer(value) {}; ColIntegerValue(const ColIntegerValue &other) : m_integer(other.m_integer) {}; - ColumnType getColType() override { return ColumnType::integer_type; }; - long getIntValue() override { return m_integer; }; - double getDoubleValue() override { return (double) m_integer; }; - std::string getStringValue() override { return std::to_string(m_integer); }; - long getDateValue() override { return m_integer; }; - bool getBoolValue() override { throw Exception("Not supported on ColIntegerValue"); }; + ColumnType getColType() const override { return ColumnType::integer_type; }; + long getIntegerValue() const override { return m_integer; }; + double getDoubleValue() const override { return (double) m_integer; }; + std::string getStringValue() const override { return std::to_string(m_integer); }; + long getDateValue() const override { return m_integer; }; + bool getBoolValue() const override { throw Exception("Not supported on ColIntegerValue"); }; - int compare(ColValue &other) override; + int compare(ColValue &other) const override; + + ~ColIntegerValue() override = default; long m_integer; - - virtual ~ColIntegerValue() = default; - }; +}; - struct ColDoubleValue : ColValue { +struct ColDoubleValue : ColValue { explicit ColDoubleValue(double value) : m_double(value) {}; ColDoubleValue(const ColDoubleValue &other) : m_double(other.m_double) {} - ColumnType getColType() override { return ColumnType::float_type; }; - long getIntValue() override { return (long) m_double; }; - double getDoubleValue() override { return m_double; }; - std::string getStringValue() override { return Settings::double_to_string(m_double); }; - long getDateValue() override { return (long) m_double; }; - bool getBoolValue() override { throw Exception("Not supported on ColDoubleValue"); }; + ColumnType getColType() const override { return ColumnType::float_type; }; + long getIntegerValue() const override { return (long) m_double; }; + double getDoubleValue() const override { return m_double; }; + std::string getStringValue() const override { return Settings::double_to_string(m_double); }; + long getDateValue() const override { return (long) m_double; }; + bool getBoolValue() const override { throw Exception("Not supported on ColDoubleValue"); }; - int compare(ColValue &other) override; + int compare(ColValue &other) const override; - virtual ~ColDoubleValue() = default; + ~ColDoubleValue() override = default; double m_double; - }; +}; - struct ColStringValue : ColValue { +struct ColStringValue : ColValue { explicit ColStringValue(const std::string &value) : m_string(std::make_unique(value)) {}; ColStringValue(const ColStringValue &other) : m_string(std::make_unique(*other.m_string)) {}; ColStringValue & operator=(ColStringValue other); - ColumnType getColType() override { return ColumnType::varchar_type; }; - long getIntValue() override { return std::stoi(*m_string); }; - double getDoubleValue() override { return std::stod(*m_string); }; - std::string getStringValue() override { return *m_string; }; - long getDateValue() override { return std::stoi(*m_string); }; - bool getBoolValue() override { throw Exception("Not supported on ColStringValue"); }; + ColumnType getColType() const override { return ColumnType::varchar_type; }; + long getIntegerValue() const override { return std::stoi(*m_string); }; + double getDoubleValue() const override { return std::stod(*m_string); }; + std::string getStringValue() const override { return *m_string; }; + std::string getCsvStringValue() const override;; + long getDateValue() const override { return std::stoi(*m_string); }; + bool getBoolValue() const override { throw Exception("Not supported on ColStringValue"); }; - int compare(ColValue &other) override; + int compare(ColValue &other) const override; std::unique_ptr m_string; - }; +}; - struct ColDateValue : ColValue { - explicit ColDateValue(long value) : m_date(value) {}; - ColDateValue(const ColDateValue &other) : m_date(other.m_date) {}; +struct ColDateValue : ColValue { + explicit ColDateValue(long value) : m_date(value) {}; + ColDateValue(const ColDateValue &other) : m_date(other.m_date) {}; - ColumnType getColType() override { return ColumnType::date_type; }; - long getIntValue() override { return m_date; }; - double getDoubleValue() override { return (double) m_date; }; - std::string getStringValue() override { return Settings::date_to_string(m_date); }; - long getDateValue() override { return m_date; }; - bool getBoolValue() override { throw Exception("Not supported on ColDateValue"); }; + ColumnType getColType() const override { return ColumnType::date_type; }; + long getIntegerValue() const override { return m_date; }; + double getDoubleValue() const override { return (double) m_date; }; + std::string getStringValue() const override { return Settings::date_to_string(m_date); }; + long getDateValue() const override { return m_date; }; + bool getBoolValue() const override { throw Exception("Not supported on ColDateValue"); }; - int compare(ColValue &other) override; + int compare(ColValue &other) const override; - virtual ~ColDateValue() = default; + ~ColDateValue() override = default; - long m_date; // seconds since epoch for now - }; + long m_date; // seconds since epoch for now +}; - struct ColBooleanValue : ColValue { - explicit ColBooleanValue(bool value) : m_bool(value) {}; - ColBooleanValue(const ColBooleanValue &other) : m_bool(other.m_bool) {}; +struct ColBooleanValue : ColValue { + explicit ColBooleanValue(bool value) : m_bool(value) {}; + ColBooleanValue(const ColBooleanValue &other) : m_bool(other.m_bool) {}; - ColumnType getColType() override { return ColumnType::bool_type; }; - long getIntValue() override { return (long) m_bool; }; - double getDoubleValue() override { return (double) m_bool; }; - std::string getStringValue() override { return m_bool ? "Y" : "N"; }; - long getDateValue() override { throw Exception("Not supported on ColBooleanValue"); }; - bool getBoolValue() override { return m_bool; }; + ColumnType getColType() const override { return ColumnType::bool_type; }; + long getIntegerValue() const override { return (long) m_bool; }; + double getDoubleValue() const override { return (double) m_bool; }; + std::string getStringValue() const override { return m_bool ? "Y" : "N"; }; + long getDateValue() const override { throw Exception("Not supported on ColBooleanValue"); }; + bool getBoolValue() const override { return m_bool; }; - int compare(ColValue &other) override; + int compare(ColValue &other) const override; - virtual ~ColBooleanValue() = default; + ~ColBooleanValue() override = default; - bool m_bool; - }; + bool m_bool; +}; - class Row { +class Row { - public: - explicit Row(int cols_count) : m_columns(cols_count) {}; +public: + explicit Row(int cols_count, bool visible) : m_columns(cols_count), m_visible(visible) {}; Row(const Row &other); Row &operator=(Row other); @@ -154,7 +155,7 @@ namespace usql { void setColumnValue(ColDefNode *col_def, ColValue &col_value); void setColumnValue(ColDefNode *col_def, ValueNode *col_value); - ColValue &operator[](int i) const { + ColValue &operator[](int i) const { auto type_index = m_columns[i].index(); switch (type_index) { case 0: @@ -169,17 +170,23 @@ namespace usql { return (ColValue &) *std::get_if(&m_columns[i]); case 5: return (ColValue &) *std::get_if(&m_columns[i]); + default: + throw Exception("should not happen"); } - throw Exception("should not happen"); } - int compare(const Row &other) const; + [[nodiscard]] int compare(const Row &other) const; void print(const std::vector &col_defs); static int print_get_column_size(const ColDefNode &col_def); - private: - // xx std::vector> m_columns; + + [[nodiscard]] bool is_visible() const { return m_visible; }; + void set_visible() { m_visible = true; }; + void set_deleted() { m_visible = true; }; + + private: + bool m_visible; std::vector> m_columns; - }; +}; } // namespace \ No newline at end of file diff --git a/usql/settings.cpp b/usql/settings.cpp index ffe7420..ca36be8 100644 --- a/usql/settings.cpp +++ b/usql/settings.cpp @@ -1,4 +1,6 @@ +#include "fast_double_parser.h" + #include "settings.h" #include "exception.h" #include "ml_date.h" @@ -9,21 +11,31 @@ std::vector> Settings::m_settings = { std::make_pair("DATE_FORMAT", "%Y-%m-%d %H:%M:%S"), std::make_pair("BOOL_TRUE_LITERAL", "Y"), std::make_pair("BOOL_FALSE_LITERAL", "N"), - std::make_pair("DOUBLE_FORMAT", "%.2f") }; + std::make_pair("DOUBLE_FORMAT", "%.2f"), + std::make_pair("USE_INDEXSCAN", "N") }; -long Settings::string_to_int(const std::string &intstr) { - return std::stoi(intstr); +long Settings::string_to_long(const std::string &intstr) { + try { + return std::stol(intstr); + } catch (std::invalid_argument &e) { + throw Exception("error parsing as integer: " + intstr); + } } -std::string Settings::int_to_string(long intval) { +std::string Settings::long_to_string(long intval) { return std::to_string(intval); } double Settings::string_to_double(const std::string &doublestr) { - return std::stod(doublestr); // TODO use fast parsing + double result; + const char * endptr = fast_double_parser::parse_number(doublestr.c_str(), &result); + if (endptr == nullptr) { + throw Exception("error parsing as double: " + doublestr); + } + return result; } std::string Settings::double_to_string(double d) { @@ -47,23 +59,20 @@ std::string Settings::date_to_string(long date) { } -bool Settings::string_to_bool(const std::string &boolstr) { - if (boolstr=="true" || boolstr == get_setting("BOOL_TRUE_LITERAL")) +bool Settings::string_to_bool(const std::string &value) { + if (value == "true" || value == get_setting("BOOL_TRUE_LITERAL")) return true; - if (boolstr=="false" || boolstr == get_setting("BOOL_FALSE_LITERAL")) + if (value == "false" || value == get_setting("BOOL_FALSE_LITERAL")) return false; - throw Exception("string_to_bool, unrecognized value: " + boolstr); + throw Exception("string_to_bool, unrecognized value: " + value); } -std::string Settings::bool_to_string(bool boolval) { - return boolval ? "true" : "false"; +std::string Settings::bool_to_string(bool value) { + return value ? "true" : "false"; } - - - std::string Settings::get_setting(const std::string &name) { for(const auto& pair : m_settings) { if (pair.first == name) return pair.second; @@ -71,6 +80,10 @@ std::string Settings::get_setting(const std::string &name) { throw Exception("unsupported setting name: " + name); } +bool Settings::get_bool_setting(const std::string &name) { + return string_to_bool(get_setting(name)); +} + void Settings::set_setting(const std::string &name, const std::string &value) { for (auto it = begin(m_settings); it != end(m_settings); ++it) { if (it->first == name) { diff --git a/usql/settings.h b/usql/settings.h index 91ed765..014787c 100644 --- a/usql/settings.h +++ b/usql/settings.h @@ -10,9 +10,10 @@ class Settings { public: static void set_setting(const std::string &name, const std::string &value); static std::string get_setting(const std::string &name); + static bool get_bool_setting(const std::string &name); - static long string_to_int(const std::string &intstr); - static std::string int_to_string(long intval); + static long string_to_long(const std::string &intstr); + static std::string long_to_string(long intval); static double string_to_double(const std::string &doublestr); static std::string double_to_string(double doubleval); @@ -20,8 +21,8 @@ public: static long string_to_date(const std::string &datestr); static std::string date_to_string(long dateval); - static bool string_to_bool(const std::string &boolstr); - static std::string bool_to_string(bool boolval); + static bool string_to_bool(const std::string &value); + static std::string bool_to_string(bool value); private: static std::vector> m_settings; diff --git a/usql/table.cpp b/usql/table.cpp index 7055707..aa92412 100644 --- a/usql/table.cpp +++ b/usql/table.cpp @@ -1,7 +1,6 @@ #include "table.h" #include "csvreader.h" #include "ml_string.h" -#include "fast_double_parser.h" #include #include @@ -19,15 +18,16 @@ Table::Table(const Table &other) { m_name = other.m_name; m_col_defs = other.m_col_defs; m_rows.reserve(other.m_rows.size()); - for(const Row& orig_row : other.m_rows) { - commit_copy_of_row(orig_row); - } + + for(const Row& orig_row : other.m_rows) + if (orig_row.is_visible()) + commit_copy_of_row((Row&)orig_row); } ColDefNode Table::get_column_def(const std::string &col_name) { auto name_cmp = [col_name](const ColDefNode& cd) { return cd.name == col_name; }; - auto col_def = std::find_if(begin(m_col_defs), end(m_col_defs), name_cmp); + auto col_def = std::find_if(std::begin(m_col_defs), std::end(m_col_defs), name_cmp); if (col_def != std::end(m_col_defs)) { return *col_def; } else { @@ -39,35 +39,43 @@ ColDefNode Table::get_column_def(int col_index) { if (col_index >= 0 && col_index < columns_count()) { return m_col_defs[col_index]; } else { - throw Exception("column with this index does not exists (" + std::to_string(col_index) + ")"); + throw Exception("column with this m_index does not exists (" + std::to_string(col_index) + ")"); } } Row& Table::create_empty_row() { - m_rows.emplace_back(columns_count()); + m_rows.emplace_back(columns_count(), false); return m_rows.back(); } std::string Table::csv_string() { - // header + const size_t k_row_size_est = m_col_defs.size() * 16; + std::string out_string; + out_string.reserve(m_rows.size() * k_row_size_est); + + // header for(int i = 0; i < m_col_defs.size(); i++) { - if (i > 0) out_string += ","; + if (i > 0) out_string += ','; out_string += m_col_defs[i].name; } // rows - for (auto & m_row : m_rows) { - std::string csv_line{"\n"}; - for(int i = 0; i < m_col_defs.size(); i++) { - if (i > 0) csv_line += ","; + for (auto & row : m_rows) { + if (row.is_visible()) { + std::string csv_line{"\n"}; + csv_line.reserve(k_row_size_est); - auto & col = m_row[i]; - if (!col.isNull()) { - csv_line += col.getStringValue(); // TODO handle enclosing commas etc + for (int i = 0; i < m_col_defs.size(); i++) { + if (i > 0) csv_line += ','; + + auto &col = row[i]; + if (!col.isNull()) { + csv_line += col.getCsvStringValue(); + } } + out_string += csv_line; } - out_string += csv_line; } return out_string; @@ -89,18 +97,18 @@ int Table::load_csv_file(const std::string &filename) { int line_size = 128; std::ifstream in(filename, std::ifstream::ate | std::ifstream::binary); - auto file_size = in.tellg(); + auto file_size = in.tellg(); std::ifstream infile(filename); if (infile.good()) { std::string sLine; std::getline(infile, sLine); - line_size = (int)sLine.size(); + line_size = (int)sLine.size() + 1; } infile.close(); if (file_size > 0) { - auto new_size = m_rows.size() + int(file_size / line_size * 1.20); + auto new_size = m_rows.size() + int((file_size / line_size) * 1.20); m_rows.reserve(new_size); } @@ -116,15 +124,15 @@ void Table::create_row_from_vector(const std::vector &colDefs, const Row& new_row = create_empty_row(); // copy values - for (int i = 0; i < std::min(columns_count(), csv_line.size()); i++) { + for (size_t i = 0; i < std::min(columns_count(), csv_line.size()); i++) { const ColDefNode & col_def = colDefs[i]; if (csv_line[i].empty()) { new_row.setColumnNull(col_def.order); } else if (col_def.type == ColumnType::integer_type) { - new_row.setIntColumnValue(col_def.order, string_to_long(csv_line[i])); + new_row.setIntColumnValue(col_def.order, Settings::string_to_long(csv_line[i])); } else if (col_def.type == ColumnType::float_type) { - new_row.setFloatColumnValue(col_def.order, string_to_double(csv_line[i])); + new_row.setFloatColumnValue(col_def.order, Settings::string_to_double(csv_line[i])); } else if (col_def.type == ColumnType::varchar_type) { new_row.setStringColumnValue(col_def.order, csv_line[i]); } else if (col_def.type == ColumnType::date_type) { @@ -139,23 +147,6 @@ void Table::create_row_from_vector(const std::vector &colDefs, const commit_row(new_row); } -double Table::string_to_double(const std::string &s) { - double result; - const char * endptr = fast_double_parser::parse_number(s.c_str(), &result); - if (endptr == nullptr) { - throw Exception("error parsing as double: " + s); - } - return result; -} - -long Table::string_to_long(const std::string &s) { - try { - return std::stol(s); - } catch (std::invalid_argument &e) { - throw Exception("error parsing as integer: " + s); - } -} - void Table::print() { std::string out{"| "}; std::string out2{"+-"}; @@ -181,16 +172,23 @@ void Table::print() { std::cout << std::endl; } -void Table::commit_row(const Row &row) { +size_t Table::get_rowid(const Row &row) const { + const Row* row_addr = (Row*)&row; + const Row* begin_addr = &(*m_rows.begin()); + + return row_addr - begin_addr; +} + +void Table::commit_row(Row &row) { try { validate_row(row); + index_row(row); } catch (Exception &e) { - m_rows.erase(m_rows.end() - 1); throw e; } } -void Table::commit_copy_of_row(const Row &row) { +void Table::commit_copy_of_row(Row &row) { Row& new_row = create_empty_row(); for(int i = 0; i < m_col_defs.size(); i++) { @@ -200,7 +198,7 @@ void Table::commit_copy_of_row(const Row &row) { new_row.setColumnNull(i); } else { if (m_col_defs[i].type == ColumnType::integer_type) { - new_row.setIntColumnValue(i, row[i].getIntValue()); + new_row.setIntColumnValue(i, row[i].getIntegerValue()); } else if (m_col_defs[i].type == ColumnType::float_type) { new_row.setFloatColumnValue(i, row[i].getDoubleValue()); } else if (m_col_defs[i].type == ColumnType::varchar_type) { @@ -215,6 +213,7 @@ void Table::commit_copy_of_row(const Row &row) { } validate_row(new_row); + index_row(row); } void Table::validate_column(const ColDefNode *col_def, ValueNode *col_val) { @@ -235,13 +234,134 @@ void Table::validate_column(const ColDefNode *col_def, ColValue &col_val) { } } -void Table::validate_row(const Row &row) { +void Table::validate_row(Row &row) { for(int i = 0; i < m_col_defs.size(); i++) { ColDefNode col_def = m_col_defs[i]; ColValue &col_val = row[i]; validate_column(&col_def, col_val); } + row.set_visible(); +} + +void Table::create_index(const Index& index) { + m_indexes.push_back(index); +} + +bool Table::drop_index(const std::string &index_name) { + auto it = std::find_if(m_indexes.begin(), m_indexes.end(), + [&index_name](const Index &idx) { + return idx.get_index_name() == index_name; + }); + + if (it != m_indexes.end()) { + m_indexes.erase(it); + return true; + } + return false; +} + +void Table::index_row(Index &index, const ColDefNode &col_def, const Row &row, const size_t rowid) { + index.insert(reinterpret_cast(&row[col_def.order]), rowid); +} + +void Table::unindex_row(Index &index, const ColDefNode &col_def, const Row &row, const size_t rowid) { + index.remove(reinterpret_cast(&row[col_def.order]), rowid); +} + +void Table::reindex_row(Index &index, const ColDefNode &col_def, const Row &old_row, const Row &new_row, size_t rowid) { + unindex_row(index, col_def, old_row, rowid); + index_row(index, col_def, new_row, rowid); +} + +void Table::index_row(const Row &row) { + if (!m_indexes.empty()) { + const size_t rowid = get_rowid(row); + for (auto &idx : m_indexes) { + ColDefNode cDef = get_column_def(idx.get_column_name()); + index_row(idx, cDef, row, rowid); + } + } +} + +void Table::unindex_row(const Row &row) { + if (!m_indexes.empty()) { + const size_t rowid = get_rowid(row); + for (auto &idx : m_indexes) { + ColDefNode cDef = get_column_def(idx.get_column_name()); + unindex_row(idx, cDef, row, rowid); + } + } +} + +void Table::reindex_row(const Row &old_row, const Row &new_row) { + if (!m_indexes.empty()) { + const size_t rowid = get_rowid(new_row); + for (auto &idx : m_indexes) { + ColDefNode cDef = get_column_def(idx.get_column_name()); + reindex_row(idx, cDef, old_row, new_row, rowid); + } + } +} + +void Table::index_rows(const std::string &index_name) { + auto index = get_index(index_name); + + ColDefNode cDef = get_column_def(index->get_column_name()); + size_t rowid = 0; + for(const Row& r : m_rows) { + index_row(*index, cDef, r, rowid); + rowid++; + } +} + +Index * Table::get_index(const std::string &index_name) { + auto it = std::find_if(m_indexes.begin(), m_indexes.end(), + [&index_name](const Index &idx) { + return idx.get_index_name() == index_name; + }); + + return (it != m_indexes.end()) ? &(*it) : nullptr; +} + +Index * Table::get_index_for_column(const std::string &col_name) { + auto it = std::find_if(m_indexes.begin(), m_indexes.end(), + [&col_name](const Index &idx) { + return idx.get_column_name() == col_name; + }); + + return (it != m_indexes.end()) ? &(*it) : nullptr; +} + +bool Table::empty() { + if (m_rows.empty()) return true; + for (const auto & r : m_rows) + if (r.is_visible()) return false; + + return true; +} + +Row *Table::rows_scanner::next() { + if (m_use_rowids) { + while (m_rowids_idx < m_rowids.size()) { + auto row_ptr = &m_table->m_rows[m_rowids[m_rowids_idx]]; + if (row_ptr->is_visible()) { + m_rowids_idx++; + return row_ptr; + } + m_rowids_idx++; + } + } else { + while (m_fscan_itr != m_table->m_rows.end()) { + if (m_fscan_itr->is_visible()) { + auto i = m_fscan_itr; + m_fscan_itr++; + return &(*i); + } + m_fscan_itr++; + } + } + return nullptr; } } // namespace diff --git a/usql/table.h b/usql/table.h index e7bcce0..736de59 100644 --- a/usql/table.h +++ b/usql/table.h @@ -1,14 +1,16 @@ #pragma once +#include "index.h" #include "parser.h" #include "row.h" +#include #include + namespace usql { - struct Table { - +struct Table { Table(const Table &other); Table(const std::string& name, const std::vector& columns); @@ -18,13 +20,15 @@ namespace usql { [[nodiscard]] int columns_count() const { return (int) m_col_defs.size(); }; [[nodiscard]] size_t rows_count() const { return m_rows.size(); }; - Row& create_empty_row(); - void commit_row(const Row &row); - void commit_copy_of_row(const Row &row); + [[nodiscard]] size_t get_rowid(const Row &row) const; + + Row &create_empty_row(); + void commit_row(Row &row); + void commit_copy_of_row(Row &row); static void validate_column(const ColDefNode *col_def, ValueNode *col_val); static void validate_column(const ColDefNode *col_def, ColValue &col_val); - void validate_row(const Row &row); + void validate_row(Row &row); std::string csv_string(); int load_csv_string(const std::string &content); @@ -32,14 +36,45 @@ namespace usql { void print(); - std::string m_name; + std::string m_name; std::vector m_col_defs; - std::vector m_rows; - - static long string_to_long(const std::string &s) ; - static double string_to_double(const std::string &s) ; + std::vector m_rows; + std::vector m_indexes; void create_row_from_vector(const std::vector &colDefs, const std::vector &csv_line); - }; -} + void create_index(const Index& index); + bool drop_index(const std::string &index_name); + + static void index_row(Index &index, const ColDefNode &col_def, const Row &row, size_t rowid); + static void unindex_row(Index &index, const ColDefNode &col_def, const Row &row, size_t rowid); + static void reindex_row(Index &index, const ColDefNode &col_def, const Row &old_row, const Row &new_row, size_t rowid); + + void index_row(const Row &row); + void unindex_row(const Row &row); + void reindex_row(const Row &old_row, const Row &new_row); + + void index_rows(const std::string &index_name); + + Index * get_index(const std::string &index_name); + Index * get_index_for_column(const std::string &col_name); + + bool empty(); + + struct rows_scanner { + explicit rows_scanner(Table *tbl) : m_use_rowids(false), m_table(tbl), m_fscan_itr(tbl->m_rows.begin()) {} + rows_scanner(Table *tbl, std::vector rowids) : m_use_rowids(true), m_table(tbl), m_rowids(std::move(rowids)), m_rowids_idx(0) {} + + Row *next(); + + private: + bool m_use_rowids; + Table * m_table; + std::vector::iterator m_fscan_itr; + std::vector m_rowids; + size_t m_rowids_idx{}; + }; + +}; + +} // namespace diff --git a/usql/usql.cpp b/usql/usql.cpp index 42c9728..dc901fe 100644 --- a/usql/usql.cpp +++ b/usql/usql.cpp @@ -1,10 +1,8 @@ #include "usql.h" #include "exception.h" -#include "ml_date.h" #include "ml_string.h" #include -#include namespace usql { @@ -20,10 +18,11 @@ std::unique_ptr USql::execute(const std::string &command) { } std::unique_ptr
USql::execute(Node &node) { - // TODO optimize execution nodes here switch (node.node_type) { case NodeType::create_table: return execute_create_table(static_cast(node)); + case NodeType::create_index: + return execute_create_index(static_cast(node)); case NodeType::create_table_as_select: return execute_create_table_as_table(static_cast(node)); case NodeType::drop_table: @@ -65,7 +64,7 @@ bool USql::eval_relational_operator(const RelationalOperatorNode &filter, Table return !all_null; return false; } else if (left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::int_value) { - comparator = left_value->getIntegerValue() - right_value->getIntegerValue(); + comparator = (double)(left_value->getIntegerValue() - right_value->getIntegerValue()); } else if ((left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::float_value) || (left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::int_value) || (left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::float_value)) { @@ -108,7 +107,7 @@ std::unique_ptr USql::eval_value_node(Table *table, Row &row, Node *n if (node->node_type == NodeType::database_value) { return eval_database_value_node(table, row, node); } else if (node->node_type == NodeType::int_value || node->node_type == NodeType::float_value || node->node_type == NodeType::string_value || node->node_type == NodeType::bool_value) { - return eval_literal_value_node(table, row, node); + return eval_literal_value_node(row, node); } else if (node->node_type == NodeType::function) { return eval_function_value_node(table, row, node, col_def_node, agg_func_value); } else if (node->node_type == NodeType::null_value) { @@ -129,7 +128,7 @@ std::unique_ptr USql::eval_database_value_node(Table *table, Row &row return std::make_unique(); if (col_def.type == ColumnType::integer_type) - return std::make_unique(db_value.getIntValue()); + return std::make_unique(db_value.getIntegerValue()); if (col_def.type == ColumnType::float_type) return std::make_unique(db_value.getDoubleValue()); if (col_def.type == ColumnType::varchar_type) @@ -137,13 +136,13 @@ std::unique_ptr USql::eval_database_value_node(Table *table, Row &row if (col_def.type == ColumnType::bool_type) return std::make_unique(db_value.getBoolValue()); if (col_def.type == ColumnType::date_type) - return std::make_unique(db_value.getIntValue()); + return std::make_unique(db_value.getIntegerValue()); throw Exception("unknown database value type"); } -std::unique_ptr USql::eval_literal_value_node(Table *table, Row &row, Node *node) { +std::unique_ptr USql::eval_literal_value_node(Row &row, Node *node) { if (node->node_type == NodeType::int_value) { auto *ivl = static_cast(node); return std::make_unique(ivl->value); @@ -166,8 +165,7 @@ std::unique_ptr USql::eval_literal_value_node(Table *table, Row &row, } -std::unique_ptr -USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value) { +std::unique_ptr USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value) { auto *fnc = static_cast(node); std::vector> evaluatedPars; @@ -177,7 +175,8 @@ USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *c // at this moment no functions without parameter(s) or first param can be null if (evaluatedPars.empty() || evaluatedPars[0]->isNull()) - return std::make_unique(); + throw Exception("eval_function_value_node, no function parameter or first is null, function: " + fnc->function); + // return std::make_unique(); // TODO use some enum if (fnc->function == "lower") return lower_function(evaluatedPars); @@ -193,23 +192,13 @@ USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *c throw Exception("invalid function: " + fnc->function); } -std::unique_ptr USql::count_function(ColValue *agg_func_value, const std::vector> &evaluatedPars) { - long c = 1; - if (!agg_func_value->isNull()) { - c = agg_func_value->getIntValue() + 1; - } - return std::make_unique(c); -} - bool USql::eval_logical_operator(LogicalOperatorNode &node, Table *pTable, Row &row) { - //bool left = eval_relational_operator(static_cast(*node.left), pTable, row); bool left = eval_where(&(*node.left), pTable, row); if ((node.op == LogicalOperatorType::and_operator && !left) || (node.op == LogicalOperatorType::or_operator && left)) return left; - //bool right = eval_relational_operator(static_cast(*node.right), pTable, row); bool right = eval_where(&(*node.right), pTable, row); return right; } @@ -227,8 +216,8 @@ std::unique_ptr USql::eval_arithmetic_operator(ColumnType outType, Ar return std::make_unique(); if (outType == ColumnType::float_type) { - double l = ((ValueNode *) left.get())->getDoubleValue(); - double r = ((ValueNode *) right.get())->getDoubleValue(); + auto l = left->getDoubleValue(); + auto r = right->getDoubleValue(); switch (node.op) { case ArithmeticalOperatorType::plus_operator: return std::make_unique(l + r); @@ -239,12 +228,12 @@ std::unique_ptr USql::eval_arithmetic_operator(ColumnType outType, Ar case ArithmeticalOperatorType::divide_operator: return std::make_unique(l / r); default: - throw Exception("implement me!!"); + throw Exception("eval_arithmetic_operator, float type implement me!!"); } } else if (outType == ColumnType::integer_type) { - long l = ((ValueNode *) left.get())->getIntegerValue(); - long r = ((ValueNode *) right.get())->getIntegerValue(); + auto l = left->getIntegerValue(); + auto r = right->getIntegerValue(); switch (node.op) { case ArithmeticalOperatorType::plus_operator: return std::make_unique(l + r); @@ -255,179 +244,57 @@ std::unique_ptr USql::eval_arithmetic_operator(ColumnType outType, Ar case ArithmeticalOperatorType::divide_operator: return std::make_unique(l / r); default: - throw Exception("implement me!!"); + throw Exception("eval_arithmetic_operator, integer type implement me!!"); } } else if (outType == ColumnType::varchar_type) { - std::string l = ((ValueNode *) left.get())->getStringValue(); - std::string r = ((ValueNode *) right.get())->getStringValue(); + auto l = left->getStringValue(); + auto r = right->getStringValue(); switch (node.op) { case ArithmeticalOperatorType::plus_operator: return std::make_unique(l + r); default: - throw Exception("implement me!!"); + throw Exception("eval_arithmetic_operator, varchar type implement me!!"); + } + + } else if (outType == ColumnType::date_type) { + auto l = left->getDateValue(); + auto r = right->getDateValue(); + switch (node.op) { + case ArithmeticalOperatorType::plus_operator: + return std::make_unique(l + r); + case ArithmeticalOperatorType::minus_operator: + return std::make_unique(l - r); + default: + throw Exception("eval_arithmetic_operator, date_type type implement me!!"); } } - // TODO date node should support addition and subtraction - - throw Exception("implement me!!"); + throw Exception("eval_arithmetic_operator, implement me!!"); } - -std::unique_ptr USql::to_string_function(const std::vector> &evaluatedPars) { - long date = evaluatedPars[0]->getDateValue(); - std::string format = evaluatedPars[1]->getStringValue(); - std::string formatted_date = date_to_string(date, format); - return std::make_unique(formatted_date); -} - -std::unique_ptr USql::to_date_function(const std::vector> &evaluatedPars) { - std::string date = evaluatedPars[0]->getStringValue(); - std::string format = evaluatedPars[1]->getStringValue(); - long epoch_time = string_to_date(date, format); - return std::make_unique(epoch_time); // No DateValueNode for now -} - -std::unique_ptr USql::date_add_function(const std::vector> &evaluatedPars) { - long datetime = evaluatedPars[0]->getDateValue(); - long quantity = evaluatedPars[1]->getIntegerValue(); - std::string part = evaluatedPars[2]->getStringValue(); - - long new_date = add_to_date(datetime, quantity, part); - return std::make_unique(new_date); // No DateValueNode for now -} - - -std::unique_ptr USql::upper_function(const std::vector> &evaluatedPars) { - std::string str = evaluatedPars[0]->getStringValue(); - std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return toupper(c); }); - return std::make_unique(str); -} - -std::unique_ptr USql::lower_function(const std::vector> &evaluatedPars) { - std::string str = evaluatedPars[0]->getStringValue(); - std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return tolower(c); }); - return std::make_unique(str); -} - -std::unique_ptr USql::pp_function(const std::vector> &evaluatedPars) { - auto &parsed_value = evaluatedPars[0]; - - if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) { - std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : ""; - char buf[20] {0}; // TODO constant here - double value = parsed_value->getDoubleValue(); - - if (format == "100%") - std::snprintf(buf, 20, "%.2f%%", value); - else if (format == "%.2f") - std::snprintf(buf, 20, "%.2f", value); - else if (value >= 1000000000000) - std::snprintf(buf, 20, "%7.2fT", value/1000000000000); - else if (value >= 1000000000) - std::sprintf(buf, "%7.2fB", value/1000000000); - else if (value >= 1000000) - std::snprintf(buf, 20, "%7.2fM", value/1000000); - else if (value >= 100000) - std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M - else if (value <= -1000000000000) - std::snprintf(buf, 20, "%7.2fT", value/1000000000000); - else if (value <= -1000000000) - std::snprintf(buf, 20, "%7.2fB", value/1000000000); - else if (value <= -1000000) - std::snprintf(buf, 20, "%7.2fM", value/1000000); - else if (value <= -100000) - std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M - else if (value == 0) - buf[0]='0'; - else - return std::make_unique(parsed_value->getStringValue().substr(0, 10)); - // TODO introduce constant for 10 - std::string s {buf}; - return std::make_unique(string_padd(s.erase(s.find_last_not_of(" ")+1), 10, ' ', false)); - } - return std::make_unique(parsed_value->getStringValue()); -} - -std::unique_ptr -USql::max_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, - ColValue *agg_func_value) { - if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) { - if (!evaluatedPars[0]->isNull()) { - long val = evaluatedPars[0]->getIntegerValue(); - if (agg_func_value->isNull()) { - return std::make_unique(val); - } else { - return std::make_unique(std::max(val, agg_func_value->getIntValue())); - } - } else { - return std::make_unique(agg_func_value->getIntValue()); - } - } else if (col_def_node->type == ColumnType::float_type) { - if (!evaluatedPars[0]->isNull()) { - double val = evaluatedPars[0]->getDoubleValue(); - if (agg_func_value->isNull()) { - return std::make_unique(val); - } else { - return std::make_unique(std::max(val, agg_func_value->getDoubleValue())); - } - } else { - return std::make_unique(agg_func_value->getDoubleValue()); - } - } - - // TODO string and boolean - throw Exception("unsupported data type for max function"); -} - -std::unique_ptr -USql::min_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, - ColValue *agg_func_value) { - if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) { - if (!evaluatedPars[0]->isNull()) { - long val = evaluatedPars[0]->getIntegerValue(); - if (agg_func_value->isNull()) { - return std::make_unique(val); - } else { - return std::make_unique(std::min(val, agg_func_value->getIntValue())); - } - } else { - return std::make_unique(agg_func_value->getIntValue()); - } - } else if (col_def_node->type == ColumnType::float_type) { - if (!evaluatedPars[0]->isNull()) { - double val = evaluatedPars[0]->getDoubleValue(); - if (agg_func_value->isNull()) { - return std::make_unique(val); - } else { - return std::make_unique(std::min(val, agg_func_value->getDoubleValue())); - } - } else { - return std::make_unique(agg_func_value->getDoubleValue()); - } - } - - // TODO string and boolean - throw Exception("unsupported data type for min function"); -} - -Table *USql::find_table(const std::string &name) { +Table *USql::find_table(const std::string &name) const { auto name_cmp = [name](const Table& t) { return t.m_name == name; }; + auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp); - if (table_def != std::end(m_tables)) { - return table_def.operator->(); - } else { - throw Exception("table not found (" + name + ")"); - } + if (table_def != std::end(m_tables)) + return const_cast
(table_def.operator->()); + + throw Exception("table not found (" + name + ")"); } -void USql::check_table_not_exists(const std::string &name) { +void USql::check_table_not_exists(const std::string &name) const { auto name_cmp = [name](const Table& t) { return t.m_name == name; }; + auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp); - if (table_def != std::end(m_tables)) { + if (table_def != std::end(m_tables)) throw Exception("table already exists"); - } } -} // namespace +void USql::check_index_not_exists(const std::string &index_name) { + for (auto &table : m_tables) + if (table.get_index(index_name) != nullptr) + throw Exception("index already exists"); +} + +} // namespace \ No newline at end of file diff --git a/usql/usql.h b/usql/usql.h index c576e18..1e13978 100644 --- a/usql/usql.h +++ b/usql/usql.h @@ -1,7 +1,9 @@ #pragma once +#include "settings.h" #include "parser.h" #include "table.h" +#include "index.h" #include #include @@ -18,18 +20,19 @@ public: private: std::unique_ptr
execute(Node &node); - std::unique_ptr
execute_create_table(CreateTableNode &node); - std::unique_ptr
execute_create_table_as_table(CreateTableAsSelectNode &node); - std::unique_ptr
execute_load(LoadIntoTableNode &node); - std::unique_ptr
execute_save(SaveTableNode &node); - std::unique_ptr
execute_drop(DropTableNode &node); - static std::unique_ptr
execute_set(SetNode &node); - static std::unique_ptr
execute_show(ShowNode &node); + std::unique_ptr
execute_create_table(const CreateTableNode &node); + std::unique_ptr
execute_create_index(const CreateIndexNode &node); + std::unique_ptr
execute_create_table_as_table(const CreateTableAsSelectNode &node); + std::unique_ptr
execute_load(const LoadIntoTableNode &node); + std::unique_ptr
execute_save(const SaveTableNode &node); + std::unique_ptr
execute_drop(const DropTableNode &node); + static std::unique_ptr
execute_set(const SetNode &node); + static std::unique_ptr
execute_show(const ShowNode &node); - std::unique_ptr
execute_insert_into_table(InsertIntoTableNode &node); - std::unique_ptr
execute_select(SelectFromTableNode &node); - std::unique_ptr
execute_delete(DeleteFromTableNode &node); - std::unique_ptr
execute_update(UpdateTableNode &node); + std::unique_ptr
execute_insert_into_table(const InsertIntoTableNode &node); + std::unique_ptr
execute_select(SelectFromTableNode &node) const; + std::unique_ptr
execute_delete(const DeleteFromTableNode &node); + std::unique_ptr
execute_update(const UpdateTableNode &node); private: @@ -37,7 +40,7 @@ private: static std::unique_ptr eval_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value); static std::unique_ptr eval_database_value_node(Table *table, Row &row, Node *node); - static std::unique_ptr eval_literal_value_node(Table *table, Row &row, Node *node); + static std::unique_ptr eval_literal_value_node(Row &row, Node *node); static std::unique_ptr eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value); @@ -50,22 +53,23 @@ private: static std::tuple get_column_definition(Table *table, SelectColNode *select_col_node, int col_order); static ColDefNode get_db_column_definition(Table *table, Node *node); static std::tuple get_node_definition(Table *table, Node *select_col_node, const std::string & col_name, int col_order); - Table *find_table(const std::string &name); + [[nodiscard]] Table *find_table(const std::string &name) const; - void check_table_not_exists(const std::string &name); + void check_table_not_exists(const std::string &name) const; + void check_index_not_exists(const std::string &index_name); private: Parser m_parser; std::list
m_tables; - static void execute_distinct(SelectFromTableNode &node, Table *result) ; - static void execute_order_by(SelectFromTableNode &node, Table *table, Table *result) ; - static void execute_offset_limit(OffsetLimitNode &node, Table *result) ; + static void execute_distinct(SelectFromTableNode &node, Table *result); + static void execute_order_by(SelectFromTableNode &node, Table *result); + static void execute_offset_limit(OffsetLimitNode &node, Table *result); - void expand_asterix_char(SelectFromTableNode &node, Table *table) const; - void setup_order_columns(std::vector &node, Table *table) const; + static void expand_asterix_char(SelectFromTableNode &node, Table *table) ; + static void setup_order_columns(std::vector &node, Table *table) ; - bool check_for_aggregate_only_functions(SelectFromTableNode &node, int result_cols_cnt) const; + static bool check_for_aggregate_only_functions(SelectFromTableNode &node, size_t result_cols_cnt) ; static std::unique_ptr lower_function(const std::vector> &evaluatedPars); static std::unique_ptr upper_function(const std::vector> &evaluatedPars); @@ -77,8 +81,20 @@ private: static std::unique_ptr max_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, ColValue *agg_func_value); static std::unique_ptr min_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, ColValue *agg_func_value); - static std::unique_ptr - count_function(ColValue *agg_func_value, const std::vector> &evaluatedPars); + static std::unique_ptr count_function(ColValue *agg_func_value, const std::vector> &evaluatedPars); + + static void select_row(SelectFromTableNode &where_node, + Table *src_table, Row *src_row, + Table *rslt_table, + const std::vector &rslt_tbl_col_defs, const std::vector &src_table_col_index, + bool is_aggregated) ; + + std::pair> probe_index_scan(const Node *where, Table *table) const; + std::pair> look_for_usable_index(const Node *where, Table *table) const; + bool normalize_where(const Node *node) const; + + Table::rows_scanner get_iterator(Table *table, const Node *where) const; }; + } // namespace \ No newline at end of file diff --git a/usql/usql_ddl.cpp b/usql/usql_ddl.cpp index 38a84d9..c6b1e18 100644 --- a/usql/usql_ddl.cpp +++ b/usql/usql_ddl.cpp @@ -1,6 +1,5 @@ #include "usql.h" #include "exception.h" -#include "ml_date.h" #include "ml_string.h" #include @@ -10,7 +9,7 @@ namespace usql { -std::unique_ptr
USql::execute_create_table(CreateTableNode &node) { +std::unique_ptr
USql::execute_create_table(const CreateTableNode &node) { check_table_not_exists(node.table_name); Table table{node.table_name, node.cols_defs}; @@ -20,7 +19,23 @@ std::unique_ptr
USql::execute_create_table(CreateTableNode &node) { } -std::unique_ptr
USql::execute_create_table_as_table(CreateTableAsSelectNode &node) { +std::unique_ptr
USql::execute_create_index(const CreateIndexNode &node) { + Table *table_def = find_table(node.table_name); // throws exception if not found + ColDefNode col_def = table_def->get_column_def(node.column_name); // throws exception if not found + + check_index_not_exists(node.index_name); + if (col_def.null) throw Exception("index on not null supported only"); + if (table_def->get_index_for_column(node.column_name) != nullptr) throw Exception("column is already indexed"); + + table_def->create_index({node.index_name, node.column_name, col_def.type}); + + table_def->index_rows(node.index_name); + + return create_stmt_result_table(0, "index created", 0); +} + + +std::unique_ptr
USql::execute_create_table_as_table(const CreateTableAsSelectNode &node) { check_table_not_exists(node.table_name); auto select = execute_select((SelectFromTableNode &) *node.select_table); @@ -43,7 +58,7 @@ std::unique_ptr
USql::execute_create_table_as_table(CreateTableAsSelectNo -std::unique_ptr
USql::execute_drop(DropTableNode &node) { +std::unique_ptr
USql::execute_drop(const DropTableNode &node) { auto name_cmp = [node](const Table& t) { return t.m_name == node.table_name; }; auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp); @@ -55,12 +70,12 @@ std::unique_ptr
USql::execute_drop(DropTableNode &node) { throw Exception("table not found (" + node.table_name + ")"); } -std::unique_ptr
USql::execute_set(SetNode &node) { +std::unique_ptr
USql::execute_set(const SetNode &node) { Settings::set_setting(node.name, node.value); return create_stmt_result_table(0, "set succeeded", 1); } -std::unique_ptr
USql::execute_show(ShowNode &node) { +std::unique_ptr
USql::execute_show(const ShowNode &node) { std::string value = Settings::get_setting(node.name); return create_stmt_result_table(0, "show succeeded: " + value, 1); } @@ -70,7 +85,7 @@ std::unique_ptr
USql::create_stmt_result_table(long code, const std::stri std::vector result_tbl_col_defs{}; result_tbl_col_defs.emplace_back("code", ColumnType::integer_type, 0, 1, false); result_tbl_col_defs.emplace_back("desc", ColumnType::varchar_type, 1, 48, false); - result_tbl_col_defs.emplace_back("affected_rows", ColumnType::integer_type, 0, 1, true); + result_tbl_col_defs.emplace_back("aff_rows", ColumnType::integer_type, 0, 1, true); auto table_def = std::make_unique
("result", result_tbl_col_defs); @@ -85,7 +100,7 @@ std::unique_ptr
USql::create_stmt_result_table(long code, const std::stri -std::unique_ptr
USql::execute_load(LoadIntoTableNode &node) { +std::unique_ptr
USql::execute_load(const LoadIntoTableNode &node) { // find source table Table *table_def = find_table(node.table_name); @@ -101,7 +116,7 @@ std::unique_ptr
USql::execute_load(LoadIntoTableNode &node) { } -std::unique_ptr
USql::execute_save(SaveTableNode &node) { +std::unique_ptr
USql::execute_save(const SaveTableNode &node) { // find source table Table *table_def = find_table(node.table_name); diff --git a/usql/usql_dml.cpp b/usql/usql_dml.cpp index 54a297f..0e810fe 100644 --- a/usql/usql_dml.cpp +++ b/usql/usql_dml.cpp @@ -1,87 +1,118 @@ #include "usql.h" #include "exception.h" -#include "ml_date.h" #include "ml_string.h" #include -#include namespace usql { -std::unique_ptr
USql::execute_select(SelectFromTableNode &node) { - // find source table - Table *table = find_table(node.table_name); +std::pair> USql::probe_index_scan(const Node *where, Table *table) const { + bool indexscan_possible = normalize_where(where); - // expand * - expand_asterix_char(node, table); - - // create result table - std::vector result_tbl_col_defs{}; - std::vector source_table_col_index{}; - for (int i = 0; i < node.cols_names->size(); i++) { - SelectColNode * col_node = &node.cols_names->operator[](i); - auto [src_tbl_col_index, rst_tbl_col_def] = get_column_definition(table, col_node, i); - - source_table_col_index.push_back(src_tbl_col_index); - result_tbl_col_defs.push_back(rst_tbl_col_def); + if (indexscan_possible && Settings::get_bool_setting("USE_INDEXSCAN")) { + // where->dump(); + return look_for_usable_index(where, table); } - // check for aggregate function - bool aggregate_funcs = check_for_aggregate_only_functions(node, result_tbl_col_defs.size()); - - // prepare result table structure - auto result = std::make_unique
("result", result_tbl_col_defs); - - // replace possible order by col names to col indexes and validate - setup_order_columns(node.order_by, result.get()); - - // execute access plan - Row* new_row = nullptr; - for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) { - // eval where for row - if (eval_where(node.where.get(), table, *row)) { - // prepare empty row and copy column values - // when agregate functions in result only one row for table - if (!aggregate_funcs || result->rows_count()==0) { - new_row = &result->create_empty_row(); - } - - for (auto idx = 0; idx < result->columns_count(); idx++) { - auto src_table_col_idx = source_table_col_index[idx]; - - if (src_table_col_idx == FUNCTION_CALL) { - auto evaluated_value = eval_value_node(table, *row, node.cols_names->operator[](idx).value.get(), &result_tbl_col_defs[idx], &new_row->operator[](idx)); - ValueNode *col_value = evaluated_value.get(); - - new_row->setColumnValue(&result_tbl_col_defs[idx], col_value); - } else { - ColValue &col_value = row->operator[](src_table_col_idx); - new_row->setColumnValue(&result_tbl_col_defs[idx], col_value); - } - } - - // add row to result - if (aggregate_funcs == 0) { - result->commit_row(*new_row); - } - } - } - // when aggregates commit this one row - if (aggregate_funcs && new_row != nullptr) { - result->commit_row(*new_row); - } - - execute_distinct(node, result.get()); - - execute_order_by(node, table, result.get()); - - execute_offset_limit(node.offset_limit, result.get()); - - return result; + // no index scan + return std::make_pair(false, std::vector{}); } -bool USql::check_for_aggregate_only_functions(SelectFromTableNode &node, int result_cols_cnt) const { +std::pair> USql::look_for_usable_index(const Node *where, Table *table) const { + if (where->node_type == NodeType::relational_operator) { + auto * ron = (RelationalOperatorNode *)where; + // TODO implement >, >=, <=, < + // https://en.cppreference.com/w/cpp/container/map/upper_bound + if (ron->op == RelationalOperatorType::equal) { + if (ron->left->node_type == NodeType::database_value && + ((ron->right->node_type == NodeType::int_value) || (ron->right->node_type == NodeType::string_value)) + ) { + auto col_name = ((DatabaseValueNode *)ron->left.get())->col_name; + + Index * used_index = table->get_index_for_column(col_name); + if (used_index != nullptr) { + std::vector rowids = used_index->search((ValueNode *)ron->right.get()); +#ifndef NDEBUG + std::cout << "using index " << table->m_name << "(" << used_index->get_column_name() << "), " << rowids.size() << "/" << table->rows_count() << std::endl; +#endif + return std::make_pair(true, rowids); + } + } + } + } else if (where->node_type == NodeType::logical_operator) { + auto * operatorNode = (LogicalOperatorNode *)where; + if (operatorNode->op == LogicalOperatorType::and_operator) { + auto [use_index, rowids] = look_for_usable_index(operatorNode->left.get(), table); + if (use_index) { + return std::make_pair(true, rowids); + } + return look_for_usable_index(operatorNode->right.get(), table); + } + } + + // no index available + return std::make_pair(false, std::vector{}); + } + +bool USql::normalize_where(const Node *node) const { + // normalize relational operators "layout" and check whether index scan even possible + + // unify relational operators tha left node is always database value + if (node->node_type == NodeType::relational_operator) { + // TODO more optimizations here, for example node 1 = 2 etc + auto * ron = (RelationalOperatorNode *)node; + if (ron->right->node_type == NodeType::database_value && ((ron->left->node_type == NodeType::int_value) || (ron->left->node_type == NodeType::string_value)) ) { + std::swap(ron->left, ron->right); + } + return true; + } else if (node->node_type == NodeType::logical_operator) { + auto * operatorNode = (LogicalOperatorNode *)node; + if (operatorNode->op == LogicalOperatorType::or_operator) { + return false; + } + bool left_subnode = normalize_where(operatorNode->left.get()); + bool right_subnode = normalize_where(operatorNode->left.get()); + return left_subnode && right_subnode; + } + return true; +} + +void USql::select_row(SelectFromTableNode &where_node, + Table *src_table, Row *src_row, + Table *rslt_table, + const std::vector &rslt_tbl_col_defs, + const std::vector &src_table_col_index, + bool is_aggregated) { + + Row *rslt_row = nullptr; + + // when aggregate functions in rslt_table only one row exists + if (is_aggregated && !rslt_table->empty()) + rslt_row = &rslt_table->m_rows[0]; + else + rslt_row = &rslt_table->create_empty_row(); + + for (auto idx = 0; idx < rslt_table->columns_count(); idx++) { + auto src_table_col_idx = src_table_col_index[idx]; + + if (src_table_col_idx == FUNCTION_CALL) { + auto evaluated_value = eval_value_node(src_table, *src_row, where_node.cols_names->operator[](idx).value.get(), + const_cast(&rslt_tbl_col_defs[idx]), &rslt_row->operator[](idx)); + ValueNode *col_value = evaluated_value.get(); + + rslt_row->setColumnValue((ColDefNode *) &rslt_tbl_col_defs[idx], col_value); + } else { + ColValue &col_value = src_row->operator[](src_table_col_idx); + rslt_row->setColumnValue((ColDefNode *) &rslt_tbl_col_defs[idx], col_value); + } + } + + // for aggregate is validated more than needed + rslt_table->commit_row(*rslt_row); +} + +bool USql::check_for_aggregate_only_functions(SelectFromTableNode &node, size_t result_cols_cnt) { int aggregate_funcs = 0; for (int i = 0; i < node.cols_names->size(); i++) { SelectColNode * col_node = &node.cols_names->operator[](i); @@ -99,7 +130,7 @@ bool USql::check_for_aggregate_only_functions(SelectFromTableNode &node, int res return aggregate_funcs > 0; } -void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) const { +void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) { if (node.cols_names->size() == 1 && node.cols_names->operator[](0).name == "*") { node.cols_names->clear(); node.cols_names->reserve(table->columns_count()); @@ -109,7 +140,7 @@ void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) const { } } -void USql::setup_order_columns(std::vector &node, Table *table) const { +void USql::setup_order_columns(std::vector &node, Table *table) { for (auto& order_node : node) { if (!order_node.col_name.empty()) { ColDefNode col_def = table->get_column_def(order_node.col_name); @@ -120,19 +151,19 @@ void USql::setup_order_columns(std::vector &node, Table *table) co if (order_node.col_index < 0 || order_node.col_index >= table->columns_count()) throw Exception("unknown column in order by clause (" + order_node.col_name + ")"); - } + } } void USql::execute_distinct(SelectFromTableNode &node, Table *result) { if (!node.distinct) return; - + auto compare_rows = [](const Row &a, const Row &b) { return a.compare(b) >= 0; }; std::sort(result->m_rows.begin(), result->m_rows.end(), compare_rows); result->m_rows.erase(std::unique(result->m_rows.begin(), result->m_rows.end()), result->m_rows.end()); } -void USql::execute_order_by(SelectFromTableNode &node, Table *table, Table *result) { +void USql::execute_order_by(SelectFromTableNode &node, Table *result) { if (node.order_by.empty()) return; auto compare_rows = [&node, &result](const Row &a, const Row &b) { @@ -160,6 +191,21 @@ void USql::execute_offset_limit(OffsetLimitNode &node, Table *result) { result->m_rows.erase(result->m_rows.begin() + node.limit, result->m_rows.end()); } +bool USql::eval_where(Node *where, Table *table, Row &row) +{ + switch (where->node_type) + { + case NodeType::true_node: + return true; + case NodeType::relational_operator: // just one condition + return eval_relational_operator(*((RelationalOperatorNode *)where), table, row); + case NodeType::logical_operator: + return eval_logical_operator(*((LogicalOperatorNode *)where), table, row); + default: + throw Exception("Wrong node type"); + } +} + std::tuple USql::get_column_definition(Table *table, SelectColNode *select_col_node, int col_order ) { return get_node_definition(table, select_col_node->value.get(), select_col_node->name, col_order ); } @@ -218,7 +264,7 @@ std::tuple USql::get_node_definition(Table *table, Node * node, auto [left_col_index, left_tbl_col_def] = get_node_definition(table, ari_node->left.get(), col_name, col_order ); auto [right_col_index, right_tbl_col_def] = get_node_definition(table, ari_node->right.get(), col_name, col_order ); - ColumnType col_type; // TODO handle varchar and it len + ColumnType col_type; // TODO handle varchar and its len if (left_tbl_col_def.type==ColumnType::float_type || right_tbl_col_def.type==ColumnType::float_type) col_type = ColumnType::float_type; else @@ -249,8 +295,7 @@ std::tuple USql::get_node_definition(Table *table, Node * node, - -std::unique_ptr
USql::execute_insert_into_table(InsertIntoTableNode &node) { +std::unique_ptr
USql::execute_insert_into_table(const InsertIntoTableNode &node) { // find table Table *table_def = find_table(node.table_name); @@ -276,45 +321,52 @@ std::unique_ptr
USql::execute_insert_into_table(InsertIntoTableNode &node -std::unique_ptr
USql::execute_delete(DeleteFromTableNode &node) { +std::unique_ptr
USql::execute_delete(const DeleteFromTableNode &node) { + size_t affected_rows = 0; + // find source table Table *table = find_table(node.table_name); // execute access plan - auto affected_rows = table->rows_count(); + Table::rows_scanner i = get_iterator(table, node.where.get()); + while(Row *row = i.next()) { + if (eval_where(node.where.get(), table, *row)) { + row->set_deleted(); + table->unindex_row(*row); - table->m_rows.erase( - std::remove_if(table->m_rows.begin(), table->m_rows.end(), - [&node, table](Row &row){return eval_where(node.where.get(), table, row);}), - table->m_rows.end()); - - affected_rows -= table->rows_count(); + affected_rows++; + } + } return create_stmt_result_table(0, "delete succeeded", affected_rows); } -std::unique_ptr
USql::execute_update(UpdateTableNode &node) { +std::unique_ptr
USql::execute_update(const UpdateTableNode &node) { + size_t affected_rows = 0; + // find source table Table *table = find_table(node.table_name); // execute access plan - int affected_rows = 0; - for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) { - // eval where for row + Table::rows_scanner i = get_iterator(table, node.where.get()); + while(Row *row = i.next()) { if (eval_where(node.where.get(), table, *row)) { - int i = 0; + Row old_row = * row; + + int col_idx = 0; for (const auto& col : node.cols_names) { - // TODO cache it like in select + // PERF cache it like in select ColDefNode col_def = table->get_column_def(col.col_name); std::unique_ptr new_val = eval_arithmetic_operator(col_def.type, - static_cast(*node.values[i]), - table, *row); + static_cast(*node.values[col_idx]), table, *row); usql::Table::validate_column(&col_def, new_val.get()); row->setColumnValue(&col_def, new_val.get()); - i++; + col_idx++; } + table->reindex_row(old_row, *row); + affected_rows++; // TODO tady je problem, ze kdyz to zfajluje na jednom radku ostatni by se nemely provest } @@ -324,20 +376,58 @@ std::unique_ptr
USql::execute_update(UpdateTableNode &node) { } -bool USql::eval_where(Node *where, Table *table, Row &row) { - switch (where->node_type) { - case NodeType::true_node: - return true; - case NodeType::relational_operator: // just one condition - return eval_relational_operator(*((RelationalOperatorNode *) where), table, row); - case NodeType::logical_operator: - return eval_logical_operator(*((LogicalOperatorNode *) where), table, row); - default: - throw Exception("Wrong node type"); +std::unique_ptr
USql::execute_select(SelectFromTableNode &node) const { + // find source table + Table *table = find_table(node.table_name); + + // expand * + expand_asterix_char(node, table); + + // create result table + std::vector result_tbl_col_defs{}; + std::vector source_table_col_index{}; + for (int i = 0; i < node.cols_names->size(); i++) { + SelectColNode *col_node = &node.cols_names->operator[](i); + auto [src_tbl_col_index, rst_tbl_col_def] = get_column_definition(table, col_node, i); + + source_table_col_index.push_back(src_tbl_col_index); + result_tbl_col_defs.push_back(rst_tbl_col_def); } - return false; + // check for aggregate function + bool is_aggregated = check_for_aggregate_only_functions(node, result_tbl_col_defs.size()); + + // prepare result table structure + auto result = std::make_unique
("result", result_tbl_col_defs); + + // replace possible order by col names to col indexes and validate + setup_order_columns(node.order_by, result.get()); + + // execute access plan + Table::rows_scanner i = get_iterator(table, node.where.get()); + while(Row *row = i.next()) { + if (eval_where(node.where.get(), table, *row)) { // put it into row_scanner.next + select_row(node, table, row, result.get(), result_tbl_col_defs, source_table_col_index, is_aggregated); + } + } + + execute_distinct(node, result.get()); + + execute_order_by(node, result.get()); + + execute_offset_limit(node.offset_limit, result.get()); + + return result; } +Table::rows_scanner USql::get_iterator(Table *table, const Node *where) const { + auto[use_index, rowids] = probe_index_scan(where, table); + + if (use_index) + return Table::rows_scanner(table, rowids); + else + return Table::rows_scanner(table); +} + } // namespace diff --git a/usql/usql_function.cpp b/usql/usql_function.cpp new file mode 100644 index 0000000..dba9ccc --- /dev/null +++ b/usql/usql_function.cpp @@ -0,0 +1,168 @@ +#include "usql.h" +#include "exception.h" +#include "ml_date.h" +#include "ml_string.h" + +#include + +namespace usql { + +std::unique_ptr USql::to_string_function(const std::vector> &evaluatedPars) { + long date = evaluatedPars[0]->getDateValue(); + std::string format = evaluatedPars[1]->getStringValue(); + std::string formatted_date = date_to_string(date, format); + return std::make_unique(formatted_date); +} + +std::unique_ptr USql::to_date_function(const std::vector> &evaluatedPars) { + std::string date = evaluatedPars[0]->getStringValue(); + std::string format = evaluatedPars[1]->getStringValue(); + long epoch_time = string_to_date(date, format); + return std::make_unique(epoch_time); // No DateValueNode for now +} + +std::unique_ptr USql::date_add_function(const std::vector> &evaluatedPars) { + long datetime = evaluatedPars[0]->getDateValue(); + long quantity = evaluatedPars[1]->getIntegerValue(); + std::string part = evaluatedPars[2]->getStringValue(); + + long new_date = add_to_date(datetime, quantity, part); + return std::make_unique(new_date); // No DateValueNode for now +} + + +std::unique_ptr USql::upper_function(const std::vector> &evaluatedPars) { + std::string str = evaluatedPars[0]->getStringValue(); + std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return toupper(c); }); + return std::make_unique(str); +} + +std::unique_ptr USql::lower_function(const std::vector> &evaluatedPars) { + std::string str = evaluatedPars[0]->getStringValue(); + std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return tolower(c); }); + return std::make_unique(str); +} + +std::unique_ptr USql::pp_function(const std::vector> &evaluatedPars) { + auto &parsed_value = evaluatedPars[0]; + + if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) { + std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : ""; + char buf[20] {0}; // TODO constant here + double value = parsed_value->getDoubleValue(); + + if (format == "100%") + std::snprintf(buf, 20, "%.2f%%", value); + else if (format == "%.2f") + std::snprintf(buf, 20, "%.2f", value); + else if (value >= 1000000000000) + std::snprintf(buf, 20, "%7.2fT", value/1000000000000); + else if (value >= 1000000000) + std::sprintf(buf, "%7.2fB", value/1000000000); + else if (value >= 1000000) + std::snprintf(buf, 20, "%7.2fM", value/1000000); + else if (value >= 100000) + std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M + else if (value <= -1000000000000) + std::snprintf(buf, 20, "%7.2fT", value/1000000000000); + else if (value <= -1000000000) + std::snprintf(buf, 20, "%7.2fB", value/1000000000); + else if (value <= -1000000) + std::snprintf(buf, 20, "%7.2fM", value/1000000); + else if (value <= -100000) + std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M + else if (value == 0) + buf[0]='0'; + else + return std::make_unique(parsed_value->getStringValue().substr(0, 10)); + // TODO introduce constant for 10 + std::string s {buf}; + return std::make_unique(string_padd(s.erase(s.find_last_not_of(' ')+1), 10, ' ', false)); + } + return std::make_unique(parsed_value->getStringValue()); +} + +std::unique_ptr +USql::max_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, ColValue *agg_func_value) { + if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) { + if (!evaluatedPars[0]->isNull()) { + auto val = evaluatedPars[0]->getIntegerValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::max(val, agg_func_value->getIntegerValue())); + } else { + return std::make_unique(agg_func_value->getIntegerValue()); + } + } else if (col_def_node->type == ColumnType::float_type) { + if (!evaluatedPars[0]->isNull()) { + auto val = evaluatedPars[0]->getDoubleValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::max(val, agg_func_value->getDoubleValue())); + } else { + return std::make_unique(agg_func_value->getDoubleValue()); + } + } else if (col_def_node->type == ColumnType::varchar_type) { + if (!evaluatedPars[0]->isNull()) { + auto val = evaluatedPars[0]->getStringValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::max(val, agg_func_value->getStringValue())); + } else { + return std::make_unique(agg_func_value->getStringValue()); + } + } + + throw Exception("unsupported data type for max function"); +} + +std::unique_ptr +USql::min_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, + ColValue *agg_func_value) { + if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) { + if (!evaluatedPars[0]->isNull()) { + long val = evaluatedPars[0]->getIntegerValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::min(val, agg_func_value->getIntegerValue())); + } else { + return std::make_unique(agg_func_value->getIntegerValue()); + } + } else if (col_def_node->type == ColumnType::float_type) { + if (!evaluatedPars[0]->isNull()) { + double val = evaluatedPars[0]->getDoubleValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::min(val, agg_func_value->getDoubleValue())); + } else { + return std::make_unique(agg_func_value->getDoubleValue()); + } + } else if (col_def_node->type == ColumnType::varchar_type) { + if (!evaluatedPars[0]->isNull()) { + auto val = evaluatedPars[0]->getStringValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::min(val, agg_func_value->getStringValue())); + } else { + return std::make_unique(agg_func_value->getStringValue()); + } + } + + throw Exception("unsupported data type for min function"); +} + +std::unique_ptr USql::count_function(ColValue *agg_func_value, const std::vector> &evaluatedPars) { + long c = 1; + if (!agg_func_value->isNull()) { + c = agg_func_value->getIntegerValue() + 1; + } + return std::make_unique(c); +} + +} // namespace \ No newline at end of file