diff --git a/CMakeLists.txt b/CMakeLists.txt index ebf7f0e..d5ed9be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ set(PROJECT_NAME usql) include_directories(${CMAKE_SOURCE_DIR}/clib ${CMAKE_SOURCE_DIR}) set(SOURCE - exception.cpp lexer.cpp parser.cpp usql.cpp usql_ddl.cpp usql_dml.cpp main.cpp table.cpp row.cpp row.h csvreader.cpp ml_date.cpp settings.cpp clib/ml_string.cpp clib/linenoise.c) + exception.cpp lexer.cpp parser.cpp usql.cpp usql_ddl.cpp usql_dml.cpp usql_function.cpp main.cpp table.cpp row.cpp row.h csvreader.cpp ml_date.cpp settings.cpp clib/ml_string.cpp clib/linenoise.c) add_executable(${PROJECT_NAME} ${SOURCE}) diff --git a/debug.h b/debug.h index 84059ae..9018fc8 100644 --- a/debug.h +++ b/debug.h @@ -32,6 +32,6 @@ std::vector k_debug_sql_commands { "update a set i = 5 where i = 2", "select * from a where i = 5", // "select max(i) from a where s = 'two'" - "select min(i), max(i) from a" + "select min(i), max(i), count(i) from a" }; diff --git a/index.h b/index.h index a5307b3..8e1fae4 100644 --- a/index.h +++ b/index.h @@ -97,7 +97,7 @@ private: } } - std::vector search(IndexValue key) { + std::vector search(const IndexValue& key) { auto search = m_index.find(key); if (search != m_index.end()) { return search->second; diff --git a/lexer.cpp b/lexer.cpp index 1a4a9ce..0a1723e 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -23,7 +23,7 @@ namespace usql { void Lexer::parse(const std::string &code) { if (code.empty()) - throw Exception("empty code"); + throw Exception("Lexer.parse empty code"); m_tokens.clear(); m_tokens.reserve(64); @@ -40,7 +40,9 @@ namespace usql { std::smatch match = *i; std::string match_str = match.str(); TokenType token_type = type(match_str); - if (token_type == TokenType::string_literal) + if (token_type == TokenType::undef) + throw Exception("Lexer.parse unknown token type: " + match_str); + if (token_type == TokenType::string_literal) match_str = stringLiteral(match_str); if (token_type != TokenType::newline) @@ -119,7 +121,6 @@ namespace usql { } TokenType Lexer::type(const std::string &token) { - // FIXME 'one is evaluated as identifier if (token == ";") return TokenType::semicolon; if (token == "+") return TokenType::plus; if (token == "-") return TokenType::minus; @@ -177,16 +178,16 @@ namespace usql { if (token.length() > 1 && token.at(0) == '%' && (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r')) return TokenType::comment; - if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"') - return TokenType::string_literal; + if (token.length() >= 2 && token.at(0) == '"') + return (token.at(token.length() - 1) == '"') ? TokenType::string_literal : TokenType::undef; - if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'') - return TokenType::string_literal; + if (token.length() >= 2 && token.at(0) == '\'') + return (token.at(token.length() - 1) == '\'') ? TokenType::string_literal : TokenType::undef; if (std::regex_match(token, k_int_regex)) return TokenType::int_number; if (std::regex_match(token, k_int_underscored_regex)) return TokenType::int_number; if (std::regex_match(token, k_double_regex)) return TokenType::double_number; - if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier; + if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier; return TokenType::undef; } diff --git a/table.h b/table.h index d45137f..a7128f9 100644 --- a/table.h +++ b/table.h @@ -10,8 +10,6 @@ namespace usql { -using IndexValue=std::variant; - struct Table { Table(const Table &other); Table(const std::string& name, const std::vector& columns); @@ -75,7 +73,7 @@ struct Table { bool m_use_rowids; Table * m_table; std::vector::iterator m_fscan_itr; - std::vector m_rowids; // TODO long here + std::vector m_rowids; size_t m_rowids_idx{}; }; diff --git a/usql.cpp b/usql.cpp index 7498077..3cd3e3a 100644 --- a/usql.cpp +++ b/usql.cpp @@ -19,7 +19,6 @@ std::unique_ptr USql::execute(const std::string &command) { } std::unique_ptr
USql::execute(Node &node) { - // TODO optimize execution nodes here switch (node.node_type) { case NodeType::create_table: return execute_create_table(static_cast(node)); @@ -167,8 +166,7 @@ std::unique_ptr USql::eval_literal_value_node(Row &row, Node *node) { } -std::unique_ptr -USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value) { +std::unique_ptr USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value) { auto *fnc = static_cast(node); std::vector> evaluatedPars; @@ -194,14 +192,6 @@ USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *c throw Exception("invalid function: " + fnc->function); } -std::unique_ptr USql::count_function(ColValue *agg_func_value, const std::vector> &evaluatedPars) { - long c = 1; - if (!agg_func_value->isNull()) { - c = agg_func_value->getIntegerValue() + 1; - } - return std::make_unique(c); -} - bool USql::eval_logical_operator(LogicalOperatorNode &node, Table *pTable, Row &row) { //bool left = eval_relational_operator(static_cast(*node.left), pTable, row); @@ -274,144 +264,6 @@ std::unique_ptr USql::eval_arithmetic_operator(ColumnType outType, Ar throw Exception("implement me!!"); } - -std::unique_ptr USql::to_string_function(const std::vector> &evaluatedPars) { - long date = evaluatedPars[0]->getDateValue(); - std::string format = evaluatedPars[1]->getStringValue(); - std::string formatted_date = date_to_string(date, format); - return std::make_unique(formatted_date); -} - -std::unique_ptr USql::to_date_function(const std::vector> &evaluatedPars) { - std::string date = evaluatedPars[0]->getStringValue(); - std::string format = evaluatedPars[1]->getStringValue(); - long epoch_time = string_to_date(date, format); - return std::make_unique(epoch_time); // No DateValueNode for now -} - -std::unique_ptr USql::date_add_function(const std::vector> &evaluatedPars) { - long datetime = evaluatedPars[0]->getDateValue(); - long quantity = evaluatedPars[1]->getIntegerValue(); - std::string part = evaluatedPars[2]->getStringValue(); - - long new_date = add_to_date(datetime, quantity, part); - return std::make_unique(new_date); // No DateValueNode for now -} - - -std::unique_ptr USql::upper_function(const std::vector> &evaluatedPars) { - std::string str = evaluatedPars[0]->getStringValue(); - std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return toupper(c); }); - return std::make_unique(str); -} - -std::unique_ptr USql::lower_function(const std::vector> &evaluatedPars) { - std::string str = evaluatedPars[0]->getStringValue(); - std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return tolower(c); }); - return std::make_unique(str); -} - -std::unique_ptr USql::pp_function(const std::vector> &evaluatedPars) { - auto &parsed_value = evaluatedPars[0]; - - if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) { - std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : ""; - char buf[20] {0}; // TODO constant here - double value = parsed_value->getDoubleValue(); - - if (format == "100%") - std::snprintf(buf, 20, "%.2f%%", value); - else if (format == "%.2f") - std::snprintf(buf, 20, "%.2f", value); - else if (value >= 1000000000000) - std::snprintf(buf, 20, "%7.2fT", value/1000000000000); - else if (value >= 1000000000) - std::sprintf(buf, "%7.2fB", value/1000000000); - else if (value >= 1000000) - std::snprintf(buf, 20, "%7.2fM", value/1000000); - else if (value >= 100000) - std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M - else if (value <= -1000000000000) - std::snprintf(buf, 20, "%7.2fT", value/1000000000000); - else if (value <= -1000000000) - std::snprintf(buf, 20, "%7.2fB", value/1000000000); - else if (value <= -1000000) - std::snprintf(buf, 20, "%7.2fM", value/1000000); - else if (value <= -100000) - std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M - else if (value == 0) - buf[0]='0'; - else - return std::make_unique(parsed_value->getStringValue().substr(0, 10)); - // TODO introduce constant for 10 - std::string s {buf}; - return std::make_unique(string_padd(s.erase(s.find_last_not_of(' ')+1), 10, ' ', false)); - } - return std::make_unique(parsed_value->getStringValue()); -} - -std::unique_ptr -USql::max_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, - ColValue *agg_func_value) { - if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) { - if (!evaluatedPars[0]->isNull()) { - long val = evaluatedPars[0]->getIntegerValue(); - if (agg_func_value->isNull()) { - return std::make_unique(val); - } else { - return std::make_unique(std::max(val, agg_func_value->getIntegerValue())); - } - } else { - return std::make_unique(agg_func_value->getIntegerValue()); - } - } else if (col_def_node->type == ColumnType::float_type) { - if (!evaluatedPars[0]->isNull()) { - double val = evaluatedPars[0]->getDoubleValue(); - if (agg_func_value->isNull()) { - return std::make_unique(val); - } else { - return std::make_unique(std::max(val, agg_func_value->getDoubleValue())); - } - } else { - return std::make_unique(agg_func_value->getDoubleValue()); - } - } - - // TODO string and boolean - throw Exception("unsupported data type for max function"); -} - -std::unique_ptr -USql::min_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, - ColValue *agg_func_value) { - if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) { - if (!evaluatedPars[0]->isNull()) { - long val = evaluatedPars[0]->getIntegerValue(); - if (agg_func_value->isNull()) { - return std::make_unique(val); - } else { - return std::make_unique(std::min(val, agg_func_value->getIntegerValue())); - } - } else { - return std::make_unique(agg_func_value->getIntegerValue()); - } - } else if (col_def_node->type == ColumnType::float_type) { - if (!evaluatedPars[0]->isNull()) { - double val = evaluatedPars[0]->getDoubleValue(); - if (agg_func_value->isNull()) { - return std::make_unique(val); - } else { - return std::make_unique(std::min(val, agg_func_value->getDoubleValue())); - } - } else { - return std::make_unique(agg_func_value->getDoubleValue()); - } - } - - // TODO string and boolean - throw Exception("unsupported data type for min function"); -} - Table *USql::find_table(const std::string &name) const { auto name_cmp = [name](const Table& t) { return t.m_name == name; }; @@ -437,4 +289,4 @@ void USql::check_index_not_exists(const std::string &index_name) { throw Exception("index already exists"); } -} // namespace +} // namespace \ No newline at end of file diff --git a/usql_function.cpp b/usql_function.cpp new file mode 100644 index 0000000..dba9ccc --- /dev/null +++ b/usql_function.cpp @@ -0,0 +1,168 @@ +#include "usql.h" +#include "exception.h" +#include "ml_date.h" +#include "ml_string.h" + +#include + +namespace usql { + +std::unique_ptr USql::to_string_function(const std::vector> &evaluatedPars) { + long date = evaluatedPars[0]->getDateValue(); + std::string format = evaluatedPars[1]->getStringValue(); + std::string formatted_date = date_to_string(date, format); + return std::make_unique(formatted_date); +} + +std::unique_ptr USql::to_date_function(const std::vector> &evaluatedPars) { + std::string date = evaluatedPars[0]->getStringValue(); + std::string format = evaluatedPars[1]->getStringValue(); + long epoch_time = string_to_date(date, format); + return std::make_unique(epoch_time); // No DateValueNode for now +} + +std::unique_ptr USql::date_add_function(const std::vector> &evaluatedPars) { + long datetime = evaluatedPars[0]->getDateValue(); + long quantity = evaluatedPars[1]->getIntegerValue(); + std::string part = evaluatedPars[2]->getStringValue(); + + long new_date = add_to_date(datetime, quantity, part); + return std::make_unique(new_date); // No DateValueNode for now +} + + +std::unique_ptr USql::upper_function(const std::vector> &evaluatedPars) { + std::string str = evaluatedPars[0]->getStringValue(); + std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return toupper(c); }); + return std::make_unique(str); +} + +std::unique_ptr USql::lower_function(const std::vector> &evaluatedPars) { + std::string str = evaluatedPars[0]->getStringValue(); + std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return tolower(c); }); + return std::make_unique(str); +} + +std::unique_ptr USql::pp_function(const std::vector> &evaluatedPars) { + auto &parsed_value = evaluatedPars[0]; + + if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) { + std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : ""; + char buf[20] {0}; // TODO constant here + double value = parsed_value->getDoubleValue(); + + if (format == "100%") + std::snprintf(buf, 20, "%.2f%%", value); + else if (format == "%.2f") + std::snprintf(buf, 20, "%.2f", value); + else if (value >= 1000000000000) + std::snprintf(buf, 20, "%7.2fT", value/1000000000000); + else if (value >= 1000000000) + std::sprintf(buf, "%7.2fB", value/1000000000); + else if (value >= 1000000) + std::snprintf(buf, 20, "%7.2fM", value/1000000); + else if (value >= 100000) + std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M + else if (value <= -1000000000000) + std::snprintf(buf, 20, "%7.2fT", value/1000000000000); + else if (value <= -1000000000) + std::snprintf(buf, 20, "%7.2fB", value/1000000000); + else if (value <= -1000000) + std::snprintf(buf, 20, "%7.2fM", value/1000000); + else if (value <= -100000) + std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M + else if (value == 0) + buf[0]='0'; + else + return std::make_unique(parsed_value->getStringValue().substr(0, 10)); + // TODO introduce constant for 10 + std::string s {buf}; + return std::make_unique(string_padd(s.erase(s.find_last_not_of(' ')+1), 10, ' ', false)); + } + return std::make_unique(parsed_value->getStringValue()); +} + +std::unique_ptr +USql::max_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, ColValue *agg_func_value) { + if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) { + if (!evaluatedPars[0]->isNull()) { + auto val = evaluatedPars[0]->getIntegerValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::max(val, agg_func_value->getIntegerValue())); + } else { + return std::make_unique(agg_func_value->getIntegerValue()); + } + } else if (col_def_node->type == ColumnType::float_type) { + if (!evaluatedPars[0]->isNull()) { + auto val = evaluatedPars[0]->getDoubleValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::max(val, agg_func_value->getDoubleValue())); + } else { + return std::make_unique(agg_func_value->getDoubleValue()); + } + } else if (col_def_node->type == ColumnType::varchar_type) { + if (!evaluatedPars[0]->isNull()) { + auto val = evaluatedPars[0]->getStringValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::max(val, agg_func_value->getStringValue())); + } else { + return std::make_unique(agg_func_value->getStringValue()); + } + } + + throw Exception("unsupported data type for max function"); +} + +std::unique_ptr +USql::min_function(const std::vector> &evaluatedPars, const ColDefNode *col_def_node, + ColValue *agg_func_value) { + if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) { + if (!evaluatedPars[0]->isNull()) { + long val = evaluatedPars[0]->getIntegerValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::min(val, agg_func_value->getIntegerValue())); + } else { + return std::make_unique(agg_func_value->getIntegerValue()); + } + } else if (col_def_node->type == ColumnType::float_type) { + if (!evaluatedPars[0]->isNull()) { + double val = evaluatedPars[0]->getDoubleValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::min(val, agg_func_value->getDoubleValue())); + } else { + return std::make_unique(agg_func_value->getDoubleValue()); + } + } else if (col_def_node->type == ColumnType::varchar_type) { + if (!evaluatedPars[0]->isNull()) { + auto val = evaluatedPars[0]->getStringValue(); + if (agg_func_value->isNull()) + return std::make_unique(val); + else + return std::make_unique(std::min(val, agg_func_value->getStringValue())); + } else { + return std::make_unique(agg_func_value->getStringValue()); + } + } + + throw Exception("unsupported data type for min function"); +} + +std::unique_ptr USql::count_function(ColValue *agg_func_value, const std::vector> &evaluatedPars) { + long c = 1; + if (!agg_func_value->isNull()) { + c = agg_func_value->getIntegerValue() + 1; + } + return std::make_unique(c); +} + +} // namespace \ No newline at end of file diff --git a/utils/cp_to_mlisp.sh b/utils/cp_to_mlisp.sh index 1e1aa8f..38c1bd6 100644 --- a/utils/cp_to_mlisp.sh +++ b/utils/cp_to_mlisp.sh @@ -14,6 +14,7 @@ usql.h \ usql.cpp \ usql_dml.cpp \ usql_ddl.cpp \ +usql_function.cpp \ row.h \ row.cpp \ parser.cpp \