extract functions to own file

This commit is contained in:
2021-12-18 13:37:42 +01:00
parent c9c4f0fba3
commit 906df74847
8 changed files with 184 additions and 164 deletions

View File

@@ -16,7 +16,7 @@ set(PROJECT_NAME usql)
include_directories(${CMAKE_SOURCE_DIR}/clib ${CMAKE_SOURCE_DIR})
set(SOURCE
exception.cpp lexer.cpp parser.cpp usql.cpp usql_ddl.cpp usql_dml.cpp main.cpp table.cpp row.cpp row.h csvreader.cpp ml_date.cpp settings.cpp clib/ml_string.cpp clib/linenoise.c)
exception.cpp lexer.cpp parser.cpp usql.cpp usql_ddl.cpp usql_dml.cpp usql_function.cpp main.cpp table.cpp row.cpp row.h csvreader.cpp ml_date.cpp settings.cpp clib/ml_string.cpp clib/linenoise.c)
add_executable(${PROJECT_NAME} ${SOURCE})

View File

@@ -32,6 +32,6 @@ std::vector<std::string> k_debug_sql_commands {
"update a set i = 5 where i = 2",
"select * from a where i = 5",
// "select max(i) from a where s = 'two'"
"select min(i), max(i) from a"
"select min(i), max(i), count(i) from a"
};

View File

@@ -97,7 +97,7 @@ private:
}
}
std::vector<rowid_t> search(IndexValue key) {
std::vector<rowid_t> search(const IndexValue& key) {
auto search = m_index.find(key);
if (search != m_index.end()) {
return search->second;

View File

@@ -23,7 +23,7 @@ namespace usql {
void Lexer::parse(const std::string &code) {
if (code.empty())
throw Exception("empty code");
throw Exception("Lexer.parse empty code");
m_tokens.clear();
m_tokens.reserve(64);
@@ -40,7 +40,9 @@ namespace usql {
std::smatch match = *i;
std::string match_str = match.str();
TokenType token_type = type(match_str);
if (token_type == TokenType::string_literal)
if (token_type == TokenType::undef)
throw Exception("Lexer.parse unknown token type: " + match_str);
if (token_type == TokenType::string_literal)
match_str = stringLiteral(match_str);
if (token_type != TokenType::newline)
@@ -119,7 +121,6 @@ namespace usql {
}
TokenType Lexer::type(const std::string &token) {
// FIXME 'one is evaluated as identifier
if (token == ";") return TokenType::semicolon;
if (token == "+") return TokenType::plus;
if (token == "-") return TokenType::minus;
@@ -177,16 +178,16 @@ namespace usql {
if (token.length() > 1 && token.at(0) == '%' && (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r'))
return TokenType::comment;
if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"')
return TokenType::string_literal;
if (token.length() >= 2 && token.at(0) == '"')
return (token.at(token.length() - 1) == '"') ? TokenType::string_literal : TokenType::undef;
if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'')
return TokenType::string_literal;
if (token.length() >= 2 && token.at(0) == '\'')
return (token.at(token.length() - 1) == '\'') ? TokenType::string_literal : TokenType::undef;
if (std::regex_match(token, k_int_regex)) return TokenType::int_number;
if (std::regex_match(token, k_int_underscored_regex)) return TokenType::int_number;
if (std::regex_match(token, k_double_regex)) return TokenType::double_number;
if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier;
if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier;
return TokenType::undef;
}

View File

@@ -10,8 +10,6 @@
namespace usql {
using IndexValue=std::variant<long, std::string>;
struct Table {
Table(const Table &other);
Table(const std::string& name, const std::vector<ColDefNode>& columns);
@@ -75,7 +73,7 @@ struct Table {
bool m_use_rowids;
Table * m_table;
std::vector<Row>::iterator m_fscan_itr;
std::vector<rowid_t> m_rowids; // TODO long here
std::vector<rowid_t> m_rowids;
size_t m_rowids_idx{};
};

152
usql.cpp
View File

@@ -19,7 +19,6 @@ std::unique_ptr<Table> USql::execute(const std::string &command) {
}
std::unique_ptr<Table> USql::execute(Node &node) {
// TODO optimize execution nodes here
switch (node.node_type) {
case NodeType::create_table:
return execute_create_table(static_cast<CreateTableNode &>(node));
@@ -167,8 +166,7 @@ std::unique_ptr<ValueNode> USql::eval_literal_value_node(Row &row, Node *node) {
}
std::unique_ptr<ValueNode>
USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value) {
std::unique_ptr<ValueNode> USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value) {
auto *fnc = static_cast<FunctionNode *>(node);
std::vector<std::unique_ptr<ValueNode>> evaluatedPars;
@@ -194,14 +192,6 @@ USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *c
throw Exception("invalid function: " + fnc->function);
}
std::unique_ptr<ValueNode> USql::count_function(ColValue *agg_func_value, const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long c = 1;
if (!agg_func_value->isNull()) {
c = agg_func_value->getIntegerValue() + 1;
}
return std::make_unique<IntValueNode>(c);
}
bool USql::eval_logical_operator(LogicalOperatorNode &node, Table *pTable, Row &row) {
//bool left = eval_relational_operator(static_cast<const RelationalOperatorNode &>(*node.left), pTable, row);
@@ -274,144 +264,6 @@ std::unique_ptr<ValueNode> USql::eval_arithmetic_operator(ColumnType outType, Ar
throw Exception("implement me!!");
}
std::unique_ptr<ValueNode> USql::to_string_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long date = evaluatedPars[0]->getDateValue();
std::string format = evaluatedPars[1]->getStringValue();
std::string formatted_date = date_to_string(date, format);
return std::make_unique<StringValueNode>(formatted_date);
}
std::unique_ptr<ValueNode> USql::to_date_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string date = evaluatedPars[0]->getStringValue();
std::string format = evaluatedPars[1]->getStringValue();
long epoch_time = string_to_date(date, format);
return std::make_unique<IntValueNode>(epoch_time); // No DateValueNode for now
}
std::unique_ptr<ValueNode> USql::date_add_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long datetime = evaluatedPars[0]->getDateValue();
long quantity = evaluatedPars[1]->getIntegerValue();
std::string part = evaluatedPars[2]->getStringValue();
long new_date = add_to_date(datetime, quantity, part);
return std::make_unique<IntValueNode>(new_date); // No DateValueNode for now
}
std::unique_ptr<ValueNode> USql::upper_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return toupper(c); });
return std::make_unique<StringValueNode>(str);
}
std::unique_ptr<ValueNode> USql::lower_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return tolower(c); });
return std::make_unique<StringValueNode>(str);
}
std::unique_ptr<ValueNode> USql::pp_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
auto &parsed_value = evaluatedPars[0];
if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) {
std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : "";
char buf[20] {0}; // TODO constant here
double value = parsed_value->getDoubleValue();
if (format == "100%")
std::snprintf(buf, 20, "%.2f%%", value);
else if (format == "%.2f")
std::snprintf(buf, 20, "%.2f", value);
else if (value >= 1000000000000)
std::snprintf(buf, 20, "%7.2fT", value/1000000000000);
else if (value >= 1000000000)
std::sprintf(buf, "%7.2fB", value/1000000000);
else if (value >= 1000000)
std::snprintf(buf, 20, "%7.2fM", value/1000000);
else if (value >= 100000)
std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M
else if (value <= -1000000000000)
std::snprintf(buf, 20, "%7.2fT", value/1000000000000);
else if (value <= -1000000000)
std::snprintf(buf, 20, "%7.2fB", value/1000000000);
else if (value <= -1000000)
std::snprintf(buf, 20, "%7.2fM", value/1000000);
else if (value <= -100000)
std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M
else if (value == 0)
buf[0]='0';
else
return std::make_unique<StringValueNode>(parsed_value->getStringValue().substr(0, 10));
// TODO introduce constant for 10
std::string s {buf};
return std::make_unique<StringValueNode>(string_padd(s.erase(s.find_last_not_of(' ')+1), 10, ' ', false));
}
return std::make_unique<StringValueNode>(parsed_value->getStringValue());
}
std::unique_ptr<ValueNode>
USql::max_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node,
ColValue *agg_func_value) {
if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) {
if (!evaluatedPars[0]->isNull()) {
long val = evaluatedPars[0]->getIntegerValue();
if (agg_func_value->isNull()) {
return std::make_unique<IntValueNode>(val);
} else {
return std::make_unique<IntValueNode>(std::max(val, agg_func_value->getIntegerValue()));
}
} else {
return std::make_unique<IntValueNode>(agg_func_value->getIntegerValue());
}
} else if (col_def_node->type == ColumnType::float_type) {
if (!evaluatedPars[0]->isNull()) {
double val = evaluatedPars[0]->getDoubleValue();
if (agg_func_value->isNull()) {
return std::make_unique<DoubleValueNode>(val);
} else {
return std::make_unique<DoubleValueNode>(std::max(val, agg_func_value->getDoubleValue()));
}
} else {
return std::make_unique<DoubleValueNode>(agg_func_value->getDoubleValue());
}
}
// TODO string and boolean
throw Exception("unsupported data type for max function");
}
std::unique_ptr<ValueNode>
USql::min_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node,
ColValue *agg_func_value) {
if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) {
if (!evaluatedPars[0]->isNull()) {
long val = evaluatedPars[0]->getIntegerValue();
if (agg_func_value->isNull()) {
return std::make_unique<IntValueNode>(val);
} else {
return std::make_unique<IntValueNode>(std::min(val, agg_func_value->getIntegerValue()));
}
} else {
return std::make_unique<IntValueNode>(agg_func_value->getIntegerValue());
}
} else if (col_def_node->type == ColumnType::float_type) {
if (!evaluatedPars[0]->isNull()) {
double val = evaluatedPars[0]->getDoubleValue();
if (agg_func_value->isNull()) {
return std::make_unique<DoubleValueNode>(val);
} else {
return std::make_unique<DoubleValueNode>(std::min(val, agg_func_value->getDoubleValue()));
}
} else {
return std::make_unique<DoubleValueNode>(agg_func_value->getDoubleValue());
}
}
// TODO string and boolean
throw Exception("unsupported data type for min function");
}
Table *USql::find_table(const std::string &name) const {
auto name_cmp = [name](const Table& t) { return t.m_name == name; };
@@ -437,4 +289,4 @@ void USql::check_index_not_exists(const std::string &index_name) {
throw Exception("index already exists");
}
} // namespace
} // namespace

168
usql_function.cpp Normal file
View File

@@ -0,0 +1,168 @@
#include "usql.h"
#include "exception.h"
#include "ml_date.h"
#include "ml_string.h"
#include <algorithm>
namespace usql {
std::unique_ptr<ValueNode> USql::to_string_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long date = evaluatedPars[0]->getDateValue();
std::string format = evaluatedPars[1]->getStringValue();
std::string formatted_date = date_to_string(date, format);
return std::make_unique<StringValueNode>(formatted_date);
}
std::unique_ptr<ValueNode> USql::to_date_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string date = evaluatedPars[0]->getStringValue();
std::string format = evaluatedPars[1]->getStringValue();
long epoch_time = string_to_date(date, format);
return std::make_unique<IntValueNode>(epoch_time); // No DateValueNode for now
}
std::unique_ptr<ValueNode> USql::date_add_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long datetime = evaluatedPars[0]->getDateValue();
long quantity = evaluatedPars[1]->getIntegerValue();
std::string part = evaluatedPars[2]->getStringValue();
long new_date = add_to_date(datetime, quantity, part);
return std::make_unique<IntValueNode>(new_date); // No DateValueNode for now
}
std::unique_ptr<ValueNode> USql::upper_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return toupper(c); });
return std::make_unique<StringValueNode>(str);
}
std::unique_ptr<ValueNode> USql::lower_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return tolower(c); });
return std::make_unique<StringValueNode>(str);
}
std::unique_ptr<ValueNode> USql::pp_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
auto &parsed_value = evaluatedPars[0];
if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) {
std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : "";
char buf[20] {0}; // TODO constant here
double value = parsed_value->getDoubleValue();
if (format == "100%")
std::snprintf(buf, 20, "%.2f%%", value);
else if (format == "%.2f")
std::snprintf(buf, 20, "%.2f", value);
else if (value >= 1000000000000)
std::snprintf(buf, 20, "%7.2fT", value/1000000000000);
else if (value >= 1000000000)
std::sprintf(buf, "%7.2fB", value/1000000000);
else if (value >= 1000000)
std::snprintf(buf, 20, "%7.2fM", value/1000000);
else if (value >= 100000)
std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M
else if (value <= -1000000000000)
std::snprintf(buf, 20, "%7.2fT", value/1000000000000);
else if (value <= -1000000000)
std::snprintf(buf, 20, "%7.2fB", value/1000000000);
else if (value <= -1000000)
std::snprintf(buf, 20, "%7.2fM", value/1000000);
else if (value <= -100000)
std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M
else if (value == 0)
buf[0]='0';
else
return std::make_unique<StringValueNode>(parsed_value->getStringValue().substr(0, 10));
// TODO introduce constant for 10
std::string s {buf};
return std::make_unique<StringValueNode>(string_padd(s.erase(s.find_last_not_of(' ')+1), 10, ' ', false));
}
return std::make_unique<StringValueNode>(parsed_value->getStringValue());
}
std::unique_ptr<ValueNode>
USql::max_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node, ColValue *agg_func_value) {
if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) {
if (!evaluatedPars[0]->isNull()) {
auto val = evaluatedPars[0]->getIntegerValue();
if (agg_func_value->isNull())
return std::make_unique<IntValueNode>(val);
else
return std::make_unique<IntValueNode>(std::max(val, agg_func_value->getIntegerValue()));
} else {
return std::make_unique<IntValueNode>(agg_func_value->getIntegerValue());
}
} else if (col_def_node->type == ColumnType::float_type) {
if (!evaluatedPars[0]->isNull()) {
auto val = evaluatedPars[0]->getDoubleValue();
if (agg_func_value->isNull())
return std::make_unique<DoubleValueNode>(val);
else
return std::make_unique<DoubleValueNode>(std::max(val, agg_func_value->getDoubleValue()));
} else {
return std::make_unique<DoubleValueNode>(agg_func_value->getDoubleValue());
}
} else if (col_def_node->type == ColumnType::varchar_type) {
if (!evaluatedPars[0]->isNull()) {
auto val = evaluatedPars[0]->getStringValue();
if (agg_func_value->isNull())
return std::make_unique<StringValueNode>(val);
else
return std::make_unique<StringValueNode>(std::max(val, agg_func_value->getStringValue()));
} else {
return std::make_unique<StringValueNode>(agg_func_value->getStringValue());
}
}
throw Exception("unsupported data type for max function");
}
std::unique_ptr<ValueNode>
USql::min_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node,
ColValue *agg_func_value) {
if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) {
if (!evaluatedPars[0]->isNull()) {
long val = evaluatedPars[0]->getIntegerValue();
if (agg_func_value->isNull())
return std::make_unique<IntValueNode>(val);
else
return std::make_unique<IntValueNode>(std::min(val, agg_func_value->getIntegerValue()));
} else {
return std::make_unique<IntValueNode>(agg_func_value->getIntegerValue());
}
} else if (col_def_node->type == ColumnType::float_type) {
if (!evaluatedPars[0]->isNull()) {
double val = evaluatedPars[0]->getDoubleValue();
if (agg_func_value->isNull())
return std::make_unique<DoubleValueNode>(val);
else
return std::make_unique<DoubleValueNode>(std::min(val, agg_func_value->getDoubleValue()));
} else {
return std::make_unique<DoubleValueNode>(agg_func_value->getDoubleValue());
}
} else if (col_def_node->type == ColumnType::varchar_type) {
if (!evaluatedPars[0]->isNull()) {
auto val = evaluatedPars[0]->getStringValue();
if (agg_func_value->isNull())
return std::make_unique<StringValueNode>(val);
else
return std::make_unique<StringValueNode>(std::min(val, agg_func_value->getStringValue()));
} else {
return std::make_unique<StringValueNode>(agg_func_value->getStringValue());
}
}
throw Exception("unsupported data type for min function");
}
std::unique_ptr<ValueNode> USql::count_function(ColValue *agg_func_value, const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long c = 1;
if (!agg_func_value->isNull()) {
c = agg_func_value->getIntegerValue() + 1;
}
return std::make_unique<IntValueNode>(c);
}
} // namespace

View File

@@ -14,6 +14,7 @@ usql.h \
usql.cpp \
usql_dml.cpp \
usql_ddl.cpp \
usql_function.cpp \
row.h \
row.cpp \
parser.cpp \