usql update

This commit is contained in:
VaclavT 2021-12-19 13:33:47 +01:00
parent 37d0d9b3f5
commit 5c925f2608
23 changed files with 1570 additions and 1124 deletions

View File

@ -44,6 +44,7 @@ set(SOURCE
usql/usql.cpp
usql/usql_dml.cpp
usql/usql_ddl.cpp
usql/usql_function.cpp
usql/table.cpp
usql/table.h
usql/row.cpp

View File

@ -24,9 +24,9 @@ MlValue uSQL::ivaluize(const usql::Table *table) {
if (c.isNull()) {
columns.push_back(MlValue::nil());
} else if (type == ColumnType::integer_type || type == ColumnType::date_type) {
columns.push_back(MlValue(c.getIntValue()));
columns.push_back(MlValue(c.getIntegerValue()));
} else if (type == ColumnType::bool_type) {
columns.push_back(c.getBoolValue() ? MlValue(c.getIntValue()) : MlValue::nil());
columns.push_back(c.getBoolValue() ? MlValue(c.getIntegerValue()) : MlValue::nil());
} else if (type == ColumnType::float_type) {
columns.push_back(MlValue(c.getDoubleValue()));
} else {

View File

@ -1,4 +1,4 @@
#include <errno.h>
#include <cerrno>
#include "exception.h"
#include "csvreader.h"
@ -7,15 +7,15 @@
namespace usql {
CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) {
skip_header = skip_hdr;
field_separator = field_sep;
quote_character = quote_ch;
line_separator = line_sep;
line_separator2 = line_sep2;
CsvReader::CsvReader(bool skip_hdr, char field_sep, char quote_ch, char line_sep, char line_sep2) {
skip_header = skip_hdr;
field_separator = field_sep;
quote_character = quote_ch;
line_separator = line_sep;
line_separator2 = line_sep2;
header_skiped = !skip_hdr;
}
header_skiped = !skip_hdr;
}
int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &cols_def, Table &table) {
@ -41,7 +41,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
size_t len = 0;
int read_chars;
long read_chars;
while ((read_chars = getline(&line_str, &len, fp)) != -1) {
if (skip_header && !header_skiped) {
header_skiped = true;
@ -59,7 +59,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
if (*aChar == quote_character) {
inQuote = !inQuote;
} else if (*aChar == field_separator) {
if (inQuote == true) {
if (inQuote) {
field += *aChar;
} else {
line.push_back(field);
@ -80,9 +80,6 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
field.clear();
line.clear();
// DEBUG
// if (row_cnt > 50000) break;
//
}
fclose(fp);
@ -93,53 +90,53 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
return row_cnt;
}
int CsvReader::parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table) {
int row_cnt = 0;
bool inQuote(false);
bool newLine(false);
std::string field;
int CsvReader::parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table) {
int row_cnt = 0;
bool inQuote(false);
bool newLine(false);
std::string field;
std::vector<std::string> line;
line.reserve(32);
std::vector<std::string> line;
line.reserve(32);
std::string::const_iterator aChar = csvSource.begin();
while (aChar != csvSource.end()) {
if (*aChar == quote_character) {
newLine = false;
inQuote = !inQuote;
} else if (*aChar == field_separator) {
newLine = false;
if (inQuote == true) {
field += *aChar;
} else {
line.push_back(field);
field.clear();
}
} else if (*aChar == line_separator || *aChar == line_separator2) {
if (inQuote == true) {
field += *aChar;
} else {
if (newLine == false) {
line.push_back(field);
if (header_skiped) {
table.create_row_from_vector(cols_def, line);
row_cnt++;
}
header_skiped = true;
field.clear();
line.clear();
newLine = true;
}
}
} else {
newLine = false;
field.push_back(*aChar);
}
std::string::const_iterator aChar = csvSource.begin();
while (aChar != csvSource.end()) {
if (*aChar == quote_character) {
newLine = false;
inQuote = !inQuote;
} else if (*aChar == field_separator) {
newLine = false;
if (inQuote) {
field += *aChar;
} else {
line.push_back(field);
field.clear();
}
} else if (*aChar == line_separator || *aChar == line_separator2) {
if (inQuote) {
field += *aChar;
} else {
if (!newLine) {
line.push_back(field);
if (header_skiped) {
table.create_row_from_vector(cols_def, line);
row_cnt++;
}
header_skiped = true;
field.clear();
line.clear();
newLine = true;
}
}
} else {
newLine = false;
field.push_back(*aChar);
}
aChar++;
}
aChar++;
}
if (!field.empty()) line.push_back(field);
if (!field.empty()) line.push_back(field);
if (header_skiped) {
table.create_row_from_vector(cols_def, line);

View File

@ -24,7 +24,7 @@ namespace usql {
bool header_skiped;
public:
CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n');
explicit CsvReader(bool skip_hdr = true, char field_sep = ',', char quote_ch = '"', char line_sep = '\r', char line_sep2 = '\n');
int parseCSV2(const std::string &csvSource, std::vector<ColDefNode> &cols_def, Table& table);

View File

@ -2,6 +2,6 @@
namespace usql {
Exception::Exception(const std::string msg) : std::runtime_error(msg) {}
Exception::Exception(std::string msg) : std::runtime_error(msg) {}
} // namespace

View File

@ -1,14 +1,12 @@
#pragma once
#include "lexer.h"
#include <string>
namespace usql {
class Exception : public std::runtime_error {
public:
Exception(const std::string msg);
explicit Exception(std::string msg);
};
} // namespace

119
usql/index.h Normal file
View File

@ -0,0 +1,119 @@
#pragma once
#include "exception.h"
#include "parser.h"
#include "row.h"
#include <iostream>
#include <utility>
#include <vector>
#include <variant>
#include <map>
namespace usql {
using IndexValue = std::variant<long, std::string>;
using rowid_t = size_t; // int is now enough but size_t is correct
static const int k_default_rowids_size = 16;
class Index {
public:
Index(std::string index_name, std::string col_name, ColumnType type) :
m_index_name(std::move(index_name)), m_column_name(std::move(col_name)),
m_data_type(type), m_uniq(false) {
if (type != ColumnType::integer_type && type != ColumnType::varchar_type)
throw Exception("creating index on unsupported type");
}
std::vector<rowid_t> search(const ValueNode *key) {
return search(to_index_value(key));
}
void insert(const ColValue *key, rowid_t rowid) {
return insert(to_index_value(key), rowid);
}
void remove(const ColValue *key, rowid_t rowid) {
return remove(to_index_value(key), rowid);
}
void truncate() {
m_index.clear();
}
[[nodiscard]] const std::string &get_column_name() const {
return m_column_name;
}
[[nodiscard]] const std::string &get_index_name() const {
return m_index_name;
}
private:
IndexValue to_index_value(const ValueNode *key) {
if (m_data_type == ColumnType::integer_type)
return key->getIntegerValue();
else if (m_data_type == ColumnType::varchar_type)
return key->getStringValue();
else
throw Exception("using index on unsupported type");
}
IndexValue to_index_value(const ColValue *key) {
if (m_data_type == ColumnType::integer_type)
return key->getIntegerValue();
else if (m_data_type == ColumnType::varchar_type)
return key->getStringValue();
else
throw Exception("using index on unsupported type");
}
void insert(const IndexValue& key, rowid_t rowid) {
auto search = m_index.find(key);
if (search != m_index.end()) {
if (m_uniq)
throw Exception("Inserting duplicate value into unique index");
search->second.push_back(rowid);
} else {
std::vector<rowid_t> rowids{rowid};
if (!m_uniq)
rowids.reserve(k_default_rowids_size);
m_index[key] = rowids;
}
}
void remove(const IndexValue& key, rowid_t rowid) {
auto search = m_index.find(key);
if (search != m_index.end()) {
search->second.erase(find(search->second.begin(), search->second.end(), rowid));
if (search->second.empty())
m_index.erase(search);
}
}
std::vector<rowid_t> search(const IndexValue& key) {
auto search = m_index.find(key);
if (search != m_index.end()) {
return search->second;
} else {
return std::vector<rowid_t>{};
}
}
private:
bool m_uniq;
std::string m_index_name;
std::string m_column_name;
ColumnType m_data_type;
std::map<IndexValue, std::vector<rowid_t> > m_index;
};
} // namespace

View File

@ -5,287 +5,292 @@
namespace usql {
Token::Token(const std::string &token_str, TokenType typ) {
token_string = token_str;
type = typ;
}
Token::Token(const std::string &token_str, TokenType typ) {
token_string = token_str;
type = typ;
}
Lexer::Lexer() {
k_words_regex =
"[-+]?[0-9]+\\.[0-9]+|[-+]?[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/"
",;:\?]|!=|<>|==|>=|<=|~=|>|<|=|;|~|\\||\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n";
k_int_regex = "[-+]?[0-9]+";
Lexer::Lexer() {
k_words_regex =
"[-+]?[0-9]+\\.[0-9]+|[-+]?[0-9]+|[A-Za-z]+[A-Za-z0-9_#]*|[\\(\\)\\[\\]\\{\\}]|[-\\+\\*/"
",;:\?]|!=|<>|==|>=|<=|~=|>|<|=|;|~|\\||\n|\r|\r\n|'([^']|'')*'|\".*?\"|%.*?\n";
k_int_regex = "[-+]?[0-9]+";
k_int_underscored_regex = "[-+]?[0-9][0-9_]+[0-9]";
k_double_regex = "[-+]?[0-9]+\\.[0-9]+";
k_identifier_regex = "[A-Za-z]+[A-Za-z0-9_#]*";
}
}
void Lexer::parse(const std::string &code) {
if (code.empty())
throw Exception("empty code");
void Lexer::parse(const std::string &code) {
if (code.empty())
throw Exception("Lexer.parse empty code");
m_tokens.clear();
m_tokens.reserve(64);
m_tokens.clear();
m_tokens.reserve(64);
m_code_str = code;
if (!m_code_str.empty() && m_code_str.back() != '\n') {
m_code_str.append("\n"); // temp solution to prevent possible situation when last line is a comment
}
m_code_str = code;
if (!m_code_str.empty() && m_code_str.back() != '\n') {
m_code_str.append("\n"); // temp solution to prevent possible situation when last line is a comment
}
auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), k_words_regex);
auto words_end = std::sregex_iterator();
auto words_begin = std::sregex_iterator(m_code_str.begin(), m_code_str.end(), k_words_regex);
auto words_end = std::sregex_iterator();
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
std::smatch match = *i;
std::string match_str = match.str();
TokenType token_type = type(match_str);
if (token_type == TokenType::string_literal)
match_str = stringLiteral(match_str);
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
std::smatch match = *i;
std::string match_str = match.str();
TokenType token_type = type(match_str);
if (token_type == TokenType::undef)
throw Exception("Lexer.parse unknown token type: " + match_str);
if (token_type == TokenType::string_literal)
match_str = stringLiteral(match_str);
if (token_type != TokenType::newline)
m_tokens.emplace_back(match_str, token_type);
}
if (token_type != TokenType::newline)
m_tokens.emplace_back(match_str, token_type);
}
// DEBUG IT
// debugTokens();
// DEBUG IT
// debugTokens();
m_index = 0;
}
m_index = 0;
}
void Lexer::debugTokens() {
int i = 0;
for (auto & m_token : m_tokens) {
std::cerr << i << "\t" << m_token.token_string << std::endl;
i++;
}
}
void Lexer::debugTokens() {
int i = 0;
for (auto & m_token : m_tokens) {
std::cerr << i << "\t" << m_token.token_string << std::endl;
i++;
}
}
Token Lexer::currentToken() { return m_tokens[m_index]; }
Token Lexer::currentToken() { return m_tokens[m_index]; }
Token Lexer::consumeToken() {
int i = m_index;
nextToken();
return m_tokens[i];
}
Token Lexer::consumeToken() {
int i = m_index;
nextToken();
return m_tokens[i];
}
Token Lexer::consumeToken(TokenType type) {
int i = m_index;
skipToken(type);
return m_tokens[i];
}
Token Lexer::consumeToken(TokenType type) {
int i = m_index;
skipToken(type);
return m_tokens[i];
}
void Lexer::nextToken() {
if (m_index < m_tokens.size()) {
m_index++;
}
}
void Lexer::nextToken() {
if (m_index < m_tokens.size()) {
m_index++;
}
}
void Lexer::skipToken(TokenType type) {
if (tokenType() == type) {
nextToken();
} else {
throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type));
}
}
void Lexer::skipToken(TokenType type) {
if (tokenType() == type) {
nextToken();
} else {
throw Exception("ERROR unexpected token " + consumeToken().token_string + ", instead of " + typeToString(type));
}
}
void Lexer::skipTokenOptional(TokenType type) {
if (tokenType() == type) {
nextToken();
}
}
void Lexer::skipTokenOptional(TokenType type) {
if (tokenType() == type) {
nextToken();
}
}
TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; }
TokenType Lexer::tokenType() { return m_index < m_tokens.size() ? currentToken().type : TokenType::eof; }
TokenType Lexer::nextTokenType() {
return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof;
}
TokenType Lexer::nextTokenType() {
return m_index < m_tokens.size() - 1 ? m_tokens[m_index + 1].type : TokenType::eof;
}
bool Lexer::isRelationalOperator(TokenType token_type) {
return (token_type == TokenType::equal || token_type == TokenType::not_equal ||
token_type == TokenType::greater || token_type == TokenType::greater_equal ||
token_type == TokenType::lesser || token_type == TokenType::lesser_equal ||
token_type == TokenType::is);
}
bool Lexer::isRelationalOperator(TokenType token_type) {
return (token_type == TokenType::equal || token_type == TokenType::not_equal ||
token_type == TokenType::greater || token_type == TokenType::greater_equal ||
token_type == TokenType::lesser || token_type == TokenType::lesser_equal ||
token_type == TokenType::is);
}
bool Lexer::isLogicalOperator(TokenType token_type) {
return (token_type == TokenType::logical_and || token_type == TokenType::logical_or);
}
bool Lexer::isLogicalOperator(TokenType token_type) {
return (token_type == TokenType::logical_and || token_type == TokenType::logical_or);
}
bool Lexer::isArithmeticalOperator(TokenType token_type) {
return (token_type == TokenType::plus || token_type == TokenType::minus ||
token_type == TokenType::multiply ||
token_type == TokenType::divide);
}
bool Lexer::isArithmeticalOperator(TokenType token_type) {
return (token_type == TokenType::plus || token_type == TokenType::minus ||
token_type == TokenType::multiply ||
token_type == TokenType::divide);
}
TokenType Lexer::type(const std::string &token) {
// FIXME 'one is evaluated as identifier
if (token == ";") return TokenType::semicolon;
if (token == "+") return TokenType::plus;
if (token == "-") return TokenType::minus;
if (token == "*") return TokenType::multiply;
if (token == "/") return TokenType::divide;
if (token == "(") return TokenType::open_paren;
if (token == ")") return TokenType::close_paren;
if (token == "=") return TokenType::equal;
if (token == "!=" || token == "<>") return TokenType::not_equal;
if (token == ">") return TokenType::greater;
if (token == ">=") return TokenType::greater_equal;
if (token == "<") return TokenType::lesser;
if (token == "<=") return TokenType::lesser_equal;
if (token == "is") return TokenType::is;
if (token == "as") return TokenType::keyword_as;
if (token == "create") return TokenType::keyword_create;
if (token == "drop") return TokenType::keyword_drop;
if (token == "where") return TokenType::keyword_where;
if (token == "order") return TokenType::keyword_order;
if (token == "by") return TokenType::keyword_by;
if (token == "offset") return TokenType::keyword_offset;
if (token == "limit") return TokenType::keyword_limit;
if (token == "asc") return TokenType::keyword_asc;
if (token == "desc") return TokenType::keyword_desc;
if (token == "from") return TokenType::keyword_from;
if (token == "delete") return TokenType::keyword_delete;
if (token == "table") return TokenType::keyword_table;
if (token == "insert") return TokenType::keyword_insert;
if (token == "into") return TokenType::keyword_into;
if (token == "values") return TokenType::keyword_values;
if (token == "select") return TokenType::keyword_select;
if (token == "set") return TokenType::keyword_set;
if (token == "copy") return TokenType::keyword_copy;
if (token == "update") return TokenType::keyword_update;
if (token == "load") return TokenType::keyword_load;
if (token == "save") return TokenType::keyword_save;
if (token == "not") return TokenType::keyword_not;
if (token == "null") return TokenType::keyword_null;
if (token == "integer") return TokenType::keyword_integer;
if (token == "float") return TokenType::keyword_float;
if (token == "varchar") return TokenType::keyword_varchar;
if (token == "date") return TokenType::keyword_date;
if (token == "boolean") return TokenType::keyword_bool;
if (token == "true") return TokenType::keyword_true;
if (token == "false") return TokenType::keyword_false;
if (token == "distinct") return TokenType::keyword_distinct;
if (token == "show") return TokenType::keyword_show;
if (token == "or") return TokenType::logical_or;
if (token == "and") return TokenType::logical_and;
if (token == ",") return TokenType::comma;
if (token == "\n" || token == "\r\n" || token == "\r") return TokenType::newline;
TokenType Lexer::type(const std::string &token) {
if (token == ";") return TokenType::semicolon;
if (token == "+") return TokenType::plus;
if (token == "-") return TokenType::minus;
if (token == "*") return TokenType::multiply;
if (token == "/") return TokenType::divide;
if (token == "(") return TokenType::open_paren;
if (token == ")") return TokenType::close_paren;
if (token == "=") return TokenType::equal;
if (token == "!=" || token == "<>") return TokenType::not_equal;
if (token == ">") return TokenType::greater;
if (token == ">=") return TokenType::greater_equal;
if (token == "<") return TokenType::lesser;
if (token == "<=") return TokenType::lesser_equal;
if (token == "is") return TokenType::is;
if (token == "as") return TokenType::keyword_as;
if (token == "create") return TokenType::keyword_create;
if (token == "drop") return TokenType::keyword_drop;
if (token == "where") return TokenType::keyword_where;
if (token == "order") return TokenType::keyword_order;
if (token == "by") return TokenType::keyword_by;
if (token == "offset") return TokenType::keyword_offset;
if (token == "limit") return TokenType::keyword_limit;
if (token == "asc") return TokenType::keyword_asc;
if (token == "desc") return TokenType::keyword_desc;
if (token == "from") return TokenType::keyword_from;
if (token == "delete") return TokenType::keyword_delete;
if (token == "table") return TokenType::keyword_table;
if (token == "index") return TokenType::keyword_index;
if (token == "on") return TokenType::keyword_on;
if (token == "insert") return TokenType::keyword_insert;
if (token == "into") return TokenType::keyword_into;
if (token == "values") return TokenType::keyword_values;
if (token == "select") return TokenType::keyword_select;
if (token == "set") return TokenType::keyword_set;
if (token == "copy") return TokenType::keyword_copy;
if (token == "update") return TokenType::keyword_update;
if (token == "load") return TokenType::keyword_load;
if (token == "save") return TokenType::keyword_save;
if (token == "not") return TokenType::keyword_not;
if (token == "null") return TokenType::keyword_null;
if (token == "integer") return TokenType::keyword_integer;
if (token == "float") return TokenType::keyword_float;
if (token == "varchar") return TokenType::keyword_varchar;
if (token == "date") return TokenType::keyword_date;
if (token == "boolean") return TokenType::keyword_bool;
if (token == "true") return TokenType::keyword_true;
if (token == "false") return TokenType::keyword_false;
if (token == "distinct") return TokenType::keyword_distinct;
if (token == "show") return TokenType::keyword_show;
if (token == "or") return TokenType::logical_or;
if (token == "and") return TokenType::logical_and;
if (token == ",") return TokenType::comma;
if (token == "\n" || token == "\r\n" || token == "\r") return TokenType::newline;
if (token.length() > 1 && token.at(0) == '%' && (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r'))
return TokenType::comment;
if (token.length() > 1 && token.at(0) == '%' && (token.at(token.length() - 1) == '\n' || token.at(token.length() - 1) == '\r'))
return TokenType::comment;
if (token.length() >= 2 && token.at(0) == '"' && token.at(token.length() - 1) == '"')
return TokenType::string_literal;
if (token.length() >= 2 && token.at(0) == '"')
return (token.at(token.length() - 1) == '"') ? TokenType::string_literal : TokenType::undef;
if (token.length() >= 2 && token.at(0) == '\'' && token.at(token.length() - 1) == '\'')
return TokenType::string_literal;
if (token.length() >= 2 && token.at(0) == '\'')
return (token.at(token.length() - 1) == '\'') ? TokenType::string_literal : TokenType::undef;
if (std::regex_match(token, k_int_regex)) return TokenType::int_number;
if (std::regex_match(token, k_int_underscored_regex)) return TokenType::int_number;
if (std::regex_match(token, k_double_regex)) return TokenType::double_number;
if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier;
if (std::regex_match(token, k_int_regex)) return TokenType::int_number;
if (std::regex_match(token, k_int_underscored_regex)) return TokenType::int_number;
if (std::regex_match(token, k_double_regex)) return TokenType::double_number;
if (std::regex_match(token, k_identifier_regex)) return TokenType::identifier;
return TokenType::undef;
}
return TokenType::undef;
}
std::string Lexer::stringLiteral(std::string token) {
// remove ' or " from the literal ends
bool replace = token[0] == '\'' && token[token.size() - 1] == '\'';
std::string Lexer::stringLiteral(std::string token) {
// remove ' or " from the literal ends
bool replace = token[0] == '\'' && token[token.size() - 1] == '\'';
std::string str = token.substr(1, token.size() - 2);
if (!replace) {
return str;
}
std::string out;
out.reserve(str.size());
std::string str = token.substr(1, token.size() - 2);
if (!replace) {
return str;
}
std::string out;
out.reserve(str.size());
for (std::string::size_type i = 0; i < str.size(); ++i) {
if (str[i] == '\'' && i < str.size() - 1) {
if (str[i + 1] == '\'') {
out.append(1, '\'');
i++;
} else {
out.append(1, str[i]);
}
} else if (str[i] == '\\' && i < str.size() - 1) {
if (str[i + 1] == 'n') {
out.append(1, '\n');
i++;
} else if (str[i + 1] == 't') {
out.append(1, '\t');
i++;
} else {
out.append(1, str[i]);
}
} else {
out.append(1, str[i]);
}
}
return out;
}
for (std::string::size_type i = 0; i < str.size(); ++i) {
if (str[i] == '\'' && i < str.size() - 1) {
if (str[i + 1] == '\'') {
out.append(1, '\'');
i++;
} else {
out.append(1, str[i]);
}
} else if (str[i] == '\\' && i < str.size() - 1) {
if (str[i + 1] == 'n') {
out.append(1, '\n');
i++;
} else if (str[i + 1] == 't') {
out.append(1, '\t');
i++;
} else {
out.append(1, str[i]);
}
} else {
out.append(1, str[i]);
}
}
return out;
}
std::string Lexer::typeToString(TokenType token_type) {
switch (token_type) {
case TokenType::undef: return "undef";
case TokenType::identifier: return "identifier";
case TokenType::plus: return "+";
case TokenType::minus: return "-";
case TokenType::multiply: return "*";
case TokenType::divide: return "/";
case TokenType::equal: return "==";
case TokenType::not_equal: return "!=";
case TokenType::greater: return ">";
case TokenType::greater_equal: return ">=";
case TokenType::lesser: return "<";
case TokenType::lesser_equal: return "<=";
case TokenType::is: return "is";
case TokenType::keyword_as: return "as";
case TokenType::keyword_create: return "create";
case TokenType::keyword_drop: return "drop";
case TokenType::keyword_where: return "where";
case TokenType::keyword_order: return "order";
case TokenType::keyword_by: return "by";
case TokenType::keyword_offset: return "offset";
case TokenType::keyword_limit: return "limit";
case TokenType::keyword_asc: return "asc";
case TokenType::keyword_desc: return "desc";
case TokenType::keyword_table: return "table";
case TokenType::keyword_into: return "into";
case TokenType::keyword_values: return "values";
case TokenType::keyword_select: return "select";
case TokenType::keyword_set: return "set";
case TokenType::keyword_copy: return "copy";
case TokenType::keyword_update: return "update";
case TokenType::keyword_load: return "load";
case TokenType::keyword_save: return "save";
case TokenType::keyword_not: return "not";
case TokenType::keyword_null: return "null";
case TokenType::keyword_integer: return "integer";
case TokenType::keyword_float: return "float";
case TokenType::keyword_varchar: return "varchar";
case TokenType::keyword_date: return "date";
case TokenType::keyword_bool: return "boolean";
case TokenType::keyword_true: return "true";
case TokenType::keyword_false: return "false";
case TokenType::keyword_distinct: return "distinct";
case TokenType::keyword_show: return "show";
case TokenType::int_number: return "int number";
case TokenType::double_number: return "double number";
case TokenType::string_literal: return "string literal";
case TokenType::open_paren: return "(";
case TokenType::close_paren: return ")";
case TokenType::logical_and: return "and";
case TokenType::logical_or: return "or";
case TokenType::semicolon: return ";";
case TokenType::comma: return ",";
case TokenType::newline: return "newline";
case TokenType::comment: return "comment";
case TokenType::eof: return "eof";
default:
return "FIXME, unknown token type";
}
}
std::string Lexer::typeToString(TokenType token_type) {
switch (token_type) {
case TokenType::undef: return "undef";
case TokenType::identifier: return "identifier";
case TokenType::plus: return "+";
case TokenType::minus: return "-";
case TokenType::multiply: return "*";
case TokenType::divide: return "/";
case TokenType::equal: return "==";
case TokenType::not_equal: return "!=";
case TokenType::greater: return ">";
case TokenType::greater_equal: return ">=";
case TokenType::lesser: return "<";
case TokenType::lesser_equal: return "<=";
case TokenType::is: return "is";
case TokenType::keyword_as: return "as";
case TokenType::keyword_create: return "create";
case TokenType::keyword_drop: return "drop";
case TokenType::keyword_where: return "where";
case TokenType::keyword_order: return "order";
case TokenType::keyword_by: return "by";
case TokenType::keyword_offset: return "offset";
case TokenType::keyword_limit: return "limit";
case TokenType::keyword_asc: return "asc";
case TokenType::keyword_desc: return "desc";
case TokenType::keyword_table: return "table";
case TokenType::keyword_index: return "index";
case TokenType::keyword_on: return "on";
case TokenType::keyword_into: return "into";
case TokenType::keyword_values: return "values";
case TokenType::keyword_select: return "select";
case TokenType::keyword_set: return "set";
case TokenType::keyword_copy: return "copy";
case TokenType::keyword_update: return "update";
case TokenType::keyword_load: return "load";
case TokenType::keyword_save: return "save";
case TokenType::keyword_not: return "not";
case TokenType::keyword_null: return "null";
case TokenType::keyword_integer: return "integer";
case TokenType::keyword_float: return "float";
case TokenType::keyword_varchar: return "varchar";
case TokenType::keyword_date: return "date";
case TokenType::keyword_bool: return "boolean";
case TokenType::keyword_true: return "true";
case TokenType::keyword_false: return "false";
case TokenType::keyword_distinct: return "distinct";
case TokenType::keyword_show: return "show";
case TokenType::int_number: return "int number";
case TokenType::double_number: return "double number";
case TokenType::string_literal: return "string literal";
case TokenType::open_paren: return "(";
case TokenType::close_paren: return ")";
case TokenType::logical_and: return "and";
case TokenType::logical_or: return "or";
case TokenType::semicolon: return ";";
case TokenType::comma: return ",";
case TokenType::newline: return "newline";
case TokenType::comment: return "comment";
case TokenType::eof: return "eof";
default:
return "FIXME, unknown token type";
}
}
}
} // namespace usql

View File

@ -25,6 +25,8 @@ namespace usql {
keyword_create,
keyword_drop,
keyword_table,
keyword_index,
keyword_on,
keyword_where,
keyword_order,
keyword_by,

View File

@ -1,165 +0,0 @@
#include "parser.h"
#include "usql.h"
#include "linenoise.h"
// https://dev.to/joaoh82/what-would-sqlite-look-like-if-written-in-rust-part-1-2np4
using namespace std::chrono;
const std::vector<std::string> commands {
"select", "create", "load", "table"
};
std::string get_history_file_dir() {
std::string file{"/.usql_history.txt"};
const char *t = std::getenv("HOME");
if (t == nullptr) return "/tmp/" + file;
else return std::string{t} + "/" + file;
}
size_t last_token_index( std::string str ) {
// remove trailing white space
while( !str.empty() && std::isspace( str.back() ) ) str.pop_back() ;
// locate the last white space
return str.find_last_of( "() \t\n" ) ;
}
void completion(const char *buf, linenoiseCompletions *lc) {
if (buf != nullptr) {
std::string str{buf};
const auto pos = last_token_index(str);
if (pos == std::string::npos)
return; // cannot find what to complete
std::string token = str.substr(pos + 1);
std::string begining = str.substr(0, pos + 1);
for (const auto & command : commands) {
if (command.find(token) == 0) {
std::string completion_string = begining + command;
linenoiseAddCompletion(lc, completion_string.c_str());
}
}
}
}
char *hints(const char *buf, int *color, int *bold) {
// if (!strcasecmp(buf,"hello")) {
// *color = 35;
// *bold = 0;
// return " World";
// }
return nullptr;
}
void setup_linenoise() {
std::string history_file = get_history_file_dir();
linenoiseHistorySetMaxLen(500);
linenoiseSetCompletionCallback(completion);
linenoiseSetHintsCallback(hints);
linenoiseSetMultiLine(1);
linenoiseHistoryLoad(history_file.c_str());
}
void linenoise_line_read(char *line) {
linenoiseHistoryAdd(line);
}
void close_linenoise() {
std::string history_file = get_history_file_dir();
linenoiseHistorySave(history_file.c_str());
}
void repl() {
std::string code;
std::string input;
setup_linenoise();
usql::USql uSql{};
while (true) {
char *line = linenoise(">>> ");
if (line == nullptr) break;
linenoise_line_read(line);
input = std::string(line);
if (input == "!quit" || input == "!q")
break;
else if (input == "!export" || input == "!x") {
std::cout << "File to export to: ";
std::getline(std::cin, input);
//write_file_contents(input, code);
} else if (!input.empty()) {
try {
time_point<high_resolution_clock> start_time = high_resolution_clock::now();
auto result = uSql.execute(input);
time_point<high_resolution_clock> end_time = high_resolution_clock::now();
std::cout << input << std::endl;
std::cout << "run time: " << duration_cast<milliseconds>(end_time - start_time).count() << " ms " << std::endl <<std::endl;
result->print();
code += input + "\n";
} catch (std::exception &e) {
std::cerr << e.what() << std::endl;
}
}
}
close_linenoise();
}
void debug() {
std::vector<std::string> sql_commands {
"set 'DATE_FORMAT' = '%Y-%m-%d' ",
"create table history_earnings_dates (datetime date, symbol varchar(8), time varchar(18), title varchar(256))",
"insert into history_earnings_dates (symbol,time,datetime,title) values ('BABA', '07:00:00', '2021-11-04', 'Alibaba Group Holding')",
"insert into history_earnings_dates (symbol,time,datetime,title) values ('BABA', '07:00:00', '2021-11-04', 'Alibaba Group Holding')",
"delete from history_earnings_dates where symbol='BABA' and datetime=to_date('2021-11-04', '%Y-%m-%d')",
"select * from history_earnings_dates"
};
usql::USql uSql{};
for (const auto &command : sql_commands) {
time_point<high_resolution_clock> start_time = high_resolution_clock::now();
auto result = uSql.execute(command);
time_point<high_resolution_clock> end_time = high_resolution_clock::now();
std::cout << command << std::endl;
std::cout << "run time: " << duration_cast<milliseconds>(end_time - start_time).count() << " ms "
<< std::endl << std::endl;
result->print();
}
std::cout << std::endl << std::endl;
}
int main(int argc, char *argv[]) {
#ifdef NDEBUG
repl();
#else
debug();
#endif
return 0;
}

View File

@ -15,6 +15,8 @@ namespace usql {
if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_table)
return parse_create_table();
if (m_lexer.tokenType() == TokenType::keyword_create && m_lexer.nextTokenType() == TokenType::keyword_index)
return parse_create_index();
if (m_lexer.tokenType() == TokenType::keyword_drop)
return parse_drop_table();
@ -302,6 +304,18 @@ namespace usql {
return std::make_unique<UpdateTableNode>(table_name, cols_names, std::move(values), std::move(where_node));
}
std::unique_ptr<Node> Parser::parse_create_index() {
m_lexer.skipToken(TokenType::keyword_create);
m_lexer.skipToken(TokenType::keyword_index);
std::string index_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::keyword_on);
std::string table_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::open_paren);
std::string column_name = m_lexer.consumeToken(TokenType::identifier).token_string;
m_lexer.skipToken(TokenType::close_paren);
return std::make_unique<CreateIndexNode>(index_name, table_name, column_name);
}
std::vector<ColOrderNode> Parser::parse_order_by_clause() {
std::vector<ColOrderNode> order_cols;
@ -331,7 +345,7 @@ namespace usql {
order_cols.emplace_back(cspec_token, asc);
break;
default:
throw Exception("order by column can be either column index or identifier");
throw Exception("order by column can be either column m_index or identifier");
}
m_lexer.skipTokenOptional(TokenType::comma);

View File

@ -6,21 +6,22 @@
#include "settings.h"
#include <string>
#include <utility>
#include <vector>
static const int FUNCTION_CALL = -1;
namespace usql {
enum class ColumnType {
enum class ColumnType {
integer_type,
float_type,
varchar_type,
date_type,
bool_type
};
};
enum class NodeType {
enum class NodeType {
true_node,
null_value,
int_value,
@ -39,173 +40,232 @@ namespace usql {
load_table,
save_table,
drop_table,
create_index,
set,
show,
database_value,
offset_limit,
column_order,
column_value,
function,
column_def,
error
};
};
struct Node {
struct Node {
NodeType node_type;
explicit Node(const NodeType type) : node_type(type) {}
virtual ~Node() = default;
};
virtual void dump() const {
std::cout << "type: Node" << std::endl;
}
};
struct ColOrderNode : Node {
struct ColOrderNode : Node {
std::string col_name;
int col_index;
bool ascending;
ColOrderNode(const std::string& name, bool asc) : Node(NodeType::column_order), col_name(name), col_index(-1), ascending(asc) {}
ColOrderNode(int index, bool asc) : Node(NodeType::database_value), col_name(""), col_index(index), ascending(asc) {}
};
ColOrderNode(std::string name, bool asc) : Node(NodeType::column_order), col_name(std::move(name)), col_index(-1), ascending(asc) {}
ColOrderNode(int index, bool asc) : Node(NodeType::database_value), col_index(index), ascending(asc) {}
void dump() const override {
std::cout << "type: ColOrderNode, col_name: " << col_name << ", col_index: " << col_index << ", asc: " << ascending << std::endl;
}
};
struct OffsetLimitNode : Node {
struct OffsetLimitNode : Node {
int offset;
int limit;
OffsetLimitNode(int off, int lim) : Node(NodeType::offset_limit), offset(off), limit(lim) {}
};
void dump() const override {
std::cout << "type: OffsetLimitNode, offset: " << offset << ", limit: " << limit << std::endl;
}
};
struct SelectColNode : Node {
std::unique_ptr<Node> value;
std::string name;
struct SelectColNode : Node {
std::unique_ptr<Node> value;
std::string name;
SelectColNode(std::unique_ptr<Node> column, const std::string &alias) :
Node(NodeType::database_value), value(std::move(column)), name(alias) {}
};
SelectColNode(std::unique_ptr<Node> column, std::string alias) :
Node(NodeType::database_value), value(std::move(column)), name(std::move(alias)) {}
struct ColDefNode : Node {
void dump() const override {
std::cout << "type: SelectColNode, name:" << name << "value:" << std::endl;
value->dump();
}
};
struct ColDefNode : Node {
std::string name;
ColumnType type;
int order;
int length;
bool null;
ColDefNode(const std::string& col_name, ColumnType col_type, int col_order, int col_len, bool nullable) :
Node(NodeType::column_def), name(col_name), type(col_type), order(col_order), length(col_len),
null(nullable) {}
};
ColDefNode(std::string col_name, ColumnType col_type, int col_order, int col_len, bool nullable) :
Node(NodeType::column_def), name(std::move(col_name)), type(col_type), order(col_order), length(col_len),
null(nullable) {}
struct FunctionNode : Node {
std::string function; // TODO use enum
std::vector<std::unique_ptr<Node>> params;
void dump() const override {
std::cout << "type: ColDefNode, name: " << name << ", type: " << (int)type << " TODO add more" << std::endl;
}
};
FunctionNode(const std::string& func_name, std::vector<std::unique_ptr<Node>> pars) :
Node(NodeType::function), function(func_name), params(std::move(pars)) {}
};
struct FunctionNode : Node {
std::string function; // TODO use enum
std::vector<std::unique_ptr<Node>> params;
struct TrueNode : Node {
FunctionNode(std::string func_name, std::vector<std::unique_ptr<Node>> pars) :
Node(NodeType::function), function(std::move(func_name)), params(std::move(pars)) {}
void dump() const override {
std::cout << "type: FunctionNode, function: " << function << " TODO add more" << std::endl;
}
};
struct TrueNode : Node {
TrueNode() : Node(NodeType::true_node) {}
};
struct ValueNode : Node {
void dump() const override {
std::cout << "type: TrueNode," << std::endl;
}
};
struct ValueNode : Node {
explicit ValueNode(NodeType type) : Node(type) {}
virtual bool isNull() { return false; }
virtual long getIntegerValue() = 0;
virtual double getDoubleValue() = 0;
virtual std::string getStringValue() = 0;
virtual long getDateValue() = 0;
virtual bool getBooleanValue() = 0;
virtual bool isNull() const { return false; }
virtual long getIntegerValue() const = 0;
virtual double getDoubleValue() const = 0;
virtual std::string getStringValue() const = 0;
virtual long getDateValue() const = 0;
virtual bool getBooleanValue() const = 0;
virtual ~ValueNode() = default;
};
~ValueNode() override = default;
};
struct NullValueNode : ValueNode {
struct NullValueNode : ValueNode {
NullValueNode() : ValueNode(NodeType::null_value) {}
NullValueNode() : ValueNode(NodeType::null_value) {}
bool isNull() override { return true; }
bool isNull() const override { return true; }
long getIntegerValue() override { throw Exception("getIntegerValue not supported on NullValueNode"); };
double getDoubleValue() override { throw Exception("getDoubleValue not supported on NullValueNode"); };
std::string getStringValue() override { throw Exception("getStringValue not supported on NullValueNode"); };
long getDateValue() override { throw Exception("getDateValue not supported on NullValueNode"); };
bool getBooleanValue() override { throw Exception("getBooleanValue not supported on NullValueNode"); };
};
long getIntegerValue() const override { throw Exception("getIntegerValue not supported on NullValueNode"); };
double getDoubleValue() const override { throw Exception("getDoubleValue not supported on NullValueNode"); };
std::string getStringValue() const override { throw Exception("getStringValue not supported on NullValueNode"); };
long getDateValue() const override { throw Exception("getDateValue not supported on NullValueNode"); };
bool getBooleanValue() const override { throw Exception("getBooleanValue not supported on NullValueNode"); };
struct IntValueNode : ValueNode {
void dump() const override {
std::cout << "type: NullValueNode," << std::endl;
}
};
struct IntValueNode : ValueNode {
long value;
explicit IntValueNode(long value) : ValueNode(NodeType::int_value), value(value) {}
long getIntegerValue() override { return value; };
double getDoubleValue() override { return (double) value; };
std::string getStringValue() override { return Settings::int_to_string(value); }
long getDateValue() override { return value; };
bool getBooleanValue() override { return value != 0; };
};
long getIntegerValue() const override { return value; };
double getDoubleValue() const override { return (double) value; };
std::string getStringValue() const override { return Settings::long_to_string(value); }
long getDateValue() const override { return value; };
bool getBooleanValue() const override { return value != 0; };
struct DoubleValueNode : ValueNode {
void dump() const override {
std::cout << "type: IntValueNode, value: " << value << std::endl;
}
};
struct DoubleValueNode : ValueNode {
double value;
explicit DoubleValueNode(double value) : ValueNode(NodeType::float_value), value(value) {}
long getIntegerValue() override { return (long) value; };
double getDoubleValue() override { return value; };
std::string getStringValue() override { return Settings::double_to_string(value); }
long getDateValue() override { return (long) value; };
bool getBooleanValue() override { return value != 0.0; };
};
long getIntegerValue() const override { return (long) value; };
double getDoubleValue() const override { return value; };
std::string getStringValue() const override { return Settings::double_to_string(value); }
long getDateValue() const override { return (long) value; };
bool getBooleanValue() const override { return value != 0.0; };
struct StringValueNode : ValueNode {
void dump() const override {
std::cout << "type: DoubleValueNode, value: " << value << std::endl;
}
};
struct StringValueNode : ValueNode {
std::string value;
explicit StringValueNode(const std::string &value) : ValueNode(NodeType::string_value), value(value) {}
explicit StringValueNode(std::string value) : ValueNode(NodeType::string_value), value(std::move(value)) {}
long getIntegerValue() override { return Settings::string_to_int(value); };
double getDoubleValue() override { return Settings::string_to_double(value); };
std::string getStringValue() override { return value; };
long getDateValue() override { return Settings::string_to_date(value); };
bool getBooleanValue() override { return Settings::string_to_bool(value); };
};
long getIntegerValue() const override { return Settings::string_to_long(value); };
double getDoubleValue() const override { return Settings::string_to_double(value); };
std::string getStringValue() const override { return value; };
long getDateValue() const override { return Settings::string_to_date(value); };
bool getBooleanValue() const override { return Settings::string_to_bool(value); };
struct BooleanValueNode : ValueNode {
bool value;
void dump() const override {
std::cout << "type: StringValueNode, value: " << value << std::endl;
}
};
explicit BooleanValueNode(bool value) : ValueNode(NodeType::bool_value), value(value) {}
struct BooleanValueNode : ValueNode {
bool value;
long getIntegerValue() override { return (long) value; };
double getDoubleValue() override { return (double) value; };
std::string getStringValue() override { return Settings::bool_to_string(value); }
long getDateValue() override { return (long) value; };
bool getBooleanValue() override { return value; };
};
explicit BooleanValueNode(bool value) : ValueNode(NodeType::bool_value), value(value) {}
long getIntegerValue() const override { return (long) value; };
double getDoubleValue() const override { return (double) value; };
std::string getStringValue() const override { return Settings::bool_to_string(value); }
long getDateValue() const override { return (long) value; };
bool getBooleanValue() const override { return value; };
void dump() const override {
std::cout << "type: BooleanValueNode, value: " << value << std::endl;
}
};
struct DatabaseValueNode : Node {
struct DatabaseValueNode : Node {
std::string col_name;
explicit DatabaseValueNode(const std::string &name) : Node(NodeType::database_value), col_name(name) {}
};
explicit DatabaseValueNode(std::string name) : Node(NodeType::database_value), col_name(std::move(name)) {}
enum class LogicalOperatorType {
void dump() const override {
std::cout << "type: DatabaseValueNode, col_name: " << col_name << std::endl;
}
};
enum class LogicalOperatorType {
and_operator,
or_operator,
not_operator
};
or_operator
// not_operator
};
struct LogicalOperatorNode : Node {
struct LogicalOperatorNode : Node {
LogicalOperatorType op;
std::unique_ptr<Node> left;
std::unique_ptr<Node> right;
LogicalOperatorNode(LogicalOperatorType op, std::unique_ptr<Node> left, std::unique_ptr<Node> right) :
Node(NodeType::logical_operator), op(op), left(std::move(left)), right(std::move(right)) {};
};
Node(NodeType::logical_operator), op(op), left(std::move(left)), right(std::move(right)) {};
enum class RelationalOperatorType {
void dump() const override {
std::cout << "type: LogicalOperatorNode, op: " << (int)op << std::endl;
left->dump();
right->dump();
}
};
enum class RelationalOperatorType {
equal,
greater,
greater_equal,
@ -215,54 +275,74 @@ namespace usql {
is,
is_not
// like
};
};
struct RelationalOperatorNode : Node {
struct RelationalOperatorNode : Node {
RelationalOperatorType op;
std::unique_ptr<Node> left;
std::unique_ptr<Node> right;
RelationalOperatorNode(RelationalOperatorType op, std::unique_ptr<Node> left, std::unique_ptr<Node> right) :
Node(NodeType::relational_operator), op(op), left(std::move(left)), right(std::move(right)) {};
};
Node(NodeType::relational_operator), op(op), left(std::move(left)), right(std::move(right)) {};
enum class ArithmeticalOperatorType {
void dump() const override {
std::cout << "type: RelationalOperatorNode, op: " << (int)op << std::endl;
left->dump();
right->dump();
}
};
enum class ArithmeticalOperatorType {
copy_value, // just copy lef value and do nothing with it
plus_operator,
minus_operator,
multiply_operator,
divide_operator
};
};
struct ArithmeticalOperatorNode : Node {
struct ArithmeticalOperatorNode : Node {
ArithmeticalOperatorType op;
std::unique_ptr<Node> left;
std::unique_ptr<Node> right;
ArithmeticalOperatorNode(ArithmeticalOperatorType op, std::unique_ptr<Node> left, std::unique_ptr<Node> right) :
Node(NodeType::arithmetical_operator), op(op), left(std::move(left)), right(std::move(right)) {};
};
Node(NodeType::arithmetical_operator), op(op), left(std::move(left)), right(std::move(right)) {};
struct CreateTableNode : Node {
void dump() const override {
std::cout << "type: ArithmeticalOperatorNode, op: " << (int)op << std::endl;
left->dump();
right->dump();
}
};
struct CreateTableNode : Node {
std::string table_name;
std::vector<ColDefNode> cols_defs;
CreateTableNode(const std::string& name, std::vector<ColDefNode> defs) :
Node(NodeType::create_table), table_name(name), cols_defs(std::move(defs)) {}
};
CreateTableNode(std::string name, std::vector<ColDefNode> defs) :
Node(NodeType::create_table), table_name(std::move(name)), cols_defs(std::move(defs)) {}
struct InsertIntoTableNode : Node {
void dump() const override {
std::cout << "type: CreateTableNode, table_name: " << table_name << "TODO complete me" << std::endl;
}
};
struct InsertIntoTableNode : Node {
std::string table_name;
std::vector<DatabaseValueNode> cols_names;
std::vector<std::unique_ptr<Node>> cols_values;
InsertIntoTableNode(const std::string& name, std::vector<DatabaseValueNode> names, std::vector<std::unique_ptr<Node>> values) :
Node(NodeType::insert_into), table_name(name), cols_names(std::move(names)), cols_values(std::move(values)) {}
};
InsertIntoTableNode(std::string name, std::vector<DatabaseValueNode> names, std::vector<std::unique_ptr<Node>> values) :
Node(NodeType::insert_into), table_name(std::move(name)), cols_names(std::move(names)), cols_values(std::move(values)) {}
struct SelectFromTableNode : Node {
void dump() const override {
std::cout << "type: InsertIntoTableNode, table_name: " << table_name << "TODO complete me" << std::endl;
}
};
struct SelectFromTableNode : Node {
std::string table_name;
std::unique_ptr<std::vector<SelectColNode>> cols_names;
std::unique_ptr<Node> where;
@ -271,84 +351,134 @@ namespace usql {
bool distinct;
SelectFromTableNode(std::string name, std::unique_ptr<std::vector<SelectColNode>> names, std::unique_ptr<Node> where_clause, std::vector<ColOrderNode> orderby, OffsetLimitNode offlim, bool distinct_):
Node(NodeType::select_from), table_name(std::move(name)), cols_names(std::move(names)), where(std::move(where_clause)), order_by(std::move(orderby)), offset_limit(offlim), distinct(distinct_) {}
};
Node(NodeType::select_from), table_name(std::move(name)), cols_names(std::move(names)), where(std::move(where_clause)), order_by(std::move(orderby)), offset_limit(std::move(offlim)), distinct(distinct_) {}
struct CreateTableAsSelectNode : Node {
std::string table_name;
std::unique_ptr<Node> select_table;
void dump() const override {
std::cout << "type: SelectFromTableNode, table_name: " << table_name << "TODO complete me" << std::endl;
where->dump();
}
};
CreateTableAsSelectNode(const std::string& name, std::unique_ptr<Node> table) :
Node(NodeType::create_table_as_select), table_name(name), select_table(std::move(table)) {}
};
struct CreateTableAsSelectNode : Node {
std::string table_name;
std::unique_ptr<Node> select_table;
struct UpdateTableNode : Node {
CreateTableAsSelectNode(std::string name, std::unique_ptr<Node> table) :
Node(NodeType::create_table_as_select), table_name(std::move(name)), select_table(std::move(table)) {}
void dump() const override {
std::cout << "type: CreateTableAsSelectNode, table_name: " << table_name << std::endl;
select_table->dump();
}
};
struct UpdateTableNode : Node {
std::string table_name;
std::vector<DatabaseValueNode> cols_names;
std::vector<std::unique_ptr<Node>> values;
std::unique_ptr<Node> where;
UpdateTableNode(const std::string &name, std::vector<DatabaseValueNode> names, std::vector<std::unique_ptr<Node>> vals,
UpdateTableNode(std::string name, std::vector<DatabaseValueNode> names, std::vector<std::unique_ptr<Node>> vals,
std::unique_ptr<Node> where_clause) :
Node(NodeType::update_table), table_name(name), cols_names(names), values(std::move(vals)),
where(std::move(where_clause)) {}
};
Node(NodeType::update_table), table_name(std::move(name)), cols_names(std::move(names)), values(std::move(vals)),
where(std::move(where_clause)) {}
struct LoadIntoTableNode : Node {
void dump() const override {
std::cout << "type: UpdateTableNode, table_name: " << table_name << "TODO complete me" << std::endl;
where->dump();
}
};
struct LoadIntoTableNode : Node {
std::string table_name;
std::string filename;
LoadIntoTableNode(const std::string& name, const std::string &file) :
Node(NodeType::load_table), table_name(name), filename(file) {}
};
LoadIntoTableNode(std::string name, std::string file) :
Node(NodeType::load_table), table_name(std::move(name)), filename(std::move(file)) {}
struct SaveTableNode : Node {
std::string table_name;
std::string filename;
void dump() const override {
std::cout << "type: LoadIntoTableNode, table_name: " << table_name << ", filename" << filename << std::endl;
}
};
SaveTableNode(const std::string& name, const std::string &file) :
Node(NodeType::save_table), table_name(name), filename(file) {}
};
struct SaveTableNode : Node {
std::string table_name;
std::string filename;
struct DropTableNode : Node {
std::string table_name;
SaveTableNode(std::string name, std::string file) :
Node(NodeType::save_table), table_name(std::move(name)), filename(std::move(file)) {}
explicit DropTableNode(const std::string& name) : Node(NodeType::drop_table), table_name(name) {}
};
void dump() const override {
std::cout << "type: SaveTableNode, table_name: " << table_name << ", filename" << filename << std::endl;
}
};
struct DeleteFromTableNode : Node {
struct DropTableNode : Node {
std::string table_name;
explicit DropTableNode(std::string name) : Node(NodeType::drop_table), table_name(std::move(name)) {}
void dump() const override {
std::cout << "type: SelectFromTableNode, table_name: " << table_name << std::endl;
}
};
struct DeleteFromTableNode : Node {
std::string table_name;
std::unique_ptr<Node> where;
DeleteFromTableNode(const std::string& name, std::unique_ptr<Node> where_clause) :
Node(NodeType::delete_from), table_name(name), where(std::move(where_clause)) {}
};
DeleteFromTableNode(std::string name, std::unique_ptr<Node> where_clause) :
Node(NodeType::delete_from), table_name(std::move(name)), where(std::move(where_clause)) {}
struct SetNode : Node {
void dump() const override {
std::cout << "type: DeleteFromTableNode, table_name: " << table_name << std::endl;
where->dump();
}
};
struct SetNode : Node {
std::string name;
std::string value;
SetNode(const std::string& name_, const std::string& value_) :
Node(NodeType::set), name(name_), value(value_) {}
};
SetNode(std::string node_name, std::string node_value) :
Node(NodeType::set), name(std::move(node_name)), value(std::move(node_value)) {}
struct ShowNode : Node {
void dump() const override {
std::cout << "type: SetNode, name: " << name << ", value: " << value << std::endl;
}
};
struct ShowNode : Node {
std::string name;
explicit ShowNode(const std::string& name_) : Node(NodeType::show), name(name_) {}
};
explicit ShowNode(std::string node_name) : Node(NodeType::show), name(std::move(node_name)) {}
void dump() const override {
std::cout << "type: ShowNode, name: " << name << std::endl;
}
};
struct CreateIndexNode : Node {
std::string index_name;
std::string table_name;
std::string column_name;
class Parser {
private:
CreateIndexNode(std::string idx_name, std::string tbl_name, std::string col_name) :
Node(NodeType::create_index), index_name(std::move(idx_name)), table_name(std::move(tbl_name)), column_name(std::move(col_name)) {}
public:
void dump() const override {
std::cout << "type: CreateIndexNode, table_name: " << table_name << ", index_name: " << index_name << ", column_name: " << column_name << std::endl;
}
};
class Parser {
private:
public:
Parser();
std::unique_ptr<Node> parse(const std::string &code);
private:
private:
std::unique_ptr<Node> parse_create_table();
std::unique_ptr<Node> parse_drop_table();
std::unique_ptr<Node> parse_load_table();
@ -360,6 +490,7 @@ namespace usql {
std::unique_ptr<Node> parse_select_from_table();
std::unique_ptr<Node> parse_delete_from_table();
std::unique_ptr<Node> parse_update_table();
std::unique_ptr<Node> parse_create_index();
std::vector<ColOrderNode> parse_order_by_clause();
OffsetLimitNode parse_offset_limit_clause();
@ -373,8 +504,8 @@ namespace usql {
LogicalOperatorType parse_logical_operator();
ArithmeticalOperatorType parse_arithmetical_operator();
private:
private:
Lexer m_lexer;
};
};
} // namespace

View File

@ -4,16 +4,16 @@
namespace usql {
int ColNullValue::compare(ColValue &other) {
int ColNullValue::compare(ColValue &other) const {
return other.isNull() ? 0 : -1; // null goes to end
}
int ColIntegerValue::compare(ColValue &other) {
long r = m_integer - other.getIntValue();
int ColIntegerValue::compare(ColValue &other) const {
long r = m_integer - other.getIntegerValue();
return other.isNull() ? 1 : r > 0 ? 1 : r == 0 ? 0 : -1;
}
int ColDoubleValue::compare(ColValue &other) {
int ColDoubleValue::compare(ColValue &other) const {
if (other.isNull()) return 1; // null goes to end
double c = m_double - other.getDoubleValue();
@ -25,22 +25,34 @@ ColStringValue & ColStringValue::operator=(ColStringValue other) {
return *this;
}
int ColStringValue::compare(ColValue &other) {
int ColStringValue::compare(ColValue &other) const {
return other.isNull() ? 1 : m_string->compare(other.getStringValue()); // null goes to end
}
int ColDateValue::compare(ColValue &other) {
long r = m_date - other.getIntValue();
std::string ColStringValue::getCsvStringValue() const {
auto src_str = getStringValue();
std::string toSearch{"\""}, replaceStr{"\\\""};
size_t pos = src_str.find(toSearch);
while(pos != std::string::npos) {
src_str.replace(pos, toSearch.size(), replaceStr);
pos =src_str.find(toSearch, pos + replaceStr.size());
}
return src_str;
}
int ColDateValue::compare(ColValue &other) const {
long r = m_date - other.getIntegerValue();
return other.isNull() ? 1 : r > 0 ? 1 : r == 0 ? 0 : -1;
}
int ColBooleanValue::compare(ColValue &other) {
int ColBooleanValue::compare(ColValue &other) const {
if (other.isNull()) return 1; // null goes to end
return m_bool == other.getBoolValue() ? 0 : m_bool && !other.getBoolValue() ? -1 : 1; // true first
}
Row::Row(const Row &other) : m_columns(other.m_columns.size()) {
Row::Row(const Row &other) : m_columns(other.m_columns.size()), m_visible(other.m_visible) {
for (int i = 0; i < other.m_columns.size(); i++) {
if (other[i].isNull())
continue; // for null NOP
@ -48,7 +60,7 @@ Row::Row(const Row &other) : m_columns(other.m_columns.size()) {
ColumnType col_type = other[i].getColType();
switch (col_type) {
case ColumnType::integer_type :
setIntColumnValue(i, other[i].getIntValue());
setIntColumnValue(i, other[i].getIntegerValue());
break;
case ColumnType::float_type :
setFloatColumnValue(i, other[i].getDoubleValue());
@ -110,7 +122,7 @@ void Row::setBoolColumnValue(int col_index, const std::string &value) {
void Row::setColumnValue(ColDefNode *col_def, ColValue &col_value) {
if (!col_value.isNull()) {
if (col_def->type == ColumnType::integer_type)
setIntColumnValue(col_def->order, col_value.getIntValue());
setIntColumnValue(col_def->order, col_value.getIntegerValue());
else if (col_def->type == ColumnType::float_type)
setFloatColumnValue(col_def->order, col_value.getDoubleValue());
else if (col_def->type == ColumnType::varchar_type)

View File

@ -9,135 +9,136 @@
namespace usql {
struct ColValue {
virtual bool isNull() { return false; };
virtual ColumnType getColType() = 0;
virtual long getIntValue() = 0;
virtual double getDoubleValue() = 0;
virtual std::string getStringValue() = 0;
virtual long getDateValue() = 0;
virtual bool getBoolValue() = 0;
struct ColValue {
virtual bool isNull() const { return false; };
virtual ColumnType getColType() const = 0;
virtual long getIntegerValue() const = 0;
virtual double getDoubleValue() const = 0;
virtual std::string getStringValue() const = 0;
virtual std::string getCsvStringValue() const { return getStringValue(); };
virtual long getDateValue() const = 0;
virtual bool getBoolValue() const = 0;
virtual int compare(ColValue &other) = 0;
virtual int compare(ColValue &other) const = 0;
virtual ~ColValue() = default;
};
};
struct ColNullValue : ColValue {
bool isNull() override { return true; };
ColumnType getColType() override { throw Exception("getColType not supported on ColNullValue"); }
long getIntValue() override { throw Exception("getIntValue not supported on ColNullValue"); };
double getDoubleValue() override { throw Exception("getDoubleValue not supported on ColNullValue"); };
std::string getStringValue() override { return "null"; };
long getDateValue() override { throw Exception("getDateValue not supported on ColNullValue"); };
bool getBoolValue() override { throw Exception("getDateValue not supported on ColNullValue"); };
struct ColNullValue : ColValue {
bool isNull() const override { return true; };
ColumnType getColType() const override { throw Exception("getColType not supported on ColNullValue"); }
long getIntegerValue() const override { throw Exception("getIntegerValue not supported on ColNullValue"); };
double getDoubleValue() const override { throw Exception("getDoubleValue not supported on ColNullValue"); };
std::string getStringValue() const override { return "null"; };
long getDateValue() const override { throw Exception("getDateValue not supported on ColNullValue"); };
bool getBoolValue() const override { throw Exception("getDateValue not supported on ColNullValue"); };
int compare(ColValue &other) override;
int compare(ColValue &other) const override;
virtual ~ColNullValue() = default;
};
~ColNullValue() override = default;
};
struct ColIntegerValue : ColValue {
struct ColIntegerValue : ColValue {
explicit ColIntegerValue(long value) : m_integer(value) {};
ColIntegerValue(const ColIntegerValue &other) : m_integer(other.m_integer) {};
ColumnType getColType() override { return ColumnType::integer_type; };
long getIntValue() override { return m_integer; };
double getDoubleValue() override { return (double) m_integer; };
std::string getStringValue() override { return std::to_string(m_integer); };
long getDateValue() override { return m_integer; };
bool getBoolValue() override { throw Exception("Not supported on ColIntegerValue"); };
ColumnType getColType() const override { return ColumnType::integer_type; };
long getIntegerValue() const override { return m_integer; };
double getDoubleValue() const override { return (double) m_integer; };
std::string getStringValue() const override { return std::to_string(m_integer); };
long getDateValue() const override { return m_integer; };
bool getBoolValue() const override { throw Exception("Not supported on ColIntegerValue"); };
int compare(ColValue &other) override;
int compare(ColValue &other) const override;
~ColIntegerValue() override = default;
long m_integer;
virtual ~ColIntegerValue() = default;
};
};
struct ColDoubleValue : ColValue {
struct ColDoubleValue : ColValue {
explicit ColDoubleValue(double value) : m_double(value) {};
ColDoubleValue(const ColDoubleValue &other) : m_double(other.m_double) {}
ColumnType getColType() override { return ColumnType::float_type; };
long getIntValue() override { return (long) m_double; };
double getDoubleValue() override { return m_double; };
std::string getStringValue() override { return Settings::double_to_string(m_double); };
long getDateValue() override { return (long) m_double; };
bool getBoolValue() override { throw Exception("Not supported on ColDoubleValue"); };
ColumnType getColType() const override { return ColumnType::float_type; };
long getIntegerValue() const override { return (long) m_double; };
double getDoubleValue() const override { return m_double; };
std::string getStringValue() const override { return Settings::double_to_string(m_double); };
long getDateValue() const override { return (long) m_double; };
bool getBoolValue() const override { throw Exception("Not supported on ColDoubleValue"); };
int compare(ColValue &other) override;
int compare(ColValue &other) const override;
virtual ~ColDoubleValue() = default;
~ColDoubleValue() override = default;
double m_double;
};
};
struct ColStringValue : ColValue {
struct ColStringValue : ColValue {
explicit ColStringValue(const std::string &value) : m_string(std::make_unique<std::string>(value)) {};
ColStringValue(const ColStringValue &other) : m_string(std::make_unique<std::string>(*other.m_string)) {};
ColStringValue & operator=(ColStringValue other);
ColumnType getColType() override { return ColumnType::varchar_type; };
long getIntValue() override { return std::stoi(*m_string); };
double getDoubleValue() override { return std::stod(*m_string); };
std::string getStringValue() override { return *m_string; };
long getDateValue() override { return std::stoi(*m_string); };
bool getBoolValue() override { throw Exception("Not supported on ColStringValue"); };
ColumnType getColType() const override { return ColumnType::varchar_type; };
long getIntegerValue() const override { return std::stoi(*m_string); };
double getDoubleValue() const override { return std::stod(*m_string); };
std::string getStringValue() const override { return *m_string; };
std::string getCsvStringValue() const override;;
long getDateValue() const override { return std::stoi(*m_string); };
bool getBoolValue() const override { throw Exception("Not supported on ColStringValue"); };
int compare(ColValue &other) override;
int compare(ColValue &other) const override;
std::unique_ptr<std::string> m_string;
};
};
struct ColDateValue : ColValue {
explicit ColDateValue(long value) : m_date(value) {};
ColDateValue(const ColDateValue &other) : m_date(other.m_date) {};
struct ColDateValue : ColValue {
explicit ColDateValue(long value) : m_date(value) {};
ColDateValue(const ColDateValue &other) : m_date(other.m_date) {};
ColumnType getColType() override { return ColumnType::date_type; };
long getIntValue() override { return m_date; };
double getDoubleValue() override { return (double) m_date; };
std::string getStringValue() override { return Settings::date_to_string(m_date); };
long getDateValue() override { return m_date; };
bool getBoolValue() override { throw Exception("Not supported on ColDateValue"); };
ColumnType getColType() const override { return ColumnType::date_type; };
long getIntegerValue() const override { return m_date; };
double getDoubleValue() const override { return (double) m_date; };
std::string getStringValue() const override { return Settings::date_to_string(m_date); };
long getDateValue() const override { return m_date; };
bool getBoolValue() const override { throw Exception("Not supported on ColDateValue"); };
int compare(ColValue &other) override;
int compare(ColValue &other) const override;
virtual ~ColDateValue() = default;
~ColDateValue() override = default;
long m_date; // seconds since epoch for now
};
long m_date; // seconds since epoch for now
};
struct ColBooleanValue : ColValue {
explicit ColBooleanValue(bool value) : m_bool(value) {};
ColBooleanValue(const ColBooleanValue &other) : m_bool(other.m_bool) {};
struct ColBooleanValue : ColValue {
explicit ColBooleanValue(bool value) : m_bool(value) {};
ColBooleanValue(const ColBooleanValue &other) : m_bool(other.m_bool) {};
ColumnType getColType() override { return ColumnType::bool_type; };
long getIntValue() override { return (long) m_bool; };
double getDoubleValue() override { return (double) m_bool; };
std::string getStringValue() override { return m_bool ? "Y" : "N"; };
long getDateValue() override { throw Exception("Not supported on ColBooleanValue"); };
bool getBoolValue() override { return m_bool; };
ColumnType getColType() const override { return ColumnType::bool_type; };
long getIntegerValue() const override { return (long) m_bool; };
double getDoubleValue() const override { return (double) m_bool; };
std::string getStringValue() const override { return m_bool ? "Y" : "N"; };
long getDateValue() const override { throw Exception("Not supported on ColBooleanValue"); };
bool getBoolValue() const override { return m_bool; };
int compare(ColValue &other) override;
int compare(ColValue &other) const override;
virtual ~ColBooleanValue() = default;
~ColBooleanValue() override = default;
bool m_bool;
};
bool m_bool;
};
class Row {
class Row {
public:
explicit Row(int cols_count) : m_columns(cols_count) {};
public:
explicit Row(int cols_count, bool visible) : m_columns(cols_count), m_visible(visible) {};
Row(const Row &other);
Row &operator=(Row other);
@ -154,7 +155,7 @@ namespace usql {
void setColumnValue(ColDefNode *col_def, ColValue &col_value);
void setColumnValue(ColDefNode *col_def, ValueNode *col_value);
ColValue &operator[](int i) const {
ColValue &operator[](int i) const {
auto type_index = m_columns[i].index();
switch (type_index) {
case 0:
@ -169,17 +170,23 @@ namespace usql {
return (ColValue &) *std::get_if<ColDateValue>(&m_columns[i]);
case 5:
return (ColValue &) *std::get_if<ColBooleanValue>(&m_columns[i]);
default:
throw Exception("should not happen");
}
throw Exception("should not happen");
}
int compare(const Row &other) const;
[[nodiscard]] int compare(const Row &other) const;
void print(const std::vector<ColDefNode> &col_defs);
static int print_get_column_size(const ColDefNode &col_def);
private:
// xx std::vector<std::unique_ptr<ColValue>> m_columns;
[[nodiscard]] bool is_visible() const { return m_visible; };
void set_visible() { m_visible = true; };
void set_deleted() { m_visible = true; };
private:
bool m_visible;
std::vector<std::variant<ColNullValue, ColIntegerValue, ColDoubleValue, ColStringValue, ColDateValue, ColBooleanValue>> m_columns;
};
};
} // namespace

View File

@ -1,4 +1,6 @@
#include "fast_double_parser.h"
#include "settings.h"
#include "exception.h"
#include "ml_date.h"
@ -9,21 +11,31 @@ std::vector<std::pair<std::string, std::string>> Settings::m_settings =
{ std::make_pair("DATE_FORMAT", "%Y-%m-%d %H:%M:%S"),
std::make_pair("BOOL_TRUE_LITERAL", "Y"),
std::make_pair("BOOL_FALSE_LITERAL", "N"),
std::make_pair("DOUBLE_FORMAT", "%.2f") };
std::make_pair("DOUBLE_FORMAT", "%.2f"),
std::make_pair("USE_INDEXSCAN", "N") };
long Settings::string_to_int(const std::string &intstr) {
return std::stoi(intstr);
long Settings::string_to_long(const std::string &intstr) {
try {
return std::stol(intstr);
} catch (std::invalid_argument &e) {
throw Exception("error parsing as integer: " + intstr);
}
}
std::string Settings::int_to_string(long intval) {
std::string Settings::long_to_string(long intval) {
return std::to_string(intval);
}
double Settings::string_to_double(const std::string &doublestr) {
return std::stod(doublestr); // TODO use fast parsing
double result;
const char * endptr = fast_double_parser::parse_number(doublestr.c_str(), &result);
if (endptr == nullptr) {
throw Exception("error parsing as double: " + doublestr);
}
return result;
}
std::string Settings::double_to_string(double d) {
@ -47,23 +59,20 @@ std::string Settings::date_to_string(long date) {
}
bool Settings::string_to_bool(const std::string &boolstr) {
if (boolstr=="true" || boolstr == get_setting("BOOL_TRUE_LITERAL"))
bool Settings::string_to_bool(const std::string &value) {
if (value == "true" || value == get_setting("BOOL_TRUE_LITERAL"))
return true;
if (boolstr=="false" || boolstr == get_setting("BOOL_FALSE_LITERAL"))
if (value == "false" || value == get_setting("BOOL_FALSE_LITERAL"))
return false;
throw Exception("string_to_bool, unrecognized value: " + boolstr);
throw Exception("string_to_bool, unrecognized value: " + value);
}
std::string Settings::bool_to_string(bool boolval) {
return boolval ? "true" : "false";
std::string Settings::bool_to_string(bool value) {
return value ? "true" : "false";
}
std::string Settings::get_setting(const std::string &name) {
for(const auto& pair : m_settings) {
if (pair.first == name) return pair.second;
@ -71,6 +80,10 @@ std::string Settings::get_setting(const std::string &name) {
throw Exception("unsupported setting name: " + name);
}
bool Settings::get_bool_setting(const std::string &name) {
return string_to_bool(get_setting(name));
}
void Settings::set_setting(const std::string &name, const std::string &value) {
for (auto it = begin(m_settings); it != end(m_settings); ++it) {
if (it->first == name) {

View File

@ -10,9 +10,10 @@ class Settings {
public:
static void set_setting(const std::string &name, const std::string &value);
static std::string get_setting(const std::string &name);
static bool get_bool_setting(const std::string &name);
static long string_to_int(const std::string &intstr);
static std::string int_to_string(long intval);
static long string_to_long(const std::string &intstr);
static std::string long_to_string(long intval);
static double string_to_double(const std::string &doublestr);
static std::string double_to_string(double doubleval);
@ -20,8 +21,8 @@ public:
static long string_to_date(const std::string &datestr);
static std::string date_to_string(long dateval);
static bool string_to_bool(const std::string &boolstr);
static std::string bool_to_string(bool boolval);
static bool string_to_bool(const std::string &value);
static std::string bool_to_string(bool value);
private:
static std::vector<std::pair<std::string, std::string>> m_settings;

View File

@ -1,7 +1,6 @@
#include "table.h"
#include "csvreader.h"
#include "ml_string.h"
#include "fast_double_parser.h"
#include <charconv>
#include <fstream>
@ -19,15 +18,16 @@ Table::Table(const Table &other) {
m_name = other.m_name;
m_col_defs = other.m_col_defs;
m_rows.reserve(other.m_rows.size());
for(const Row& orig_row : other.m_rows) {
commit_copy_of_row(orig_row);
}
for(const Row& orig_row : other.m_rows)
if (orig_row.is_visible())
commit_copy_of_row((Row&)orig_row);
}
ColDefNode Table::get_column_def(const std::string &col_name) {
auto name_cmp = [col_name](const ColDefNode& cd) { return cd.name == col_name; };
auto col_def = std::find_if(begin(m_col_defs), end(m_col_defs), name_cmp);
auto col_def = std::find_if(std::begin(m_col_defs), std::end(m_col_defs), name_cmp);
if (col_def != std::end(m_col_defs)) {
return *col_def;
} else {
@ -39,35 +39,43 @@ ColDefNode Table::get_column_def(int col_index) {
if (col_index >= 0 && col_index < columns_count()) {
return m_col_defs[col_index];
} else {
throw Exception("column with this index does not exists (" + std::to_string(col_index) + ")");
throw Exception("column with this m_index does not exists (" + std::to_string(col_index) + ")");
}
}
Row& Table::create_empty_row() {
m_rows.emplace_back(columns_count());
m_rows.emplace_back(columns_count(), false);
return m_rows.back();
}
std::string Table::csv_string() {
// header
const size_t k_row_size_est = m_col_defs.size() * 16;
std::string out_string;
out_string.reserve(m_rows.size() * k_row_size_est);
// header
for(int i = 0; i < m_col_defs.size(); i++) {
if (i > 0) out_string += ",";
if (i > 0) out_string += ',';
out_string += m_col_defs[i].name;
}
// rows
for (auto & m_row : m_rows) {
std::string csv_line{"\n"};
for(int i = 0; i < m_col_defs.size(); i++) {
if (i > 0) csv_line += ",";
for (auto & row : m_rows) {
if (row.is_visible()) {
std::string csv_line{"\n"};
csv_line.reserve(k_row_size_est);
auto & col = m_row[i];
if (!col.isNull()) {
csv_line += col.getStringValue(); // TODO handle enclosing commas etc
for (int i = 0; i < m_col_defs.size(); i++) {
if (i > 0) csv_line += ',';
auto &col = row[i];
if (!col.isNull()) {
csv_line += col.getCsvStringValue();
}
}
out_string += csv_line;
}
out_string += csv_line;
}
return out_string;
@ -89,18 +97,18 @@ int Table::load_csv_file(const std::string &filename) {
int line_size = 128;
std::ifstream in(filename, std::ifstream::ate | std::ifstream::binary);
auto file_size = in.tellg();
auto file_size = in.tellg();
std::ifstream infile(filename);
if (infile.good()) {
std::string sLine;
std::getline(infile, sLine);
line_size = (int)sLine.size();
line_size = (int)sLine.size() + 1;
}
infile.close();
if (file_size > 0) {
auto new_size = m_rows.size() + int(file_size / line_size * 1.20);
auto new_size = m_rows.size() + int((file_size / line_size) * 1.20);
m_rows.reserve(new_size);
}
@ -116,15 +124,15 @@ void Table::create_row_from_vector(const std::vector<ColDefNode> &colDefs, const
Row& new_row = create_empty_row();
// copy values
for (int i = 0; i < std::min<int>(columns_count(), csv_line.size()); i++) {
for (size_t i = 0; i < std::min<size_t>(columns_count(), csv_line.size()); i++) {
const ColDefNode & col_def = colDefs[i];
if (csv_line[i].empty()) {
new_row.setColumnNull(col_def.order);
} else if (col_def.type == ColumnType::integer_type) {
new_row.setIntColumnValue(col_def.order, string_to_long(csv_line[i]));
new_row.setIntColumnValue(col_def.order, Settings::string_to_long(csv_line[i]));
} else if (col_def.type == ColumnType::float_type) {
new_row.setFloatColumnValue(col_def.order, string_to_double(csv_line[i]));
new_row.setFloatColumnValue(col_def.order, Settings::string_to_double(csv_line[i]));
} else if (col_def.type == ColumnType::varchar_type) {
new_row.setStringColumnValue(col_def.order, csv_line[i]);
} else if (col_def.type == ColumnType::date_type) {
@ -139,23 +147,6 @@ void Table::create_row_from_vector(const std::vector<ColDefNode> &colDefs, const
commit_row(new_row);
}
double Table::string_to_double(const std::string &s) {
double result;
const char * endptr = fast_double_parser::parse_number(s.c_str(), &result);
if (endptr == nullptr) {
throw Exception("error parsing as double: " + s);
}
return result;
}
long Table::string_to_long(const std::string &s) {
try {
return std::stol(s);
} catch (std::invalid_argument &e) {
throw Exception("error parsing as integer: " + s);
}
}
void Table::print() {
std::string out{"| "};
std::string out2{"+-"};
@ -181,16 +172,23 @@ void Table::print() {
std::cout << std::endl;
}
void Table::commit_row(const Row &row) {
size_t Table::get_rowid(const Row &row) const {
const Row* row_addr = (Row*)&row;
const Row* begin_addr = &(*m_rows.begin());
return row_addr - begin_addr;
}
void Table::commit_row(Row &row) {
try {
validate_row(row);
index_row(row);
} catch (Exception &e) {
m_rows.erase(m_rows.end() - 1);
throw e;
}
}
void Table::commit_copy_of_row(const Row &row) {
void Table::commit_copy_of_row(Row &row) {
Row& new_row = create_empty_row();
for(int i = 0; i < m_col_defs.size(); i++) {
@ -200,7 +198,7 @@ void Table::commit_copy_of_row(const Row &row) {
new_row.setColumnNull(i);
} else {
if (m_col_defs[i].type == ColumnType::integer_type) {
new_row.setIntColumnValue(i, row[i].getIntValue());
new_row.setIntColumnValue(i, row[i].getIntegerValue());
} else if (m_col_defs[i].type == ColumnType::float_type) {
new_row.setFloatColumnValue(i, row[i].getDoubleValue());
} else if (m_col_defs[i].type == ColumnType::varchar_type) {
@ -215,6 +213,7 @@ void Table::commit_copy_of_row(const Row &row) {
}
validate_row(new_row);
index_row(row);
}
void Table::validate_column(const ColDefNode *col_def, ValueNode *col_val) {
@ -235,13 +234,134 @@ void Table::validate_column(const ColDefNode *col_def, ColValue &col_val) {
}
}
void Table::validate_row(const Row &row) {
void Table::validate_row(Row &row) {
for(int i = 0; i < m_col_defs.size(); i++) {
ColDefNode col_def = m_col_defs[i];
ColValue &col_val = row[i];
validate_column(&col_def, col_val);
}
row.set_visible();
}
void Table::create_index(const Index& index) {
m_indexes.push_back(index);
}
bool Table::drop_index(const std::string &index_name) {
auto it = std::find_if(m_indexes.begin(), m_indexes.end(),
[&index_name](const Index &idx) {
return idx.get_index_name() == index_name;
});
if (it != m_indexes.end()) {
m_indexes.erase(it);
return true;
}
return false;
}
void Table::index_row(Index &index, const ColDefNode &col_def, const Row &row, const size_t rowid) {
index.insert(reinterpret_cast<ColValue *>(&row[col_def.order]), rowid);
}
void Table::unindex_row(Index &index, const ColDefNode &col_def, const Row &row, const size_t rowid) {
index.remove(reinterpret_cast<ColValue *>(&row[col_def.order]), rowid);
}
void Table::reindex_row(Index &index, const ColDefNode &col_def, const Row &old_row, const Row &new_row, size_t rowid) {
unindex_row(index, col_def, old_row, rowid);
index_row(index, col_def, new_row, rowid);
}
void Table::index_row(const Row &row) {
if (!m_indexes.empty()) {
const size_t rowid = get_rowid(row);
for (auto &idx : m_indexes) {
ColDefNode cDef = get_column_def(idx.get_column_name());
index_row(idx, cDef, row, rowid);
}
}
}
void Table::unindex_row(const Row &row) {
if (!m_indexes.empty()) {
const size_t rowid = get_rowid(row);
for (auto &idx : m_indexes) {
ColDefNode cDef = get_column_def(idx.get_column_name());
unindex_row(idx, cDef, row, rowid);
}
}
}
void Table::reindex_row(const Row &old_row, const Row &new_row) {
if (!m_indexes.empty()) {
const size_t rowid = get_rowid(new_row);
for (auto &idx : m_indexes) {
ColDefNode cDef = get_column_def(idx.get_column_name());
reindex_row(idx, cDef, old_row, new_row, rowid);
}
}
}
void Table::index_rows(const std::string &index_name) {
auto index = get_index(index_name);
ColDefNode cDef = get_column_def(index->get_column_name());
size_t rowid = 0;
for(const Row& r : m_rows) {
index_row(*index, cDef, r, rowid);
rowid++;
}
}
Index * Table::get_index(const std::string &index_name) {
auto it = std::find_if(m_indexes.begin(), m_indexes.end(),
[&index_name](const Index &idx) {
return idx.get_index_name() == index_name;
});
return (it != m_indexes.end()) ? &(*it) : nullptr;
}
Index * Table::get_index_for_column(const std::string &col_name) {
auto it = std::find_if(m_indexes.begin(), m_indexes.end(),
[&col_name](const Index &idx) {
return idx.get_column_name() == col_name;
});
return (it != m_indexes.end()) ? &(*it) : nullptr;
}
bool Table::empty() {
if (m_rows.empty()) return true;
for (const auto & r : m_rows)
if (r.is_visible()) return false;
return true;
}
Row *Table::rows_scanner::next() {
if (m_use_rowids) {
while (m_rowids_idx < m_rowids.size()) {
auto row_ptr = &m_table->m_rows[m_rowids[m_rowids_idx]];
if (row_ptr->is_visible()) {
m_rowids_idx++;
return row_ptr;
}
m_rowids_idx++;
}
} else {
while (m_fscan_itr != m_table->m_rows.end()) {
if (m_fscan_itr->is_visible()) {
auto i = m_fscan_itr;
m_fscan_itr++;
return &(*i);
}
m_fscan_itr++;
}
}
return nullptr;
}
} // namespace

View File

@ -1,14 +1,16 @@
#pragma once
#include "index.h"
#include "parser.h"
#include "row.h"
#include <utility>
#include <vector>
namespace usql {
struct Table {
struct Table {
Table(const Table &other);
Table(const std::string& name, const std::vector<ColDefNode>& columns);
@ -18,13 +20,15 @@ namespace usql {
[[nodiscard]] int columns_count() const { return (int) m_col_defs.size(); };
[[nodiscard]] size_t rows_count() const { return m_rows.size(); };
Row& create_empty_row();
void commit_row(const Row &row);
void commit_copy_of_row(const Row &row);
[[nodiscard]] size_t get_rowid(const Row &row) const;
Row &create_empty_row();
void commit_row(Row &row);
void commit_copy_of_row(Row &row);
static void validate_column(const ColDefNode *col_def, ValueNode *col_val);
static void validate_column(const ColDefNode *col_def, ColValue &col_val);
void validate_row(const Row &row);
void validate_row(Row &row);
std::string csv_string();
int load_csv_string(const std::string &content);
@ -32,14 +36,45 @@ namespace usql {
void print();
std::string m_name;
std::string m_name;
std::vector<ColDefNode> m_col_defs;
std::vector<Row> m_rows;
static long string_to_long(const std::string &s) ;
static double string_to_double(const std::string &s) ;
std::vector<Row> m_rows;
std::vector<Index> m_indexes;
void create_row_from_vector(const std::vector<ColDefNode> &colDefs, const std::vector<std::string> &csv_line);
};
}
void create_index(const Index& index);
bool drop_index(const std::string &index_name);
static void index_row(Index &index, const ColDefNode &col_def, const Row &row, size_t rowid);
static void unindex_row(Index &index, const ColDefNode &col_def, const Row &row, size_t rowid);
static void reindex_row(Index &index, const ColDefNode &col_def, const Row &old_row, const Row &new_row, size_t rowid);
void index_row(const Row &row);
void unindex_row(const Row &row);
void reindex_row(const Row &old_row, const Row &new_row);
void index_rows(const std::string &index_name);
Index * get_index(const std::string &index_name);
Index * get_index_for_column(const std::string &col_name);
bool empty();
struct rows_scanner {
explicit rows_scanner(Table *tbl) : m_use_rowids(false), m_table(tbl), m_fscan_itr(tbl->m_rows.begin()) {}
rows_scanner(Table *tbl, std::vector<rowid_t> rowids) : m_use_rowids(true), m_table(tbl), m_rowids(std::move(rowids)), m_rowids_idx(0) {}
Row *next();
private:
bool m_use_rowids;
Table * m_table;
std::vector<Row>::iterator m_fscan_itr;
std::vector<rowid_t> m_rowids;
size_t m_rowids_idx{};
};
};
} // namespace

View File

@ -1,10 +1,8 @@
#include "usql.h"
#include "exception.h"
#include "ml_date.h"
#include "ml_string.h"
#include <algorithm>
#include <fstream>
namespace usql {
@ -20,10 +18,11 @@ std::unique_ptr<Table> USql::execute(const std::string &command) {
}
std::unique_ptr<Table> USql::execute(Node &node) {
// TODO optimize execution nodes here
switch (node.node_type) {
case NodeType::create_table:
return execute_create_table(static_cast<CreateTableNode &>(node));
case NodeType::create_index:
return execute_create_index(static_cast<CreateIndexNode &>(node));
case NodeType::create_table_as_select:
return execute_create_table_as_table(static_cast<CreateTableAsSelectNode &>(node));
case NodeType::drop_table:
@ -65,7 +64,7 @@ bool USql::eval_relational_operator(const RelationalOperatorNode &filter, Table
return !all_null;
return false;
} else if (left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::int_value) {
comparator = left_value->getIntegerValue() - right_value->getIntegerValue();
comparator = (double)(left_value->getIntegerValue() - right_value->getIntegerValue());
} else if ((left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::float_value) ||
(left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::int_value) ||
(left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::float_value)) {
@ -108,7 +107,7 @@ std::unique_ptr<ValueNode> USql::eval_value_node(Table *table, Row &row, Node *n
if (node->node_type == NodeType::database_value) {
return eval_database_value_node(table, row, node);
} else if (node->node_type == NodeType::int_value || node->node_type == NodeType::float_value || node->node_type == NodeType::string_value || node->node_type == NodeType::bool_value) {
return eval_literal_value_node(table, row, node);
return eval_literal_value_node(row, node);
} else if (node->node_type == NodeType::function) {
return eval_function_value_node(table, row, node, col_def_node, agg_func_value);
} else if (node->node_type == NodeType::null_value) {
@ -129,7 +128,7 @@ std::unique_ptr<ValueNode> USql::eval_database_value_node(Table *table, Row &row
return std::make_unique<NullValueNode>();
if (col_def.type == ColumnType::integer_type)
return std::make_unique<IntValueNode>(db_value.getIntValue());
return std::make_unique<IntValueNode>(db_value.getIntegerValue());
if (col_def.type == ColumnType::float_type)
return std::make_unique<DoubleValueNode>(db_value.getDoubleValue());
if (col_def.type == ColumnType::varchar_type)
@ -137,13 +136,13 @@ std::unique_ptr<ValueNode> USql::eval_database_value_node(Table *table, Row &row
if (col_def.type == ColumnType::bool_type)
return std::make_unique<BooleanValueNode>(db_value.getBoolValue());
if (col_def.type == ColumnType::date_type)
return std::make_unique<IntValueNode>(db_value.getIntValue());
return std::make_unique<IntValueNode>(db_value.getIntegerValue());
throw Exception("unknown database value type");
}
std::unique_ptr<ValueNode> USql::eval_literal_value_node(Table *table, Row &row, Node *node) {
std::unique_ptr<ValueNode> USql::eval_literal_value_node(Row &row, Node *node) {
if (node->node_type == NodeType::int_value) {
auto *ivl = static_cast<IntValueNode *>(node);
return std::make_unique<IntValueNode>(ivl->value);
@ -166,8 +165,7 @@ std::unique_ptr<ValueNode> USql::eval_literal_value_node(Table *table, Row &row,
}
std::unique_ptr<ValueNode>
USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value) {
std::unique_ptr<ValueNode> USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value) {
auto *fnc = static_cast<FunctionNode *>(node);
std::vector<std::unique_ptr<ValueNode>> evaluatedPars;
@ -177,7 +175,8 @@ USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *c
// at this moment no functions without parameter(s) or first param can be null
if (evaluatedPars.empty() || evaluatedPars[0]->isNull())
return std::make_unique<NullValueNode>();
throw Exception("eval_function_value_node, no function parameter or first is null, function: " + fnc->function);
// return std::make_unique<NullValueNode>();
// TODO use some enum
if (fnc->function == "lower") return lower_function(evaluatedPars);
@ -193,23 +192,13 @@ USql::eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *c
throw Exception("invalid function: " + fnc->function);
}
std::unique_ptr<ValueNode> USql::count_function(ColValue *agg_func_value, const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long c = 1;
if (!agg_func_value->isNull()) {
c = agg_func_value->getIntValue() + 1;
}
return std::make_unique<IntValueNode>(c);
}
bool USql::eval_logical_operator(LogicalOperatorNode &node, Table *pTable, Row &row) {
//bool left = eval_relational_operator(static_cast<const RelationalOperatorNode &>(*node.left), pTable, row);
bool left = eval_where(&(*node.left), pTable, row);
if ((node.op == LogicalOperatorType::and_operator && !left) || (node.op == LogicalOperatorType::or_operator && left))
return left;
//bool right = eval_relational_operator(static_cast<const RelationalOperatorNode &>(*node.right), pTable, row);
bool right = eval_where(&(*node.right), pTable, row);
return right;
}
@ -227,8 +216,8 @@ std::unique_ptr<ValueNode> USql::eval_arithmetic_operator(ColumnType outType, Ar
return std::make_unique<NullValueNode>();
if (outType == ColumnType::float_type) {
double l = ((ValueNode *) left.get())->getDoubleValue();
double r = ((ValueNode *) right.get())->getDoubleValue();
auto l = left->getDoubleValue();
auto r = right->getDoubleValue();
switch (node.op) {
case ArithmeticalOperatorType::plus_operator:
return std::make_unique<DoubleValueNode>(l + r);
@ -239,12 +228,12 @@ std::unique_ptr<ValueNode> USql::eval_arithmetic_operator(ColumnType outType, Ar
case ArithmeticalOperatorType::divide_operator:
return std::make_unique<DoubleValueNode>(l / r);
default:
throw Exception("implement me!!");
throw Exception("eval_arithmetic_operator, float type implement me!!");
}
} else if (outType == ColumnType::integer_type) {
long l = ((ValueNode *) left.get())->getIntegerValue();
long r = ((ValueNode *) right.get())->getIntegerValue();
auto l = left->getIntegerValue();
auto r = right->getIntegerValue();
switch (node.op) {
case ArithmeticalOperatorType::plus_operator:
return std::make_unique<IntValueNode>(l + r);
@ -255,179 +244,57 @@ std::unique_ptr<ValueNode> USql::eval_arithmetic_operator(ColumnType outType, Ar
case ArithmeticalOperatorType::divide_operator:
return std::make_unique<IntValueNode>(l / r);
default:
throw Exception("implement me!!");
throw Exception("eval_arithmetic_operator, integer type implement me!!");
}
} else if (outType == ColumnType::varchar_type) {
std::string l = ((ValueNode *) left.get())->getStringValue();
std::string r = ((ValueNode *) right.get())->getStringValue();
auto l = left->getStringValue();
auto r = right->getStringValue();
switch (node.op) {
case ArithmeticalOperatorType::plus_operator:
return std::make_unique<StringValueNode>(l + r);
default:
throw Exception("implement me!!");
throw Exception("eval_arithmetic_operator, varchar type implement me!!");
}
} else if (outType == ColumnType::date_type) {
auto l = left->getDateValue();
auto r = right->getDateValue();
switch (node.op) {
case ArithmeticalOperatorType::plus_operator:
return std::make_unique<IntValueNode>(l + r);
case ArithmeticalOperatorType::minus_operator:
return std::make_unique<IntValueNode>(l - r);
default:
throw Exception("eval_arithmetic_operator, date_type type implement me!!");
}
}
// TODO date node should support addition and subtraction
throw Exception("implement me!!");
throw Exception("eval_arithmetic_operator, implement me!!");
}
std::unique_ptr<ValueNode> USql::to_string_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long date = evaluatedPars[0]->getDateValue();
std::string format = evaluatedPars[1]->getStringValue();
std::string formatted_date = date_to_string(date, format);
return std::make_unique<StringValueNode>(formatted_date);
}
std::unique_ptr<ValueNode> USql::to_date_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string date = evaluatedPars[0]->getStringValue();
std::string format = evaluatedPars[1]->getStringValue();
long epoch_time = string_to_date(date, format);
return std::make_unique<IntValueNode>(epoch_time); // No DateValueNode for now
}
std::unique_ptr<ValueNode> USql::date_add_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long datetime = evaluatedPars[0]->getDateValue();
long quantity = evaluatedPars[1]->getIntegerValue();
std::string part = evaluatedPars[2]->getStringValue();
long new_date = add_to_date(datetime, quantity, part);
return std::make_unique<IntValueNode>(new_date); // No DateValueNode for now
}
std::unique_ptr<ValueNode> USql::upper_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return toupper(c); });
return std::make_unique<StringValueNode>(str);
}
std::unique_ptr<ValueNode> USql::lower_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return tolower(c); });
return std::make_unique<StringValueNode>(str);
}
std::unique_ptr<ValueNode> USql::pp_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
auto &parsed_value = evaluatedPars[0];
if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) {
std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : "";
char buf[20] {0}; // TODO constant here
double value = parsed_value->getDoubleValue();
if (format == "100%")
std::snprintf(buf, 20, "%.2f%%", value);
else if (format == "%.2f")
std::snprintf(buf, 20, "%.2f", value);
else if (value >= 1000000000000)
std::snprintf(buf, 20, "%7.2fT", value/1000000000000);
else if (value >= 1000000000)
std::sprintf(buf, "%7.2fB", value/1000000000);
else if (value >= 1000000)
std::snprintf(buf, 20, "%7.2fM", value/1000000);
else if (value >= 100000)
std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M
else if (value <= -1000000000000)
std::snprintf(buf, 20, "%7.2fT", value/1000000000000);
else if (value <= -1000000000)
std::snprintf(buf, 20, "%7.2fB", value/1000000000);
else if (value <= -1000000)
std::snprintf(buf, 20, "%7.2fM", value/1000000);
else if (value <= -100000)
std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M
else if (value == 0)
buf[0]='0';
else
return std::make_unique<StringValueNode>(parsed_value->getStringValue().substr(0, 10));
// TODO introduce constant for 10
std::string s {buf};
return std::make_unique<StringValueNode>(string_padd(s.erase(s.find_last_not_of(" ")+1), 10, ' ', false));
}
return std::make_unique<StringValueNode>(parsed_value->getStringValue());
}
std::unique_ptr<ValueNode>
USql::max_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node,
ColValue *agg_func_value) {
if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) {
if (!evaluatedPars[0]->isNull()) {
long val = evaluatedPars[0]->getIntegerValue();
if (agg_func_value->isNull()) {
return std::make_unique<IntValueNode>(val);
} else {
return std::make_unique<IntValueNode>(std::max(val, agg_func_value->getIntValue()));
}
} else {
return std::make_unique<IntValueNode>(agg_func_value->getIntValue());
}
} else if (col_def_node->type == ColumnType::float_type) {
if (!evaluatedPars[0]->isNull()) {
double val = evaluatedPars[0]->getDoubleValue();
if (agg_func_value->isNull()) {
return std::make_unique<DoubleValueNode>(val);
} else {
return std::make_unique<DoubleValueNode>(std::max(val, agg_func_value->getDoubleValue()));
}
} else {
return std::make_unique<DoubleValueNode>(agg_func_value->getDoubleValue());
}
}
// TODO string and boolean
throw Exception("unsupported data type for max function");
}
std::unique_ptr<ValueNode>
USql::min_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node,
ColValue *agg_func_value) {
if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) {
if (!evaluatedPars[0]->isNull()) {
long val = evaluatedPars[0]->getIntegerValue();
if (agg_func_value->isNull()) {
return std::make_unique<IntValueNode>(val);
} else {
return std::make_unique<IntValueNode>(std::min(val, agg_func_value->getIntValue()));
}
} else {
return std::make_unique<IntValueNode>(agg_func_value->getIntValue());
}
} else if (col_def_node->type == ColumnType::float_type) {
if (!evaluatedPars[0]->isNull()) {
double val = evaluatedPars[0]->getDoubleValue();
if (agg_func_value->isNull()) {
return std::make_unique<DoubleValueNode>(val);
} else {
return std::make_unique<DoubleValueNode>(std::min(val, agg_func_value->getDoubleValue()));
}
} else {
return std::make_unique<DoubleValueNode>(agg_func_value->getDoubleValue());
}
}
// TODO string and boolean
throw Exception("unsupported data type for min function");
}
Table *USql::find_table(const std::string &name) {
Table *USql::find_table(const std::string &name) const {
auto name_cmp = [name](const Table& t) { return t.m_name == name; };
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp);
if (table_def != std::end(m_tables)) {
return table_def.operator->();
} else {
throw Exception("table not found (" + name + ")");
}
if (table_def != std::end(m_tables))
return const_cast<Table *>(table_def.operator->());
throw Exception("table not found (" + name + ")");
}
void USql::check_table_not_exists(const std::string &name) {
void USql::check_table_not_exists(const std::string &name) const {
auto name_cmp = [name](const Table& t) { return t.m_name == name; };
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp);
if (table_def != std::end(m_tables)) {
if (table_def != std::end(m_tables))
throw Exception("table already exists");
}
}
} // namespace
void USql::check_index_not_exists(const std::string &index_name) {
for (auto &table : m_tables)
if (table.get_index(index_name) != nullptr)
throw Exception("index already exists");
}
} // namespace

View File

@ -1,7 +1,9 @@
#pragma once
#include "settings.h"
#include "parser.h"
#include "table.h"
#include "index.h"
#include <string>
#include <list>
@ -18,18 +20,19 @@ public:
private:
std::unique_ptr<Table> execute(Node &node);
std::unique_ptr<Table> execute_create_table(CreateTableNode &node);
std::unique_ptr<Table> execute_create_table_as_table(CreateTableAsSelectNode &node);
std::unique_ptr<Table> execute_load(LoadIntoTableNode &node);
std::unique_ptr<Table> execute_save(SaveTableNode &node);
std::unique_ptr<Table> execute_drop(DropTableNode &node);
static std::unique_ptr<Table> execute_set(SetNode &node);
static std::unique_ptr<Table> execute_show(ShowNode &node);
std::unique_ptr<Table> execute_create_table(const CreateTableNode &node);
std::unique_ptr<Table> execute_create_index(const CreateIndexNode &node);
std::unique_ptr<Table> execute_create_table_as_table(const CreateTableAsSelectNode &node);
std::unique_ptr<Table> execute_load(const LoadIntoTableNode &node);
std::unique_ptr<Table> execute_save(const SaveTableNode &node);
std::unique_ptr<Table> execute_drop(const DropTableNode &node);
static std::unique_ptr<Table> execute_set(const SetNode &node);
static std::unique_ptr<Table> execute_show(const ShowNode &node);
std::unique_ptr<Table> execute_insert_into_table(InsertIntoTableNode &node);
std::unique_ptr<Table> execute_select(SelectFromTableNode &node);
std::unique_ptr<Table> execute_delete(DeleteFromTableNode &node);
std::unique_ptr<Table> execute_update(UpdateTableNode &node);
std::unique_ptr<Table> execute_insert_into_table(const InsertIntoTableNode &node);
std::unique_ptr<Table> execute_select(SelectFromTableNode &node) const;
std::unique_ptr<Table> execute_delete(const DeleteFromTableNode &node);
std::unique_ptr<Table> execute_update(const UpdateTableNode &node);
private:
@ -37,7 +40,7 @@ private:
static std::unique_ptr<ValueNode> eval_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value);
static std::unique_ptr<ValueNode> eval_database_value_node(Table *table, Row &row, Node *node);
static std::unique_ptr<ValueNode> eval_literal_value_node(Table *table, Row &row, Node *node);
static std::unique_ptr<ValueNode> eval_literal_value_node(Row &row, Node *node);
static std::unique_ptr<ValueNode> eval_function_value_node(Table *table, Row &row, Node *node, ColDefNode *col_def_node, ColValue *agg_func_value);
@ -50,22 +53,23 @@ private:
static std::tuple<int, ColDefNode> get_column_definition(Table *table, SelectColNode *select_col_node, int col_order);
static ColDefNode get_db_column_definition(Table *table, Node *node);
static std::tuple<int, ColDefNode> get_node_definition(Table *table, Node *select_col_node, const std::string & col_name, int col_order);
Table *find_table(const std::string &name);
[[nodiscard]] Table *find_table(const std::string &name) const;
void check_table_not_exists(const std::string &name);
void check_table_not_exists(const std::string &name) const;
void check_index_not_exists(const std::string &index_name);
private:
Parser m_parser;
std::list<Table> m_tables;
static void execute_distinct(SelectFromTableNode &node, Table *result) ;
static void execute_order_by(SelectFromTableNode &node, Table *table, Table *result) ;
static void execute_offset_limit(OffsetLimitNode &node, Table *result) ;
static void execute_distinct(SelectFromTableNode &node, Table *result);
static void execute_order_by(SelectFromTableNode &node, Table *result);
static void execute_offset_limit(OffsetLimitNode &node, Table *result);
void expand_asterix_char(SelectFromTableNode &node, Table *table) const;
void setup_order_columns(std::vector<ColOrderNode> &node, Table *table) const;
static void expand_asterix_char(SelectFromTableNode &node, Table *table) ;
static void setup_order_columns(std::vector<ColOrderNode> &node, Table *table) ;
bool check_for_aggregate_only_functions(SelectFromTableNode &node, int result_cols_cnt) const;
static bool check_for_aggregate_only_functions(SelectFromTableNode &node, size_t result_cols_cnt) ;
static std::unique_ptr<ValueNode> lower_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars);
static std::unique_ptr<ValueNode> upper_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars);
@ -77,8 +81,20 @@ private:
static std::unique_ptr<ValueNode> max_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node, ColValue *agg_func_value);
static std::unique_ptr<ValueNode> min_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node, ColValue *agg_func_value);
static std::unique_ptr<ValueNode>
count_function(ColValue *agg_func_value, const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars);
static std::unique_ptr<ValueNode> count_function(ColValue *agg_func_value, const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars);
static void select_row(SelectFromTableNode &where_node,
Table *src_table, Row *src_row,
Table *rslt_table,
const std::vector<ColDefNode> &rslt_tbl_col_defs, const std::vector<int> &src_table_col_index,
bool is_aggregated) ;
std::pair<bool, std::vector<rowid_t>> probe_index_scan(const Node *where, Table *table) const;
std::pair<bool, std::vector<rowid_t>> look_for_usable_index(const Node *where, Table *table) const;
bool normalize_where(const Node *node) const;
Table::rows_scanner get_iterator(Table *table, const Node *where) const;
};
} // namespace

View File

@ -1,6 +1,5 @@
#include "usql.h"
#include "exception.h"
#include "ml_date.h"
#include "ml_string.h"
#include <algorithm>
@ -10,7 +9,7 @@ namespace usql {
std::unique_ptr<Table> USql::execute_create_table(CreateTableNode &node) {
std::unique_ptr<Table> USql::execute_create_table(const CreateTableNode &node) {
check_table_not_exists(node.table_name);
Table table{node.table_name, node.cols_defs};
@ -20,7 +19,23 @@ std::unique_ptr<Table> USql::execute_create_table(CreateTableNode &node) {
}
std::unique_ptr<Table> USql::execute_create_table_as_table(CreateTableAsSelectNode &node) {
std::unique_ptr<Table> USql::execute_create_index(const CreateIndexNode &node) {
Table *table_def = find_table(node.table_name); // throws exception if not found
ColDefNode col_def = table_def->get_column_def(node.column_name); // throws exception if not found
check_index_not_exists(node.index_name);
if (col_def.null) throw Exception("index on not null supported only");
if (table_def->get_index_for_column(node.column_name) != nullptr) throw Exception("column is already indexed");
table_def->create_index({node.index_name, node.column_name, col_def.type});
table_def->index_rows(node.index_name);
return create_stmt_result_table(0, "index created", 0);
}
std::unique_ptr<Table> USql::execute_create_table_as_table(const CreateTableAsSelectNode &node) {
check_table_not_exists(node.table_name);
auto select = execute_select((SelectFromTableNode &) *node.select_table);
@ -43,7 +58,7 @@ std::unique_ptr<Table> USql::execute_create_table_as_table(CreateTableAsSelectNo
std::unique_ptr<Table> USql::execute_drop(DropTableNode &node) {
std::unique_ptr<Table> USql::execute_drop(const DropTableNode &node) {
auto name_cmp = [node](const Table& t) { return t.m_name == node.table_name; };
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp);
@ -55,12 +70,12 @@ std::unique_ptr<Table> USql::execute_drop(DropTableNode &node) {
throw Exception("table not found (" + node.table_name + ")");
}
std::unique_ptr<Table> USql::execute_set(SetNode &node) {
std::unique_ptr<Table> USql::execute_set(const SetNode &node) {
Settings::set_setting(node.name, node.value);
return create_stmt_result_table(0, "set succeeded", 1);
}
std::unique_ptr<Table> USql::execute_show(ShowNode &node) {
std::unique_ptr<Table> USql::execute_show(const ShowNode &node) {
std::string value = Settings::get_setting(node.name);
return create_stmt_result_table(0, "show succeeded: " + value, 1);
}
@ -70,7 +85,7 @@ std::unique_ptr<Table> USql::create_stmt_result_table(long code, const std::stri
std::vector<ColDefNode> result_tbl_col_defs{};
result_tbl_col_defs.emplace_back("code", ColumnType::integer_type, 0, 1, false);
result_tbl_col_defs.emplace_back("desc", ColumnType::varchar_type, 1, 48, false);
result_tbl_col_defs.emplace_back("affected_rows", ColumnType::integer_type, 0, 1, true);
result_tbl_col_defs.emplace_back("aff_rows", ColumnType::integer_type, 0, 1, true);
auto table_def = std::make_unique<Table>("result", result_tbl_col_defs);
@ -85,7 +100,7 @@ std::unique_ptr<Table> USql::create_stmt_result_table(long code, const std::stri
std::unique_ptr<Table> USql::execute_load(LoadIntoTableNode &node) {
std::unique_ptr<Table> USql::execute_load(const LoadIntoTableNode &node) {
// find source table
Table *table_def = find_table(node.table_name);
@ -101,7 +116,7 @@ std::unique_ptr<Table> USql::execute_load(LoadIntoTableNode &node) {
}
std::unique_ptr<Table> USql::execute_save(SaveTableNode &node) {
std::unique_ptr<Table> USql::execute_save(const SaveTableNode &node) {
// find source table
Table *table_def = find_table(node.table_name);

View File

@ -1,87 +1,118 @@
#include "usql.h"
#include "exception.h"
#include "ml_date.h"
#include "ml_string.h"
#include <algorithm>
#include <fstream>
namespace usql {
std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
// find source table
Table *table = find_table(node.table_name);
std::pair<bool, std::vector<rowid_t>> USql::probe_index_scan(const Node *where, Table *table) const {
bool indexscan_possible = normalize_where(where);
// expand *
expand_asterix_char(node, table);
// create result table
std::vector<ColDefNode> result_tbl_col_defs{};
std::vector<int> source_table_col_index{};
for (int i = 0; i < node.cols_names->size(); i++) {
SelectColNode * col_node = &node.cols_names->operator[](i);
auto [src_tbl_col_index, rst_tbl_col_def] = get_column_definition(table, col_node, i);
source_table_col_index.push_back(src_tbl_col_index);
result_tbl_col_defs.push_back(rst_tbl_col_def);
if (indexscan_possible && Settings::get_bool_setting("USE_INDEXSCAN")) {
// where->dump();
return look_for_usable_index(where, table);
}
// check for aggregate function
bool aggregate_funcs = check_for_aggregate_only_functions(node, result_tbl_col_defs.size());
// prepare result table structure
auto result = std::make_unique<Table>("result", result_tbl_col_defs);
// replace possible order by col names to col indexes and validate
setup_order_columns(node.order_by, result.get());
// execute access plan
Row* new_row = nullptr;
for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) {
// eval where for row
if (eval_where(node.where.get(), table, *row)) {
// prepare empty row and copy column values
// when agregate functions in result only one row for table
if (!aggregate_funcs || result->rows_count()==0) {
new_row = &result->create_empty_row();
}
for (auto idx = 0; idx < result->columns_count(); idx++) {
auto src_table_col_idx = source_table_col_index[idx];
if (src_table_col_idx == FUNCTION_CALL) {
auto evaluated_value = eval_value_node(table, *row, node.cols_names->operator[](idx).value.get(), &result_tbl_col_defs[idx], &new_row->operator[](idx));
ValueNode *col_value = evaluated_value.get();
new_row->setColumnValue(&result_tbl_col_defs[idx], col_value);
} else {
ColValue &col_value = row->operator[](src_table_col_idx);
new_row->setColumnValue(&result_tbl_col_defs[idx], col_value);
}
}
// add row to result
if (aggregate_funcs == 0) {
result->commit_row(*new_row);
}
}
}
// when aggregates commit this one row
if (aggregate_funcs && new_row != nullptr) {
result->commit_row(*new_row);
}
execute_distinct(node, result.get());
execute_order_by(node, table, result.get());
execute_offset_limit(node.offset_limit, result.get());
return result;
// no index scan
return std::make_pair(false, std::vector<rowid_t>{});
}
bool USql::check_for_aggregate_only_functions(SelectFromTableNode &node, int result_cols_cnt) const {
std::pair<bool, std::vector<rowid_t>> USql::look_for_usable_index(const Node *where, Table *table) const {
if (where->node_type == NodeType::relational_operator) {
auto * ron = (RelationalOperatorNode *)where;
// TODO implement >, >=, <=, <
// https://en.cppreference.com/w/cpp/container/map/upper_bound
if (ron->op == RelationalOperatorType::equal) {
if (ron->left->node_type == NodeType::database_value &&
((ron->right->node_type == NodeType::int_value) || (ron->right->node_type == NodeType::string_value))
) {
auto col_name = ((DatabaseValueNode *)ron->left.get())->col_name;
Index * used_index = table->get_index_for_column(col_name);
if (used_index != nullptr) {
std::vector<rowid_t> rowids = used_index->search((ValueNode *)ron->right.get());
#ifndef NDEBUG
std::cout << "using index " << table->m_name << "(" << used_index->get_column_name() << "), " << rowids.size() << "/" << table->rows_count() << std::endl;
#endif
return std::make_pair(true, rowids);
}
}
}
} else if (where->node_type == NodeType::logical_operator) {
auto * operatorNode = (LogicalOperatorNode *)where;
if (operatorNode->op == LogicalOperatorType::and_operator) {
auto [use_index, rowids] = look_for_usable_index(operatorNode->left.get(), table);
if (use_index) {
return std::make_pair(true, rowids);
}
return look_for_usable_index(operatorNode->right.get(), table);
}
}
// no index available
return std::make_pair(false, std::vector<rowid_t>{});
}
bool USql::normalize_where(const Node *node) const {
// normalize relational operators "layout" and check whether index scan even possible
// unify relational operators tha left node is always database value
if (node->node_type == NodeType::relational_operator) {
// TODO more optimizations here, for example node 1 = 2 etc
auto * ron = (RelationalOperatorNode *)node;
if (ron->right->node_type == NodeType::database_value && ((ron->left->node_type == NodeType::int_value) || (ron->left->node_type == NodeType::string_value)) ) {
std::swap(ron->left, ron->right);
}
return true;
} else if (node->node_type == NodeType::logical_operator) {
auto * operatorNode = (LogicalOperatorNode *)node;
if (operatorNode->op == LogicalOperatorType::or_operator) {
return false;
}
bool left_subnode = normalize_where(operatorNode->left.get());
bool right_subnode = normalize_where(operatorNode->left.get());
return left_subnode && right_subnode;
}
return true;
}
void USql::select_row(SelectFromTableNode &where_node,
Table *src_table, Row *src_row,
Table *rslt_table,
const std::vector<ColDefNode> &rslt_tbl_col_defs,
const std::vector<int> &src_table_col_index,
bool is_aggregated) {
Row *rslt_row = nullptr;
// when aggregate functions in rslt_table only one row exists
if (is_aggregated && !rslt_table->empty())
rslt_row = &rslt_table->m_rows[0];
else
rslt_row = &rslt_table->create_empty_row();
for (auto idx = 0; idx < rslt_table->columns_count(); idx++) {
auto src_table_col_idx = src_table_col_index[idx];
if (src_table_col_idx == FUNCTION_CALL) {
auto evaluated_value = eval_value_node(src_table, *src_row, where_node.cols_names->operator[](idx).value.get(),
const_cast<ColDefNode *>(&rslt_tbl_col_defs[idx]), &rslt_row->operator[](idx));
ValueNode *col_value = evaluated_value.get();
rslt_row->setColumnValue((ColDefNode *) &rslt_tbl_col_defs[idx], col_value);
} else {
ColValue &col_value = src_row->operator[](src_table_col_idx);
rslt_row->setColumnValue((ColDefNode *) &rslt_tbl_col_defs[idx], col_value);
}
}
// for aggregate is validated more than needed
rslt_table->commit_row(*rslt_row);
}
bool USql::check_for_aggregate_only_functions(SelectFromTableNode &node, size_t result_cols_cnt) {
int aggregate_funcs = 0;
for (int i = 0; i < node.cols_names->size(); i++) {
SelectColNode * col_node = &node.cols_names->operator[](i);
@ -99,7 +130,7 @@ bool USql::check_for_aggregate_only_functions(SelectFromTableNode &node, int res
return aggregate_funcs > 0;
}
void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) const {
void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) {
if (node.cols_names->size() == 1 && node.cols_names->operator[](0).name == "*") {
node.cols_names->clear();
node.cols_names->reserve(table->columns_count());
@ -109,7 +140,7 @@ void USql::expand_asterix_char(SelectFromTableNode &node, Table *table) const {
}
}
void USql::setup_order_columns(std::vector<ColOrderNode> &node, Table *table) const {
void USql::setup_order_columns(std::vector<ColOrderNode> &node, Table *table) {
for (auto& order_node : node) {
if (!order_node.col_name.empty()) {
ColDefNode col_def = table->get_column_def(order_node.col_name);
@ -120,19 +151,19 @@ void USql::setup_order_columns(std::vector<ColOrderNode> &node, Table *table) co
if (order_node.col_index < 0 || order_node.col_index >= table->columns_count())
throw Exception("unknown column in order by clause (" + order_node.col_name + ")");
}
}
}
void USql::execute_distinct(SelectFromTableNode &node, Table *result) {
if (!node.distinct) return;
auto compare_rows = [](const Row &a, const Row &b) { return a.compare(b) >= 0; };
std::sort(result->m_rows.begin(), result->m_rows.end(), compare_rows);
result->m_rows.erase(std::unique(result->m_rows.begin(), result->m_rows.end()), result->m_rows.end());
}
void USql::execute_order_by(SelectFromTableNode &node, Table *table, Table *result) {
void USql::execute_order_by(SelectFromTableNode &node, Table *result) {
if (node.order_by.empty()) return;
auto compare_rows = [&node, &result](const Row &a, const Row &b) {
@ -160,6 +191,21 @@ void USql::execute_offset_limit(OffsetLimitNode &node, Table *result) {
result->m_rows.erase(result->m_rows.begin() + node.limit, result->m_rows.end());
}
bool USql::eval_where(Node *where, Table *table, Row &row)
{
switch (where->node_type)
{
case NodeType::true_node:
return true;
case NodeType::relational_operator: // just one condition
return eval_relational_operator(*((RelationalOperatorNode *)where), table, row);
case NodeType::logical_operator:
return eval_logical_operator(*((LogicalOperatorNode *)where), table, row);
default:
throw Exception("Wrong node type");
}
}
std::tuple<int, ColDefNode> USql::get_column_definition(Table *table, SelectColNode *select_col_node, int col_order ) {
return get_node_definition(table, select_col_node->value.get(), select_col_node->name, col_order );
}
@ -218,7 +264,7 @@ std::tuple<int, ColDefNode> USql::get_node_definition(Table *table, Node * node,
auto [left_col_index, left_tbl_col_def] = get_node_definition(table, ari_node->left.get(), col_name, col_order );
auto [right_col_index, right_tbl_col_def] = get_node_definition(table, ari_node->right.get(), col_name, col_order );
ColumnType col_type; // TODO handle varchar and it len
ColumnType col_type; // TODO handle varchar and its len
if (left_tbl_col_def.type==ColumnType::float_type || right_tbl_col_def.type==ColumnType::float_type)
col_type = ColumnType::float_type;
else
@ -249,8 +295,7 @@ std::tuple<int, ColDefNode> USql::get_node_definition(Table *table, Node * node,
std::unique_ptr<Table> USql::execute_insert_into_table(InsertIntoTableNode &node) {
std::unique_ptr<Table> USql::execute_insert_into_table(const InsertIntoTableNode &node) {
// find table
Table *table_def = find_table(node.table_name);
@ -276,45 +321,52 @@ std::unique_ptr<Table> USql::execute_insert_into_table(InsertIntoTableNode &node
std::unique_ptr<Table> USql::execute_delete(DeleteFromTableNode &node) {
std::unique_ptr<Table> USql::execute_delete(const DeleteFromTableNode &node) {
size_t affected_rows = 0;
// find source table
Table *table = find_table(node.table_name);
// execute access plan
auto affected_rows = table->rows_count();
Table::rows_scanner i = get_iterator(table, node.where.get());
while(Row *row = i.next()) {
if (eval_where(node.where.get(), table, *row)) {
row->set_deleted();
table->unindex_row(*row);
table->m_rows.erase(
std::remove_if(table->m_rows.begin(), table->m_rows.end(),
[&node, table](Row &row){return eval_where(node.where.get(), table, row);}),
table->m_rows.end());
affected_rows -= table->rows_count();
affected_rows++;
}
}
return create_stmt_result_table(0, "delete succeeded", affected_rows);
}
std::unique_ptr<Table> USql::execute_update(UpdateTableNode &node) {
std::unique_ptr<Table> USql::execute_update(const UpdateTableNode &node) {
size_t affected_rows = 0;
// find source table
Table *table = find_table(node.table_name);
// execute access plan
int affected_rows = 0;
for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) {
// eval where for row
Table::rows_scanner i = get_iterator(table, node.where.get());
while(Row *row = i.next()) {
if (eval_where(node.where.get(), table, *row)) {
int i = 0;
Row old_row = * row;
int col_idx = 0;
for (const auto& col : node.cols_names) {
// TODO cache it like in select
// PERF cache it like in select
ColDefNode col_def = table->get_column_def(col.col_name);
std::unique_ptr<ValueNode> new_val = eval_arithmetic_operator(col_def.type,
static_cast<ArithmeticalOperatorNode &>(*node.values[i]),
table, *row);
static_cast<ArithmeticalOperatorNode &>(*node.values[col_idx]), table, *row);
usql::Table::validate_column(&col_def, new_val.get());
row->setColumnValue(&col_def, new_val.get());
i++;
col_idx++;
}
table->reindex_row(old_row, *row);
affected_rows++;
// TODO tady je problem, ze kdyz to zfajluje na jednom radku ostatni by se nemely provest
}
@ -324,20 +376,58 @@ std::unique_ptr<Table> USql::execute_update(UpdateTableNode &node) {
}
bool USql::eval_where(Node *where, Table *table, Row &row) {
switch (where->node_type) {
case NodeType::true_node:
return true;
case NodeType::relational_operator: // just one condition
return eval_relational_operator(*((RelationalOperatorNode *) where), table, row);
case NodeType::logical_operator:
return eval_logical_operator(*((LogicalOperatorNode *) where), table, row);
default:
throw Exception("Wrong node type");
std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) const {
// find source table
Table *table = find_table(node.table_name);
// expand *
expand_asterix_char(node, table);
// create result table
std::vector<ColDefNode> result_tbl_col_defs{};
std::vector<int> source_table_col_index{};
for (int i = 0; i < node.cols_names->size(); i++) {
SelectColNode *col_node = &node.cols_names->operator[](i);
auto [src_tbl_col_index, rst_tbl_col_def] = get_column_definition(table, col_node, i);
source_table_col_index.push_back(src_tbl_col_index);
result_tbl_col_defs.push_back(rst_tbl_col_def);
}
return false;
// check for aggregate function
bool is_aggregated = check_for_aggregate_only_functions(node, result_tbl_col_defs.size());
// prepare result table structure
auto result = std::make_unique<Table>("result", result_tbl_col_defs);
// replace possible order by col names to col indexes and validate
setup_order_columns(node.order_by, result.get());
// execute access plan
Table::rows_scanner i = get_iterator(table, node.where.get());
while(Row *row = i.next()) {
if (eval_where(node.where.get(), table, *row)) { // put it into row_scanner.next
select_row(node, table, row, result.get(), result_tbl_col_defs, source_table_col_index, is_aggregated);
}
}
execute_distinct(node, result.get());
execute_order_by(node, result.get());
execute_offset_limit(node.offset_limit, result.get());
return result;
}
Table::rows_scanner USql::get_iterator(Table *table, const Node *where) const {
auto[use_index, rowids] = probe_index_scan(where, table);
if (use_index)
return Table::rows_scanner(table, rowids);
else
return Table::rows_scanner(table);
}
} // namespace

168
usql/usql_function.cpp Normal file
View File

@ -0,0 +1,168 @@
#include "usql.h"
#include "exception.h"
#include "ml_date.h"
#include "ml_string.h"
#include <algorithm>
namespace usql {
std::unique_ptr<ValueNode> USql::to_string_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long date = evaluatedPars[0]->getDateValue();
std::string format = evaluatedPars[1]->getStringValue();
std::string formatted_date = date_to_string(date, format);
return std::make_unique<StringValueNode>(formatted_date);
}
std::unique_ptr<ValueNode> USql::to_date_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string date = evaluatedPars[0]->getStringValue();
std::string format = evaluatedPars[1]->getStringValue();
long epoch_time = string_to_date(date, format);
return std::make_unique<IntValueNode>(epoch_time); // No DateValueNode for now
}
std::unique_ptr<ValueNode> USql::date_add_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long datetime = evaluatedPars[0]->getDateValue();
long quantity = evaluatedPars[1]->getIntegerValue();
std::string part = evaluatedPars[2]->getStringValue();
long new_date = add_to_date(datetime, quantity, part);
return std::make_unique<IntValueNode>(new_date); // No DateValueNode for now
}
std::unique_ptr<ValueNode> USql::upper_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return toupper(c); });
return std::make_unique<StringValueNode>(str);
}
std::unique_ptr<ValueNode> USql::lower_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
std::string str = evaluatedPars[0]->getStringValue();
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return tolower(c); });
return std::make_unique<StringValueNode>(str);
}
std::unique_ptr<ValueNode> USql::pp_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
auto &parsed_value = evaluatedPars[0];
if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) {
std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : "";
char buf[20] {0}; // TODO constant here
double value = parsed_value->getDoubleValue();
if (format == "100%")
std::snprintf(buf, 20, "%.2f%%", value);
else if (format == "%.2f")
std::snprintf(buf, 20, "%.2f", value);
else if (value >= 1000000000000)
std::snprintf(buf, 20, "%7.2fT", value/1000000000000);
else if (value >= 1000000000)
std::sprintf(buf, "%7.2fB", value/1000000000);
else if (value >= 1000000)
std::snprintf(buf, 20, "%7.2fM", value/1000000);
else if (value >= 100000)
std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M
else if (value <= -1000000000000)
std::snprintf(buf, 20, "%7.2fT", value/1000000000000);
else if (value <= -1000000000)
std::snprintf(buf, 20, "%7.2fB", value/1000000000);
else if (value <= -1000000)
std::snprintf(buf, 20, "%7.2fM", value/1000000);
else if (value <= -100000)
std::snprintf(buf, 20, "%7.2fM", value/100000); // 0.12M
else if (value == 0)
buf[0]='0';
else
return std::make_unique<StringValueNode>(parsed_value->getStringValue().substr(0, 10));
// TODO introduce constant for 10
std::string s {buf};
return std::make_unique<StringValueNode>(string_padd(s.erase(s.find_last_not_of(' ')+1), 10, ' ', false));
}
return std::make_unique<StringValueNode>(parsed_value->getStringValue());
}
std::unique_ptr<ValueNode>
USql::max_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node, ColValue *agg_func_value) {
if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) {
if (!evaluatedPars[0]->isNull()) {
auto val = evaluatedPars[0]->getIntegerValue();
if (agg_func_value->isNull())
return std::make_unique<IntValueNode>(val);
else
return std::make_unique<IntValueNode>(std::max(val, agg_func_value->getIntegerValue()));
} else {
return std::make_unique<IntValueNode>(agg_func_value->getIntegerValue());
}
} else if (col_def_node->type == ColumnType::float_type) {
if (!evaluatedPars[0]->isNull()) {
auto val = evaluatedPars[0]->getDoubleValue();
if (agg_func_value->isNull())
return std::make_unique<DoubleValueNode>(val);
else
return std::make_unique<DoubleValueNode>(std::max(val, agg_func_value->getDoubleValue()));
} else {
return std::make_unique<DoubleValueNode>(agg_func_value->getDoubleValue());
}
} else if (col_def_node->type == ColumnType::varchar_type) {
if (!evaluatedPars[0]->isNull()) {
auto val = evaluatedPars[0]->getStringValue();
if (agg_func_value->isNull())
return std::make_unique<StringValueNode>(val);
else
return std::make_unique<StringValueNode>(std::max(val, agg_func_value->getStringValue()));
} else {
return std::make_unique<StringValueNode>(agg_func_value->getStringValue());
}
}
throw Exception("unsupported data type for max function");
}
std::unique_ptr<ValueNode>
USql::min_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars, const ColDefNode *col_def_node,
ColValue *agg_func_value) {
if (col_def_node->type == ColumnType::integer_type || col_def_node->type == ColumnType::date_type) {
if (!evaluatedPars[0]->isNull()) {
long val = evaluatedPars[0]->getIntegerValue();
if (agg_func_value->isNull())
return std::make_unique<IntValueNode>(val);
else
return std::make_unique<IntValueNode>(std::min(val, agg_func_value->getIntegerValue()));
} else {
return std::make_unique<IntValueNode>(agg_func_value->getIntegerValue());
}
} else if (col_def_node->type == ColumnType::float_type) {
if (!evaluatedPars[0]->isNull()) {
double val = evaluatedPars[0]->getDoubleValue();
if (agg_func_value->isNull())
return std::make_unique<DoubleValueNode>(val);
else
return std::make_unique<DoubleValueNode>(std::min(val, agg_func_value->getDoubleValue()));
} else {
return std::make_unique<DoubleValueNode>(agg_func_value->getDoubleValue());
}
} else if (col_def_node->type == ColumnType::varchar_type) {
if (!evaluatedPars[0]->isNull()) {
auto val = evaluatedPars[0]->getStringValue();
if (agg_func_value->isNull())
return std::make_unique<StringValueNode>(val);
else
return std::make_unique<StringValueNode>(std::min(val, agg_func_value->getStringValue()));
} else {
return std::make_unique<StringValueNode>(agg_func_value->getStringValue());
}
}
throw Exception("unsupported data type for min function");
}
std::unique_ptr<ValueNode> USql::count_function(ColValue *agg_func_value, const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars) {
long c = 1;
if (!agg_func_value->isNull()) {
c = agg_func_value->getIntegerValue() + 1;
}
return std::make_unique<IntValueNode>(c);
}
} // namespace