indexes WIP
This commit is contained in:
36
debug.h
Normal file
36
debug.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
std::vector<std::string> c_sql_commands {
|
||||
// "create table history_earnings_dates (datetime date, symbol varchar(8), time varchar(18), title varchar(256))",
|
||||
// "set 'DATE_FORMAT' = '%Y-%m-%d'",
|
||||
// "load into history_earnings_dates '/Users/vaclavt/Development/mlisp_fin/data/history_earnings_dates.csv'"
|
||||
// "insert into history_earnings_dates (symbol,time,datetime,title) values ('BABA', '07:00:00', '2021-11-04', 'Alibaba Group Holding')",
|
||||
// "insert into history_earnings_dates (symbol,time,datetime,title) values ('BABA', '07:00:00', '2021-11-04', 'Alibaba Group Holding')",
|
||||
// "delete from history_earnings_dates where symbol='BABA' and datetime=to_date('2021-11-04', '%Y-%m-%d')",
|
||||
// "select * from history_earnings_dates"
|
||||
|
||||
"create table sf1 (symbol varchar(8) not null, dimension varchar(3), calendar_date date, date_key date, report_period date, last_updated date, accoci float, assets float, assetsavg float, assetsc float, assetsnc float, assetturnover float, bvps float, capex float, cashneq float, cashnequsd float, cor float, consolinc float, currentratio float, de float, debt float, debtc float, debtnc float, debtusd float, deferredrev float, depamor float, deposits float, divyield float, dps float, ebit float, ebitda float, ebitdamargin float, ebitdausd float, ebitusd float, ebt float, eps float, epsdil float, epsusd float, equity float, equityavg float, equityusd float, ev float, evebit float, evebitda float, fcf float, fcfps float, fxusd float, gp float, grossmargin float, intangibles float, intexp float, invcap float, invcapavg float, inventory float, investments float, investmentsc float, investmentsnc float, liabilities float, liabilitiesc float, liabilitiesnc float, marketcap float, ncf float, ncfbus float, ncfcommon float, ncfdebt float, ncfdiv float, ncff float, ncfi float, ncfinv float, ncfo float, ncfx float, netinc float, netinccmn float, netinccmnusd float, netincdis float, netincnci float, netmargin float, opex float, opinc float, payables float, payoutratio float, pb float, pe float, pe1 float, ppnenet float, prefdivis float, price float, ps float, ps1 float, receivables float, retearn float, revenue float, revenueusd float, rnd float, roa float, roe float, roic float, ros float, sbcomp float, sgna float, sharefactor float, sharesbas float, shareswa float, shareswadil float, sps float, tangibles float, taxassets float, taxexp float, taxliabilities float, tbvps float, workingcapital float)",
|
||||
"set 'DATE_FORMAT' = '%Y-%m-%d'",
|
||||
"create index sf1_symbol on sf1(symbol)",
|
||||
"load into sf1 '/srv/SHARADAR_SF1.csv'",
|
||||
"set 'USE_INDEXSCAN' = 'false'",
|
||||
"select dimension, to_string(calendar_date, '%d.%m.%Y'), pp(eps, \"%.2f\"), pp(shareswadil), pp(revenue), pp(netinc), pp(cashneq), pp(assets), pp(debt), pp(ncfdebt), pp(roe*100), pp(intangibles), calendar_date from sf1 where symbol = 'MU' and dimension = 'ARQ' order by dimension, calendar_date desc limit 5",
|
||||
"set 'USE_INDEXSCAN' = 'true'",
|
||||
"select dimension, to_string(calendar_date, '%d.%m.%Y'), pp(eps, \"%.2f\"), pp(shareswadil), pp(revenue), pp(netinc), pp(cashneq), pp(assets), pp(debt), pp(ncfdebt), pp(roe*100), pp(intangibles), calendar_date from sf1 where symbol = 'MU' and dimension = 'ARQ' order by dimension, calendar_date desc limit 5"
|
||||
|
||||
// "create table a (i integer not null, s varchar(64))",
|
||||
// "create index a_i on a(i)",
|
||||
// "insert into a (i, s) values(1, 'one')",
|
||||
// "insert into a (i, s) values(2, 'two')",
|
||||
// "insert into a (i, s) values(2, 'second two')",
|
||||
// "insert into a (i, s) values(3, 'three')",
|
||||
// "set 'USE_INDEXSCAN' = 'true'",
|
||||
// "select * from a where 1 = i",
|
||||
// "delete from a where i = 2 and s ='two'",
|
||||
// "select * from a where i = 2",
|
||||
// "update a set i = 5 where i = 2",
|
||||
// "select * from a where i = 5",
|
||||
// "select max(i) from a where s = 'two'"
|
||||
};
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include "lexer.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace usql {
|
||||
|
||||
82
index.h
82
index.h
@@ -1,79 +1,77 @@
|
||||
#pragma once
|
||||
|
||||
#include "exception.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
|
||||
namespace usql {
|
||||
|
||||
enum class IndexedDataType {
|
||||
integer,
|
||||
string
|
||||
};
|
||||
|
||||
using rowid_t = size_t; // int is now enough but size_t is correct
|
||||
|
||||
template <typename K>
|
||||
template<typename K>
|
||||
class Index {
|
||||
public:
|
||||
Index(std::string index_name, std::string col_name, IndexedDataType type) :
|
||||
m_index_name(std::move(index_name)), m_column_name(std::move(col_name)),
|
||||
m_data_type(type), m_uniq(false) {}
|
||||
|
||||
void insert(K key, int rowid) {
|
||||
// std::cout << "inserting key: " << key << " val: " << rowid << std::endl;
|
||||
|
||||
// TODO handle uniqueness
|
||||
void insert(K key, rowid_t rowid) {
|
||||
auto search = m_index.find(key);
|
||||
if (search != m_index.end()) {
|
||||
if (m_uniq)
|
||||
throw Exception("Inserting duplicate value into unique index");
|
||||
|
||||
search->second.push_back(rowid);
|
||||
} else {
|
||||
std::vector<int> rowids{rowid};
|
||||
std::vector<rowid_t> rowids{rowid};
|
||||
if (!m_uniq)
|
||||
rowids.reserve(8);
|
||||
m_index[key] = rowids;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void update(K old_key, K new_key, int rowid) {
|
||||
// std::cout << "updating key: " << old_key << " to: " << new_key << " val: " << rowid << std::endl;
|
||||
|
||||
// TODO handle uniqueness
|
||||
}
|
||||
|
||||
void remove(K key, int rowid) {
|
||||
// std::cout << "removing key: " << key << " val: " << rowid << std::endl;
|
||||
|
||||
void remove(K key, rowid_t rowid) {
|
||||
auto search = m_index.find(key);
|
||||
if (search != m_index.end()) {
|
||||
search->second.erase(find(search->second.begin(), search->second.end(), rowid));
|
||||
}
|
||||
if (search->second.empty())
|
||||
m_index.erase(search);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> search(K key) {
|
||||
// std::cout << "returning rowids for key: " << key << std::endl;
|
||||
|
||||
std::vector<rowid_t> search(K key) {
|
||||
auto search = m_index.find(key);
|
||||
if (search != m_index.end()) {
|
||||
return search->second;
|
||||
} else {
|
||||
return std::vector<int>{};
|
||||
}
|
||||
return std::vector<rowid_t>{};
|
||||
}
|
||||
}
|
||||
|
||||
void truncate() {
|
||||
// std::cout << "truncating" << std::endl;
|
||||
m_index.clear();
|
||||
}
|
||||
|
||||
void dump() {
|
||||
std::for_each(m_index.begin(), m_index.end(),
|
||||
[](std::pair<K, std::vector<int>> element){
|
||||
K key = element.first;
|
||||
std::vector<int> rowids = element.second;
|
||||
std::cout << "key: " << key << ", rowids count:" << rowids.size() << std::endl;
|
||||
});
|
||||
}
|
||||
// void dump() {
|
||||
// std::for_each(m_index.begin(), m_index.end(),
|
||||
// [](std::pair<K, std::vector<rowid_t>> element){
|
||||
// K key = element.first;
|
||||
// std::vector<rowid_t> rowids = element.second;
|
||||
// std::cout << "key: " << key << ", rowids count:" << rowids.size() << std::endl;
|
||||
// });
|
||||
// }
|
||||
|
||||
[[nodiscard]] const std::string &get_column_name() const {
|
||||
return m_column_name;
|
||||
return m_column_name;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::string &get_index_name() const {
|
||||
@@ -83,14 +81,14 @@ public:
|
||||
[[nodiscard]] IndexedDataType get_data_type() const {
|
||||
return m_data_type;
|
||||
}
|
||||
|
||||
private:
|
||||
bool m_uniq;
|
||||
std::string m_index_name;
|
||||
std::string m_column_name;
|
||||
IndexedDataType m_data_type;
|
||||
|
||||
// DEBUG for debug it is public
|
||||
public:
|
||||
std::map<K, std::vector<int> > m_index;
|
||||
};
|
||||
private:
|
||||
bool m_uniq;
|
||||
std::string m_index_name;
|
||||
std::string m_column_name;
|
||||
IndexedDataType m_data_type;
|
||||
|
||||
std::map<K, std::vector<rowid_t> > m_index;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
44
main.cpp
44
main.cpp
@@ -3,7 +3,8 @@
|
||||
|
||||
#include "linenoise.h"
|
||||
|
||||
// https://dev.to/joaoh82/what-would-sqlite-look-like-if-written-in-rust-part-1-2np4
|
||||
#include "debug.h"
|
||||
|
||||
|
||||
using namespace std::chrono;
|
||||
|
||||
@@ -126,40 +127,9 @@ void repl() {
|
||||
}
|
||||
|
||||
void debug() {
|
||||
std::vector<std::string> sql_commands {
|
||||
// "create table history_earnings_dates (datetime date, symbol varchar(8), time varchar(18), title varchar(256))",
|
||||
// "set 'DATE_FORMAT' = '%Y-%m-%d'",
|
||||
// "load into history_earnings_dates '/Users/vaclavt/Development/mlisp_fin/data/history_earnings_dates.csv'"
|
||||
// "insert into history_earnings_dates (symbol,time,datetime,title) values ('BABA', '07:00:00', '2021-11-04', 'Alibaba Group Holding')",
|
||||
// "insert into history_earnings_dates (symbol,time,datetime,title) values ('BABA', '07:00:00', '2021-11-04', 'Alibaba Group Holding')",
|
||||
// "delete from history_earnings_dates where symbol='BABA' and datetime=to_date('2021-11-04', '%Y-%m-%d')",
|
||||
// "select * from history_earnings_dates"
|
||||
|
||||
// "create table sf1 (symbol varchar(8), dimension varchar(3), calendar_date date, date_key date, report_period date, last_updated date, accoci float, assets float, assetsavg float, assetsc float, assetsnc float, assetturnover float, bvps float, capex float, cashneq float, cashnequsd float, cor float, consolinc float, currentratio float, de float, debt float, debtc float, debtnc float, debtusd float, deferredrev float, depamor float, deposits float, divyield float, dps float, ebit float, ebitda float, ebitdamargin float, ebitdausd float, ebitusd float, ebt float, eps float, epsdil float, epsusd float, equity float, equityavg float, equityusd float, ev float, evebit float, evebitda float, fcf float, fcfps float, fxusd float, gp float, grossmargin float, intangibles float, intexp float, invcap float, invcapavg float, inventory float, investments float, investmentsc float, investmentsnc float, liabilities float, liabilitiesc float, liabilitiesnc float, marketcap float, ncf float, ncfbus float, ncfcommon float, ncfdebt float, ncfdiv float, ncff float, ncfi float, ncfinv float, ncfo float, ncfx float, netinc float, netinccmn float, netinccmnusd float, netincdis float, netincnci float, netmargin float, opex float, opinc float, payables float, payoutratio float, pb float, pe float, pe1 float, ppnenet float, prefdivis float, price float, ps float, ps1 float, receivables float, retearn float, revenue float, revenueusd float, rnd float, roa float, roe float, roic float, ros float, sbcomp float, sgna float, sharefactor float, sharesbas float, shareswa float, shareswadil float, sps float, tangibles float, taxassets float, taxexp float, taxliabilities float, tbvps float, workingcapital float)",
|
||||
// "set 'DATE_FORMAT' = '%Y-%m-%d'",
|
||||
// "load into sf1 '/srv/SHARADAR_SF1.csv'",
|
||||
// "create index sf1_symbol on sf1(symbol)",
|
||||
// "set 'USE_INDEXSCAN' = 'false'",
|
||||
// "select dimension, to_string(calendar_date, '%d.%m.%Y'), pp(eps, \"%.2f\"), pp(shareswadil), pp(revenue), pp(netinc), pp(cashneq), pp(assets), pp(debt), pp(ncfdebt), pp(roe*100), pp(intangibles), calendar_date from sf1 where symbol = 'MU' and dimension = 'ARQ' order by dimension, calendar_date desc limit 5",
|
||||
// "set 'USE_INDEXSCAN' = 'true'",
|
||||
// "select dimension, to_string(calendar_date, '%d.%m.%Y'), pp(eps, \"%.2f\"), pp(shareswadil), pp(revenue), pp(netinc), pp(cashneq), pp(assets), pp(debt), pp(ncfdebt), pp(roe*100), pp(intangibles), calendar_date from sf1 where symbol = 'MU' and dimension = 'ARQ' order by dimension, calendar_date desc limit 5"
|
||||
|
||||
"create table a (i integer not null, s varchar(64))",
|
||||
"insert into a (i, s) values(1, 'one')",
|
||||
"insert into a (i, s) values(2, 'two')",
|
||||
"insert into a (i, s) values(2, 'second two')",
|
||||
"insert into a (i, s) values(3, 'three')",
|
||||
"create index a_i on a(i)",
|
||||
"create index a_s on a(s)",
|
||||
"set 'USE_INDEXSCAN' = 'true'",
|
||||
"select * from a where 1 = i",
|
||||
"select * from a where i = 2",
|
||||
"select max(i) from a where s = 'two'"
|
||||
};
|
||||
|
||||
usql::USql uSql{};
|
||||
|
||||
for (const auto &command : sql_commands) {
|
||||
for (const auto &command : c_sql_commands) {
|
||||
time_point<high_resolution_clock> start_time = high_resolution_clock::now();
|
||||
auto result = uSql.execute(command);
|
||||
time_point<high_resolution_clock> end_time = high_resolution_clock::now();
|
||||
@@ -177,11 +147,11 @@ void debug() {
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
//#ifdef NDEBUG
|
||||
// repl();
|
||||
//#else
|
||||
#ifdef NDEBUG
|
||||
repl();
|
||||
#else
|
||||
debug();
|
||||
//#endif
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
2
row.cpp
2
row.cpp
@@ -40,7 +40,7 @@ int ColBooleanValue::compare(ColValue &other) {
|
||||
return m_bool == other.getBoolValue() ? 0 : m_bool && !other.getBoolValue() ? -1 : 1; // true first
|
||||
}
|
||||
|
||||
Row::Row(const Row &other) : m_columns(other.m_columns.size()) {
|
||||
Row::Row(const Row &other) : m_columns(other.m_columns.size()), m_visible(other.m_visible) {
|
||||
for (int i = 0; i < other.m_columns.size(); i++) {
|
||||
if (other[i].isNull())
|
||||
continue; // for null NOP
|
||||
|
||||
24
row.h
24
row.h
@@ -36,7 +36,7 @@ namespace usql {
|
||||
|
||||
int compare(ColValue &other) override;
|
||||
|
||||
virtual ~ColNullValue() = default;
|
||||
~ColNullValue() override = default;
|
||||
};
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ namespace usql {
|
||||
|
||||
long m_integer;
|
||||
|
||||
virtual ~ColIntegerValue() = default;
|
||||
~ColIntegerValue() override = default;
|
||||
};
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ namespace usql {
|
||||
|
||||
int compare(ColValue &other) override;
|
||||
|
||||
virtual ~ColDoubleValue() = default;
|
||||
~ColDoubleValue() override = default;
|
||||
|
||||
double m_double;
|
||||
};
|
||||
@@ -109,7 +109,7 @@ namespace usql {
|
||||
|
||||
int compare(ColValue &other) override;
|
||||
|
||||
virtual ~ColDateValue() = default;
|
||||
~ColDateValue() override = default;
|
||||
|
||||
long m_date; // seconds since epoch for now
|
||||
};
|
||||
@@ -127,7 +127,7 @@ namespace usql {
|
||||
|
||||
int compare(ColValue &other) override;
|
||||
|
||||
virtual ~ColBooleanValue() = default;
|
||||
~ColBooleanValue() override = default;
|
||||
|
||||
bool m_bool;
|
||||
};
|
||||
@@ -137,7 +137,7 @@ namespace usql {
|
||||
class Row {
|
||||
|
||||
public:
|
||||
explicit Row(int cols_count) : m_columns(cols_count) {};
|
||||
explicit Row(int cols_count, bool visible) : m_columns(cols_count), m_visible(visible) {};
|
||||
Row(const Row &other);
|
||||
|
||||
Row &operator=(Row other);
|
||||
@@ -169,16 +169,22 @@ namespace usql {
|
||||
return (ColValue &) *std::get_if<ColDateValue>(&m_columns[i]);
|
||||
case 5:
|
||||
return (ColValue &) *std::get_if<ColBooleanValue>(&m_columns[i]);
|
||||
default:
|
||||
throw Exception("should not happen");
|
||||
}
|
||||
throw Exception("should not happen");
|
||||
}
|
||||
|
||||
int compare(const Row &other) const;
|
||||
[[nodiscard]] int compare(const Row &other) const;
|
||||
|
||||
void print(const std::vector<ColDefNode> &col_defs);
|
||||
static int print_get_column_size(const ColDefNode &col_def);
|
||||
|
||||
[[nodiscard]] bool is_visible() const { return m_visible; };
|
||||
void set_visible() { m_visible = true; };
|
||||
void set_deleted() { m_visible = true; };
|
||||
|
||||
private:
|
||||
// xx std::vector<std::unique_ptr<ColValue>> m_columns;
|
||||
bool m_visible;
|
||||
std::vector<std::variant<ColNullValue, ColIntegerValue, ColDoubleValue, ColStringValue, ColDateValue, ColBooleanValue>> m_columns;
|
||||
};
|
||||
|
||||
|
||||
153
table.cpp
153
table.cpp
@@ -19,9 +19,10 @@ Table::Table(const Table &other) {
|
||||
m_name = other.m_name;
|
||||
m_col_defs = other.m_col_defs;
|
||||
m_rows.reserve(other.m_rows.size());
|
||||
for(const Row& orig_row : other.m_rows) {
|
||||
commit_copy_of_row(orig_row);
|
||||
}
|
||||
|
||||
for(const Row& orig_row : other.m_rows)
|
||||
if (orig_row.is_visible())
|
||||
commit_copy_of_row((Row&)orig_row);
|
||||
}
|
||||
|
||||
ColDefNode Table::get_column_def(const std::string &col_name) {
|
||||
@@ -44,30 +45,39 @@ ColDefNode Table::get_column_def(int col_index) {
|
||||
}
|
||||
|
||||
Row& Table::create_empty_row() {
|
||||
m_rows.emplace_back(columns_count());
|
||||
m_rows.emplace_back(columns_count(), false);
|
||||
return m_rows.back();
|
||||
}
|
||||
|
||||
std::string Table::csv_string() {
|
||||
// header
|
||||
const size_t row_size_est = m_col_defs.size() * 16;
|
||||
|
||||
std::string out_string;
|
||||
out_string.reserve(m_rows.size() * row_size_est);
|
||||
// TODO improve it here https://www.cplusplus.com/reference/string/string/reserve/
|
||||
|
||||
// header
|
||||
for(int i = 0; i < m_col_defs.size(); i++) {
|
||||
if (i > 0) out_string += ",";
|
||||
out_string += m_col_defs[i].name;
|
||||
}
|
||||
|
||||
// rows
|
||||
for (auto & m_row : m_rows) {
|
||||
std::string csv_line{"\n"};
|
||||
for(int i = 0; i < m_col_defs.size(); i++) {
|
||||
if (i > 0) csv_line += ",";
|
||||
for (auto & row : m_rows) {
|
||||
if (row.is_visible()) {
|
||||
std::string csv_line{"\n"};
|
||||
csv_line.reserve(row_size_est);
|
||||
|
||||
auto & col = m_row[i];
|
||||
if (!col.isNull()) {
|
||||
csv_line += col.getStringValue(); // TODO handle enclosing commas etc
|
||||
for (int i = 0; i < m_col_defs.size(); i++) {
|
||||
if (i > 0) csv_line += ",";
|
||||
|
||||
auto &col = row[i];
|
||||
if (!col.isNull()) {
|
||||
csv_line += col.getStringValue(); // TODO handle enclosing commas etc
|
||||
}
|
||||
}
|
||||
out_string += csv_line;
|
||||
}
|
||||
out_string += csv_line;
|
||||
}
|
||||
|
||||
return out_string;
|
||||
@@ -89,18 +99,18 @@ int Table::load_csv_file(const std::string &filename) {
|
||||
int line_size = 128;
|
||||
|
||||
std::ifstream in(filename, std::ifstream::ate | std::ifstream::binary);
|
||||
auto file_size = in.tellg();
|
||||
auto file_size = in.tellg();
|
||||
|
||||
std::ifstream infile(filename);
|
||||
if (infile.good()) {
|
||||
std::string sLine;
|
||||
std::getline(infile, sLine);
|
||||
line_size = (int)sLine.size();
|
||||
line_size = (int)sLine.size() + 1;
|
||||
}
|
||||
infile.close();
|
||||
|
||||
if (file_size > 0) {
|
||||
auto new_size = m_rows.size() + int(file_size / line_size * 1.20);
|
||||
auto new_size = m_rows.size() + int((file_size / line_size) * 1.20);
|
||||
m_rows.reserve(new_size);
|
||||
}
|
||||
|
||||
@@ -116,7 +126,7 @@ void Table::create_row_from_vector(const std::vector<ColDefNode> &colDefs, const
|
||||
Row& new_row = create_empty_row();
|
||||
|
||||
// copy values
|
||||
for (int i = 0; i < std::min<int>(columns_count(), csv_line.size()); i++) {
|
||||
for (size_t i = 0; i < std::min<size_t>(columns_count(), csv_line.size()); i++) {
|
||||
const ColDefNode & col_def = colDefs[i];
|
||||
|
||||
if (csv_line[i].empty()) {
|
||||
@@ -181,16 +191,23 @@ void Table::print() {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void Table::commit_row(const Row &row) {
|
||||
size_t Table::get_rowid(const Row &row) const {
|
||||
const Row* row_addr = (Row*)&row;
|
||||
const Row* begin_addr = &(*m_rows.begin());
|
||||
|
||||
return row_addr - begin_addr;
|
||||
}
|
||||
|
||||
void Table::commit_row(Row &row) {
|
||||
try {
|
||||
validate_row(row);
|
||||
index_row(row);
|
||||
} catch (Exception &e) {
|
||||
m_rows.erase(m_rows.end() - 1);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
void Table::commit_copy_of_row(const Row &row) {
|
||||
void Table::commit_copy_of_row(Row &row) {
|
||||
Row& new_row = create_empty_row();
|
||||
|
||||
for(int i = 0; i < m_col_defs.size(); i++) {
|
||||
@@ -215,6 +232,7 @@ void Table::commit_copy_of_row(const Row &row) {
|
||||
}
|
||||
|
||||
validate_row(new_row);
|
||||
index_row(row);
|
||||
}
|
||||
|
||||
void Table::validate_column(const ColDefNode *col_def, ValueNode *col_val) {
|
||||
@@ -235,26 +253,35 @@ void Table::validate_column(const ColDefNode *col_def, ColValue &col_val) {
|
||||
}
|
||||
}
|
||||
|
||||
void Table::validate_row(const Row &row) {
|
||||
void Table::validate_row(Row &row) {
|
||||
for(int i = 0; i < m_col_defs.size(); i++) {
|
||||
ColDefNode col_def = m_col_defs[i];
|
||||
ColValue &col_val = row[i];
|
||||
|
||||
validate_column(&col_def, col_val);
|
||||
}
|
||||
row.set_visible();
|
||||
}
|
||||
|
||||
void Table::create_index(const Index<IndexValue>& index) {
|
||||
m_indexes.push_back(index);
|
||||
}
|
||||
|
||||
void Table::drop_index(const std::string &column) {
|
||||
throw Exception("implement me! Table::drop_index(const std::string &column)");
|
||||
bool Table::drop_index(const std::string &index_name) {
|
||||
auto it = std::find_if(m_indexes.begin(), m_indexes.end(),
|
||||
[&index_name](const Index<IndexValue> &idx) {
|
||||
return idx.get_index_name() == index_name;
|
||||
});
|
||||
|
||||
if (it != m_indexes.end()) {
|
||||
m_indexes.erase(it);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Table::index_row(Index<IndexValue> &index, const Row &row, const size_t rowid) {
|
||||
ColDefNode col_def = get_column_def(index.get_column_name());
|
||||
|
||||
void Table::index_row(Index<IndexValue> &index, const ColDefNode &col_def, const Row &row, const size_t rowid) {
|
||||
if (col_def.type==ColumnType::integer_type) {
|
||||
index.insert(row[col_def.order].getIntValue(), rowid);
|
||||
} else if (col_def.type==ColumnType::varchar_type) {
|
||||
@@ -264,22 +291,65 @@ void Table::index_row(Index<IndexValue> &index, const Row &row, const size_t row
|
||||
}
|
||||
}
|
||||
|
||||
void Table::index_row(const Row &row, const size_t rowid) {
|
||||
for (auto &i : m_indexes) {
|
||||
index_row(i, row, rowid);
|
||||
void Table::unindex_row(Index<IndexValue> &index, const ColDefNode &col_def, const Row &row, const size_t rowid) {
|
||||
if (col_def.type==ColumnType::integer_type) {
|
||||
index.remove(row[col_def.order].getIntValue(), rowid);
|
||||
} else if (col_def.type==ColumnType::varchar_type) {
|
||||
index.remove(row[col_def.order].getStringValue(), rowid);
|
||||
} else {
|
||||
throw Exception("implement me! Table::index_row(const Row &row)");
|
||||
}
|
||||
}
|
||||
|
||||
void Table::reindex_row(Index<IndexValue> &index, const ColDefNode &col_def, const Row &old_row, const Row &new_row, size_t rowid) {
|
||||
unindex_row(index, col_def, old_row, rowid);
|
||||
index_row(index, col_def, new_row, rowid);
|
||||
}
|
||||
|
||||
|
||||
void Table::index_row(const Row &row) {
|
||||
if (!m_indexes.empty()) {
|
||||
const size_t rowid = get_rowid(row);
|
||||
for (auto &idx : m_indexes) {
|
||||
ColDefNode cDef = get_column_def(idx.get_column_name());
|
||||
index_row(idx, cDef, row, rowid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Table::unindex_row(const Row &row) {
|
||||
if (!m_indexes.empty()) {
|
||||
const size_t rowid = get_rowid(row);
|
||||
for (auto &idx : m_indexes) {
|
||||
ColDefNode cDef = get_column_def(idx.get_column_name());
|
||||
unindex_row(idx, cDef, row, rowid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Table::reindex_row(const Row &old_row, const Row &new_row) {
|
||||
if (!m_indexes.empty()) {
|
||||
const size_t rowid = get_rowid(new_row);
|
||||
for (auto &idx : m_indexes) {
|
||||
ColDefNode cDef = get_column_def(idx.get_column_name());
|
||||
reindex_row(idx, cDef, old_row, new_row, rowid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Table::index_rows(const std::string &index_name) {
|
||||
auto index = get_index(index_name);
|
||||
// TODO handle null pointer
|
||||
|
||||
ColDefNode cDef = get_column_def(index->get_column_name());
|
||||
size_t rowid = 0;
|
||||
for(const Row& r : m_rows) {
|
||||
index_row(*index, r, rowid);
|
||||
index_row(*index, cDef, r, rowid);
|
||||
rowid++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Index<IndexValue> * Table::get_index(const std::string &index_name) {
|
||||
auto it = std::find_if(m_indexes.begin(), m_indexes.end(),
|
||||
[&index_name](const Index<IndexValue> &idx) {
|
||||
@@ -303,4 +373,27 @@ Index<IndexValue> * Table::get_index_for_column(const std::string &col_name) {
|
||||
}
|
||||
|
||||
|
||||
Row *Table::rows_scanner::next() {
|
||||
if (m_use_rowids) {
|
||||
while (m_rowids_idx < m_rowids.size()) {
|
||||
auto row_ptr = &m_table->m_rows[m_rowids[m_rowids_idx]];
|
||||
if (row_ptr->is_visible()) {
|
||||
m_rowids_idx++;
|
||||
return row_ptr;
|
||||
}
|
||||
m_rowids_idx++;
|
||||
}
|
||||
} else {
|
||||
while (m_fscan_itr != m_table->m_rows.end()) {
|
||||
if (m_fscan_itr->is_visible()) {
|
||||
auto i = m_fscan_itr;
|
||||
m_fscan_itr++;
|
||||
return &(*i);
|
||||
}
|
||||
m_fscan_itr++;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
43
table.h
43
table.h
@@ -4,6 +4,7 @@
|
||||
#include "parser.h"
|
||||
#include "row.h"
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <iterator> // For std::forward_iterator_tag
|
||||
@@ -26,14 +27,15 @@ struct Table {
|
||||
[[nodiscard]] int columns_count() const { return (int) m_col_defs.size(); };
|
||||
[[nodiscard]] size_t rows_count() const { return m_rows.size(); };
|
||||
|
||||
Row& create_empty_row();
|
||||
void commit_row(const Row &row);
|
||||
void commit_copy_of_row(const Row &row);
|
||||
Row& get_row(int rowid) { return m_rows[rowid]; };
|
||||
[[nodiscard]] size_t get_rowid(const Row &row) const;
|
||||
|
||||
Row &create_empty_row();
|
||||
void commit_row(Row &row);
|
||||
void commit_copy_of_row(Row &row);
|
||||
|
||||
static void validate_column(const ColDefNode *col_def, ValueNode *col_val);
|
||||
static void validate_column(const ColDefNode *col_def, ColValue &col_val);
|
||||
void validate_row(const Row &row);
|
||||
void validate_row(Row &row);
|
||||
|
||||
std::string csv_string();
|
||||
int load_csv_string(const std::string &content);
|
||||
@@ -52,19 +54,36 @@ struct Table {
|
||||
void create_row_from_vector(const std::vector<ColDefNode> &colDefs, const std::vector<std::string> &csv_line);
|
||||
|
||||
void create_index(const Index<IndexValue>& index);
|
||||
void drop_index(const std::string &column);
|
||||
void index_row(Index<IndexValue> &index, const Row &row, const size_t rowid);
|
||||
void index_row(const Row &row, const size_t rowid);
|
||||
bool drop_index(const std::string &index_name);
|
||||
|
||||
static void index_row(Index<IndexValue> &index, const ColDefNode &col_def, const Row &row, size_t rowid);
|
||||
static void unindex_row(Index<IndexValue> &index, const ColDefNode &col_def, const Row &row, size_t rowid);
|
||||
static void reindex_row(Index<IndexValue> &index, const ColDefNode &col_def, const Row &old_row, const Row &new_row, size_t rowid);
|
||||
|
||||
void index_row(const Row &row);
|
||||
void unindex_row(const Row &row);
|
||||
void reindex_row(const Row &old_row, const Row &new_row);
|
||||
|
||||
void index_rows(const std::string &index_name);
|
||||
|
||||
Index<IndexValue> * get_index(const std::string &index_name);
|
||||
Index<IndexValue> * get_index_for_column(const std::string &col_name);
|
||||
std::vector<int> index_search(const std::string &col_name, IndexValue key);
|
||||
|
||||
|
||||
typedef std::vector<Row>::iterator iterator;
|
||||
iterator fs_begin() { return m_rows.begin(); }
|
||||
iterator fs_end() { return m_rows.end(); }
|
||||
struct rows_scanner {
|
||||
explicit rows_scanner(Table *tbl) : m_use_rowids(false), m_table(tbl), m_fscan_itr(tbl->m_rows.begin()) {}
|
||||
rows_scanner(Table *tbl, std::vector<rowid_t> rowids) : m_use_rowids(true), m_table(tbl), m_rowids(std::move(rowids)), m_rowids_idx(0) {}
|
||||
|
||||
Row *next();
|
||||
|
||||
private:
|
||||
bool m_use_rowids;
|
||||
Table * m_table;
|
||||
std::vector<Row>::iterator m_fscan_itr;
|
||||
std::vector<rowid_t> m_rowids; // TODO long here
|
||||
size_t m_rowids_idx{};
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
21
usql.cpp
21
usql.cpp
@@ -66,7 +66,7 @@ bool USql::eval_relational_operator(const RelationalOperatorNode &filter, Table
|
||||
return !all_null;
|
||||
return false;
|
||||
} else if (left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::int_value) {
|
||||
comparator = left_value->getIntegerValue() - right_value->getIntegerValue();
|
||||
comparator = (double)(left_value->getIntegerValue() - right_value->getIntegerValue());
|
||||
} else if ((left_value->node_type == NodeType::int_value && right_value->node_type == NodeType::float_value) ||
|
||||
(left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::int_value) ||
|
||||
(left_value->node_type == NodeType::float_value && right_value->node_type == NodeType::float_value)) {
|
||||
@@ -414,20 +414,27 @@ USql::min_function(const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars,
|
||||
|
||||
Table *USql::find_table(const std::string &name) const {
|
||||
auto name_cmp = [name](const Table& t) { return t.m_name == name; };
|
||||
|
||||
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp);
|
||||
if (table_def != std::end(m_tables)) {
|
||||
if (table_def != std::end(m_tables))
|
||||
return const_cast<Table *>(table_def.operator->());
|
||||
} else {
|
||||
throw Exception("table not found (" + name + ")");
|
||||
}
|
||||
|
||||
throw Exception("table not found (" + name + ")");
|
||||
}
|
||||
|
||||
void USql::check_table_not_exists(const std::string &name) const {
|
||||
auto name_cmp = [name](const Table& t) { return t.m_name == name; };
|
||||
|
||||
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp);
|
||||
if (table_def != std::end(m_tables)) {
|
||||
if (table_def != std::end(m_tables))
|
||||
throw Exception("table already exists");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void USql::check_index_not_exists(const std::string &index_name) {
|
||||
for (auto &table : m_tables)
|
||||
if (table.get_index(index_name) != nullptr)
|
||||
throw Exception("index already exists");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
42
usql.h
42
usql.h
@@ -20,19 +20,19 @@ public:
|
||||
private:
|
||||
std::unique_ptr<Table> execute(Node &node);
|
||||
|
||||
std::unique_ptr<Table> execute_create_table(CreateTableNode &node);
|
||||
std::unique_ptr<Table> execute_create_index(CreateIndexNode &node);
|
||||
std::unique_ptr<Table> execute_create_table_as_table(CreateTableAsSelectNode &node);
|
||||
std::unique_ptr<Table> execute_load(LoadIntoTableNode &node);
|
||||
std::unique_ptr<Table> execute_save(SaveTableNode &node);
|
||||
std::unique_ptr<Table> execute_drop(DropTableNode &node);
|
||||
static std::unique_ptr<Table> execute_set(SetNode &node);
|
||||
static std::unique_ptr<Table> execute_show(ShowNode &node);
|
||||
std::unique_ptr<Table> execute_create_table(const CreateTableNode &node);
|
||||
std::unique_ptr<Table> execute_create_index(const CreateIndexNode &node);
|
||||
std::unique_ptr<Table> execute_create_table_as_table(const CreateTableAsSelectNode &node);
|
||||
std::unique_ptr<Table> execute_load(const LoadIntoTableNode &node);
|
||||
std::unique_ptr<Table> execute_save(const SaveTableNode &node);
|
||||
std::unique_ptr<Table> execute_drop(const DropTableNode &node);
|
||||
static std::unique_ptr<Table> execute_set(const SetNode &node);
|
||||
static std::unique_ptr<Table> execute_show(const ShowNode &node);
|
||||
|
||||
std::unique_ptr<Table> execute_insert_into_table(InsertIntoTableNode &node);
|
||||
std::unique_ptr<Table> execute_insert_into_table(const InsertIntoTableNode &node);
|
||||
std::unique_ptr<Table> execute_select(SelectFromTableNode &node) const;
|
||||
std::unique_ptr<Table> execute_delete(DeleteFromTableNode &node);
|
||||
std::unique_ptr<Table> execute_update(UpdateTableNode &node);
|
||||
std::unique_ptr<Table> execute_delete(const DeleteFromTableNode &node);
|
||||
std::unique_ptr<Table> execute_update(const UpdateTableNode &node);
|
||||
|
||||
|
||||
private:
|
||||
@@ -56,13 +56,14 @@ private:
|
||||
[[nodiscard]] Table *find_table(const std::string &name) const;
|
||||
|
||||
void check_table_not_exists(const std::string &name) const;
|
||||
void check_index_not_exists(const std::string &index_name);
|
||||
|
||||
private:
|
||||
Parser m_parser;
|
||||
std::list<Table> m_tables;
|
||||
|
||||
static void execute_distinct(SelectFromTableNode &node, Table *result);
|
||||
static void execute_order_by(SelectFromTableNode &node, Table *table, Table *result);
|
||||
static void execute_order_by(SelectFromTableNode &node, Table *result);
|
||||
static void execute_offset_limit(OffsetLimitNode &node, Table *result);
|
||||
|
||||
static void expand_asterix_char(SelectFromTableNode &node, Table *table) ;
|
||||
@@ -82,15 +83,18 @@ private:
|
||||
|
||||
static std::unique_ptr<ValueNode> count_function(ColValue *agg_func_value, const std::vector<std::unique_ptr<ValueNode>> &evaluatedPars);
|
||||
|
||||
static void evalRowWhere(SelectFromTableNode &where_node,
|
||||
Table *src_table, Row *src_row,
|
||||
Table *rslt_table, Row *rslt_row,
|
||||
const std::vector<ColDefNode> &rslt_tbl_col_defs, const std::vector<int> &src_table_col_index,
|
||||
bool is_aggregated) ;
|
||||
static void eval_where_on_row(SelectFromTableNode &where_node,
|
||||
Table *src_table, Row *src_row,
|
||||
Table *rslt_table, Row *rslt_row,
|
||||
const std::vector<ColDefNode> &rslt_tbl_col_defs, const std::vector<int> &src_table_col_index,
|
||||
bool is_aggregated) ;
|
||||
|
||||
std::pair<bool, std::vector<int>> probe_index_scan(const Node *where, Table *table) const;
|
||||
std::pair<bool, std::vector<int>> look_for_usable_index(const Node *where, Table *table) const;
|
||||
std::pair<bool, std::vector<rowid_t>> probe_index_scan(const Node *where, Table *table) const;
|
||||
std::pair<bool, std::vector<rowid_t>> look_for_usable_index(const Node *where, Table *table) const;
|
||||
bool normalize_where(const Node *node) const;
|
||||
|
||||
Table::rows_scanner get_iterator(Table *table, const Node *where) const;
|
||||
};
|
||||
|
||||
|
||||
} // namespace
|
||||
30
usql_ddl.cpp
30
usql_ddl.cpp
@@ -9,7 +9,7 @@ namespace usql {
|
||||
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_create_table(CreateTableNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_create_table(const CreateTableNode &node) {
|
||||
check_table_not_exists(node.table_name);
|
||||
|
||||
Table table{node.table_name, node.cols_defs};
|
||||
@@ -19,21 +19,23 @@ std::unique_ptr<Table> USql::execute_create_table(CreateTableNode &node) {
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_create_index(CreateIndexNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_create_index(const CreateIndexNode &node) {
|
||||
Table *table_def = find_table(node.table_name); // throws exception if not found
|
||||
ColDefNode col_def = table_def->get_column_def(node.column_name); // throws exception if not found
|
||||
|
||||
if (table_def->get_index(node.index_name) != nullptr) throw Exception("index already exists");
|
||||
check_index_not_exists(node.index_name);
|
||||
if (col_def.null) throw Exception("index on not null supported only");
|
||||
if (table_def->get_index_for_column(node.column_name) != nullptr) throw Exception("column is already indexed");
|
||||
|
||||
IndexedDataType type;
|
||||
if (col_def.type == ColumnType::integer_type) type = IndexedDataType::integer;
|
||||
else if (col_def.type == ColumnType::varchar_type) type = IndexedDataType::string;
|
||||
else throw Exception("creating index on unsupported type");
|
||||
if (col_def.type == ColumnType::integer_type)
|
||||
type = IndexedDataType::integer;
|
||||
else if (col_def.type == ColumnType::varchar_type)
|
||||
type = IndexedDataType::string;
|
||||
else
|
||||
throw Exception("creating index on unsupported type");
|
||||
|
||||
Index<IndexValue> i{node.index_name, node.column_name, type};
|
||||
table_def->create_index(i);
|
||||
table_def->create_index({node.index_name, node.column_name, type});
|
||||
|
||||
table_def->index_rows(node.index_name);
|
||||
|
||||
@@ -41,7 +43,7 @@ std::unique_ptr<Table> USql::execute_create_index(CreateIndexNode &node) {
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_create_table_as_table(CreateTableAsSelectNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_create_table_as_table(const CreateTableAsSelectNode &node) {
|
||||
check_table_not_exists(node.table_name);
|
||||
|
||||
auto select = execute_select((SelectFromTableNode &) *node.select_table);
|
||||
@@ -64,7 +66,7 @@ std::unique_ptr<Table> USql::execute_create_table_as_table(CreateTableAsSelectNo
|
||||
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_drop(DropTableNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_drop(const DropTableNode &node) {
|
||||
auto name_cmp = [node](const Table& t) { return t.m_name == node.table_name; };
|
||||
|
||||
auto table_def = std::find_if(begin(m_tables), end(m_tables), name_cmp);
|
||||
@@ -76,12 +78,12 @@ std::unique_ptr<Table> USql::execute_drop(DropTableNode &node) {
|
||||
throw Exception("table not found (" + node.table_name + ")");
|
||||
}
|
||||
|
||||
std::unique_ptr<Table> USql::execute_set(SetNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_set(const SetNode &node) {
|
||||
Settings::set_setting(node.name, node.value);
|
||||
return create_stmt_result_table(0, "set succeeded", 1);
|
||||
}
|
||||
|
||||
std::unique_ptr<Table> USql::execute_show(ShowNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_show(const ShowNode &node) {
|
||||
std::string value = Settings::get_setting(node.name);
|
||||
return create_stmt_result_table(0, "show succeeded: " + value, 1);
|
||||
}
|
||||
@@ -106,7 +108,7 @@ std::unique_ptr<Table> USql::create_stmt_result_table(long code, const std::stri
|
||||
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_load(LoadIntoTableNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_load(const LoadIntoTableNode &node) {
|
||||
// find source table
|
||||
Table *table_def = find_table(node.table_name);
|
||||
|
||||
@@ -122,7 +124,7 @@ std::unique_ptr<Table> USql::execute_load(LoadIntoTableNode &node) {
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_save(SaveTableNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_save(const SaveTableNode &node) {
|
||||
// find source table
|
||||
Table *table_def = find_table(node.table_name);
|
||||
|
||||
|
||||
203
usql_dml.cpp
203
usql_dml.cpp
@@ -7,64 +7,7 @@
|
||||
namespace usql {
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) const {
|
||||
// find source table
|
||||
Table *table = find_table(node.table_name);
|
||||
|
||||
// expand *
|
||||
expand_asterix_char(node, table);
|
||||
|
||||
// create result table
|
||||
std::vector<ColDefNode> result_tbl_col_defs{};
|
||||
std::vector<int> source_table_col_index{};
|
||||
for (int i = 0; i < node.cols_names->size(); i++) {
|
||||
SelectColNode * col_node = &node.cols_names->operator[](i);
|
||||
auto [src_tbl_col_index, rst_tbl_col_def] = get_column_definition(table, col_node, i);
|
||||
|
||||
source_table_col_index.push_back(src_tbl_col_index);
|
||||
result_tbl_col_defs.push_back(rst_tbl_col_def);
|
||||
}
|
||||
|
||||
// check for aggregate function
|
||||
bool is_aggregated = check_for_aggregate_only_functions(node, result_tbl_col_defs.size());
|
||||
|
||||
// prepare result table structure
|
||||
auto result = std::make_unique<Table>("result", result_tbl_col_defs);
|
||||
|
||||
// replace possible order by col names to col indexes and validate
|
||||
setup_order_columns(node.order_by, result.get());
|
||||
|
||||
|
||||
// execute access plan
|
||||
Row* new_row = nullptr;
|
||||
|
||||
// look for index to use
|
||||
auto [use_index, rowids] = probe_index_scan(node.where.get(), table);
|
||||
|
||||
// index scan
|
||||
if (use_index) {
|
||||
for (int & rowid : rowids) {
|
||||
evalRowWhere(node, table, (Row *) &table->get_row(rowid), result.get(), new_row, result_tbl_col_defs, source_table_col_index, is_aggregated);
|
||||
}
|
||||
|
||||
// full scan
|
||||
} else {
|
||||
for (auto row = table->fs_begin(); row != table->fs_end(); ++row) {
|
||||
evalRowWhere(node, table, &(*row), result.get(), new_row, result_tbl_col_defs, source_table_col_index, is_aggregated);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
execute_distinct(node, result.get());
|
||||
|
||||
execute_order_by(node, table, result.get());
|
||||
|
||||
execute_offset_limit(node.offset_limit, result.get());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<bool, std::vector<int>> USql::probe_index_scan(const Node *where, Table *table) const {
|
||||
std::pair<bool, std::vector<rowid_t>> USql::probe_index_scan(const Node *where, Table *table) const {
|
||||
bool indexscan_possible = normalize_where(where);
|
||||
|
||||
if (indexscan_possible && Settings::get_bool_setting("USE_INDEXSCAN")) {
|
||||
@@ -73,10 +16,10 @@ std::pair<bool, std::vector<int>> USql::probe_index_scan(const Node *where, Tabl
|
||||
}
|
||||
|
||||
// no index scan
|
||||
return std::make_pair(false, std::vector<int>{});
|
||||
return std::make_pair(false, std::vector<rowid_t>{});
|
||||
}
|
||||
|
||||
std::pair<bool, std::vector<int>> USql::look_for_usable_index(const Node *where, Table *table) const {
|
||||
std::pair<bool, std::vector<rowid_t>> USql::look_for_usable_index(const Node *where, Table *table) const {
|
||||
if (where->node_type == NodeType::relational_operator) {
|
||||
auto * ron = (RelationalOperatorNode *)where;
|
||||
if (ron->op == RelationalOperatorType::equal) {
|
||||
@@ -87,7 +30,7 @@ std::pair<bool, std::vector<int>> USql::look_for_usable_index(const Node *where,
|
||||
|
||||
Index<IndexValue> * used_index = table->get_index_for_column(col_name);
|
||||
if (used_index != nullptr) {
|
||||
std::vector<int> rowids;
|
||||
std::vector<rowid_t> rowids;
|
||||
|
||||
if (used_index->get_data_type() == IndexedDataType::integer)
|
||||
rowids = used_index->search(((ValueNode *) ron->right.get())->getIntegerValue());
|
||||
@@ -111,7 +54,7 @@ std::pair<bool, std::vector<int>> USql::look_for_usable_index(const Node *where,
|
||||
}
|
||||
|
||||
// no index available
|
||||
return std::make_pair(false, std::vector<int>{});
|
||||
return std::make_pair(false, std::vector<rowid_t>{});
|
||||
}
|
||||
|
||||
bool USql::normalize_where(const Node *node) const {
|
||||
@@ -137,12 +80,12 @@ bool USql::normalize_where(const Node *node) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void USql::evalRowWhere(SelectFromTableNode &where_node,
|
||||
Table *src_table, Row *src_row,
|
||||
Table *rslt_table, Row *rslt_row,
|
||||
const std::vector<ColDefNode> &rslt_tbl_col_defs,
|
||||
const std::vector<int> &src_table_col_index,
|
||||
bool is_aggregated) {
|
||||
void USql::eval_where_on_row(SelectFromTableNode &where_node,
|
||||
Table *src_table, Row *src_row,
|
||||
Table *rslt_table, Row *rslt_row,
|
||||
const std::vector<ColDefNode> &rslt_tbl_col_defs,
|
||||
const std::vector<int> &src_table_col_index,
|
||||
bool is_aggregated) {
|
||||
|
||||
if (eval_where(where_node.where.get(), src_table, *src_row)) {
|
||||
// prepare empty src_row and copy column values
|
||||
@@ -224,7 +167,7 @@ void USql::execute_distinct(SelectFromTableNode &node, Table *result) {
|
||||
result->m_rows.erase(std::unique(result->m_rows.begin(), result->m_rows.end()), result->m_rows.end());
|
||||
}
|
||||
|
||||
void USql::execute_order_by(SelectFromTableNode &node, Table *table, Table *result) {
|
||||
void USql::execute_order_by(SelectFromTableNode &node, Table *result) {
|
||||
if (node.order_by.empty()) return;
|
||||
|
||||
auto compare_rows = [&node, &result](const Row &a, const Row &b) {
|
||||
@@ -252,6 +195,21 @@ void USql::execute_offset_limit(OffsetLimitNode &node, Table *result) {
|
||||
result->m_rows.erase(result->m_rows.begin() + node.limit, result->m_rows.end());
|
||||
}
|
||||
|
||||
bool USql::eval_where(Node *where, Table *table, Row &row)
|
||||
{
|
||||
switch (where->node_type)
|
||||
{
|
||||
case NodeType::true_node:
|
||||
return true;
|
||||
case NodeType::relational_operator: // just one condition
|
||||
return eval_relational_operator(*((RelationalOperatorNode *)where), table, row);
|
||||
case NodeType::logical_operator:
|
||||
return eval_logical_operator(*((LogicalOperatorNode *)where), table, row);
|
||||
default:
|
||||
throw Exception("Wrong node type");
|
||||
}
|
||||
}
|
||||
|
||||
std::tuple<int, ColDefNode> USql::get_column_definition(Table *table, SelectColNode *select_col_node, int col_order ) {
|
||||
return get_node_definition(table, select_col_node->value.get(), select_col_node->name, col_order );
|
||||
}
|
||||
@@ -341,8 +299,7 @@ std::tuple<int, ColDefNode> USql::get_node_definition(Table *table, Node * node,
|
||||
|
||||
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_insert_into_table(InsertIntoTableNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_insert_into_table(const InsertIntoTableNode &node) {
|
||||
// find table
|
||||
Table *table_def = find_table(node.table_name);
|
||||
|
||||
@@ -368,45 +325,54 @@ std::unique_ptr<Table> USql::execute_insert_into_table(InsertIntoTableNode &node
|
||||
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_delete(DeleteFromTableNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_delete(const DeleteFromTableNode &node) {
|
||||
size_t affected_rows = 0;
|
||||
|
||||
// find source table
|
||||
Table *table = find_table(node.table_name);
|
||||
|
||||
// execute access plan
|
||||
auto affected_rows = table->rows_count();
|
||||
Table::rows_scanner i = get_iterator(table, node.where.get());
|
||||
while(Row *row = i.next()) {
|
||||
bool to_delete = eval_where(node.where.get(), table, *row);
|
||||
if (to_delete) {
|
||||
row->set_deleted();
|
||||
table->unindex_row(*row);
|
||||
|
||||
table->m_rows.erase(
|
||||
std::remove_if(table->m_rows.begin(), table->m_rows.end(),
|
||||
[&node, table](Row &row){return eval_where(node.where.get(), table, row);}),
|
||||
table->m_rows.end());
|
||||
|
||||
affected_rows -= table->rows_count();
|
||||
affected_rows++;
|
||||
}
|
||||
}
|
||||
|
||||
return create_stmt_result_table(0, "delete succeeded", affected_rows);
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<Table> USql::execute_update(UpdateTableNode &node) {
|
||||
std::unique_ptr<Table> USql::execute_update(const UpdateTableNode &node) {
|
||||
size_t affected_rows = 0;
|
||||
|
||||
// find source table
|
||||
Table *table = find_table(node.table_name);
|
||||
|
||||
// execute access plan
|
||||
int affected_rows = 0;
|
||||
for (auto row = begin(table->m_rows); row != end(table->m_rows); ++row) {
|
||||
// eval where for row
|
||||
if (eval_where(node.where.get(), table, *row)) {
|
||||
int i = 0;
|
||||
Table::rows_scanner i = get_iterator(table, node.where.get());
|
||||
while(Row *row = i.next()) {
|
||||
bool to_update = eval_where(node.where.get(), table, *row);
|
||||
if (to_update) {
|
||||
Row old_row = * row;
|
||||
|
||||
int col_idx = 0;
|
||||
for (const auto& col : node.cols_names) {
|
||||
// TODO cache it like in select
|
||||
ColDefNode col_def = table->get_column_def(col.col_name);
|
||||
std::unique_ptr<ValueNode> new_val = eval_arithmetic_operator(col_def.type,
|
||||
static_cast<ArithmeticalOperatorNode &>(*node.values[i]),
|
||||
table, *row);
|
||||
static_cast<ArithmeticalOperatorNode &>(*node.values[col_idx]), table, *row);
|
||||
|
||||
usql::Table::validate_column(&col_def, new_val.get());
|
||||
row->setColumnValue(&col_def, new_val.get());
|
||||
i++;
|
||||
col_idx++;
|
||||
}
|
||||
table->reindex_row(old_row, *row);
|
||||
|
||||
affected_rows++;
|
||||
// TODO tady je problem, ze kdyz to zfajluje na jednom radku ostatni by se nemely provest
|
||||
}
|
||||
@@ -416,20 +382,59 @@ std::unique_ptr<Table> USql::execute_update(UpdateTableNode &node) {
|
||||
}
|
||||
|
||||
|
||||
bool USql::eval_where(Node *where, Table *table, Row &row) {
|
||||
switch (where->node_type) {
|
||||
case NodeType::true_node:
|
||||
return true;
|
||||
case NodeType::relational_operator: // just one condition
|
||||
return eval_relational_operator(*((RelationalOperatorNode *) where), table, row);
|
||||
case NodeType::logical_operator:
|
||||
return eval_logical_operator(*((LogicalOperatorNode *) where), table, row);
|
||||
default:
|
||||
throw Exception("Wrong node type");
|
||||
std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) const {
|
||||
// find source table
|
||||
Table *table = find_table(node.table_name);
|
||||
|
||||
// expand *
|
||||
expand_asterix_char(node, table);
|
||||
|
||||
// create result table
|
||||
std::vector<ColDefNode> result_tbl_col_defs{};
|
||||
std::vector<int> source_table_col_index{};
|
||||
for (int i = 0; i < node.cols_names->size(); i++) {
|
||||
SelectColNode *col_node = &node.cols_names->operator[](i);
|
||||
auto [src_tbl_col_index, rst_tbl_col_def] = get_column_definition(table, col_node, i);
|
||||
|
||||
source_table_col_index.push_back(src_tbl_col_index);
|
||||
result_tbl_col_defs.push_back(rst_tbl_col_def);
|
||||
}
|
||||
|
||||
return false;
|
||||
// check for aggregate function
|
||||
bool is_aggregated = check_for_aggregate_only_functions(node, result_tbl_col_defs.size());
|
||||
|
||||
// prepare result table structure
|
||||
auto result = std::make_unique<Table>("result", result_tbl_col_defs);
|
||||
|
||||
// replace possible order by col names to col indexes and validate
|
||||
setup_order_columns(node.order_by, result.get());
|
||||
|
||||
// execute access plan
|
||||
Row *new_row = nullptr;
|
||||
|
||||
|
||||
Table::rows_scanner i = get_iterator(table, node.where.get());
|
||||
while(Row *row = i.next()) {
|
||||
eval_where_on_row(node, table, row, result.get(), new_row, result_tbl_col_defs, source_table_col_index, is_aggregated);
|
||||
}
|
||||
|
||||
execute_distinct(node, result.get());
|
||||
|
||||
execute_order_by(node, result.get());
|
||||
|
||||
execute_offset_limit(node.offset_limit, result.get());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Table::rows_scanner USql::get_iterator(Table *table, const Node *where) const {
|
||||
auto[use_index, rowids] = probe_index_scan(where, table);
|
||||
|
||||
if (use_index)
|
||||
return Table::rows_scanner(table, rowids);
|
||||
else
|
||||
return Table::rows_scanner(table);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Reference in New Issue
Block a user