pp function

This commit is contained in:
VaclavT 2021-08-16 13:55:51 +02:00
parent b03462da6a
commit a344f5b62f
6 changed files with 88 additions and 29 deletions

View File

@ -70,7 +70,7 @@ int CsvReader::parseCSV(const std::string &filename, std::vector<ColDefNode> &co
field.clear(); field.clear();
line.clear(); line.clear();
// DEBUG // DEBUG
// if (row_cnt > 1000) break; //if (row_cnt > 100000) break;
// //
} }

View File

@ -127,22 +127,22 @@ void repl() {
void debug() { void debug() {
std::__1::vector<std::string> sql_commands{ std::__1::vector<std::string> sql_commands{
// "set 'DATE_FORMAT' = '%Y-%m-%d'",
// "show 'DATE_FORMAT'",
// "create table ticker ( tablee varchar(5) not null, permaticker integer, ticker varchar(10) not null, name varchar(256) not null, exchange varchar(32), isdelisted boolean, category varchar(32), cusips varchar(256), siccode integer, sicsector varchar(256), sicindustry varchar(256), famasector varchar(256), famaindustry varchar(256), sector varchar(128), industry varchar(128), scalemarketcap varchar(64), scalerevenue varchar(64), relatedtickers varchar(128), currency varchar(3), location varchar(64), lastupdated date, firstadded date, firstpricedate date, lastpricedate date, firstquarter date, lastquarter date, secfilings varchar(256), companysite varchar(256))", // "create table ticker ( tablee varchar(5) not null, permaticker integer, ticker varchar(10) not null, name varchar(256) not null, exchange varchar(32), isdelisted boolean, category varchar(32), cusips varchar(256), siccode integer, sicsector varchar(256), sicindustry varchar(256), famasector varchar(256), famaindustry varchar(256), sector varchar(128), industry varchar(128), scalemarketcap varchar(64), scalerevenue varchar(64), relatedtickers varchar(128), currency varchar(3), location varchar(64), lastupdated date, firstadded date, firstpricedate date, lastpricedate date, firstquarter date, lastquarter date, secfilings varchar(256), companysite varchar(256))",
// "load ticker from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/tickers.csv')", // "load ticker from '/Users/vaclavt/Library/Mobile Documents/com~apple~CloudDocs/Development/usql/tickers.csv')",
// "select * from ticker where ticker = 'WFC' and tablee = 'SF1'", // "select * from ticker where ticker = 'WFC' and tablee = 'SF1'",
// "set 'DATE_FORMAT' = '%Y-%m-%d'", // "create table sf1 ( ticker varchar(8), dimension varchar(3), calendar_date date, date_key date, report_period date, last_updated date, accoci float, assets float, assetsavg float, assetsc float, assetsnc float, assetturnover float, bvps float, capex float, cashneq float, cashnequsd float, cor float, consolinc float, currentratio float, de float, debt float, debtc float, debtnc float, debtusd float, deferredrev float, depamor float, deposits float, divyield float, dps float, ebit float, ebitda float, ebitdamargin float, ebitdausd float, ebitusd float, ebt float, eps float, epsdil float, epsusd float, equity float, equityavg float, equityusd float, ev float, evebit float, evebitda float, fcf float, fcfps float, fxusd float, gp float, grossmargin float, intangibles float, intexp float, invcap float, invcapavg float, inventory float, investments float, investmentsc float, investmentsnc float, liabilities float, liabilitiesc float, liabilitiesnc float, marketcap float, ncf float, ncfbus float, ncfcommon float, ncfdebt float, ncfdiv float, ncff float, ncfi float, ncfinv float, ncfo float, ncfx float, netinc float, netinccmn float, netinccmnusd float, netincdis float, netincnci float, netmargin float, opex float, opinc float, payables float, payoutratio float, pb float, pe float, pe1 float, ppnenet float, prefdivis float, price float, ps float, ps1 float, receivables float, retearn float, revenue float, revenueusd float, rnd float, roa float, roe float, roic float, ros float, sbcomp float, sgna float, sharefactor float, sharesbas float, shareswa float, shareswadil float, sps float, tangibles float, taxassets float, taxexp float, taxliabilities float, tbvps float, workingcapital float)",
// "show 'DATE_FORMAT'", // "load sf1 from '/tmp/sf1.csv'",
"create table sf1 ( ticker varchar(8), dimension varchar(3), calendar_date date, date_key date, report_period date, last_updated date, accoci float, assets float, assetsavg float, assetsc float, assetsnc float, assetturnover float, bvps float, capex float, cashneq float, cashnequsd float, cor float, consolinc float, currentratio float, de float, debt float, debtc float, debtnc float, debtusd float, deferredrev float, depamor float, deposits float, divyield float, dps float, ebit float, ebitda float, ebitdamargin float, ebitdausd float, ebitusd float, ebt float, eps float, epsdil float, epsusd float, equity float, equityavg float, equityusd float, ev float, evebit float, evebitda float, fcf float, fcfps float, fxusd float, gp float, grossmargin float, intangibles float, intexp float, invcap float, invcapavg float, inventory float, investments float, investmentsc float, investmentsnc float, liabilities float, liabilitiesc float, liabilitiesnc float, marketcap float, ncf float, ncfbus float, ncfcommon float, ncfdebt float, ncfdiv float, ncff float, ncfi float, ncfinv float, ncfo float, ncfx float, netinc float, netinccmn float, netinccmnusd float, netincdis float, netincnci float, netmargin float, opex float, opinc float, payables float, payoutratio float, pb float, pe float, pe1 float, ppnenet float, prefdivis float, price float, ps float, ps1 float, receivables float, retearn float, revenue float, revenueusd float, rnd float, roa float, roe float, roic float, ros float, sbcomp float, sgna float, sharefactor float, sharesbas float, shareswa float, shareswadil float, sps float, tangibles float, taxassets float, taxexp float, taxliabilities float, tbvps float, workingcapital float)", // "select ticker, calendar_date from sf1 where calendar_date > to_date('2019-01-01', '%Y-%m-%d') limit 1",
"load sf1 from '/tmp/sf1.csv'",
"select ticker, calendar_date from sf1 where calendar_date > to_date('2019-01-01', '%Y-%m-%d') limit 1",
// "select ticker, dimension, calendar_date, eps, dps from sf1 where (ticker = 'AIG' or ticker = 'AI') and dimension = 'MRY' order by 3 desc", // "select ticker, dimension, calendar_date, eps, dps from sf1 where (ticker = 'AIG' or ticker = 'AI') and dimension = 'MRY' order by 3 desc",
// "select ticker, dimension, calendar_date, eps, dps from sf1 where (ticker = 'AIG' or ticker = 'AI') and (dimension = 'MRY' or dimension = 'MRQ') order by 3 desc", // "select ticker, dimension, calendar_date, eps, dps from sf1 where (ticker = 'AIG' or ticker = 'AI') and (dimension = 'MRY' or dimension = 'MRQ') order by 3 desc",
// "select ticker, dimension, calendar_date, eps, dps from sf1 where (ticker = 'AIG' or ticker = 'WFC') and dimension = 'MRY' order by 3 desc", // "select ticker, dimension, calendar_date, eps, dps, roa*100 as roa, roe*100 as roe, revenue, netinc from sf1 where (ticker = 'AIG' or ticker = 'WFC') and dimension = 'MRY' and calendar_date > to_date('2019-01-01', '%Y-%m-%d') order by 3 desc",
// "create table a (i integer not null, s varchar(64), f float null, d date null, b boolean)", "create table a (i integer not null, s varchar(64), f float null, d date null, b boolean)",
// "insert into a (i, s, b) values(1, upper('one'), 'Y')", "insert into a (i, s, b) values(1, upper('one'), 'Y')",
// "insert into a (i, s, b, f) values(1 + 10000, upper('one'), 'Y', 3.1415)", "insert into a (i, s, b, f) values(1 + 10000, upper('one'), 'Y', 3.1415)",
// "update a set i = i * 100, f = f + 0.01 where i > 1", "update a set i = i * 100, f = f + 0.01 where i > 1",
// "select to_string(i, '%d.%m.%Y %H:%M:%S'), i, s from a where i < to_date('20.12.2019', '%d.%m.%Y')", "select to_string(i, '%d.%m.%Y %H:%M:%S'), pp(i), s from a where i < to_date('20.12.2019', '%d.%m.%Y')",
// "select i + 2 as first, i, s, b, f from a where i >=1 order by 1 desc offset 0 limit 1", // "select i + 2 as first, i, s, b, f from a where i >=1 order by 1 desc offset 0 limit 1",
// "update table a set s = 'null string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'", // "update table a set s = 'null string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'",
// "update table a set i = null", // "update table a set i = null",

View File

@ -321,7 +321,7 @@ namespace usql {
} }
std::vector<ColOrderNode> Parser::parse_order_by_clause() { std::vector<ColOrderNode> Parser::parse_order_by_clause() {
std::vector<ColOrderNode> order_cols; std::vector<ColOrderNode> order_cols;
if (m_lexer.tokenType() == TokenType::keyword_order) { if (m_lexer.tokenType() == TokenType::keyword_order) {
@ -329,7 +329,7 @@ namespace usql {
m_lexer.skipToken(TokenType::keyword_by); m_lexer.skipToken(TokenType::keyword_by);
do { do {
int col_index = -1; int col_index = FUNCTION_CALL;
bool asc = true; bool asc = true;
auto token_type = m_lexer.tokenType(); auto token_type = m_lexer.tokenType();

View File

@ -10,6 +10,9 @@
namespace usql { namespace usql {
static const int FUNCTION_CALL = -1;
enum class ColumnType { enum class ColumnType {
integer_type, integer_type,
float_type, float_type,

View File

@ -2,6 +2,7 @@
#include "exception.h" #include "exception.h"
#include "ml_date.h" #include "ml_date.h"
#include <cmath>
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
@ -174,6 +175,7 @@ std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
node.cols_names->emplace_back(SelectColNode{std::make_unique<DatabaseValueNode>(col.name), col.name}); node.cols_names->emplace_back(SelectColNode{std::make_unique<DatabaseValueNode>(col.name), col.name});
} }
} }
// TODO further validations/optimizations like translate order by column names to indexes, validate those indexes, eval 1 + 1 etc
// create result table // create result table
@ -199,8 +201,9 @@ std::unique_ptr<Table> USql::execute_select(SelectFromTableNode &node) {
for (auto idx = 0; idx < result->columns_count(); idx++) { for (auto idx = 0; idx < result->columns_count(); idx++) {
auto row_col_index = source_table_col_index[idx]; auto row_col_index = source_table_col_index[idx];
if (row_col_index == -1) { // TODO introduce constant here if (row_col_index == FUNCTION_CALL) {
auto evaluated_value = eval_value_node(table, *row, node.cols_names->operator[](idx).value.get()); //auto evaluated_value = eval_value_node(table, *row, node.cols_names->operator[](idx).value.get());
auto evaluated_value = eval_function_value_node(table, *row, node.cols_names->operator[](idx).value.get());
ValueNode *col_value = evaluated_value.get(); ValueNode *col_value = evaluated_value.get();
new_row.setColumnValue(&result_tbl_col_defs[idx], col_value); new_row.setColumnValue(&result_tbl_col_defs[idx], col_value);
@ -277,15 +280,7 @@ std::tuple<int, ColDefNode> USql::get_node_definition(Table *table, Node * node,
} else if (node->node_type == NodeType::function) { } else if (node->node_type == NodeType::function) {
auto func_node = static_cast<FunctionNode *>(node); auto func_node = static_cast<FunctionNode *>(node);
return get_function_node_definition(col_name, col_order, func_node);
if (func_node->function == "to_string") {
ColDefNode col_def = ColDefNode{col_name, ColumnType::varchar_type, col_order, 32, true};
return std::make_tuple(-1, col_def);
} else if (func_node->function == "to_date") {
ColDefNode col_def = ColDefNode{col_name, ColumnType::integer_type, col_order, 1, true};
return std::make_tuple(-1, col_def);
}
throw Exception("Unsupported function");
} else if (node->node_type == NodeType::arithmetical_operator) { } else if (node->node_type == NodeType::arithmetical_operator) {
auto ari_node = static_cast<ArithmeticalOperatorNode *>(node); auto ari_node = static_cast<ArithmeticalOperatorNode *>(node);
@ -322,8 +317,6 @@ std::tuple<int, ColDefNode> USql::get_node_definition(Table *table, Node * node,
throw Exception("Unsupported node type"); throw Exception("Unsupported node type");
} }
std::unique_ptr<Table> USql::execute_delete(DeleteFromTableNode &node) { std::unique_ptr<Table> USql::execute_delete(DeleteFromTableNode &node) {
// find source table // find source table
Table *table = find_table(node.table_name); Table *table = find_table(node.table_name);
@ -492,6 +485,26 @@ std::unique_ptr<ValueNode> USql::eval_literal_value_node(Table *table, Row &row,
} }
std::tuple<int, ColDefNode> USql::get_function_node_definition(const std::string &col_name, int col_order, const FunctionNode *func_node) {
if (func_node->function == "to_string") {
ColDefNode col_def = ColDefNode{col_name, ColumnType::varchar_type, col_order, 32, true};
return std::make_tuple(-1, col_def);
} else if (func_node->function == "to_date") {
ColDefNode col_def = ColDefNode{col_name, ColumnType::date_type, col_order, 1, true};
return std::make_tuple(-1, col_def);
} else if (func_node->function == "round") {
ColDefNode col_def = ColDefNode{col_name, ColumnType::float_type, col_order, 1, true};
return std::make_tuple(-1, col_def);
} else if (func_node->function == "pp") {
ColDefNode col_def = ColDefNode{col_name, ColumnType::varchar_type, col_order, 10, true};
return std::make_tuple(-1, col_def);
} else if (func_node->function == "upper" || func_node->function == "lower") {
throw Exception("eval size of string");
}
throw Exception("Unsupported function");
}
std::unique_ptr<ValueNode> USql::eval_function_value_node(Table *table, Row &row, Node *node) { std::unique_ptr<ValueNode> USql::eval_function_value_node(Table *table, Row &row, Node *node) {
auto *fnc = static_cast<FunctionNode *>(node); auto *fnc = static_cast<FunctionNode *>(node);
@ -511,7 +524,6 @@ std::unique_ptr<ValueNode> USql::eval_function_value_node(Table *table, Row &row
std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return std::toupper(c); }); std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) -> unsigned char { return std::toupper(c); });
return std::make_unique<StringValueNode>(str); return std::make_unique<StringValueNode>(str);
} }
if (fnc->function == "to_date") { if (fnc->function == "to_date") {
std::string date = evaluatedPars[0]->getStringValue(); std::string date = evaluatedPars[0]->getStringValue();
std::string format = evaluatedPars[1]->getStringValue(); std::string format = evaluatedPars[1]->getStringValue();
@ -524,6 +536,48 @@ std::unique_ptr<ValueNode> USql::eval_function_value_node(Table *table, Row &row
std::string formatted_date = date_to_string(date, format); std::string formatted_date = date_to_string(date, format);
return std::make_unique<StringValueNode>(formatted_date); return std::make_unique<StringValueNode>(formatted_date);
} }
if (fnc->function == "pp") {
auto &parsed_value = evaluatedPars[0];
if (parsed_value->node_type == NodeType::int_value || parsed_value->node_type == NodeType::float_value) {
std::string format = evaluatedPars.size() > 1 ? evaluatedPars[1]->getStringValue() : "";
std::string str;
str.resize(32);
double value = parsed_value->getDoubleValue();
if (format == "100%")
str = std::snprintf((char *)str.c_str(), 20, "%.2f%%", value);
else if (value >= 1000000000000)
str = std::snprintf((char *)str.c_str(), 20, "%7.2fT", value/1000000000000);
else if (value >= 1000000000)
str = std::snprintf((char *)str.c_str(), 20, "%7.2fB", value/1000000000);
else if (value >= 1000000)
str = std::snprintf((char *)str.c_str(), 20, "%7.2fM", value/1000000);
else if (value >= 100000)
str = std::snprintf((char *)str.c_str(), 20, "%7.2fM", value/100000); // 0.12M
else if (value <= -1000000000000)
str = std::snprintf((char *)str.c_str(), 20, "%7.2fT", value/1000000000000);
else if (value <= -1000000000)
str = std::snprintf((char *)str.c_str(), 20, "%7.2fB", value/1000000000);
else if (value <= -1000000)
str = std::snprintf((char *)str.c_str(), 20, "%7.2fM", value/1000000);
else if (value <= -100000)
str = std::snprintf((char *)str.c_str(), 20, "%7.2fM", value/100000); // 0.12M
else
str = parsed_value->getStringValue();
return std::make_unique<StringValueNode>(str);
}
return std::make_unique<StringValueNode>(parsed_value->getStringValue());
}
if (fnc->function == "round") {
double value = evaluatedPars[0]->getDoubleValue();
int places = evaluatedPars[1]->getIntegerValue();
// TODO, FIXME implement me
double rounded = std::ceil(value * pow(10, places)) / pow(10, places);
return std::make_unique<DoubleValueNode>(rounded);
}
throw Exception("invalid function"); throw Exception("invalid function");
} }

4
usql.h
View File

@ -46,12 +46,14 @@ private:
static std::unique_ptr<ValueNode> eval_arithmetic_operator(ColumnType outType, ArithmeticalOperatorNode &node, Table *table, Row &row) ; static std::unique_ptr<ValueNode> eval_arithmetic_operator(ColumnType outType, ArithmeticalOperatorNode &node, Table *table, Row &row) ;
static std::unique_ptr<Table> create_stmt_result_table(long code, const std::string &text, size_t affected_rows);
static std::tuple<int, ColDefNode> get_column_definition(Table *table, SelectColNode *select_col_node, int col_order) ; static std::tuple<int, ColDefNode> get_column_definition(Table *table, SelectColNode *select_col_node, int col_order) ;
static std::tuple<int, ColDefNode> get_node_definition(Table *table, Node *select_col_node, const std::string & col_name, int col_order) ; static std::tuple<int, ColDefNode> get_node_definition(Table *table, Node *select_col_node, const std::string & col_name, int col_order) ;
static std::tuple<int, ColDefNode> get_function_node_definition(const std::string &col_name, int col_order, const FunctionNode *func_node);
Table *find_table(const std::string &name); Table *find_table(const std::string &name);
void check_table_not_exists(const std::string &name); void check_table_not_exists(const std::string &name);
static std::unique_ptr<Table> create_stmt_result_table(long code, const std::string &text, size_t affected_rows);
private: private:
Parser m_parser; Parser m_parser;