From 50a7993a2e42c49e4024f895400940fd32c696e2 Mon Sep 17 00:00:00 2001 From: VaclavT Date: Sun, 1 Aug 2021 14:31:19 +0200 Subject: [PATCH] simple distinct added --- lexer.cpp | 5 +++++ lexer.h | 1 + main.cpp | 1 + parser.cpp | 8 +++++++- parser.h | 5 +++-- row.cpp | 8 ++++++++ row.h | 10 ++++------ usql.cpp | 13 +++++++++++-- usql.h | 2 +- 9 files changed, 41 insertions(+), 12 deletions(-) diff --git a/lexer.cpp b/lexer.cpp index 8f61a60..fc184a4 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -197,6 +197,8 @@ namespace usql { return TokenType::keyword_float; if (token == "varchar") return TokenType::keyword_varchar; + if (token == "distinct") + return TokenType::keyword_distinct; if (token == "or") return TokenType::logical_or; if (token == "and") @@ -382,6 +384,9 @@ namespace usql { case TokenType::keyword_varchar: txt = "varchar"; break; + case TokenType::keyword_distinct: + txt = "distinct"; + break; case TokenType::int_number: txt = "int number"; break; diff --git a/lexer.h b/lexer.h index 8a23395..c95299c 100644 --- a/lexer.h +++ b/lexer.h @@ -47,6 +47,7 @@ namespace usql { keyword_integer, keyword_float, keyword_varchar, + keyword_distinct, int_number, double_number, string_literal, diff --git a/main.cpp b/main.cpp index b692f3d..907de07 100644 --- a/main.cpp +++ b/main.cpp @@ -18,6 +18,7 @@ int main(int argc, char *argv[]) { "insert into a (i, s) values(to_date('20.12.1973', '%d.%m.%Y'), 'six')", "save table a into '/tmp/a.csv'", "select i, s from a where i > 2 order by 1 desc offset 1 limit 1", + "select distinct s from a", // "select i, s from a where i = 1", // "select i, s from a where s = 'two'", // "select i, s from a where i <= 3 and s = 'one'", diff --git a/parser.cpp b/parser.cpp index 2faebec..67ce77a 100644 --- a/parser.cpp +++ b/parser.cpp @@ -208,10 +208,16 @@ namespace usql { } std::unique_ptr Parser::parse_select_from_table() { + bool distinct = false; auto cols = std::make_unique>(); m_lexer.skipToken(TokenType::keyword_select); + if (m_lexer.tokenType() == TokenType::keyword_distinct) { + distinct = true; + m_lexer.skipToken(TokenType::keyword_distinct); + } + int i = 1; while (m_lexer.tokenType() != TokenType::keyword_from) { auto column_value = parse_value(); @@ -240,7 +246,7 @@ namespace usql { OffsetLimitNode offsetlimit_node = parse_offset_limit_clause(); - return std::make_unique(table_name, std::move(cols), std::move(where_node), orderby_node, offsetlimit_node); + return std::make_unique(table_name, std::move(cols), std::move(where_node), orderby_node, offsetlimit_node, distinct); } std::unique_ptr Parser::parse_delete_from_table() { diff --git a/parser.h b/parser.h index 69bed40..7ce6478 100644 --- a/parser.h +++ b/parser.h @@ -239,9 +239,10 @@ namespace usql { std::unique_ptr where; std::vector order_by; OffsetLimitNode offset_limit; + bool distinct; - SelectFromTableNode(std::string name, std::unique_ptr> names, std::unique_ptr where_clause, std::vector orderby, OffsetLimitNode offlim) : - Node(NodeType::select_from), table_name(name), cols_names(std::move(names)), where(std::move(where_clause)), order_by(orderby), offset_limit(offlim) {} + SelectFromTableNode(std::string name, std::unique_ptr> names, std::unique_ptr where_clause, std::vector orderby, OffsetLimitNode offlim, bool distinct_): + Node(NodeType::select_from), table_name(name), cols_names(std::move(names)), where(std::move(where_clause)), order_by(orderby), offset_limit(offlim), distinct(distinct_) {} }; struct CreateTableAsSelectNode : Node { diff --git a/row.cpp b/row.cpp index 71561c2..b796da1 100644 --- a/row.cpp +++ b/row.cpp @@ -97,6 +97,14 @@ namespace usql { } } + int Row::compare(const Row & other) const { + for (int ci = 0; ci < m_columns.size(); ci++) { + int cmp = m_columns[ci]->compare(other.ith_column(ci)); + if (cmp != 0) return cmp; + } + return 0; + } + void Row::print(const std::vector & col_char_sizes) { std::string out{"| "}; diff --git a/row.h b/row.h index edbc641..7df1c31 100644 --- a/row.h +++ b/row.h @@ -84,6 +84,7 @@ namespace usql { Row(const Row &other); Row &operator=(Row other); + bool operator==(const Row &other) const {return this->compare(other) == 0; }; void setColumnNull(int col_index); void setColumnValue(int col_index, long value); @@ -92,13 +93,10 @@ namespace usql { void setColumnValue(ColDefNode *col_def, ColValue *col_value); void setColumnValue(ColDefNode *col_def, ValueNode *col_value); - ColValue &operator[](int i) { - return *m_columns[i]; - } + ColValue &operator[](int i) { return *m_columns[i]; } - ColValue * ith_column(int i) const { - return m_columns[i].get(); - } + ColValue * ith_column(int i) const { return m_columns[i].get(); } + int compare(const Row &other) const; void print(const std::vector & col_char_sizes); diff --git a/usql.cpp b/usql.cpp index c9b6ac3..d947be3 100644 --- a/usql.cpp +++ b/usql.cpp @@ -190,15 +190,24 @@ std::unique_ptr USql::execute_select(SelectFromTableNode &node) { } } - // order by + execute_distinct(node, result.get()); + execute_order_by(node, table, result.get()); - // offset & limit execute_offset_limit(node.offset_limit, result.get()); return std::move(result); } +void USql::execute_distinct(SelectFromTableNode &node, Table *result) const { + if (!node.distinct) return; + + auto compare_rows = [](const Row &a, const Row &b) { return a.compare(b) >= 0; }; + std::sort(result->m_rows.begin(), result->m_rows.end(), compare_rows); + + result->m_rows.erase(std::unique(result->m_rows.begin(), result->m_rows.end()), result->m_rows.end()); +} + void USql::execute_order_by(SelectFromTableNode &node, Table *table, Table *result) const { if (node.order_by.size() == 0) return; diff --git a/usql.h b/usql.h index 2969532..cf9a023 100644 --- a/usql.h +++ b/usql.h @@ -55,8 +55,8 @@ private: Parser m_parser; std::list
m_tables; + void execute_distinct(SelectFromTableNode &node, Table *result) const; void execute_order_by(SelectFromTableNode &node, Table *table, Table *result) const; - void execute_offset_limit(OffsetLimitNode &node, Table *result) const; };