From 29d8eda23886eedcde3fefafdfc7d74d6acaddcd Mon Sep 17 00:00:00 2001 From: VaclavT Date: Wed, 17 Mar 2021 23:58:33 +0100 Subject: [PATCH] fix parsing of negative numbers, less memory, faster --- clib/csvparser.cpp | 90 +++++++++++++++++++++++----------------------- clib/csvparser.h | 9 ++--- debug.lsp | 3 ++ 3 files changed, 53 insertions(+), 49 deletions(-) diff --git a/clib/csvparser.cpp b/clib/csvparser.cpp index 0a491d6..f999d5d 100644 --- a/clib/csvparser.cpp +++ b/clib/csvparser.cpp @@ -17,9 +17,12 @@ MlValue CsvParser::parseCSV(const std::string &csvSource) { bool newLine(false); std::string field; - // PERF optimize it for memory usage and performance - std::vector> parsed_data; // TODO some default size here - std::vector line; + std::vector parsed_data; + // TODO when csvSource is really big use some bigger nr to prevent reallocations + parsed_data.reserve(128); + + std::vector line; + line.reserve(32); std::string::const_iterator aChar = csvSource.begin(); while (aChar != csvSource.end()) { @@ -31,7 +34,7 @@ MlValue CsvParser::parseCSV(const std::string &csvSource) { if (inQuote == true) { field += *aChar; } else { - line.push_back(field); + line.push_back(ivalualize(field)); field.clear(); } } else if (*aChar == line_separator || *aChar == line_separator2) { @@ -39,8 +42,8 @@ MlValue CsvParser::parseCSV(const std::string &csvSource) { field += *aChar; } else { if (newLine == false) { - line.push_back(field); - addLine(line, parsed_data); + line.push_back(ivalualize(field)); + add_line(line, parsed_data); field.clear(); line.clear(); newLine = true; @@ -55,41 +58,15 @@ MlValue CsvParser::parseCSV(const std::string &csvSource) { } if (field.size()) - line.push_back(field); + line.push_back(ivalualize(field)); - addLine(line, parsed_data); + add_line(line, parsed_data); - return ivalualize(parsed_data); + return parsed_data; } -MlValue CsvParser::ivalualize(std::vector > &parsed_data) const { - int rows = parsed_data.size(); - int cols = rows > 0 ? parsed_data[0].size() : 0; - std::vector result; - - if (rows > 0 && cols > 0) { - for (int r = 0; r < rows; r++) { - std::vector row; - for (int c = 0; c < cols; c++) { - std::string value = parsed_data[r][c]; - if (is_string_int(value)) { - row.push_back(MlValue(stoi(value))); - } - if (is_string_float(value)) { - row.push_back(MlValue(std::stod(value))); - } else { - row.push_back(MlValue::string(value)); - } - } - result.push_back(row); - } - } - - return result; -} - -void CsvParser::addLine(const std::vector &line, std::vector > &lines) { +void CsvParser::add_line(const std::vector &line, std::vector &lines) { if (skip_header && !header_skiped) { header_skiped = true; } else { @@ -98,17 +75,40 @@ void CsvParser::addLine(const std::vector &line, std::vector #include #include #include @@ -25,11 +26,11 @@ public: MlValue parseCSV(const std::string &csvSource); private: - void addLine(const std::vector &line, std::vector > &lines); + void add_line(const std::vector &line, std::vector &lines); - MlValue ivalualize(std::vector > &parsed_data) const; + MlValue ivalualize(const std::string &value) const; - bool is_string_int(const std::string &str) const; + bool is_string_int(const std::string &s, long &val) const; - bool is_string_float(const std::string &str) const; + bool is_string_float(const std::string &s, double &val) const; }; diff --git a/debug.lsp b/debug.lsp index aa767d1..89b8c0a 100644 --- a/debug.lsp +++ b/debug.lsp @@ -26,5 +26,8 @@ (benchmark "benchmark range 1000 : " (range 1 1000)) +(define fdx_list (parse-csv (read-file "tests/csv_data.csv"))) +(print fdx_list) + (sleep 1.5) \ No newline at end of file