From b0ff9dd934d88d6d27a6001e209c8d68885ba165 Mon Sep 17 00:00:00 2001 From: VaclavT Date: Wed, 24 Mar 2021 00:18:27 +0100 Subject: [PATCH] split by regexp and [rl]tring added --- CMakeLists.txt | 2 +- Readme.md | 2 +- debug.lsp | 40 +++++++++++++++++++++++----------------- ml.cpp | 30 ++++++++++++++++++++++++++++++ ml_string.cpp | 23 +++++++++++++++++++++++ ml_string.h | 4 ++++ stdlib/stdlib.lsp | 7 +++++++ 7 files changed, 89 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 814f216..6ba2a5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-stack_size -Wl,0x1000 # otool -lV build/ml | grep stack # set(CMAKE_CXX_FLAGS "-Wall -Wextra") -set(CMAKE_CXX_FLAGS_RELEASE "-O3") +# set(CMAKE_CXX_FLAGS_RELEASE "-O3") include_directories(/usr/local/opt/openssl/include ${CMAKE_SOURCE_DIR}/clib ${CMAKE_SOURCE_DIR} ) diff --git a/Readme.md b/Readme.md index 109943d..93d8af0 100644 --- a/Readme.md +++ b/Readme.md @@ -5,7 +5,7 @@ - better error reporting..for example ls_dir on non existing dir should prind `pwd` dir ### TODO -- add debug support, at least function call could keep call stack +- add debug support, at least call stack - documentation - add url of source/inspiration to clib/*.cpp - add stdtest - to test every functionality diff --git a/debug.lsp b/debug.lsp index 89b8c0a..166d5f0 100644 --- a/debug.lsp +++ b/debug.lsp @@ -1,33 +1,39 @@ +(print (string-split "split me by space" "\\s+")) + + +(print (string-rtrim "abc ")) +(print (string-ltrim " abc")) +(print (string-trim " abc ")) ;; (print (sprintf "%.2f" (list 1.25))) ;; (print (sprintf "%.2f" '(1.23456))) ;; (print (sprintf "%d" '(10000000))) -(define q 1.23) -(print (sprintf "%+.2f%%" (list q))) +;; (define q 1.23) +;; (print (sprintf "%+.2f%%" (list q))) -(define q -1.23) -(print (sprintf "%+.2f%%" (list q))) +;; (define q -1.23) +;; (print (sprintf "%+.2f%%" (list q))) -(define term-rst-esc "\x1B[0m") -(define term-red-esc '"\x1B[31m") -(define term-green-esc "\x1B[32m") +;; (define term-rst-esc "\x1B[0m") +;; (define term-red-esc '"\x1B[31m") +;; (define term-green-esc "\x1B[32m") -(defun term-red (str) (sprintf (+ term-red-esc str term-rst-esc))) +;; (defun term-red (str) (sprintf (+ term-red-esc str term-rst-esc))) -(print (+ (term-red (sprintf "%.2f" (list 1.11))) " " - (term-green (sprintf "%.2f" (list 1.11))) " " - (term-blue (sprintf "%.2f" (list 1.11))) " " - (term-yellow (sprintf "%.2f" (list 1.11))) " " - )) +;; (print (+ (term-red (sprintf "%.2f" (list 1.11))) " " +;; (term-green (sprintf "%.2f" (list 1.11))) " " +;; (term-blue (sprintf "%.2f" (list 1.11))) " " +;; (term-yellow (sprintf "%.2f" (list 1.11))) " " +;; )) -(benchmark "benchmark makelist 1000 : " (make-list 1000)) -(benchmark "benchmark range 1000 : " (range 1 1000)) +;; (benchmark "benchmark makelist 1000 : " (make-list 1000)) +;; (benchmark "benchmark range 1000 : " (range 1 1000)) -(define fdx_list (parse-csv (read-file "tests/csv_data.csv"))) -(print fdx_list) +;; (define fdx_list (parse-csv (read-file "tests/csv_data.csv"))) +;; (print fdx_list) (sleep 1.5) \ No newline at end of file diff --git a/ml.cpp b/ml.cpp index fb2e8af..b41fdaa 100644 --- a/ml.cpp +++ b/ml.cpp @@ -1526,6 +1526,34 @@ namespace builtin { return MlValue((long)regexp_search(args[0].as_string(), args[1].as_string())); } + // Splits string by regexp and returns list containing splited parts + MlValue string_split(std::vector args, MlEnvironment &env) { + eval_args(args, env); + + if (args.size() != 2) + throw MlError(MlValue("string-split", string_split), env, args.size() > 2 ? TOO_MANY_ARGS : TOO_FEW_ARGS); + + // TODO more efficient + std::vector elements = regexp_strsplit(args[0].as_string(), args[1].as_string()); + std::vector result{}; + + for (size_t i = 0; i < elements.size(); i++) + result.push_back(MlValue::string(elements[i])); + + return MlValue(result); + } + + // trims characters " \n\r\t" from left or right or both ends of a string + MlValue string_rltrim(std::vector args, MlEnvironment &env) { + eval_args(args, env); + + if (args.size() != 3) + throw MlError(MlValue("string_rltrim", string_rltrim), env, args.size() > 3 ? TOO_MANY_ARGS : TOO_FEW_ARGS); + + // TODO validate + return MlValue::string(trim(args[0].as_string(), args[1].as_string(), args[2].as_string())); + } + MlValue string_pad(std::vector args, MlEnvironment &env) { eval_args(args, env); @@ -1803,7 +1831,9 @@ MlValue MlEnvironment::get(const std::string &name) const { if (name == "display") return MlValue("display", builtin::display); if (name == "string-replace") return MlValue("string-replace", builtin::string_replace); if (name == "string-regex?") return MlValue("string-regex?", builtin::string_regex); + if (name == "string-split") return MlValue("string-split", builtin::string_split); if (name == "string-pad") return MlValue("string-pad", builtin::string_pad); + if (name == "string-rltrim") return MlValue("string-rltrim", builtin::string_rltrim); // Casting operations if (name == "int") return MlValue("int", builtin::cast_to_int); diff --git a/ml_string.cpp b/ml_string.cpp index cc8ce88..ca0425d 100644 --- a/ml_string.cpp +++ b/ml_string.cpp @@ -30,6 +30,29 @@ bool regexp_search(const std::string &where, const std::string ®ex_str) { return false; } +std::vector regexp_strsplit(const std::string &string_to_split, const std::string &rgx_str) { + std::vector elems; + + std::regex rgx(rgx_str); + std::sregex_token_iterator iter(string_to_split.begin(), string_to_split.end(), rgx, -1); + std::sregex_token_iterator end; + + for (; iter != end; ++iter) + elems.push_back(*iter); + + return elems; +} + +std::string trim(std::string s, const std::string &chars_to_trim, const std::string &rltrim) { + if (rltrim == "ltrim" || rltrim == "trim") + s.erase(0, s.find_first_not_of(chars_to_trim)); + + if (rltrim == "rtrim" || rltrim == "trim") + s.erase(s.find_last_not_of(chars_to_trim)+1); + + return s; +} + std::string string_padd(const std::string &str, int pad_len, char fill_char, bool from_right) { int str_len = str.length(); diff --git a/ml_string.h b/ml_string.h index 6bdb454..00c68ab 100644 --- a/ml_string.h +++ b/ml_string.h @@ -12,4 +12,8 @@ void replace_substring(std::string &src, const std::string &substr, const std::s // Returns true if where contains regex bool regexp_search(const std::string &where, const std::string ®ex_str); +std::vector regexp_strsplit(const std::string &string_to_split, const std::string &rgx_str); + +std::string trim(std::string s, const std::string &chars_to_trim, const std::string &rltrim); + std::string string_padd(const std::string & str, int pad_len, char fill_char, bool from_right); \ No newline at end of file diff --git a/stdlib/stdlib.lsp b/stdlib/stdlib.lsp index 487b5ed..da263cf 100644 --- a/stdlib/stdlib.lsp +++ b/stdlib/stdlib.lsp @@ -24,6 +24,13 @@ (defun inc (n) (+ n 1)) +(defun string-ltrim (str) + (string-rltrim str " \n\r\t" "ltrim")) +(defun string-rtrim (str) + (string-rltrim str " \n\r\t" "rtrim")) +(defun string-trim (str) + (string-rltrim str " \n\r\t" "trim")) + ; pad string on the end (defun string-rpad (str length pad_char) (string-pad str length pad_char "rpad"))