split by regexp and [rl]tring added

This commit is contained in:
VaclavT 2021-03-24 00:18:27 +01:00
parent 2c68861451
commit b0ff9dd934
7 changed files with 89 additions and 19 deletions

View File

@ -14,7 +14,7 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-stack_size -Wl,0x1000
# otool -lV build/ml | grep stack
# set(CMAKE_CXX_FLAGS "-Wall -Wextra")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
# set(CMAKE_CXX_FLAGS_RELEASE "-O3")
include_directories(/usr/local/opt/openssl/include ${CMAKE_SOURCE_DIR}/clib ${CMAKE_SOURCE_DIR} )

View File

@ -5,7 +5,7 @@
- better error reporting..for example ls_dir on non existing dir should prind `pwd` dir
### TODO
- add debug support, at least function call could keep call stack
- add debug support, at least call stack
- documentation
- add url of source/inspiration to clib/*.cpp
- add stdtest - to test every functionality

View File

@ -1,33 +1,39 @@
(print (string-split "split me by space" "\\s+"))
(print (string-rtrim "abc "))
(print (string-ltrim " abc"))
(print (string-trim " abc "))
;; (print (sprintf "%.2f" (list 1.25)))
;; (print (sprintf "%.2f" '(1.23456)))
;; (print (sprintf "%d" '(10000000)))
(define q 1.23)
(print (sprintf "%+.2f%%" (list q)))
;; (define q 1.23)
;; (print (sprintf "%+.2f%%" (list q)))
(define q -1.23)
(print (sprintf "%+.2f%%" (list q)))
;; (define q -1.23)
;; (print (sprintf "%+.2f%%" (list q)))
(define term-rst-esc "\x1B[0m")
(define term-red-esc '"\x1B[31m")
(define term-green-esc "\x1B[32m")
;; (define term-rst-esc "\x1B[0m")
;; (define term-red-esc '"\x1B[31m")
;; (define term-green-esc "\x1B[32m")
(defun term-red (str) (sprintf (+ term-red-esc str term-rst-esc)))
;; (defun term-red (str) (sprintf (+ term-red-esc str term-rst-esc)))
(print (+ (term-red (sprintf "%.2f" (list 1.11))) " "
(term-green (sprintf "%.2f" (list 1.11))) " "
(term-blue (sprintf "%.2f" (list 1.11))) " "
(term-yellow (sprintf "%.2f" (list 1.11))) " "
))
;; (print (+ (term-red (sprintf "%.2f" (list 1.11))) " "
;; (term-green (sprintf "%.2f" (list 1.11))) " "
;; (term-blue (sprintf "%.2f" (list 1.11))) " "
;; (term-yellow (sprintf "%.2f" (list 1.11))) " "
;; ))
(benchmark "benchmark makelist 1000 : " (make-list 1000))
(benchmark "benchmark range 1000 : " (range 1 1000))
;; (benchmark "benchmark makelist 1000 : " (make-list 1000))
;; (benchmark "benchmark range 1000 : " (range 1 1000))
(define fdx_list (parse-csv (read-file "tests/csv_data.csv")))
(print fdx_list)
;; (define fdx_list (parse-csv (read-file "tests/csv_data.csv")))
;; (print fdx_list)
(sleep 1.5)

30
ml.cpp
View File

@ -1526,6 +1526,34 @@ namespace builtin {
return MlValue((long)regexp_search(args[0].as_string(), args[1].as_string()));
}
// Splits string by regexp and returns list containing splited parts
MlValue string_split(std::vector<MlValue> args, MlEnvironment &env) {
eval_args(args, env);
if (args.size() != 2)
throw MlError(MlValue("string-split", string_split), env, args.size() > 2 ? TOO_MANY_ARGS : TOO_FEW_ARGS);
// TODO more efficient
std::vector<std::string> elements = regexp_strsplit(args[0].as_string(), args[1].as_string());
std::vector<MlValue> result{};
for (size_t i = 0; i < elements.size(); i++)
result.push_back(MlValue::string(elements[i]));
return MlValue(result);
}
// trims characters " \n\r\t" from left or right or both ends of a string
MlValue string_rltrim(std::vector<MlValue> args, MlEnvironment &env) {
eval_args(args, env);
if (args.size() != 3)
throw MlError(MlValue("string_rltrim", string_rltrim), env, args.size() > 3 ? TOO_MANY_ARGS : TOO_FEW_ARGS);
// TODO validate
return MlValue::string(trim(args[0].as_string(), args[1].as_string(), args[2].as_string()));
}
MlValue string_pad(std::vector<MlValue> args, MlEnvironment &env) {
eval_args(args, env);
@ -1803,7 +1831,9 @@ MlValue MlEnvironment::get(const std::string &name) const {
if (name == "display") return MlValue("display", builtin::display);
if (name == "string-replace") return MlValue("string-replace", builtin::string_replace);
if (name == "string-regex?") return MlValue("string-regex?", builtin::string_regex);
if (name == "string-split") return MlValue("string-split", builtin::string_split);
if (name == "string-pad") return MlValue("string-pad", builtin::string_pad);
if (name == "string-rltrim") return MlValue("string-rltrim", builtin::string_rltrim);
// Casting operations
if (name == "int") return MlValue("int", builtin::cast_to_int);

View File

@ -30,6 +30,29 @@ bool regexp_search(const std::string &where, const std::string &regex_str) {
return false;
}
std::vector<std::string> regexp_strsplit(const std::string &string_to_split, const std::string &rgx_str) {
std::vector<std::string> elems;
std::regex rgx(rgx_str);
std::sregex_token_iterator iter(string_to_split.begin(), string_to_split.end(), rgx, -1);
std::sregex_token_iterator end;
for (; iter != end; ++iter)
elems.push_back(*iter);
return elems;
}
std::string trim(std::string s, const std::string &chars_to_trim, const std::string &rltrim) {
if (rltrim == "ltrim" || rltrim == "trim")
s.erase(0, s.find_first_not_of(chars_to_trim));
if (rltrim == "rtrim" || rltrim == "trim")
s.erase(s.find_last_not_of(chars_to_trim)+1);
return s;
}
std::string string_padd(const std::string &str, int pad_len, char fill_char, bool from_right) {
int str_len = str.length();

View File

@ -12,4 +12,8 @@ void replace_substring(std::string &src, const std::string &substr, const std::s
// Returns true if where contains regex
bool regexp_search(const std::string &where, const std::string &regex_str);
std::vector<std::string> regexp_strsplit(const std::string &string_to_split, const std::string &rgx_str);
std::string trim(std::string s, const std::string &chars_to_trim, const std::string &rltrim);
std::string string_padd(const std::string & str, int pad_len, char fill_char, bool from_right);

View File

@ -24,6 +24,13 @@
(defun inc (n) (+ n 1))
(defun string-ltrim (str)
(string-rltrim str " \n\r\t" "ltrim"))
(defun string-rtrim (str)
(string-rltrim str " \n\r\t" "rtrim"))
(defun string-trim (str)
(string-rltrim str " \n\r\t" "trim"))
; pad string on the end
(defun string-rpad (str length pad_char)
(string-pad str length pad_char "rpad"))