string regex function added
This commit is contained in:
parent
8d90513a6b
commit
80d7935974
|
|
@ -1,5 +1,10 @@
|
||||||
|
(print (string-regex-list "<td class=\"xyz\">1</td><td>2</td>" "<td.*?>(.*?)</td>" "match" "ignore"))
|
||||||
|
(print (string-regex-list "<td class=\"xyz\">1</td><td>2</td>" "<td.*?>(.*?)</td>" "token"))
|
||||||
|
|
||||||
|
|
||||||
;; (include "/usr/local/var/mlisp/ut.lsp")
|
;; (include "/usr/local/var/mlisp/ut.lsp")
|
||||||
;; (ut::define-test "result of (get-localtime-offset)" '(ut::assert-equal 7200 (get-localtime-offset)))
|
;; (ut::define-test "result of (string-regex-list ... \"match\")" (ut::assert-equal '(("<td class=\"xyz\">1</td>" "<td>2</td>")) (string-regex-list "<td class=\"xyz\">1</td><td>2</td>" "<td.*?>(.*?)</td>" "match" "ignore")))
|
||||||
|
;; (ut::define-test "result of (string-regex-list ... \"token\")" '(ut::assert-equal '(("1") ("2")) (string-regex-list "<td class=\"xyz\">1</td><td>2</td>" "<td.*?>(.*?)</td>" "token")))
|
||||||
;; (ut::run-tests)
|
;; (ut::run-tests)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
|-b|Skips loadin of std lib|
|
|-b|Skips loadin of std lib|
|
||||||
|-c code|Runs given code|
|
|-c code|Runs given code|
|
||||||
|-f source_file ..|Executes code in files|
|
|-f source_file ..|Executes code in files|
|
||||||
|
|-run source_file ..|Executes code in file, if first line of file is sheebang, it is skipped|
|
||||||
|-i|Runs REPL|
|
|-i|Runs REPL|
|
||||||
|-d|Turns on better stacktrace|
|
|-d|Turns on better stacktrace|
|
||||||
|-p|Prints profile info at the end|
|
|-p|Prints profile info at the end|
|
||||||
|
|
@ -103,10 +104,11 @@
|
||||||
|`start-of-year datetime`||`>>> (start-of-year (str-to-date "2021-05-13 10:32:12" "%Y-%m-%d %H:%M:%S")) => 1609459200`|
|
|`start-of-year datetime`||`>>> (start-of-year (str-to-date "2021-05-13 10:32:12" "%Y-%m-%d %H:%M:%S")) => 1609459200`|
|
||||||
|`end-of-year datetime`||`>>> (end-of-year (str-to-date "2021-05-13 10:32:12" "%Y-%m-%d %H:%M:%S")) => 1640995199`|
|
|`end-of-year datetime`||`>>> (end-of-year (str-to-date "2021-05-13 10:32:12" "%Y-%m-%d %H:%M:%S")) => 1640995199`|
|
||||||
|`(debug ..)`|||
|
|`(debug ..)`|||
|
||||||
|`(display ..)`|||
|
|`(display ..)`|Displays passed parameters|`>>> (display '(1 2 3)) => "(1 2 3)"`|
|
||||||
|`(string-replace source substr replacement)`|Replace a substring with a replacement string in a source string|`>>> (string-replace "abcdefg" "de" "DE") => "abcDEfg"`|
|
|`(string-replace source substr replacement)`|Replace a substring with a replacement string in a source string|`>>> (string-replace "abcdefg" "de" "DE") => "abcDEfg"`|
|
||||||
|`(string-replace-re source substr replacement)`|Replace a substring regex with a replacement string in a source string|`>>> (string-replace-re "there is a subsequence in the string" "\\b(sub)([^ ]*)" "sub-$2") => "there is a sub-sequence in the string"`|
|
|`(string-replace-re source substr replacement)`|Replace a substring regex with a replacement string in a source string|`>>> (string-replace-re "there is a subsequence in the string" "\\b(sub)([^ ]*)" "sub-$2") => "there is a sub-sequence in the string"`|
|
||||||
|`(string-regex? where regex)`| Returns true if where contains regex|`>>> (string-regex? "aba123cdefg" "[0-9]+") => 1`|
|
|`(string-regex? where regex)`| Returns true if where contains regex|`>>> (string-regex? "aba123cdefg" "[0-9]+") => 1`|
|
||||||
|
|`(string-regex-list where regex [mode] [ignorecase])`| Returns list of substring from where captured by regex in mode match or tokens|`>>> (string-regex-list "<td class=\"xyz\">1</td><td>2</td>" "<td.*?>(.*?)</td>" "match" "ignore") => (("<td class=\"xyz\">1</td>" "<td>2</td>"))` `>>> (string-regex-list "<td class=\"xyz\">1</td><td>2</td>" "<td.*?>(.*?)</td>" "token") => (("1") ("2"))`|
|
||||||
|`(string-pad str len char rpad_lpad)`|||
|
|`(string-pad str len char rpad_lpad)`|||
|
||||||
|`(string-lpad str len char)`|Pad string from start with char to length len|`>>> (string-lpad "0" 10 "x") => "xxxxxxxxx0"`|
|
|`(string-lpad str len char)`|Pad string from start with char to length len|`>>> (string-lpad "0" 10 "x") => "xxxxxxxxx0"`|
|
||||||
|`(string-rpad str len char)`|Pad string from righ with char to length len|`>>> (string-rpad "0" 10 "x") => "0xxxxxxxxx"`|
|
|`(string-rpad str len char)`|Pad string from righ with char to length len|`>>> (string-rpad "0" 10 "x") => "0xxxxxxxxx"`|
|
||||||
|
|
|
||||||
27
ml.cpp
27
ml.cpp
|
|
@ -1660,6 +1660,32 @@ MlValue string_regex(std::vector<MlValue> args, MlEnvironment &env) {
|
||||||
return MlValue(regexp_search(args[0].as_string(), args[1].as_string()));
|
return MlValue(regexp_search(args[0].as_string(), args[1].as_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Returns found substrings of a regex
|
||||||
|
MlValue string_regex_list(std::vector<MlValue> args, MlEnvironment &env) {
|
||||||
|
eval_args(args, env);
|
||||||
|
|
||||||
|
bool match_mode = true;
|
||||||
|
bool ignore_case = false;
|
||||||
|
|
||||||
|
if (args.size() < 2 && args.size() > 4)
|
||||||
|
throw MlError(MlValue("string-regex-list", string_regex_list), env, args.size() > 4 ? TOO_MANY_ARGS : TOO_FEW_ARGS);
|
||||||
|
|
||||||
|
if (args.size() >= 3) match_mode = args[2].as_string() == "match";
|
||||||
|
if (args.size() == 4) ignore_case = args[3].as_string() == "ignore";
|
||||||
|
|
||||||
|
auto found_matches = regexp_search2(args[0].as_string(), args[1].as_string(), match_mode, ignore_case);
|
||||||
|
std::vector<MlValue> list;
|
||||||
|
for(auto &l : found_matches) {
|
||||||
|
std::vector<MlValue> sublist;
|
||||||
|
for(auto &v : l) {
|
||||||
|
sublist.push_back(MlValue::string(v));
|
||||||
|
}
|
||||||
|
list.push_back(sublist);
|
||||||
|
}
|
||||||
|
return MlValue(list);
|
||||||
|
}
|
||||||
|
|
||||||
// Splits string by regexp and returns list containing splited parts
|
// Splits string by regexp and returns list containing splited parts
|
||||||
MlValue string_split(std::vector<MlValue> args, MlEnvironment &env) {
|
MlValue string_split(std::vector<MlValue> args, MlEnvironment &env) {
|
||||||
eval_args(args, env);
|
eval_args(args, env);
|
||||||
|
|
@ -2087,6 +2113,7 @@ MlValue MlEnvironment::get(const std::string &name) const {
|
||||||
if (name == "string-replace") return MlValue("string-replace", builtin::string_replace);
|
if (name == "string-replace") return MlValue("string-replace", builtin::string_replace);
|
||||||
if (name == "string-replace-re") return MlValue("string-replace-re", builtin::string_replace_re);
|
if (name == "string-replace-re") return MlValue("string-replace-re", builtin::string_replace_re);
|
||||||
if (name == "string-regex?") return MlValue("string-regex?", builtin::string_regex);
|
if (name == "string-regex?") return MlValue("string-regex?", builtin::string_regex);
|
||||||
|
if (name == "string-regex-list") return MlValue("string-regex?", builtin::string_regex_list);
|
||||||
if (name == "string-split") return MlValue("string-split", builtin::string_split);
|
if (name == "string-split") return MlValue("string-split", builtin::string_split);
|
||||||
if (name == "string-pad") return MlValue("string-pad", builtin::string_pad);
|
if (name == "string-pad") return MlValue("string-pad", builtin::string_pad);
|
||||||
if (name == "string-rltrim") return MlValue("string-rltrim", builtin::string_rltrim);
|
if (name == "string-rltrim") return MlValue("string-rltrim", builtin::string_rltrim);
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,55 @@ std::vector<std::string> regexp_strsplit(const std::string &string_to_split, con
|
||||||
return elems;
|
return elems;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<std::string> regexp_match(const std::string &str, const std::string ®_ex, bool case_sensitive) {
|
||||||
|
std::vector<std::string> matches;
|
||||||
|
|
||||||
|
std::regex pars_regex(reg_ex); // , (case_sensitive ? std::regex::basic : std::regex::basic|std::regex::icase));
|
||||||
|
auto words_begin = std::sregex_iterator(str.begin(), str.end(), pars_regex);
|
||||||
|
auto words_end = std::sregex_iterator();
|
||||||
|
|
||||||
|
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
|
||||||
|
std::smatch match = *i;
|
||||||
|
matches.push_back(match.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::string>> regexp_tokens(const std::string &str, const std::string ®_ex, bool case_sensitive) {
|
||||||
|
std::vector<std::vector<std::string>> captured_groups;
|
||||||
|
std::vector<std::string> captured_subgroups;
|
||||||
|
|
||||||
|
std::smatch res;
|
||||||
|
std::regex exp(reg_ex); // , (case_sensitive ? std::regex::basic : std::regex::basic|std::regex::icase));
|
||||||
|
std::string::const_iterator searchStart(str.cbegin());
|
||||||
|
while (std::regex_search(searchStart, str.cend(), res, exp)) {
|
||||||
|
captured_subgroups.clear();
|
||||||
|
if (res.size() == 1) { // no subgroups
|
||||||
|
captured_subgroups.push_back(res[0]);
|
||||||
|
} else {
|
||||||
|
for (size_t i = 1; i < res.size(); ++i) { // [0] is whole match
|
||||||
|
captured_subgroups.push_back(res[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (captured_subgroups.size() > 0)
|
||||||
|
captured_groups.push_back(captured_subgroups);
|
||||||
|
|
||||||
|
searchStart += res.position() + res.length();
|
||||||
|
}
|
||||||
|
return captured_groups;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<std::string>> regexp_search2(const std::string &string_to_split, const std::string &rgx_str, bool match_mode, bool ignore_case) {
|
||||||
|
if (match_mode) {
|
||||||
|
std::vector<std::vector<std::string>> matches{regexp_match(string_to_split, rgx_str, ignore_case)};
|
||||||
|
return matches;
|
||||||
|
} else {
|
||||||
|
return regexp_tokens(string_to_split, rgx_str, ignore_case);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string string_lucase(std::string s, const std::string &strcase) {
|
std::string string_lucase(std::string s, const std::string &strcase) {
|
||||||
if (strcase == "upper")
|
if (strcase == "upper")
|
||||||
std::transform(s.begin(), s.end(),s.begin(), ::toupper);
|
std::transform(s.begin(), s.end(),s.begin(), ::toupper);
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,8 @@ std::string replace_substring_regexp(const std::string &src, const std::string &
|
||||||
|
|
||||||
// Returns true if where contains regex
|
// Returns true if where contains regex
|
||||||
bool regexp_search(const std::string &where, const std::string ®ex_str);
|
bool regexp_search(const std::string &where, const std::string ®ex_str);
|
||||||
|
// Returns list of contained regex patterns
|
||||||
|
std::vector<std::vector<std::string>> regexp_search2(const std::string &where, const std::string ®ex_str, bool match_mode, bool ignore_case);
|
||||||
|
|
||||||
std::vector<std::string> regexp_strsplit(const std::string &string_to_split, const std::string &rgx_str);
|
std::vector<std::string> regexp_strsplit(const std::string &string_to_split, const std::string &rgx_str);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -62,18 +62,21 @@
|
||||||
(ut::define-test "result of (string-rtrim \"abc \")" '(ut::assert-equal "abc" (string-rtrim "abc ")))
|
(ut::define-test "result of (string-rtrim \"abc \")" '(ut::assert-equal "abc" (string-rtrim "abc ")))
|
||||||
(ut::define-test "result of (string-ltrim \" abc\")" '(ut::assert-equal "abc" (string-ltrim " abc")))
|
(ut::define-test "result of (string-ltrim \" abc\")" '(ut::assert-equal "abc" (string-ltrim " abc")))
|
||||||
(ut::define-test "result of (string-trim \" abc \")" '(ut::assert-equal "abc" (string-trim " abc ")))
|
(ut::define-test "result of (string-trim \" abc \")" '(ut::assert-equal "abc" (string-trim " abc ")))
|
||||||
(ut::define-test "result of (string-regex? \"test.lsp\" \"^.*\.l(i)?sp$\")" '(ut::assert-true (string-regex? "test.lsp" "^.*\.l(i)?sp$")))
|
|
||||||
|
|
||||||
(ut::define-test "result of (string-split \"split me by space\" \"\\s+\")" '(ut::assert-equal '("split" "me" "by" "space") (string-split "split me by space" "\\s+")))
|
|
||||||
(ut::define-test "result of (string-upcase \"abcABCD\")" '(ut::assert-equal "ABCABCD" (string-upcase "abcABCD")))
|
(ut::define-test "result of (string-upcase \"abcABCD\")" '(ut::assert-equal "ABCABCD" (string-upcase "abcABCD")))
|
||||||
(ut::define-test "result of (string-downcase \"abcABCD\")" '(ut::assert-equal "abcabcd" (string-downcase "abcABCD")))
|
(ut::define-test "result of (string-downcase \"abcABCD\")" '(ut::assert-equal "abcabcd" (string-downcase "abcABCD")))
|
||||||
(ut::define-test "result of (string-len \"abcdef\")" '(ut::assert-equal 6 (string-len "abcdef")))
|
(ut::define-test "result of (string-len \"abcdef\")" '(ut::assert-equal 6 (string-len "abcdef")))
|
||||||
|
|
||||||
|
(ut::define-test "result of (string-split \"split me by space\" \"\\s+\")" '(ut::assert-equal '("split" "me" "by" "space") (string-split "split me by space" "\\s+")))
|
||||||
|
|
||||||
|
(ut::define-test "result of (string-regex? \"test.lsp\" \"^.*\.l(i)?sp$\")" '(ut::assert-true (string-regex? "test.lsp" "^.*\.l(i)?sp$")))
|
||||||
|
(ut::define-test "result of (string-regex-list ... \"match\")" (ut::assert-equal '(("<td class=\"xyz\">1</td>" "<td>2</td>")) (string-regex-list "<td class=\"xyz\">1</td><td>2</td>" "<td.*?>(.*?)</td>" "match" "ignore")))
|
||||||
|
(ut::define-test "result of (string-regex-list ... \"token\")" '(ut::assert-equal '(("1") ("2")) (string-regex-list "<td class=\"xyz\">1</td><td>2</td>" "<td.*?>(.*?)</td>" "token")))
|
||||||
|
|
||||||
(ut::define-test "result of (string-replace \"abcdef\" \"de\" \"DE\")" '(ut::assert-equal "abcDEfg" (string-replace "abcdefg" "de" "DE")))
|
(ut::define-test "result of (string-replace \"abcdef\" \"de\" \"DE\")" '(ut::assert-equal "abcDEfg" (string-replace "abcdefg" "de" "DE")))
|
||||||
(ut::define-test "result of (string-replace-re \"there is a subsequence in the string\" \"\\b(sub)([^ ]*)\" \"sub-$2\")" '(ut::assert-equal "there is a sub-sequence in the string" (string-replace-re "there is a subsequence in the string" "\\b(sub)([^ ]*)" "sub-$2")))
|
(ut::define-test "result of (string-replace-re \"there is a subsequence in the string\" \"\\b(sub)([^ ]*)\" \"sub-$2\")" '(ut::assert-equal "there is a sub-sequence in the string" (string-replace-re "there is a subsequence in the string" "\\b(sub)([^ ]*)" "sub-$2")))
|
||||||
(ut::define-test "result of (string-replace-re \"XX<script>there is a subsequence in the string</script>YY<script>bbb</script>ZZ\" \"<script>(.*?)</script>\" \"\")" '(ut::assert-equal "XXYYZZ" (string-replace-re "XX<script>there is a subsequence in the string</script>YY<script>bbb</script>ZZ" "<script>(.*?)</script>" "")))
|
(ut::define-test "result of (string-replace-re \"XX<script>there is a subsequence in the string</script>YY<script>bbb</script>ZZ\" \"<script>(.*?)</script>\" \"\")" '(ut::assert-equal "XXYYZZ" (string-replace-re "XX<script>there is a subsequence in the string</script>YY<script>bbb</script>ZZ" "<script>(.*?)</script>" "")))
|
||||||
|
|
||||||
|
|
||||||
(ut::define-test "result of (string-substr \"ABCDEF\")" '(ut::assert-equal "ABCDEF" (string-substr "ABCDEF")))
|
(ut::define-test "result of (string-substr \"ABCDEF\")" '(ut::assert-equal "ABCDEF" (string-substr "ABCDEF")))
|
||||||
(ut::define-test "result of (string-substr \"ABCDEF\" 1)" '(ut::assert-equal "BCDEF" (string-substr "ABCDEF" 1)))
|
(ut::define-test "result of (string-substr \"ABCDEF\" 1)" '(ut::assert-equal "BCDEF" (string-substr "ABCDEF" 1)))
|
||||||
(ut::define-test "result of (string-substr \"ABCDEF\" 2 3)" '(ut::assert-equal "CDE" (string-substr "ABCDEF" 2 3)))
|
(ut::define-test "result of (string-substr \"ABCDEF\" 2 3)" '(ut::assert-equal "CDE" (string-substr "ABCDEF" 2 3)))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue