From b97cc6aac17d207fa6d8fbd550372d9c4263b31c Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Mon, 30 May 2022 15:49:18 +0200 Subject: [PATCH] lib: rework uc_index() implementation - Fix segfault on passing string haystack with non-string needle argument - Perform strict equality tests against array haystacks - Make string searches binary safe - Improve left index string search performance - Improve right index array search performance - Add missing test coverage for index() and rindex() Signed-off-by: Jo-Philipp Wich --- lib.c | 59 +++++++++++++++++++------- tests/custom/03_stdlib/58_index | 46 ++++++++++++++++++++ tests/custom/03_stdlib/59_rindex | 46 ++++++++++++++++++++ tests/custom/04_bugs/38_index_segfault | 28 ++++++++++++ 4 files changed, 164 insertions(+), 15 deletions(-) create mode 100644 tests/custom/03_stdlib/58_index create mode 100644 tests/custom/03_stdlib/59_rindex create mode 100644 tests/custom/04_bugs/38_index_segfault diff --git a/lib.c b/lib.c index f0263c27..67939726 100644 --- a/lib.c +++ b/lib.c @@ -285,39 +285,68 @@ uc_length(uc_vm_t *vm, size_t nargs) } } +static int +uc_uniq_ucv_equal(const void *k1, const void *k2); + static uc_value_t * uc_index(uc_vm_t *vm, size_t nargs, bool right) { uc_value_t *stack = uc_fn_arg(0); uc_value_t *needle = uc_fn_arg(1); const char *sstr, *nstr, *p; - size_t arridx, len; + size_t arridx, slen, nlen; ssize_t ret = -1; switch (ucv_type(stack)) { case UC_ARRAY: - for (arridx = 0, len = ucv_array_length(stack); arridx < len; arridx++) { - if (ucv_compare(I_EQ, ucv_array_get(stack, arridx), needle, NULL)) { - ret = (ssize_t)arridx; - - if (!right) + if (right) { + for (arridx = ucv_array_length(stack); arridx > 0; arridx--) { + if (uc_uniq_ucv_equal(ucv_array_get(stack, arridx - 1), needle)) { + ret = (ssize_t)(arridx - 1); + break; + } + } + } + else { + for (arridx = 0, slen = ucv_array_length(stack); arridx < slen; arridx++) { + if (uc_uniq_ucv_equal(ucv_array_get(stack, arridx), needle)) { + ret = (ssize_t)arridx; break; + } } } return ucv_int64_new(ret); case UC_STRING: - sstr = ucv_string_get(stack); - nstr = needle ? ucv_string_get(needle) : NULL; - len = needle ? strlen(nstr) : 0; - - for (p = sstr; *p && len; p++) { - if (!strncmp(p, nstr, len)) { - ret = (ssize_t)(p - sstr); + if (ucv_type(needle) == UC_STRING) { + sstr = ucv_string_get(stack); + slen = ucv_string_length(stack); + nstr = ucv_string_get(needle); + nlen = ucv_string_length(needle); + + if (slen == nlen) { + if (memcmp(sstr, nstr, nlen) == 0) + ret = 0; + } + else if (slen > nlen) { + if (right) { + p = sstr + slen - nlen; + + do { + if (memcmp(p, nstr, nlen) == 0) { + ret = (ssize_t)(p - sstr); + break; + } + } + while (--p != sstr); + } + else { + p = (const char *)memmem(sstr, slen, nstr, nlen); - if (!right) - break; + if (p) + ret = (ssize_t)(p - sstr); + } } } diff --git a/tests/custom/03_stdlib/58_index b/tests/custom/03_stdlib/58_index new file mode 100644 index 00000000..fd197f49 --- /dev/null +++ b/tests/custom/03_stdlib/58_index @@ -0,0 +1,46 @@ +The `index()` function locates an element within a given array or a substring +position within a given string, depending on the type of arguments given. + +Returns `null` if the given haystack argument is neither an array nor a string, +returns `-1` if the element was not found within the array or the substring was +not found within the string. + +Returns the first found index position in all other cases. + +-- Testcase -- +{% + let o = {}; + + printf("%.J\n", [ + index([ 1, 2, "abc", 3, "abc", 1, 2 ], "abc"), // should return 2 + index([ 1, 2, 3 ], 4), // should return -1 + index([ [], {} ], {}), // should return -1 (strict equality) + index([ [], o ], o), // should return 1 (strict equality) + + index("foobarfoobarfoobar", "arf"), // should return 4 + index("test", "hello"), // should return -1 + index("test", "test"), // should return 0 (needle = haystack length special case) + index("test", ""), // should return 0 (zero length needle special case) + index("", ""), // should return 0 (zero length special case) + + index({ test: true }, true), // should return null + index(1234, 3), // should return null + ]); +%} +-- End -- + +-- Expect stdout -- +[ + 2, + -1, + -1, + 1, + 4, + -1, + 0, + 0, + 0, + null, + null +] +-- End -- diff --git a/tests/custom/03_stdlib/59_rindex b/tests/custom/03_stdlib/59_rindex new file mode 100644 index 00000000..ef352ffd --- /dev/null +++ b/tests/custom/03_stdlib/59_rindex @@ -0,0 +1,46 @@ +The `rindex()` function locates an element within a given array or a substring +position within a given string, depending on the type of arguments given. + +Returns `null` if the given haystack argument is neither an array nor a string, +returns `-1` if the element was not found within the array or the substring was +not found within the string. + +Returns the last found index position in all other cases. + +-- Testcase -- +{% + let o = {}; + + printf("%.J\n", [ + rindex([ 1, 2, "abc", 3, "abc", 1, 2 ], "abc"), // should return 4 + rindex([ 1, 2, 3 ], 4), // should return -1 + rindex([ [], {} ], {}), // should return -1 (strict equality) + rindex([ [], o ], o), // should return 1 (strict equality) + + rindex("foobarfoobarfoobar", "arf"), // should return 10 + rindex("test", "hello"), // should return -1 + rindex("test", "test"), // should return 0 (needle = haystack length special case) + rindex("test", ""), // should return 4 (zero length needle special case) + rindex("", ""), // should return 0 (zero length special case) + + rindex({ test: true }, true), // should return null + rindex(1234, 3), // should return null + ]); +%} +-- End -- + +-- Expect stdout -- +[ + 4, + -1, + -1, + 1, + 10, + -1, + 0, + 4, + 0, + null, + null +] +-- End -- diff --git a/tests/custom/04_bugs/38_index_segfault b/tests/custom/04_bugs/38_index_segfault new file mode 100644 index 00000000..e29b99f7 --- /dev/null +++ b/tests/custom/04_bugs/38_index_segfault @@ -0,0 +1,28 @@ +When index() or rindex() was invoked with a string haystack and a non- +string needle argument, a segmentation fault occurred due to an internal +strlen() invocation on a NULL pointer. + +-- Testcase -- +print(index("abc", []), "\n") +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +-1 +-- End -- + + +-- Testcase -- +print(rindex("abc", []), "\n") +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +-1 +-- End --