Skip to content

Commit af56cd8

Browse files
galpeterdbatyai
authored andcommitted
Make the string trim method unicode tolerant.
Fixes issue #426 JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com
1 parent 5733690 commit af56cd8

File tree

2 files changed

+25
-16
lines changed

2 files changed

+25
-16
lines changed

jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2009,24 +2009,25 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu
20092009

20102010
/* 3 */
20112011
const lit_utf8_size_t size = ecma_string_get_size (original_string_p);
2012-
const ecma_length_t length = ecma_string_get_size (original_string_p);
20132012

20142013
/* Workaround: avoid repeated call of ecma_string_get_char_at_pos() because its overhead */
20152014
lit_utf8_byte_t *original_utf8_str_p = (lit_utf8_byte_t *) mem_heap_alloc_block (size + 1,
20162015
MEM_HEAP_ALLOC_SHORT_TERM);
20172016
ecma_string_to_utf8_string (original_string_p, original_utf8_str_p, (ssize_t) size);
20182017

2018+
const ecma_length_t length = lit_utf8_string_length (original_utf8_str_p, size);
2019+
2020+
lit_utf8_iterator_t iter = lit_utf8_iterator_create (original_utf8_str_p, size);
2021+
20192022
uint32_t prefix = 0, postfix = 0;
20202023
uint32_t new_len = 0;
20212024

2022-
while (prefix < length)
2025+
while (!lit_utf8_iterator_is_eos (&iter))
20232026
{
2024-
ecma_char_t next_char = lit_utf8_string_code_unit_at (original_utf8_str_p,
2025-
size,
2026-
prefix);
2027+
ecma_char_t current_char = lit_utf8_iterator_read_next (&iter);
20272028

2028-
if (lit_char_is_white_space (next_char)
2029-
|| lit_char_is_line_terminator (next_char))
2029+
if (lit_char_is_white_space (current_char)
2030+
|| lit_char_is_line_terminator (current_char))
20302031
{
20312032
prefix++;
20322033
}
@@ -2036,13 +2037,13 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu
20362037
}
20372038
}
20382039

2039-
while (postfix < length - prefix)
2040+
lit_utf8_iterator_seek_eos (&iter);
2041+
while (!lit_utf8_iterator_is_bos (&iter))
20402042
{
2041-
ecma_char_t next_char = lit_utf8_string_code_unit_at (original_utf8_str_p,
2042-
size,
2043-
length - postfix - 1);
2044-
if (lit_char_is_white_space (next_char)
2045-
|| lit_char_is_line_terminator (next_char))
2043+
ecma_char_t current_char = lit_utf8_iterator_read_prev (&iter);
2044+
2045+
if (lit_char_is_white_space (current_char)
2046+
|| lit_char_is_line_terminator (current_char))
20462047
{
20472048
postfix++;
20482049
}
@@ -2051,8 +2052,7 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu
20512052
break;
20522053
}
20532054
}
2054-
2055-
new_len = prefix < size ? size - prefix - postfix : 0;
2055+
new_len = prefix < length ? length - prefix - postfix : 0;
20562056

20572057
ecma_string_t *new_str_p = ecma_string_substr (original_string_p, prefix, prefix + new_len);
20582058

tests/jerry/string-prototype-trim.js

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,13 @@ assert(" ".trim() === "");
6666

6767
assert("".trim() === "");
6868

69-
// FIXME: add unicode tests when unicode support available
69+
assert("\uf389".trim() === "\uf389");
70+
assert(String.prototype.trim.call('\uf389') === "\uf389");
71+
assert("\u20291\u00D0".trim() === "1\u00D0");
72+
assert("\u20291\u00A0".trim() === "1");
73+
74+
assert("\u0009\u000B\u000C\u0020\u00A01".trim() === "1");
75+
assert("\u000A\u000D\u2028\u202911".trim() === "11");
76+
77+
assert("\u0009\u000B\u000C\u0020\u00A01\u0009\u000B\u000C\u0020\u00A0".trim() === "1");
78+
assert("\u000A\u000D\u2028\u202911\u000A\u000D\u2028\u2029".trim() === "11");

0 commit comments

Comments
 (0)