@@ -414,7 +414,7 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri
414414 lit_utf8_size_t string_size) /* *< string size */
415415{
416416 JERRY_ASSERT (string_p != NULL || string_size == 0 );
417- JERRY_ASSERT (lit_is_utf8_string_valid (string_p, string_size));
417+ JERRY_ASSERT (lit_is_cesu8_string_valid (string_p, string_size));
418418
419419 lit_magic_string_id_t magic_string_id;
420420 if (lit_is_utf8_string_magic (string_p, string_size, &magic_string_id))
@@ -444,7 +444,7 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri
444444} /* ecma_new_ecma_string_from_utf8 */
445445
446446/* *
447- * Allocate new ecma-string and fill it with utf -8 character which represents specified code unit
447+ * Allocate new ecma-string and fill it with cesu -8 character which represents specified code unit
448448 *
449449 * @return pointer to ecma-string descriptor
450450 */
@@ -627,14 +627,7 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
627627 jerry_fatal (ERR_OUT_OF_MEMORY);
628628 }
629629
630- ecma_char_t str1_last_code_unit = ecma_string_get_char_at_pos (string1_p, ecma_string_get_length (string1_p) - 1 );
631- ecma_char_t str2_first_code_unit = ecma_string_get_char_at_pos (string2_p, 0 );
632-
633- bool is_surrogate_pair_sliced = (lit_is_code_unit_high_surrogate (str1_last_code_unit)
634- && lit_is_code_unit_low_surrogate (str2_first_code_unit));
635-
636- lit_utf8_size_t buffer_size = str1_size + str2_size - (lit_utf8_size_t ) (is_surrogate_pair_sliced ?
637- LIT_UTF8_CESU8_SURROGATE_SIZE_DIF : 0 );
630+ lit_utf8_size_t buffer_size = str1_size + str2_size;
638631
639632 lit_utf8_byte_t *str_p = (lit_utf8_byte_t *) mem_heap_alloc_block (buffer_size, MEM_HEAP_ALLOC_SHORT_TERM);
640633
@@ -643,23 +636,9 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
643636 bytes_copied1 = ecma_string_to_utf8_string (string1_p, str_p, (ssize_t ) str1_size);
644637 JERRY_ASSERT (bytes_copied1 > 0 );
645638
646- if (!is_surrogate_pair_sliced)
647- {
648- bytes_copied2 = ecma_string_to_utf8_string (string2_p, str_p + str1_size, (ssize_t ) str2_size);
649- JERRY_ASSERT (bytes_copied2 > 0 );
650- }
651- else
652- {
653- bytes_copied2 = ecma_string_to_utf8_string (string2_p,
654- str_p + str1_size - LIT_UTF8_MAX_BYTES_IN_CODE_UNIT + 1 ,
655- (ssize_t ) buffer_size - bytes_copied1
656- + LIT_UTF8_MAX_BYTES_IN_CODE_UNIT);
657- JERRY_ASSERT (bytes_copied2 > 0 );
639+ bytes_copied2 = ecma_string_to_utf8_string (string2_p, str_p + str1_size, (ssize_t ) str2_size);
640+ JERRY_ASSERT (bytes_copied2 > 0 );
658641
659- lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (str1_last_code_unit,
660- str2_first_code_unit);
661- lit_code_point_to_utf8 (surrogate_code_point, str_p + str1_size - LIT_UTF8_MAX_BYTES_IN_CODE_UNIT);
662- }
663642 ecma_string_t *str_concat_p = ecma_new_ecma_string_from_utf8 (str_p, buffer_size);
664643
665644 mem_heap_free_block ((void *) str_p);
@@ -955,7 +934,7 @@ ecma_string_get_array_index (const ecma_string_t *str_p, /**< ecma-string */
955934} /* ecma_string_is_array_index */
956935
957936/* *
958- * Convert ecma-string's contents to a utf -8 string and put it to the buffer.
937+ * Convert ecma-string's contents to a cesu -8 string and put it to the buffer.
959938 *
960939 * @return number of bytes, actually copied to the buffer - if string's content was copied successfully;
961940 * otherwise (in case size of buffer is insufficient) - negative number, which is calculated
@@ -1018,7 +997,6 @@ ecma_string_to_utf8_string (const ecma_string_t *string_desc_p, /**< ecma-string
1018997
1019998 break ;
1020999 }
1021-
10221000 case ECMA_STRING_CONTAINER_MAGIC_STRING:
10231001 {
10241002 const lit_magic_string_id_t id = string_desc_p->u .magic_string_id ;
@@ -1491,7 +1469,7 @@ ecma_string_get_char_at_pos (const ecma_string_t *string_p, /**< ecma-string */
14911469 ssize_t sz = ecma_string_to_utf8_string (string_p, utf8_str_p, (ssize_t ) buffer_size);
14921470 JERRY_ASSERT (sz > 0 );
14931471
1494- ch = lit_utf8_string_code_unit_at (utf8_str_p, buffer_size, index);;
1472+ ch = lit_utf8_string_code_unit_at (utf8_str_p, buffer_size, index);
14951473
14961474 MEM_FINALIZE_LOCAL_ARRAY (utf8_str_p);
14971475
@@ -1682,10 +1660,7 @@ ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma strin
16821660 JERRY_ASSERT (end_pos <= string_length);
16831661#endif
16841662
1685- const ecma_length_t span = (start_pos > end_pos) ? 0 : end_pos - start_pos;
1686- const lit_utf8_size_t utf8_str_size = LIT_UTF8_MAX_BYTES_IN_CODE_UNIT * span;
1687-
1688- if (utf8_str_size)
1663+ if (start_pos < end_pos)
16891664 {
16901665 /* *
16911666 * I. Dump original string to plain buffer
@@ -1701,20 +1676,22 @@ ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma strin
17011676 /* *
17021677 * II. Extract substring
17031678 */
1704- MEM_DEFINE_LOCAL_ARRAY (utf8_substr_buffer, utf8_str_size, lit_utf8_byte_t );
1679+ lit_utf8_byte_t *start_p = utf8_str_p;
1680+ end_pos -= start_pos;
17051681
1706- lit_utf8_size_t utf8_substr_buffer_offset = 0 ;
1707- for (ecma_length_t idx = 0 ; idx < span; idx++)
1682+ while (start_pos--)
17081683 {
1709- ecma_char_t code_unit = lit_utf8_string_code_unit_at (utf8_str_p, buffer_size, start_pos + idx);
1684+ start_p += lit_get_unicode_char_size_by_utf8_first_byte (*start_p);
1685+ }
17101686
1711- JERRY_ASSERT (utf8_str_size >= utf8_substr_buffer_offset + LIT_UTF8_MAX_BYTES_IN_CODE_UNIT);
1712- utf8_substr_buffer_offset += lit_code_unit_to_utf8 (code_unit, utf8_substr_buffer + utf8_substr_buffer_offset);
1687+ lit_utf8_byte_t *end_p = start_p;
1688+ while (end_pos--)
1689+ {
1690+ end_p += lit_get_unicode_char_size_by_utf8_first_byte (*end_p);
17131691 }
17141692
1715- ecma_string_p = ecma_new_ecma_string_from_utf8 (utf8_substr_buffer, utf8_substr_buffer_offset );
1693+ ecma_string_p = ecma_new_ecma_string_from_utf8 (start_p, ( lit_utf8_size_t ) (end_p - start_p) );
17161694
1717- MEM_FINALIZE_LOCAL_ARRAY (utf8_substr_buffer);
17181695 MEM_FINALIZE_LOCAL_ARRAY (utf8_str_p);
17191696
17201697 return ecma_string_p;
@@ -1746,47 +1723,47 @@ ecma_string_trim (const ecma_string_t *string_p) /**< pointer to an ecma string
17461723 ssize_t sz = ecma_string_to_utf8_string (string_p, utf8_str_p, (ssize_t ) buffer_size);
17471724 JERRY_ASSERT (sz >= 0 );
17481725
1749- lit_utf8_iterator_t front = lit_utf8_iterator_create (utf8_str_p, buffer_size);
1750-
1751- lit_utf8_iterator_t back = lit_utf8_iterator_create (utf8_str_p, buffer_size);
1752- lit_utf8_iterator_seek_eos (&back);
1753-
1754- lit_utf8_iterator_pos_t start = lit_utf8_iterator_get_pos (&back);
1755- lit_utf8_iterator_pos_t end = lit_utf8_iterator_get_pos (&front);
1756-
1757- ecma_char_t current;
1726+ ecma_char_t ch;
1727+ lit_utf8_size_t read_size;
1728+ lit_utf8_byte_t *nonws_start_p = utf8_str_p + buffer_size;
1729+ lit_utf8_byte_t *current_p = utf8_str_p;
17581730
17591731 /* Trim front. */
1760- while (! lit_utf8_iterator_is_eos (&front) )
1732+ while (current_p < nonws_start_p )
17611733 {
1762- current = lit_utf8_iterator_read_next (&front);
1763- if (!lit_char_is_white_space (current)
1764- && !lit_char_is_line_terminator (current))
1734+ read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
1735+
1736+ if (!lit_char_is_white_space (ch)
1737+ && !lit_char_is_line_terminator (ch))
17651738 {
1766- lit_utf8_iterator_decr (&front);
1767- start = lit_utf8_iterator_get_pos (&front);
1739+ nonws_start_p = current_p;
17681740 break ;
17691741 }
1742+
1743+ current_p += read_size;
17701744 }
17711745
1746+ current_p = utf8_str_p + buffer_size;
1747+
17721748 /* Trim back. */
1773- while (! lit_utf8_iterator_is_bos (&back) )
1749+ while (current_p > utf8_str_p )
17741750 {
1775- current = lit_utf8_iterator_read_prev (&back);
1776- if (!lit_char_is_white_space (current)
1777- && !lit_char_is_line_terminator (current))
1751+ read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
1752+
1753+ if (!lit_char_is_white_space (ch)
1754+ && !lit_char_is_line_terminator (ch))
17781755 {
1779- lit_utf8_iterator_incr (&back);
1780- end = lit_utf8_iterator_get_pos (&back);
17811756 break ;
17821757 }
1758+
1759+ current_p -= read_size;
17831760 }
17841761
17851762 /* Construct new string. */
1786- if (end. offset > start. offset )
1763+ if (current_p > nonws_start_p )
17871764 {
1788- ret_string_p = ecma_new_ecma_string_from_utf8 (utf8_str_p + start. offset ,
1789- (lit_utf8_size_t ) (end. offset - start. offset ));
1765+ ret_string_p = ecma_new_ecma_string_from_utf8 (nonws_start_p ,
1766+ (lit_utf8_size_t ) (current_p - nonws_start_p ));
17901767 }
17911768 else
17921769 {
0 commit comments