@@ -215,6 +215,120 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri
215215 return string_desc_p ;
216216} /* ecma_new_ecma_string_from_utf8 */
217217
218+ /**
219+ * Allocate a new ecma-string and initialize it from the utf8 string argument.
220+ * All 4-bytes long unicode sequences are converted into two 3-bytes long sequences.
221+ *
222+ * @return pointer to ecma-string descriptor
223+ */
224+ ecma_string_t *
225+ ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t * string_p , /**< utf-8 string */
226+ lit_utf8_size_t string_size ) /**< utf-8 string size */
227+ {
228+ JERRY_ASSERT (string_p != NULL || string_size == 0 );
229+
230+ ecma_string_t * string_desc_p = NULL ;
231+
232+ ecma_length_t string_length = 0 ;
233+ lit_utf8_size_t converted_string_size = 0 ;
234+ lit_utf8_size_t pos = 0 ;
235+
236+ /* Calculate the required length and size information of the converted cesu-8 encoded string */
237+ while (pos < string_size )
238+ {
239+ if ((string_p [pos ] & LIT_UTF8_1_BYTE_MASK ) == LIT_UTF8_1_BYTE_MARKER )
240+ {
241+ pos ++ ;
242+ }
243+ else if ((string_p [pos ] & LIT_UTF8_2_BYTE_MASK ) == LIT_UTF8_2_BYTE_MARKER )
244+ {
245+ pos += 2 ;
246+ }
247+ else if ((string_p [pos ] & LIT_UTF8_3_BYTE_MASK ) == LIT_UTF8_3_BYTE_MARKER )
248+ {
249+ pos += 3 ;
250+ }
251+ else
252+ {
253+ JERRY_ASSERT ((string_p [pos ] & LIT_UTF8_4_BYTE_MASK ) == LIT_UTF8_4_BYTE_MARKER );
254+ pos += 4 ;
255+ converted_string_size += 2 ;
256+ }
257+
258+ string_length ++ ;
259+ }
260+
261+ JERRY_ASSERT (pos == string_size );
262+
263+ if (converted_string_size == 0 )
264+ {
265+ return ecma_new_ecma_string_from_utf8 (string_p , string_size );
266+ }
267+ else
268+ {
269+ converted_string_size += string_size ;
270+
271+ JERRY_ASSERT (lit_is_utf8_string_valid (string_p , string_size ));
272+
273+ lit_utf8_byte_t * data_p ;
274+
275+ if (likely (string_size <= UINT16_MAX ))
276+ {
277+ string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t ) + converted_string_size );
278+
279+ string_desc_p -> refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE ;
280+ string_desc_p -> u .common_field = 0 ;
281+ string_desc_p -> u .utf8_string .size = (uint16_t ) converted_string_size ;
282+ string_desc_p -> u .utf8_string .length = (uint16_t ) string_length ;
283+
284+ data_p = (lit_utf8_byte_t * ) (string_desc_p + 1 );
285+ }
286+ else
287+ {
288+ string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t ) + converted_string_size );
289+
290+ string_desc_p -> refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE ;
291+ string_desc_p -> u .common_field = 0 ;
292+ string_desc_p -> u .long_utf8_string_size = converted_string_size ;
293+
294+ ecma_long_string_t * long_string_desc_p = (ecma_long_string_t * ) string_desc_p ;
295+ long_string_desc_p -> long_utf8_string_length = string_length ;
296+
297+ data_p = (lit_utf8_byte_t * ) (long_string_desc_p + 1 );
298+ }
299+
300+ pos = 0 ;
301+
302+ while (pos < string_size )
303+ {
304+ if ((string_p [pos ] & LIT_UTF8_4_BYTE_MASK ) == LIT_UTF8_4_BYTE_MARKER )
305+ {
306+ /* Processing 4 byte unicode sequence. Always converted to two 3 byte long sequence. */
307+ uint32_t character = ((((uint32_t ) string_p [pos ++ ]) & 0x7 ) << 18 );
308+ character |= ((((uint32_t ) string_p [pos ++ ]) & LIT_UTF8_LAST_6_BITS_MASK ) << 12 );
309+ character |= ((((uint32_t ) string_p [pos ++ ]) & LIT_UTF8_LAST_6_BITS_MASK ) << 6 );
310+ character |= (((uint32_t ) string_p [pos ++ ]) & LIT_UTF8_LAST_6_BITS_MASK );
311+
312+ JERRY_ASSERT (character >= 0x10000 );
313+ character -= 0x10000 ;
314+
315+ data_p += lit_char_to_utf8_bytes (data_p , (ecma_char_t ) (0xd800 | (character >> 10 )));
316+ data_p += lit_char_to_utf8_bytes (data_p , (ecma_char_t ) (0xdc00 | (character & LIT_UTF16_LAST_10_BITS_MASK )));
317+ }
318+ else
319+ {
320+ * data_p ++ = string_p [pos ++ ];
321+ }
322+ }
323+
324+ JERRY_ASSERT (pos == string_size );
325+
326+ string_desc_p -> hash = lit_utf8_string_calc_hash (data_p , converted_string_size );
327+ }
328+
329+ return string_desc_p ;
330+ } /* ecma_new_ecma_string_from_utf8_converted_to_cesu8 */
331+
218332/**
219333 * Allocate new ecma-string and fill it with cesu-8 character which represents specified code unit
220334 *
0 commit comments