@@ -636,10 +636,56 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
636636 return build_string (string , stringEnd , intern , symbolize );
637637}
638638
639+ typedef struct _json_unescape_cursor {
640+ const char * p ;
641+ const char * pe ;
642+ char * buffer ;
643+ } JSON_UnescapeCursor ;
644+
645+ static ALWAYS_INLINE () int json_copy_and_find_next_backslash (JSON_UnescapeCursor * cursor , const char * stringEnd )
646+ {
647+ if (cursor -> pe >= stringEnd ) {
648+ return 0 ;
649+ }
650+
651+ const char * p = cursor -> p ;
652+
653+ #ifdef HAVE_SIMD_NEON
654+ while (p + sizeof (uint8x16_t ) <= stringEnd ) {
655+ uint8x16_t chunk = vld1q_u8 ((const unsigned char * )p );
656+ vst1q_u8 ((unsigned char * )cursor -> buffer , chunk );
657+ uint8x16_t has_backslash = vceqq_u8 (chunk , vdupq_n_u8 ('\\' ));
658+ uint64_t mask = neon_match_mask (has_backslash );
659+ if (mask ) {
660+ uint32_t index = trailing_zeros64 (mask ) >> 2 ;
661+ cursor -> buffer += index ;
662+ cursor -> p = p + index ;
663+ cursor -> pe = p + index ;
664+ return 1 ;
665+ }
666+ p += sizeof (uint8x16_t );
667+ cursor -> buffer += sizeof (uint8x16_t );
668+ cursor -> p = p ;
669+ cursor -> pe = p ;
670+ }
671+ #endif
672+
673+ cursor -> pe = memchr (p , '\\' , stringEnd - p );
674+ if (cursor -> pe ) {
675+ if (cursor -> pe > p ) {
676+ MEMCPY (cursor -> buffer , p , char , cursor -> pe - p );
677+ cursor -> buffer += cursor -> pe - p ;
678+ cursor -> p = cursor -> pe ;
679+ }
680+ return 1 ;
681+ }
682+ return 0 ;
683+ }
684+
639685static VALUE json_string_unescape (JSON_ParserState * state , const char * string , const char * stringEnd , bool is_name , bool intern , bool symbolize )
640686{
641687 size_t bufferSize = stringEnd - string ;
642- const char * p = string , * pe = string , * unescape , * bufferStart ;
688+ const char * unescape , * bufferStart ;
643689 char * buffer ;
644690 int unescape_len ;
645691 char buf [4 ];
@@ -649,14 +695,15 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
649695 buffer = RSTRING_PTR (result );
650696 bufferStart = buffer ;
651697
652- while (pe < stringEnd && (pe = memchr (pe , '\\' , stringEnd - pe ))) {
698+ JSON_UnescapeCursor cursor ;
699+ cursor .p = string ;
700+ cursor .pe = string ;
701+ cursor .buffer = buffer ;
702+
703+ while (json_copy_and_find_next_backslash (& cursor , stringEnd )) {
653704 unescape = (char * ) "?" ;
654705 unescape_len = 1 ;
655- if (pe > p ) {
656- MEMCPY (buffer , p , char , pe - p );
657- buffer += pe - p ;
658- }
659- switch (* ++ pe ) {
706+ switch (* ++ cursor .pe ) {
660707 case 'n' :
661708 unescape = (char * ) "\n" ;
662709 break ;
@@ -679,11 +726,11 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
679726 unescape = (char * ) "\f" ;
680727 break ;
681728 case 'u' :
682- if (pe > stringEnd - 5 ) {
683- raise_parse_error_at ("incomplete unicode character escape sequence at %s" , state , p );
729+ if (cursor . pe > stringEnd - 5 ) {
730+ raise_parse_error_at ("incomplete unicode character escape sequence at %s" , state , cursor . p );
684731 } else {
685- uint32_t ch = unescape_unicode (state , (unsigned char * ) ++ pe );
686- pe += 3 ;
732+ uint32_t ch = unescape_unicode (state , (unsigned char * ) ++ cursor . pe );
733+ cursor . pe += 3 ;
687734 /* To handle values above U+FFFF, we take a sequence of
688735 * \uXXXX escapes in the U+D800..U+DBFF then
689736 * U+DC00..U+DFFF ranges, take the low 10 bits from each
@@ -695,22 +742,22 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
695742 * Area".
696743 */
697744 if ((ch & 0xFC00 ) == 0xD800 ) {
698- pe ++ ;
699- if (pe > stringEnd - 6 ) {
700- raise_parse_error_at ("incomplete surrogate pair at %s" , state , p );
745+ cursor . pe ++ ;
746+ if (cursor . pe > stringEnd - 6 ) {
747+ raise_parse_error_at ("incomplete surrogate pair at %s" , state , cursor . p );
701748 }
702- if (pe [0 ] == '\\' && pe [1 ] == 'u' ) {
703- uint32_t sur = unescape_unicode (state , (unsigned char * ) pe + 2 );
749+ if (cursor . pe [0 ] == '\\' && cursor . pe [1 ] == 'u' ) {
750+ uint32_t sur = unescape_unicode (state , (unsigned char * ) cursor . pe + 2 );
704751
705752 if ((sur & 0xFC00 ) != 0xDC00 ) {
706- raise_parse_error_at ("invalid surrogate pair at %s" , state , p );
753+ raise_parse_error_at ("invalid surrogate pair at %s" , state , cursor . p );
707754 }
708755
709756 ch = (((ch & 0x3F ) << 10 ) | ((((ch >> 6 ) & 0xF ) + 1 ) << 16 )
710757 | (sur & 0x3FF ));
711- pe += 5 ;
758+ cursor . pe += 5 ;
712759 } else {
713- raise_parse_error_at ("incomplete surrogate pair at %s" , state , p );
760+ raise_parse_error_at ("incomplete surrogate pair at %s" , state , cursor . p );
714761 break ;
715762 }
716763 }
@@ -719,19 +766,19 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
719766 }
720767 break ;
721768 default :
722- p = pe ;
769+ cursor . p = cursor . pe ;
723770 continue ;
724771 }
725- MEMCPY (buffer , unescape , char , unescape_len );
726- buffer += unescape_len ;
727- p = ++ pe ;
772+ MEMCPY (cursor . buffer , unescape , char , unescape_len );
773+ cursor . buffer += unescape_len ;
774+ cursor . p = ++ cursor . pe ;
728775 }
729776
730- if (stringEnd > p ) {
731- MEMCPY (buffer , p , char , stringEnd - p );
732- buffer += stringEnd - p ;
777+ if (stringEnd > cursor . p ) {
778+ MEMCPY (cursor . buffer , cursor . p , char , stringEnd - cursor . p );
779+ cursor . buffer += stringEnd - cursor . p ;
733780 }
734- rb_str_set_len (result , buffer - bufferStart );
781+ rb_str_set_len (result , cursor . buffer - bufferStart );
735782
736783 if (symbolize ) {
737784 result = rb_str_intern (result );
0 commit comments