@@ -642,9 +642,9 @@ class StubGenerator: public StubCodeGenerator {
642642 return start;
643643 }
644644
645- // The inner part of zero_words(). This is the bulk operation,
646- // zeroing words in blocks, using DC ZVA.
647- // The caller is responsible for zeroing the last few words.
645+ // The inner part of zero_words(). This is the bulk operation,
646+ // zeroing words in blocks, possibly using DC ZVA to do it. The
647+ // caller is responsible for zeroing the last few words.
648648 //
649649 // Inputs:
650650 // r10: the HeapWord-aligned base address of an array to zero.
@@ -653,8 +653,10 @@ class StubGenerator: public StubCodeGenerator {
653653 // Returns r10 and r11, adjusted for the caller to clear.
654654 // r10: the base address of the tail of words left to clear.
655655 // r11: the number of words in the tail.
656- // r11 < MAX2(zva_length * 2, (int)BlockZeroingLowLimit)
656+ // r11 < MacroAssembler::zero_words_block_size.
657+
657658 address generate_zero_blocks () {
659+ Label done;
658660 Label base_aligned;
659661
660662 Register base = r10, cnt = r11;
@@ -664,34 +666,51 @@ class StubGenerator: public StubCodeGenerator {
664666 StubCodeMark mark (this , stub_id);
665667 address start = __ pc ();
666668
667- assert (UseBlockZeroing, " only work when UseBlockZeroing is true" );
669+ if (UseBlockZeroing) {
670+ int zva_length = VM_Version::zva_length ();
668671
669- int zva_length = VM_Version::zva_length ();
672+ // Ensure ZVA length can be divided by 16. This is required by
673+ // the subsequent operations.
674+ assert (zva_length % 16 == 0 , " Unexpected ZVA Length" );
670675
671- // Ensure ZVA length can be divided by 16. This is required by
672- // the subsequent operations.
673- assert (zva_length % 16 == 0 , " Unexpected ZVA Length" );
676+ __ tbz (base, 3 , base_aligned);
677+ __ str (zr, Address (__ post (base, 8 )));
678+ __ sub (cnt, cnt, 1 );
679+ __ bind (base_aligned);
674680
675- __ tbz (base, 3 , base_aligned);
676- __ str (zr, Address (__ post (base, 8 )));
677- __ sub (cnt, cnt, 1 );
678- __ bind (base_aligned);
681+ // Ensure count >= zva_length * 2 so that it still deserves a zva after
682+ // alignment.
683+ Label small;
684+ int low_limit = MAX2 (zva_length * 2 , (int )BlockZeroingLowLimit);
685+ __ subs (rscratch1, cnt, low_limit >> 3 );
686+ __ br (Assembler::LT, small);
687+ __ zero_dcache_blocks (base, cnt);
688+ __ bind (small);
689+ }
679690
680- // Ensure count >= zva_length * 2 so that it still deserves a zva after
681- // alignment.
682- Label small;
683- int low_limit = MAX2 (zva_length * 2 , (int )BlockZeroingLowLimit);
684- __ subs (rscratch1, cnt, low_limit >> 3 );
685- __ br (Assembler::LT, small);
686- __ zero_dcache_blocks (base, cnt);
687- __ bind (small);
691+ {
692+ // Process words with length exceeding the predefined
693+ // block size threshold. The loop body will be unrolled based on
694+ // the number of STPs calculated below.
695+ const int unroll = MacroAssembler::zero_words_block_size / 2 ;
696+ // Clear the remaining blocks.
697+ Label loop;
698+ __ subs (cnt, cnt, unroll * 2 );
699+ __ br (Assembler::LT, done);
700+ __ bind (loop);
701+ for (int i = 0 ; i < unroll; i++)
702+ __ stp (zr, zr, __ post (base, 16 ));
703+ __ subs (cnt, cnt, unroll * 2 );
704+ __ br (Assembler::GE, loop);
705+ __ bind (done);
706+ __ add (cnt, cnt, unroll * 2 );
707+ }
688708
689709 __ ret (lr);
690710
691711 return start;
692712 }
693713
694-
695714 typedef enum {
696715 copy_forwards = 1 ,
697716 copy_backwards = -1
0 commit comments