@@ -4491,13 +4491,21 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len,
44914491// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
44924492void C2_MacroAssembler::arrays_equals (bool is_array_equ, Register ary1, Register ary2,
44934493 Register limit, Register result, Register chr,
4494- XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) {
4494+ XMMRegister vec1, XMMRegister vec2, bool is_char,
4495+ KRegister mask, bool expand_ary2) {
4496+ // for expand_ary2, limit is the (smaller) size of the second array.
44954497 ShortBranchVerifier sbv (this );
44964498 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
44974499
4500+ assert ((!expand_ary2) || ((expand_ary2) && (UseAVX == 2 )),
4501+ " Expansion only implemented for AVX2" );
4502+
44984503 int length_offset = arrayOopDesc::length_offset_in_bytes ();
44994504 int base_offset = arrayOopDesc::base_offset_in_bytes (is_char ? T_CHAR : T_BYTE);
45004505
4506+ Address::ScaleFactor scaleFactor = expand_ary2 ? Address::times_2 : Address::times_1;
4507+ int scaleIncr = expand_ary2 ? 8 : 16 ;
4508+
45014509 if (is_array_equ) {
45024510 // Check the input args
45034511 cmpoop (ary1, ary2);
@@ -4533,14 +4541,20 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
45334541
45344542 if (UseAVX >= 2 ) {
45354543 // With AVX2, use 32-byte vector compare
4536- Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
4544+ Label COMPARE_WIDE_VECTORS, COMPARE_WIDE_VECTORS_16, COMPARE_TAIL, COMPARE_TAIL_16 ;
45374545
45384546 // Compare 32-byte vectors
4539- andl (result, 0x0000001f ); // tail count (in bytes)
4540- andl (limit, 0xffffffe0 ); // vector count (in bytes)
4541- jcc (Assembler::zero, COMPARE_TAIL);
4547+ if (expand_ary2) {
4548+ andl (result, 0x0000000f ); // tail count (in bytes)
4549+ andl (limit, 0xfffffff0 ); // vector count (in bytes)
4550+ jcc (Assembler::zero, COMPARE_TAIL);
4551+ } else {
4552+ andl (result, 0x0000001f ); // tail count (in bytes)
4553+ andl (limit, 0xffffffe0 ); // vector count (in bytes)
4554+ jcc (Assembler::zero, COMPARE_TAIL_16);
4555+ }
45424556
4543- lea (ary1, Address (ary1, limit, Address::times_1 ));
4557+ lea (ary1, Address (ary1, limit, scaleFactor ));
45444558 lea (ary2, Address (ary2, limit, Address::times_1));
45454559 negptr (limit);
45464560
@@ -4583,25 +4597,59 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
45834597 }// if (VM_Version::supports_avx512vlbw())
45844598#endif // _LP64
45854599 bind (COMPARE_WIDE_VECTORS);
4586- vmovdqu (vec1, Address (ary1, limit, Address::times_1));
4587- vmovdqu (vec2, Address (ary2, limit, Address::times_1));
4600+ vmovdqu (vec1, Address (ary1, limit, scaleFactor));
4601+ if (expand_ary2) {
4602+ vpmovzxbw (vec2, Address (ary2, limit, Address::times_1), Assembler::AVX_256bit);
4603+ } else {
4604+ vmovdqu (vec2, Address (ary2, limit, Address::times_1));
4605+ }
45884606 vpxor (vec1, vec2);
45894607
45904608 vptest (vec1, vec1);
45914609 jcc (Assembler::notZero, FALSE_LABEL);
4592- addptr (limit, 32 );
4610+ addptr (limit, scaleIncr * 2 );
45934611 jcc (Assembler::notZero, COMPARE_WIDE_VECTORS);
45944612
45954613 testl (result, result);
45964614 jcc (Assembler::zero, TRUE_LABEL);
45974615
4598- vmovdqu (vec1, Address (ary1, result, Address::times_1, -32 ));
4599- vmovdqu (vec2, Address (ary2, result, Address::times_1, -32 ));
4616+ vmovdqu (vec1, Address (ary1, result, scaleFactor, -32 ));
4617+ if (expand_ary2) {
4618+ vpmovzxbw (vec2, Address (ary2, result, Address::times_1, -16 ), Assembler::AVX_256bit);
4619+ } else {
4620+ vmovdqu (vec2, Address (ary2, result, Address::times_1, -32 ));
4621+ }
46004622 vpxor (vec1, vec2);
46014623
46024624 vptest (vec1, vec1);
4603- jccb (Assembler::notZero, FALSE_LABEL);
4604- jmpb (TRUE_LABEL);
4625+ jcc (Assembler::notZero, FALSE_LABEL);
4626+ jmp (TRUE_LABEL);
4627+
4628+ bind (COMPARE_TAIL_16); // limit is zero
4629+ movl (limit, result);
4630+
4631+ // Compare 16-byte chunks
4632+ andl (result, 0x0000000f ); // tail count (in bytes)
4633+ andl (limit, 0xfffffff0 ); // vector count (in bytes)
4634+ jcc (Assembler::zero, COMPARE_TAIL);
4635+
4636+ lea (ary1, Address (ary1, limit, scaleFactor));
4637+ lea (ary2, Address (ary2, limit, Address::times_1));
4638+ negptr (limit);
4639+
4640+ bind (COMPARE_WIDE_VECTORS_16);
4641+ movdqu (vec1, Address (ary1, limit, scaleFactor));
4642+ if (expand_ary2) {
4643+ vpmovzxbw (vec2, Address (ary2, limit, Address::times_1), Assembler::AVX_128bit);
4644+ } else {
4645+ movdqu (vec2, Address (ary2, limit, Address::times_1));
4646+ }
4647+ pxor (vec1, vec2);
4648+
4649+ ptest (vec1, vec1);
4650+ jcc (Assembler::notZero, FALSE_LABEL);
4651+ addptr (limit, scaleIncr);
4652+ jcc (Assembler::notZero, COMPARE_WIDE_VECTORS_16);
46054653
46064654 bind (COMPARE_TAIL); // limit is zero
46074655 movl (limit, result);
@@ -4646,19 +4694,34 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
46464694 }
46474695
46484696 // Compare 4-byte vectors
4649- andl (limit, 0xfffffffc ); // vector count (in bytes)
4650- jccb (Assembler::zero, COMPARE_CHAR);
4697+ if (expand_ary2) {
4698+ testl (result, result);
4699+ jccb (Assembler::zero, TRUE_LABEL);
4700+ } else {
4701+ andl (limit, 0xfffffffc ); // vector count (in bytes)
4702+ jccb (Assembler::zero, COMPARE_CHAR);
4703+ }
46514704
4652- lea (ary1, Address (ary1, limit, Address::times_1 ));
4705+ lea (ary1, Address (ary1, limit, scaleFactor ));
46534706 lea (ary2, Address (ary2, limit, Address::times_1));
46544707 negptr (limit);
46554708
46564709 bind (COMPARE_VECTORS);
4657- movl (chr, Address (ary1, limit, Address::times_1));
4658- cmpl (chr, Address (ary2, limit, Address::times_1));
4659- jccb (Assembler::notEqual, FALSE_LABEL);
4660- addptr (limit, 4 );
4661- jcc (Assembler::notZero, COMPARE_VECTORS);
4710+ if (expand_ary2) {
4711+ // There are no "vector" operations for bytes to shorts
4712+ movzbl (chr, Address (ary2, limit, Address::times_1));
4713+ cmpw (Address (ary1, limit, Address::times_2), chr);
4714+ jccb (Assembler::notEqual, FALSE_LABEL);
4715+ addptr (limit, 1 );
4716+ jcc (Assembler::notZero, COMPARE_VECTORS);
4717+ jmp (TRUE_LABEL);
4718+ } else {
4719+ movl (chr, Address (ary1, limit, Address::times_1));
4720+ cmpl (chr, Address (ary2, limit, Address::times_1));
4721+ jccb (Assembler::notEqual, FALSE_LABEL);
4722+ addptr (limit, 4 );
4723+ jcc (Assembler::notZero, COMPARE_VECTORS);
4724+ }
46624725
46634726 // Compare trailing char (final 2 bytes), if any
46644727 bind (COMPARE_CHAR);
0 commit comments