Skip to content
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c7f0e28
8284960: Integration of JEP 426: Vector API (Fourth Incubator)
Apr 26, 2022
fe4a064
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
Apr 26, 2022
4a5bea7
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
Apr 28, 2022
c87593e
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
Apr 28, 2022
fe74c7f
8284960: AARCH64 backend changes.
Apr 28, 2022
39f8ef3
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
Apr 28, 2022
667a201
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
Apr 29, 2022
b682bc3
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
May 4, 2022
8700dc5
8284960: Integrating changes from panama-vector (Add @since 19 tags).
May 4, 2022
731787e
8284960: Correcting a typo.
May 4, 2022
b021e08
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
May 9, 2022
adf205f
8284960: Review comments resolution.
May 12, 2022
df7eb90
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
May 16, 2022
0b7f84b
8284960: Adding --enable-preview in vectorAPI benchmarks.
May 17, 2022
823ba8c
8284960: Changes to enable jdk.incubator.vector to be treated as prev…
May 19, 2022
311f323
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
May 19, 2022
17a0e38
8284960: Integrating incremental patches.
May 19, 2022
a2c9673
8284960: Review comments resolved.
May 24, 2022
d17f99c
Merge branch 'master' of http://github.com/openjdk/jdk into JDK-8284960
May 24, 2022
0f6e158
8284960: Post merge cleanups.
May 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2434,6 +2434,14 @@ const bool Matcher::match_rule_supported(int opcode) {
ret_value = false;
}
break;
case Op_PopCountI:
case Op_PopCountL:
case Op_PopCountVI:
case Op_PopCountVL:
if (!UsePopCountInstruction) {
ret_value = false;
}
break;
}

return ret_value; // Per default match rules are supported.
Expand Down Expand Up @@ -2475,6 +2483,9 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
break;
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
case Op_CompressV:
case Op_CompressM:
case Op_ExpandV:
return false;
default:
break;
Expand Down Expand Up @@ -8642,7 +8653,6 @@ instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
//

instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI src));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);
Expand All @@ -8653,6 +8663,7 @@ instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
"addv $tmp, $tmp\t# vector (8B)\n\t"
"mov $dst, $tmp\t# vector (1D)" %}
ins_encode %{
assert(UsePopCountInstruction, "unsupported");
__ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
__ mov($tmp$$FloatRegister, __ D, 0, $src$$Register);
__ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
Expand All @@ -8664,7 +8675,6 @@ instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
%}

instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI (LoadI mem)));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);
Expand All @@ -8674,6 +8684,7 @@ instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
"addv $tmp, $tmp\t# vector (8B)\n\t"
"mov $dst, $tmp\t# vector (1D)" %}
ins_encode %{
assert(UsePopCountInstruction, "unsupported");
FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
Expand All @@ -8687,7 +8698,6 @@ instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{

// Note: Long.bitCount(long) returns an int.
instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL src));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);
Expand All @@ -8697,6 +8707,7 @@ instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
"addv $tmp, $tmp\t# vector (8B)\n\t"
"mov $dst, $tmp\t# vector (1D)" %}
ins_encode %{
assert(UsePopCountInstruction, "unsupported");
__ mov($tmp$$FloatRegister, __ D, 0, $src$$Register);
__ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
__ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
Expand All @@ -8707,7 +8718,6 @@ instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
%}

instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL (LoadL mem)));
effect(TEMP tmp);
ins_cost(INSN_COST * 13);
Expand All @@ -8717,6 +8727,7 @@ instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
"addv $tmp, $tmp\t# vector (8B)\n\t"
"mov $dst, $tmp\t# vector (1D)" %}
ins_encode %{
assert(UsePopCountInstruction, "unsupported");
FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
Expand Down
214 changes: 197 additions & 17 deletions src/hotspot/cpu/aarch64/aarch64_neon.ad
Original file line number Diff line number Diff line change
Expand Up @@ -5683,39 +5683,91 @@ instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{
ins_pipe(vdop_fp128);
%}

instruct vpopcount4I(vecX dst, vecX src) %{
predicate(UsePopCountInstruction && n->as_Vector()->length() == 4);
instruct vpopcountID(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() < 16);
match(Set dst (PopCountVI src));
format %{
"cnt $dst, $src\t# vector (16B)\n\t"
"uaddlp $dst, $dst\t# vector (16B)\n\t"
"uaddlp $dst, $dst\t# vector (8H)"
ins_cost(3 * INSN_COST);
format %{ "vpopcountI $dst, $src\t# vector (8B/4H/2S)" %}
ins_encode %{
assert(UsePopCountInstruction, "unsupported");
BasicType bt = Matcher::vector_element_basic_type(this);
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($src$$reg));
if (bt == T_SHORT || bt == T_INT) {
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
as_FloatRegister($dst$$reg));
}
if (bt == T_INT) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it hard to reason about the code in its current form.

Maybe make the second if (bt == T_INT) nested and move it under if (bt == T_SHORT || bt == T_INT)?

__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
as_FloatRegister($dst$$reg));
}
%}
ins_pipe(pipe_class_default);
%}

instruct vpopcountIX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (PopCountVI src));
ins_cost(3 * INSN_COST);
format %{ "vpopcountI $dst, $src\t# vector (16B/8H/4S)" %}
ins_encode %{
assert(UsePopCountInstruction, "unsupported");
BasicType bt = Matcher::vector_element_basic_type(this);
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src$$reg));
if (bt == T_SHORT || bt == T_INT) {
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($dst$$reg));
}
if (bt == T_INT) {
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($dst$$reg));
}
%}
ins_pipe(pipe_class_default);
%}

// If the PopCountVL is generated by auto-vectorization, the dst basic
// type is T_INT. And once we have unified the type definition for
// Vector API and auto-vectorization, this rule can be merged with
// "vpopcountLX" rule.
instruct vpopcountLD(vecD dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() < 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (PopCountVL src));
ins_cost(5 * INSN_COST);
format %{ "vpopcountL $dst, $src\t# vector (2S)" %}
ins_encode %{
assert(UsePopCountInstruction, "unsupported");
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($dst$$reg));
__ xtn(as_FloatRegister($dst$$reg), __ T2S,
as_FloatRegister($dst$$reg), __ T2D);
%}
ins_pipe(pipe_class_default);
%}

instruct vpopcount2I(vecD dst, vecD src) %{
predicate(UsePopCountInstruction && n->as_Vector()->length() == 2);
match(Set dst (PopCountVI src));
format %{
"cnt $dst, $src\t# vector (8B)\n\t"
"uaddlp $dst, $dst\t# vector (8B)\n\t"
"uaddlp $dst, $dst\t# vector (4H)"
%}
instruct vpopcountLX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16 &&
n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
match(Set dst (PopCountVL src));
ins_cost(4 * INSN_COST);
format %{ "vpopcountL $dst, $src\t# vector (2D)" %}
ins_encode %{
__ cnt(as_FloatRegister($dst$$reg), __ T8B,
assert(UsePopCountInstruction, "unsupported");
__ cnt(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($src$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
__ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
__ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
as_FloatRegister($dst$$reg));
__ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_class_default);
Expand Down Expand Up @@ -5921,3 +5973,131 @@ instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{
%}
ins_pipe(pipe_slow);
%}

//------------------------- CountLeadingZerosV -----------------------------

instruct countLeadingZerosVD(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (CountLeadingZerosV src));
ins_cost(INSN_COST);
format %{ "countLeadingZerosV $dst, $src\t# vector (8B/4H/2S)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct countLeadingZerosVX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (CountLeadingZerosV src));
ins_cost(INSN_COST);
format %{ "countLeadingZerosV $dst, $src\t# vector (16B/8H/4S/2D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);
if (bt != T_LONG) {
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
} else {
__ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 0);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
__ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
}
%}
ins_pipe(pipe_slow);
%}

//------------------------- CountTrailingZerosV ----------------------------

instruct countTrailingZerosVD(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (CountTrailingZerosV src));
ins_cost(3 * INSN_COST);
format %{ "countTrailingZerosV $dst, $src\t# vector (8B/4H/2S)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_slow);
%}

instruct countTrailingZerosVX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (CountTrailingZerosV src));
ins_cost(3 * INSN_COST);
format %{ "countTrailingZerosV $dst, $src\t# vector (16B/8H/4S/2D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
if (bt != T_LONG) {
__ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($dst$$reg));
} else {
__ umov(rscratch1, as_FloatRegister($dst$$reg), __ D, 0);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
__ umov(rscratch1, as_FloatRegister($dst$$reg), __ D, 1);
__ clz(rscratch1, rscratch1);
__ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
}
%}
ins_pipe(pipe_slow);
%}

//------------------------------ ReverseV -----------------------------------

instruct vreverseD(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (ReverseV src));
ins_cost(2 * INSN_COST);
format %{ "ReverseV $dst, $src\t# vector (D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
%}
ins_pipe(pipe_slow);
%}

instruct vreverseX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (ReverseV src));
ins_cost(2 * INSN_COST);
format %{ "ReverseV $dst, $src\t# vector (X)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
%}
ins_pipe(pipe_slow);
%}

//---------------------------- ReverseBytesV --------------------------------

instruct vreverseBytesD(vecD dst, vecD src) %{
predicate(n->as_Vector()->length_in_bytes() == 8);
match(Set dst (ReverseBytesV src));
ins_cost(INSN_COST);
format %{ "ReverseBytesV $dst, $src\t# vector (D)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ neon_reverse_bytes(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
%}
ins_pipe(pipe_slow);
%}

instruct vreverseBytesX(vecX dst, vecX src) %{
predicate(n->as_Vector()->length_in_bytes() == 16);
match(Set dst (ReverseBytesV src));
ins_cost(INSN_COST);
format %{ "ReverseBytesV $dst, $src\t# vector (X)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ neon_reverse_bytes(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
%}
ins_pipe(pipe_slow);
%}
Loading