Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
dc6858f
8370230: Bad copyright in NMTPrintMallocSiteOfCorruptedMemory.java af…
Oct 20, 2025
257bb2b
8370079: Re-enable vmTestbase/gc/vector/CircularListLow and LinearLis…
albertnetymk Oct 20, 2025
a1be297
8369854: (ch) Refine specification of behavior of {Gathering,Writable…
Oct 20, 2025
a1302e5
8365625: Can't change accelerator colors in Windows L&F
aivanov-jdk Oct 20, 2025
8145cfa
8352637: Enhance bytecode verification
Apr 17, 2025
d9dad57
8356294: Enhance Path Factories
JoeWang-Java Jun 26, 2025
c448505
8359454: Enhance String handling
rgiulietti Jul 3, 2025
e1d1fa9
8360937: Enhance certificate handling
seanjmullan Jul 9, 2025
c781a2f
8370136: Support async execution of jpackage tests
Oct 21, 2025
0522cf2
8370123: Minor jpackage refactoring
Oct 21, 2025
2de8d58
8366333: AArch64: Enhance SVE subword type implementation of vector c…
erifan Oct 21, 2025
eee2908
8370257: Remove ProblemListed tests from ProblemList.txt
Oct 21, 2025
207fe55
8369902: C2 SuperWord: wrong result because filterin NaN instead of z…
eme64 Oct 21, 2025
634746a
8369898: C2 SuperWord: assert(has_ctrl(i)) failed: should be control,…
eme64 Oct 21, 2025
2aa0efd
8370126: Improve jpackage signing testing
Oct 21, 2025
b6b0f05
8370262: Add jdk/javadoc/doccheck/checks/jdkCheckLinks.java to the Pr…
Oct 21, 2025
430041d
8367499: Refactor exhaustiveness computation from Flow into a separat…
lahodaj Oct 21, 2025
ec13c28
8366829: Add java.time.Duration constants MIN and MAX
pavelrappo Oct 21, 2025
2be273f
8346005: Parallel: Incorrect page size calculation with UseLargePages
albertnetymk Oct 21, 2025
a0c4124
8370078: Remove unnecessary argument in ContiguousSpace::initialize
albertnetymk Oct 21, 2025
ea7186a
8020207: jconsole fails connecting over SSL using service:jmx:rmi://.…
GennadiyKrivoshein Oct 21, 2025
d4c0239
8370240: [PPC64] jhsdb jstack cannot handle continuation stub
dbriemann Oct 21, 2025
517d543
8370234: Remove CardTableBarrierSet::write_region
albertnetymk Oct 21, 2025
2af4d20
8370031: Make RegMask copy constructor explicit and replace RegMask o…
Oct 21, 2025
0529a58
8370326: Parallel: Remove unused ParCompactionManager::push
albertnetymk Oct 21, 2025
b77b910
8369349: Add missing CPE headers
Oct 21, 2025
9a88d7f
8369211: AArch64: Devirtualize class RelocActions
Oct 21, 2025
d55e1b4
8370222: Wrong output for a command in jlink man page
ammbra Oct 21, 2025
b68fa43
Merge
Oct 21, 2025
43e036b
8366424: Missing type profiling in generated Record Object methods
liach Oct 21, 2025
aab3fc5
8370207: Test sun/misc/SunMiscSignalTest.java crashes after JDK-8369631
Oct 21, 2025
cac2519
8356578: Test --mac-entitlements
Oct 21, 2025
ed153ee
8369032: Add test to ensure serialized ICC_Profile stores only necess…
mrserb Oct 21, 2025
94c0611
8370122: jpackage test lib improvements
Oct 22, 2025
70e7861
8370248: AOTMapLogger should check if pointer is in AOTMetaspace
iklam Oct 22, 2025
eff4b11
8369322: Implement native stack printing for Windows-AArch64
swesonga Oct 22, 2025
8d9b2fa
8365072: Refactor tests to use PEM API (Phase 2)
myankelev Oct 22, 2025
27c83c7
8370225: RISC-V: cleanup verify_xxx in interp_masm_riscv.hpp
Oct 22, 2025
6bf3581
8369946: Bytecode rewriting causes Java heap corruption on PPC
TheRealMDoerr Oct 22, 2025
bdfd5e8
8367690: C2: Unneeded branch in reduce_phi
Oct 22, 2025
b8d3c90
8370229: Remove unused method declarations after JDK-8322630
fandreuz Oct 22, 2025
6010457
8370378: Some compiler tests inadvertently exclude particular platforms
TobiHartmann Oct 22, 2025
763d425
8368781: PerfMemory - make issues more transparent
MBaesken Oct 22, 2025
cbbb0a8
8367976: Validate and clamp jdk.httpclient.bufsize
vy Oct 22, 2025
65b3239
8370242: JFR: Clear event reference eagerly when using EventStream
egahlin Oct 22, 2025
92e380c
8361451: Test vmTestbase/metaspace/stressHierarchy/stressHierarchy012…
coleenp Oct 22, 2025
afba636
8369991: Thread blocking during JFR emergency dump must be in safepoi…
Oct 22, 2025
a925461
8370442: Compilation error in jpackage EntitlementsTest test
Oct 22, 2025
d8ebe38
8370377: Avoid resolving constant pool entries during preimage genera…
Oct 22, 2025
4377e7c
8367008: Algorithm identifiers for HmacSHA* should always have NULL a…
koushikthirupattur Oct 22, 2025
45e145f
8359057: AbstractInterpreter::is_not_reached returns incorrectly with…
Oct 22, 2025
2a8cbd9
8359472: JVM crashes when attaching a dynamic agent before JVMTI_PHAS…
fandreuz Oct 22, 2025
0744db8
8367002: Missing compiled exception handler for "recursive" exception
dean-long Oct 22, 2025
be18e7e
8064922: [macos] Test javax/swing/JTabbedPane/4624207/bug4624207.java…
Oct 22, 2025
3e20a93
8370156: Fix jpackage IconTest
Oct 22, 2025
ffcb158
8320677: Printer tests use invalid '@run main/manual=yesno
anass-baya Oct 23, 2025
027aea9
8370325: G1: Disallow GC for TLAB allocation
Oct 23, 2025
389e5a2
8212084
tschatzl Oct 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1266,20 +1266,20 @@ source %{
// adlc register classes to make AArch64 rheapbase (r27) and rfp (r29)
// registers conditionally reserved.

_ANY_REG32_mask = _ALL_REG32_mask;
_ANY_REG32_mask.assignFrom(_ALL_REG32_mask);
_ANY_REG32_mask.remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));

_ANY_REG_mask = _ALL_REG_mask;
_ANY_REG_mask.assignFrom(_ALL_REG_mask);

_PTR_REG_mask = _ALL_REG_mask;
_PTR_REG_mask.assignFrom(_ALL_REG_mask);

_NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
_NO_SPECIAL_REG32_mask.assignFrom(_ALL_REG32_mask);
_NO_SPECIAL_REG32_mask.subtract(_NON_ALLOCATABLE_REG32_mask);

_NO_SPECIAL_REG_mask = _ALL_REG_mask;
_NO_SPECIAL_REG_mask.assignFrom(_ALL_REG_mask);
_NO_SPECIAL_REG_mask.subtract(_NON_ALLOCATABLE_REG_mask);

_NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
_NO_SPECIAL_PTR_REG_mask.assignFrom(_ALL_REG_mask);
_NO_SPECIAL_PTR_REG_mask.subtract(_NON_ALLOCATABLE_REG_mask);

// r27 is not allocatable when compressed oops is on and heapbase is not
Expand All @@ -1297,7 +1297,7 @@ source %{
_NO_SPECIAL_PTR_REG_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
}

_NO_SPECIAL_NO_RFP_PTR_REG_mask = _NO_SPECIAL_PTR_REG_mask;
_NO_SPECIAL_NO_RFP_PTR_REG_mask.assignFrom(_NO_SPECIAL_PTR_REG_mask);
_NO_SPECIAL_NO_RFP_PTR_REG_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
}

Expand Down Expand Up @@ -2545,27 +2545,27 @@ bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
return false;
}

RegMask Matcher::divI_proj_mask() {
const RegMask& Matcher::divI_proj_mask() {
ShouldNotReachHere();
return RegMask();
return RegMask::EMPTY;
}

// Register for MODI projection of divmodI.
RegMask Matcher::modI_proj_mask() {
const RegMask& Matcher::modI_proj_mask() {
ShouldNotReachHere();
return RegMask();
return RegMask::EMPTY;
}

// Register for DIVL projection of divmodL.
RegMask Matcher::divL_proj_mask() {
const RegMask& Matcher::divL_proj_mask() {
ShouldNotReachHere();
return RegMask();
return RegMask::EMPTY;
}

// Register for MODL projection of divmodL.
RegMask Matcher::modL_proj_mask() {
const RegMask& Matcher::modL_proj_mask() {
ShouldNotReachHere();
return RegMask();
return RegMask::EMPTY;
}

bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
Expand Down
20 changes: 11 additions & 9 deletions src/hotspot/cpu/aarch64/aarch64_vector.ad
Original file line number Diff line number Diff line change
Expand Up @@ -7081,29 +7081,31 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
%}

instruct vcompressB(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2,
vReg tmp3, vReg tmp4, pReg ptmp, pRegGov pgtmp) %{
vReg tmp3, pReg ptmp, pRegGov pgtmp) %{
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_BYTE);
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ptmp, TEMP pgtmp);
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp, TEMP pgtmp);
match(Set dst (CompressV src pg));
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, tmp4, $ptmp, $pgtmp" %}
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, $ptmp, $pgtmp" %}
ins_encode %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_compress_byte($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
$tmp1$$FloatRegister,$tmp2$$FloatRegister,
$tmp3$$FloatRegister,$tmp4$$FloatRegister,
$ptmp$$PRegister, $pgtmp$$PRegister);
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister,
$ptmp$$PRegister, $pgtmp$$PRegister, length_in_bytes);
%}
ins_pipe(pipe_slow);
%}

instruct vcompressS(vReg dst, vReg src, pReg pg,
vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_SHORT);
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP pgtmp);
match(Set dst (CompressV src pg));
format %{ "vcompressS $dst, $src, $pg\t# KILL $tmp1, $tmp2, $pgtmp" %}
ins_encode %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_dup($tmp1$$FloatRegister, __ H, 0);
__ sve_compress_short($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
$tmp1$$FloatRegister,$tmp2$$FloatRegister, $pgtmp$$PRegister);
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $pgtmp$$PRegister,
length_in_bytes);
%}
ins_pipe(pipe_slow);
%}
Expand Down
20 changes: 11 additions & 9 deletions src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
Original file line number Diff line number Diff line change
Expand Up @@ -5069,29 +5069,31 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
%}

instruct vcompressB(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2,
vReg tmp3, vReg tmp4, pReg ptmp, pRegGov pgtmp) %{
vReg tmp3, pReg ptmp, pRegGov pgtmp) %{
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_BYTE);
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ptmp, TEMP pgtmp);
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp, TEMP pgtmp);
match(Set dst (CompressV src pg));
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, tmp4, $ptmp, $pgtmp" %}
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, $ptmp, $pgtmp" %}
ins_encode %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_compress_byte($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
$tmp1$$FloatRegister,$tmp2$$FloatRegister,
$tmp3$$FloatRegister,$tmp4$$FloatRegister,
$ptmp$$PRegister, $pgtmp$$PRegister);
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister,
$ptmp$$PRegister, $pgtmp$$PRegister, length_in_bytes);
%}
ins_pipe(pipe_slow);
%}

instruct vcompressS(vReg dst, vReg src, pReg pg,
vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_SHORT);
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP pgtmp);
match(Set dst (CompressV src pg));
format %{ "vcompressS $dst, $src, $pg\t# KILL $tmp1, $tmp2, $pgtmp" %}
ins_encode %{
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_dup($tmp1$$FloatRegister, __ H, 0);
__ sve_compress_short($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
$tmp1$$FloatRegister,$tmp2$$FloatRegister, $pgtmp$$PRegister);
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $pgtmp$$PRegister,
length_in_bytes);
%}
ins_pipe(pipe_slow);
%}
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/aarch64/assembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3486,6 +3486,7 @@ template<typename R, typename... Rx>
INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors
INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
INSN(sve_splice,0b00000101, 0b101100100); // splice two vectors under predicate control, destructive
INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
INSN(sve_umax, 0b00000100, 0b001001000); // unsigned maximum vectors
Expand Down
149 changes: 76 additions & 73 deletions src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2203,114 +2203,117 @@ void C2_MacroAssembler::sve_gen_mask_imm(PRegister dst, BasicType bt, uint32_t l
// Pack active elements of src, under the control of mask, into the lowest-numbered elements of dst.
// Any remaining elements of dst will be filled with zero.
// Clobbers: rscratch1
// Preserves: src, mask
// Preserves: mask, vzr
void C2_MacroAssembler::sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
FloatRegister vtmp1, FloatRegister vtmp2,
PRegister pgtmp) {
FloatRegister vzr, FloatRegister vtmp,
PRegister pgtmp, unsigned vector_length_in_bytes) {
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
assert_different_registers(dst, src, vtmp1, vtmp2);
// When called by sve_compress_byte, src and vtmp may be the same register.
assert_different_registers(dst, src, vzr);
assert_different_registers(dst, vtmp, vzr);
assert_different_registers(mask, pgtmp);

// Example input: src = 8888 7777 6666 5555 4444 3333 2222 1111
// mask = 0001 0000 0000 0001 0001 0000 0001 0001
// Expected result: dst = 0000 0000 0000 8888 5555 4444 2222 1111
sve_dup(vtmp2, H, 0);
// high <-- low
// Example input: src = hh gg ff ee dd cc bb aa, one character is 8 bits.
// mask = 01 00 00 01 01 00 01 01, one character is 1 bit.
// Expected result: dst = 00 00 00 hh ee dd bb aa

// Extend lowest half to type INT.
// dst = 00004444 00003333 00002222 00001111
// dst = 00dd 00cc 00bb 00aa
sve_uunpklo(dst, S, src);
// pgtmp = 00000001 00000000 00000001 00000001
// pgtmp = 0001 0000 0001 0001
sve_punpklo(pgtmp, mask);
// Pack the active elements in size of type INT to the right,
// and fill the remainings with zero.
// dst = 00000000 00004444 00002222 00001111
// dst = 0000 00dd 00bb 00aa
sve_compact(dst, S, dst, pgtmp);
// Narrow the result back to type SHORT.
// dst = 0000 0000 0000 0000 0000 4444 2222 1111
sve_uzp1(dst, H, dst, vtmp2);
// dst = 00 00 00 00 00 dd bb aa
sve_uzp1(dst, H, dst, vzr);

// Return if the vector length is no more than MaxVectorSize/2, since the
// highest half is invalid.
if (vector_length_in_bytes <= (MaxVectorSize >> 1)) {
return;
}

// Count the active elements of lowest half.
// rscratch1 = 3
sve_cntp(rscratch1, S, ptrue, pgtmp);

// Repeat to the highest half.
// pgtmp = 00000001 00000000 00000000 00000001
// pgtmp = 0001 0000 0000 0001
sve_punpkhi(pgtmp, mask);
// vtmp1 = 00008888 00007777 00006666 00005555
sve_uunpkhi(vtmp1, S, src);
// vtmp1 = 00000000 00000000 00008888 00005555
sve_compact(vtmp1, S, vtmp1, pgtmp);
// vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
sve_uzp1(vtmp1, H, vtmp1, vtmp2);

// Compressed low: dst = 0000 0000 0000 0000 0000 4444 2222 1111
// Compressed high: vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
// TRUE_CNT is the number of active elements in the compressed low.
neg(rscratch1, rscratch1);
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
sve_index(vtmp2, H, rscratch1, 1);
// vtmp1 = 0000 0000 0000 8888 5555 0000 0000 0000
sve_tbl(vtmp1, H, vtmp1, vtmp2);

// Combine the compressed high(after shifted) with the compressed low.
// dst = 0000 0000 0000 8888 5555 4444 2222 1111
sve_orr(dst, dst, vtmp1);
// vtmp = 00hh 00gg 00ff 00ee
sve_uunpkhi(vtmp, S, src);
// vtmp = 0000 0000 00hh 00ee
sve_compact(vtmp, S, vtmp, pgtmp);
// vtmp = 00 00 00 00 00 00 hh ee
sve_uzp1(vtmp, H, vtmp, vzr);

// pgtmp = 00 00 00 00 00 01 01 01
sve_whilelt(pgtmp, H, zr, rscratch1);
// Compressed low: dst = 00 00 00 00 00 dd bb aa
// Compressed high: vtmp = 00 00 00 00 00 00 hh ee
// Combine the compressed low with the compressed high:
// dst = 00 00 00 hh ee dd bb aa
sve_splice(dst, H, pgtmp, vtmp);
}

// Clobbers: rscratch1, rscratch2
// Preserves: src, mask
void C2_MacroAssembler::sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
FloatRegister vtmp1, FloatRegister vtmp2,
FloatRegister vtmp3, FloatRegister vtmp4,
PRegister ptmp, PRegister pgtmp) {
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
PRegister ptmp, PRegister pgtmp, unsigned vector_length_in_bytes) {
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3, vtmp4);
assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3);
assert_different_registers(mask, ptmp, pgtmp);
// Example input: src = 88 77 66 55 44 33 22 11
// mask = 01 00 00 01 01 00 01 01
// Expected result: dst = 00 00 00 88 55 44 22 11
// high <-- low
// Example input: src = q p n m l k j i h g f e d c b a, one character is 8 bits.
// mask = 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 1, one character is 1 bit.
// Expected result: dst = 0 0 0 0 0 0 0 0 0 0 0 p i g c a
FloatRegister vzr = vtmp3;
sve_dup(vzr, B, 0);

sve_dup(vtmp4, B, 0);
// Extend lowest half to type SHORT.
// vtmp1 = 0044 0033 0022 0011
// vtmp1 = 0h 0g 0f 0e 0d 0c 0b 0a
sve_uunpklo(vtmp1, H, src);
// ptmp = 0001 0000 0001 0001
// ptmp = 00 01 00 00 00 01 00 01
sve_punpklo(ptmp, mask);
// Count the active elements of lowest half.
// rscratch2 = 3
sve_cntp(rscratch2, H, ptrue, ptmp);
// Pack the active elements in size of type SHORT to the right,
// and fill the remainings with zero.
// dst = 0000 0044 0022 0011
sve_compress_short(dst, vtmp1, ptmp, vtmp2, vtmp3, pgtmp);
// dst = 00 00 00 00 00 0g 0c 0a
unsigned extended_size = vector_length_in_bytes << 1;
sve_compress_short(dst, vtmp1, ptmp, vzr, vtmp2, pgtmp, extended_size > MaxVectorSize ? MaxVectorSize : extended_size);
// Narrow the result back to type BYTE.
// dst = 00 00 00 00 00 44 22 11
sve_uzp1(dst, B, dst, vtmp4);
// dst = 0 0 0 0 0 0 0 0 0 0 0 0 0 g c a
sve_uzp1(dst, B, dst, vzr);

// Return if the vector length is no more than MaxVectorSize/2, since the
// highest half is invalid.
if (vector_length_in_bytes <= (MaxVectorSize >> 1)) {
return;
}
// Count the active elements of lowest half.
// rscratch2 = 3
sve_cntp(rscratch2, H, ptrue, ptmp);

// Repeat to the highest half.
// ptmp = 0001 0000 0000 0001
// ptmp = 00 01 00 00 00 00 00 01
sve_punpkhi(ptmp, mask);
// vtmp1 = 0088 0077 0066 0055
// vtmp2 = 0q 0p 0n 0m 0l 0k 0j 0i
sve_uunpkhi(vtmp2, H, src);
// vtmp1 = 0000 0000 0088 0055
sve_compress_short(vtmp1, vtmp2, ptmp, vtmp3, vtmp4, pgtmp);

sve_dup(vtmp4, B, 0);
// vtmp1 = 00 00 00 00 00 00 88 55
sve_uzp1(vtmp1, B, vtmp1, vtmp4);

// Compressed low: dst = 00 00 00 00 00 44 22 11
// Compressed high: vtmp1 = 00 00 00 00 00 00 88 55
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
// TRUE_CNT is the number of active elements in the compressed low.
neg(rscratch2, rscratch2);
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
sve_index(vtmp2, B, rscratch2, 1);
// vtmp1 = 00 00 00 88 55 00 00 00
sve_tbl(vtmp1, B, vtmp1, vtmp2);
// Combine the compressed high(after shifted) with the compressed low.
// dst = 00 00 00 88 55 44 22 11
sve_orr(dst, dst, vtmp1);
// vtmp1 = 00 00 00 00 00 00 0p 0i
sve_compress_short(vtmp1, vtmp2, ptmp, vzr, vtmp2, pgtmp, extended_size - MaxVectorSize);
// vtmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 p i
sve_uzp1(vtmp1, B, vtmp1, vzr);

// ptmp = 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1
sve_whilelt(ptmp, B, zr, rscratch2);
// Compressed low: dst = 0 0 0 0 0 0 0 0 0 0 0 0 0 g c a
// Compressed high: vtmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 p i
// Combine the compressed low with the compressed high:
// dst = 0 0 0 0 0 0 0 0 0 0 0 p i g c a
sve_splice(dst, B, ptmp, vtmp1);
}

void C2_MacroAssembler::neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ) {
Expand Down
9 changes: 4 additions & 5 deletions src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,12 @@
// lowest-numbered elements of dst. Any remaining elements of dst will
// be filled with zero.
void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
FloatRegister vtmp1, FloatRegister vtmp2,
FloatRegister vtmp3, FloatRegister vtmp4,
PRegister ptmp, PRegister pgtmp);
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
PRegister ptmp, PRegister pgtmp, unsigned vector_length_in_bytes);

void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
FloatRegister vtmp1, FloatRegister vtmp2,
PRegister pgtmp);
FloatRegister vzr, FloatRegister vtmp,
PRegister pgtmp, unsigned vector_length_in_bytes);

void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);

Expand Down
Loading