Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2007,6 +2007,9 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r

if (bottom_type()->isa_vect() && !bottom_type()->isa_vectmask()) {
uint ireg = ideal_reg();
DEBUG_ONLY(int algm = MIN2(RegMask::num_registers(ireg), (int)Matcher::stack_alignment_in_slots()) * VMRegImpl::stack_slot_size);
assert((src_lo_rc != rc_stack) || is_aligned(src_offset, algm), "unaligned vector spill sp offset %d (src)", src_offset);
assert((dst_lo_rc != rc_stack) || is_aligned(dst_offset, algm), "unaligned vector spill sp offset %d (dst)", dst_offset);
if (ireg == Op_VecA && masm) {
int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
Expand Down
70 changes: 42 additions & 28 deletions src/hotspot/cpu/ppc/ppc.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1795,69 +1795,77 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r
return size; // Self copy, no move.

if (bottom_type()->isa_vect() != nullptr && ideal_reg() == Op_VecX) {
int src_offset = ra_->reg2offset(src_lo);
int dst_offset = ra_->reg2offset(dst_lo);
DEBUG_ONLY(int algm = MIN2(RegMask::num_registers(ideal_reg()), (int)Matcher::stack_alignment_in_slots()) * VMRegImpl::stack_slot_size);
assert((src_lo_rc != rc_stack) || is_aligned(src_offset, algm), "unaligned vector spill sp offset %d (src)", src_offset);
assert((dst_lo_rc != rc_stack) || is_aligned(dst_offset, algm), "unaligned vector spill sp offset %d (dst)", dst_offset);
// Memory->Memory Spill.
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
int src_offset = ra_->reg2offset(src_lo);
int dst_offset = ra_->reg2offset(dst_lo);
if (masm) {
__ ld(R0, src_offset, R1_SP);
__ std(R0, dst_offset, R1_SP);
__ ld(R0, src_offset+8, R1_SP);
__ std(R0, dst_offset+8, R1_SP);
}
size += 16;
#ifndef PRODUCT
if (st != nullptr) {
st->print("%-7s [R1_SP + #%d] -> [R1_SP + #%d] \t// vector spill copy", "SPILL", src_offset, dst_offset);
}
#endif // !PRODUCT
}
// VectorRegister->Memory Spill.
else if (src_lo_rc == rc_vec && dst_lo_rc == rc_stack) {
VectorSRegister Rsrc = as_VectorRegister(Matcher::_regEncode[src_lo]).to_vsr();
int dst_offset = ra_->reg2offset(dst_lo);
if (PowerArchitecturePPC64 >= 9) {
if (is_aligned(dst_offset, 16)) {
if (masm) {
__ stxv(Rsrc, dst_offset, R1_SP); // matches storeV16_Power9
}
size += 4;
} else {
// Other alignment can be used by Vector API (VectorPayload in rearrangeOp,
// observed with VectorRearrangeTest.java on Power9).
if (masm) {
__ addi(R0, R1_SP, dst_offset);
__ stxvx(Rsrc, R0); // matches storeV16_Power9 (regarding element ordering)
}
size += 8;
if (masm) {
__ stxv(Rsrc, dst_offset, R1_SP); // matches storeV16_Power9
}
size += 4;
} else {
if (masm) {
__ addi(R0, R1_SP, dst_offset);
__ stxvd2x(Rsrc, R0); // matches storeV16_Power8
}
size += 8;
}
#ifndef PRODUCT
if (st != nullptr) {
if (PowerArchitecturePPC64 >= 9) {
st->print("%-7s %s, [R1_SP + #%d] \t// vector spill copy", "STXV", Matcher::regName[src_lo], dst_offset);
} else {
st->print("%-7s %s, R1_SP, %d \t// vector spill copy", "ADDI", Matcher::regName[src_lo], dst_offset);
st->print("%-7s %s, [R1_SP] \t// vector spill copy", "STXVD2X", Matcher::regName[src_lo]);
}
}
#endif // !PRODUCT
}
// Memory->VectorRegister Spill.
else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vec) {
VectorSRegister Rdst = as_VectorRegister(Matcher::_regEncode[dst_lo]).to_vsr();
int src_offset = ra_->reg2offset(src_lo);
if (PowerArchitecturePPC64 >= 9) {
if (is_aligned(src_offset, 16)) {
if (masm) {
__ lxv(Rdst, src_offset, R1_SP);
}
size += 4;
} else {
if (masm) {
__ addi(R0, R1_SP, src_offset);
__ lxvx(Rdst, R0);
}
size += 8;
if (masm) {
__ lxv(Rdst, src_offset, R1_SP);
}
size += 4;
} else {
if (masm) {
__ addi(R0, R1_SP, src_offset);
__ lxvd2x(Rdst, R0);
}
size += 8;
}
#ifndef PRODUCT
if (st != nullptr) {
if (PowerArchitecturePPC64 >= 9) {
st->print("%-7s %s, [R1_SP + #%d] \t// vector spill copy", "LXV", Matcher::regName[dst_lo], src_offset);
} else {
st->print("%-7s %s, R1_SP, %d \t// vector spill copy", "ADDI", Matcher::regName[src_lo], src_offset);
st->print("%-7s %s, [R1_SP] \t// vector spill copy", "LXVD2X", Matcher::regName[dst_lo]);
}
}
#endif // !PRODUCT
}
// VectorRegister->VectorRegister.
else if (src_lo_rc == rc_vec && dst_lo_rc == rc_vec) {
Expand All @@ -1867,6 +1875,12 @@ uint MachSpillCopyNode::implementation(C2_MacroAssembler *masm, PhaseRegAlloc *r
__ xxlor(Rdst, Rsrc, Rsrc);
}
size += 4;
#ifndef PRODUCT
if (st != nullptr) {
st->print("%-7s %s, %s, %s\t// vector spill copy",
"XXLOR", Matcher::regName[dst_lo], Matcher::regName[src_lo], Matcher::regName[src_lo]);
}
#endif // !PRODUCT
}
else {
ShouldNotReachHere(); // No VR spill.
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/chaitin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class LRG : public ResourceObj {

private:
// Number of registers this live range uses when it colors
uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else
uint16_t _num_regs; // byte size of the value divided by 4
// except _num_regs is kill count for fat_proj

// For scalable register, num_regs may not be the actual physical register size.
Expand Down
13 changes: 6 additions & 7 deletions src/hotspot/share/opto/matcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,13 +283,12 @@ void Matcher::match( ) {
_parm_regs[i].set_pair(reg2, reg1);
}

// Finally, make sure the incoming arguments take up an even number of
// words, in case the arguments or locals need to contain doubleword stack
// slots. The rest of the system assumes that stack slot pairs (in
// particular, in the spill area) which look aligned will in fact be
// aligned relative to the stack pointer in the target machine. Double
// stack slots will always be allocated aligned.
_new_SP = OptoReg::Name(align_up(_in_arg_limit, (int)RegMask::SlotsPerLong));
// Allocated register sets are aligned to their size. Offsets to the stack
// pointer have to be aligned to the size of the access. For this _new_SP is
// aligned to the size of the largest register set with the stack alignment as
// limit and a minimum of SlotsPerLong (2).
int vector_aligment = MIN2(C->max_vector_size(), stack_alignment_in_bytes()) / VMRegImpl::stack_slot_size;
_new_SP = OptoReg::Name(align_up(_in_arg_limit, MAX2((int)RegMask::SlotsPerLong, vector_aligment)));

// Compute highest outgoing stack argument as
// _new_SP + out_preserve_stack_slots + max(outgoing argument size).
Expand Down
16 changes: 6 additions & 10 deletions src/hotspot/share/opto/regmask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,16 +354,12 @@ class RegMask {
}

// SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
// Also, consider the maximum alignment size for a normally allocated
// value. Since we allocate register pairs but not register quads (at
// present), this alignment is SlotsPerLong (== 2). A normally
// aligned allocated register is either a single register, or a pair
// of adjacent registers, the lower-numbered being even.
// See also is_aligned_Pairs() below, and the padding added before
// Matcher::_new_SP to keep allocated pairs aligned properly.
// If we ever go to quad-word allocations, SlotsPerQuad will become
// the controlling alignment constraint. Note that this alignment
// requirement is internal to the allocator, and independent of any
// We allocate single registers for 32 bit values and register pairs for 64
// bit values. The number of registers allocated for vectors match their size. E.g. for 128 bit
// vectors (VecX) we allocate a set of 4 registers. Allocated sets are adjacent and aligned.
// See RegMask::find_first_set(), is_aligned_pairs(), is_aligned_sets(), and the padding added before
// Matcher::_new_SP to keep allocated pairs and sets aligned properly.
// Note that this alignment requirement is internal to the allocator, and independent of any
// particular platform.
enum { SlotsPerLong = 2,
SlotsPerVecA = 4,
Expand Down
6 changes: 6 additions & 0 deletions test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,12 @@ public class IRNode {
machOnly(MEM_TO_REG_SPILL_COPY, "MemToRegSpillCopy");
}

public static final String MEM_TO_REG_SPILL_COPY_TYPE = COMPOSITE_PREFIX + "MEM_TO_REG_SPILL_COPY_TYPE" + POSTFIX;
static {
String regex = START + "MemToRegSpillCopy" + MID + IS_REPLACED + ".*" + END;
machOnly(MEM_TO_REG_SPILL_COPY_TYPE, regex);
}

public static final String MIN = PREFIX + "MIN" + POSTFIX;
static {
beforeMatchingNameRegex(MIN, "Min(I|L)");
Expand Down
109 changes: 109 additions & 0 deletions test/hotspot/jtreg/compiler/vectorapi/TestVectorSpilling.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright (c) 2025 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

package compiler.vectorapi;

import compiler.lib.ir_framework.*;

import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.VectorSpecies;

import jdk.test.lib.Asserts;

/**
* @test
* @bug 8370473
* @library /test/lib /
* @summary Test alignment of vector spill slots. It should match the vector size.
* @modules jdk.incubator.vector
* @requires vm.opt.final.MaxVectorSize == null | vm.opt.final.MaxVectorSize >= 16
*
* @run driver compiler.vectorapi.TestVectorSpilling
*/

public class TestVectorSpilling {

private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_128;
private static int LENGTH = 1024;

private static int[] ia1;
private static int[] ia2;
private static int[] ir ;

public static void main(String[] args) {
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
}

static class LData {
// Rading from a volatile field prevents cse optimization
static volatile long vF = 1042;

long l1, l2, l3, l4, l5, l6, l7, l8;
public LData() {
l1 = vF; l2 = vF; l3 = vF; l4 = vF; l5 = vF; l6 = vF; l7 = vF; l8 = vF;
}
public long sum() {
return l1 + l2 + l3 + l4 + l5 + l6 + l7 + l8;
}
}


@Run(test = "test16ByteSpilling")
static void test16ByteSpilling_runner() {
test16ByteSpilling(1, 2, 3, 4, 5, 6, 7, 8, 9);
}

@Test
@IR(counts = {IRNode.MEM_TO_REG_SPILL_COPY_TYPE, "vectorx", "> 0"},
phase = {CompilePhase.FINAL_CODE})
static long test16ByteSpilling(long l1, long l2, long l3, long l4, long l5, long l6, long l7, long l8,
long l9 /* odd stack arg */) {
// To be scalar replaced and spilled to stack
LData d1 = new LData();
LData d2 = new LData();
LData d3 = new LData();

for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
IntVector a1v = IntVector.fromArray(I_SPECIES, ia1, i);
IntVector a2v = IntVector.fromArray(I_SPECIES, ia2, i);
int scalar = spillPoint();
a1v.add(a2v)
.add(scalar).intoArray(ir, i);
}

return l1 + l2 + l3 + l4 + l5 + l6 + l7 + l8 + l9 + d1.sum() + d2.sum() + d3.sum();
}

@DontInline
static int spillPoint() {
return 42;
}

static {
ia1 = new int[LENGTH];
ia2 = new int[LENGTH];
ir = new int[LENGTH];
}

}