diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 2f2bee6e22f0f..4326851abe672 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2745,10 +2745,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, } if (index == -1) { - /* If we get an out-of-range offset it is a bug in the compiler, - so we assert here. */ - assert(Address::offset_ok_for_immed(disp, exact_log2(size_in_memory)), "c2 compiler bug"); - /* Fix up any out-of-range offsets. */ + // Fix up any out-of-range offsets. assert_different_registers(rscratch1, base); Address addr = Address(base, disp); addr = __ legitimize_address(addr, size_in_memory, rscratch1); @@ -3348,7 +3345,11 @@ encode %{ int scale = $mem$$scale; int disp = $mem$$disp; if (index == -1) { - __ prfm(Address(base, disp), PSTL1KEEP); + // Fix up any out-of-range offsets. + assert_different_registers(rscratch1, base); + Address addr = Address(base, disp); + addr = __ legitimize_address(addr, 8, rscratch1); + __ prfm(addr, PSTL1KEEP); } else { Register index_reg = as_Register(index); if (disp == 0) { @@ -4229,9 +4230,9 @@ operand immIOffset16() interface(CONST_INTER); %} -operand immLoffset() +operand immLOffset() %{ - predicate(Address::offset_ok_for_immed(n->get_long(), 0)); + predicate(n->get_long() >= -256 && n->get_long() <= 65520); match(ConL); op_cost(0); @@ -5341,6 +5342,34 @@ operand indOffL16(iRegP reg, immLoffset16 off) %} %} +operand indirectX2P(iRegL reg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(CastX2P reg); + op_cost(0); + format %{ "[$reg]\t# long -> ptr" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp(0x0); + %} +%} + +operand indOffX2P(iRegL reg, immLOffset off) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (CastX2P reg) off); + op_cost(0); + format %{ "[$reg, $off]\t# long -> ptr" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xffffffff); + scale(0x0); + disp($off); + %} +%} + operand indirectN(iRegN reg) %{ predicate(CompressedOops::shift() == 0); @@ -5431,7 +5460,7 @@ operand indOffIN(iRegN reg, immIOffset off) %} %} -operand indOffLN(iRegN reg, immLoffset off) +operand indOffLN(iRegN reg, immLOffset off) %{ predicate(CompressedOops::shift() == 0); constraint(ALLOC_IN_RC(ptr_reg)); @@ -5664,6 +5693,17 @@ operand iRegL2I(iRegL reg) %{ interface(REG_INTER) %} +operand iRegL2P(iRegL reg) %{ + + op_cost(0); + + match(CastX2P reg); + + format %{ "l2p($reg)" %} + + interface(REG_INTER) +%} + opclass vmem2(indirect, indIndex, indOffI2, indOffL2); opclass vmem4(indirect, indIndex, indOffI4, indOffL4); opclass vmem8(indirect, indIndex, indOffI8, indOffL8); @@ -5680,21 +5720,21 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16); // instruction defs. we can turn a memory op into an Address opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1, - indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN); + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P); opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2, - indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN); + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P); opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4, - indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN); + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P); opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8, - indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN); + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P); // All of the memory operands. For the pipeline description. opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8, - indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN); + indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P); // iRegIorL2I is used for src inputs in rules for 32 bit int (I) @@ -5711,6 +5751,7 @@ opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indInde // movw is actually redundant but its not too costly. opclass iRegIorL2I(iRegI, iRegL2I); +opclass iRegPorL2P(iRegP, iRegL2P); //----------PIPELINE----------------------------------------------------------- // Rules which define the behavior of the target architectures pipeline. @@ -9811,7 +9852,7 @@ instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{ %} // Pointer Addition -instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ +instruct addP_reg_reg(iRegPNoSp dst, iRegPorL2P src1, iRegL src2) %{ match(Set dst (AddP src1 src2)); ins_cost(INSN_COST); @@ -9826,7 +9867,7 @@ instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ ins_pipe(ialu_reg_reg); %} -instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{ +instruct addP_reg_reg_ext(iRegPNoSp dst, iRegPorL2P src1, iRegIorL2I src2) %{ match(Set dst (AddP src1 (ConvI2L src2))); ins_cost(1.9 * INSN_COST); @@ -9841,7 +9882,7 @@ instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{ ins_pipe(ialu_reg_reg); %} -instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{ +instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegPorL2P src1, iRegL src2, immIScale scale) %{ match(Set dst (AddP src1 (LShiftL src2 scale))); ins_cost(1.9 * INSN_COST); @@ -9856,7 +9897,7 @@ instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale ins_pipe(ialu_reg_reg_shift); %} -instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{ +instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegPorL2P src1, iRegIorL2I src2, immIScale scale) %{ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale))); ins_cost(1.9 * INSN_COST); @@ -9889,7 +9930,7 @@ instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{ // Pointer Immediate Addition // n.b. this needs to be more expensive than using an indirect memory // operand -instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{ +instruct addP_reg_imm(iRegPNoSp dst, iRegPorL2P src1, immLAddSub src2) %{ match(Set dst (AddP src1 src2)); ins_cost(INSN_COST); diff --git a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad index c7c7165affb57..6e401724baa82 100644 --- a/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad @@ -1,5 +1,5 @@ // -// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -62,7 +62,13 @@ instruct xLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr) format %{ "ldr $dst, $mem" %} ins_encode %{ - const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + if (ref_addr.getMode() == Address::base_plus_offset) { + // Fix up any out-of-range offsets. + assert_different_registers(rscratch1, as_Register($mem$$base)); + assert_different_registers(rscratch1, $dst$$Register); + ref_addr = __ legitimize_address(ref_addr, 8, rscratch1); + } __ ldr($dst$$Register, ref_addr); x_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, barrier_data()); %} diff --git a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad index 92181e2b6b908..56d4538477920 100644 --- a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad +++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad @@ -1,5 +1,5 @@ // -// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -111,7 +111,13 @@ instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr) format %{ "ldr $dst, $mem" %} ins_encode %{ - const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + if (ref_addr.getMode() == Address::base_plus_offset) { + // Fix up any out-of-range offsets. + assert_different_registers(rscratch2, as_Register($mem$$base)); + assert_different_registers(rscratch2, $dst$$Register); + ref_addr = __ legitimize_address(ref_addr, 8, rscratch2); + } __ ldr($dst$$Register, ref_addr); z_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch1); %} diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp index 57361313f8533..39b804f7e5c96 100644 --- a/src/hotspot/share/opto/machnode.cpp +++ b/src/hotspot/share/opto/machnode.cpp @@ -395,7 +395,14 @@ const class TypePtr *MachNode::adr_type() const { // 32-bit unscaled narrow oop can be the base of any address expression t = t->make_ptr(); } - if (t->isa_intptr_t() && offset != 0 && offset != Type::OffsetBot) { + + if (t->isa_intptr_t() && +#if !defined(AARCH64) + // AArch64 supports the addressing mode: + // [base, 0], in which [base] is converted from a long value + offset != 0 && +#endif + offset != Type::OffsetBot) { // We cannot assert that the offset does not look oop-ish here. // Depending on the heap layout the cardmark base could land // inside some oopish region. It definitely does for Win2K. diff --git a/test/hotspot/jtreg/compiler/c2/TestCastX2P.java b/test/hotspot/jtreg/compiler/c2/TestCastX2P.java new file mode 100644 index 0000000000000..6591692f0cc61 --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/TestCastX2P.java @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2024, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.c2; + +import jdk.internal.misc.Unsafe; +import jdk.test.lib.Asserts; + +/** + * @test TestCastX2P + * @summary AArch64: remove extra register copy when converting from long to pointer. + * @bug 8336245 + * @library /test/lib + * @modules java.base/jdk.internal.misc + * @run main/othervm -XX:-TieredCompilation compiler.c2.TestCastX2P + */ + +public class TestCastX2P { + + public static final int LEN = 2040; + + static final Unsafe UNSAFE = Unsafe.getUnsafe(); + + public static long lseed = 0xbeef; + public static int iseed = 0xbeef; + public static short sseed = (short) (0xef); + public static byte bseed = (byte) (0xe); + + public static long off1 = 16; + public static long off2 = 32; + public static long off3 = 64; + + public static class TestLong { + + private static long address = UNSAFE.allocateMemory(LEN); + + static { + for (int k = 0; k < 10_000; k++) { + for (int i = 0; i < LEN/2; i++) { + UNSAFE.putLong(address+i, lseed); + } + } + + UNSAFE.putLong(address + off1 + 1030, lseed); + UNSAFE.putLong(address + 1023, lseed); + UNSAFE.putLong(address + off2 + 1001, lseed); + } + } + + public static class TestLongIndirect { + + private static long address = UNSAFE.allocateMemory(LEN); + + static { + for (int k = 0; k < 1000; k++) { + for (int i = 0; i < LEN/2; i++) { + UNSAFE.putLong(address+i, lseed); + } + } + + UNSAFE.putLong(address + off1, lseed); + UNSAFE.putLong(address + off1 + off2, lseed); + UNSAFE.putLong(address + off3, lseed); + } + } + + public static class TestInt { + + private static long address = UNSAFE.allocateMemory(LEN); + + static { + for (int k = 0; k < 10_000; k++) { + for (int i = 0; i < LEN/2; i++) { + UNSAFE.putInt(address+i, iseed); + } + } + + UNSAFE.putInt(address + off1 + 274, iseed); + UNSAFE.putInt(address + 278, iseed); + UNSAFE.putInt(address + off2 + 282, iseed); + } + } + + public static class TestIntIndirect { + + private static long address = UNSAFE.allocateMemory(LEN); + + static { + for (int k = 0; k < 1000; k++) { + for (int i = 0; i < LEN/2; i++) { + UNSAFE.putInt(address+i, iseed); + } + } + + UNSAFE.putInt(address + off1, iseed); + UNSAFE.putInt(address + off1 + off2, iseed); + UNSAFE.putInt(address + off3, iseed); + } + } + + public static class TestShort { + + private static long address = UNSAFE.allocateMemory(LEN); + + static { + for (int k = 0; k < 10_000; k++) { + for (int i = 0; i < LEN/2; i++) { + UNSAFE.putShort(address+i, sseed); + } + } + + UNSAFE.putShort(address + off1 + 257, sseed); + UNSAFE.putShort(address + 277, sseed); + UNSAFE.putShort(address + off2 + 283, sseed); + } + } + + public static class TestShortIndirect { + + private static long address = UNSAFE.allocateMemory(LEN); + + static { + for (int k = 0; k < 1000; k++) { + for (int i = 0; i < LEN/2; i++) { + UNSAFE.putShort(address+i, sseed); + } + } + + UNSAFE.putShort(address + off1, sseed); + UNSAFE.putShort(address + off1 + off2, sseed); + UNSAFE.putShort(address + off3, sseed); + } + } + + public static class TestByte { + + private static long address = UNSAFE.allocateMemory(LEN); + + static { + for (int k = 0; k < 10_000; k++) { + for (int i = 0; i < LEN/2; i++) { + UNSAFE.putByte(address+i, bseed); + } + } + + UNSAFE.putByte(address + off1 + 257, bseed); + UNSAFE.putByte(address + 277, bseed); + UNSAFE.putByte(address + off2 + 283, bseed); + } + } + + public static class TestByteIndirect { + + private static long address = UNSAFE.allocateMemory(LEN); + + static { + for (int k = 0; k < 1000; k++) { + for (int i = 0; i < LEN/2; i++) { + UNSAFE.putByte(address+i, bseed); + } + } + + UNSAFE.putByte(address + off1, bseed); + UNSAFE.putByte(address + off1 + off2, bseed); + UNSAFE.putByte(address + off3, bseed); + } + } + + static void test() { + TestLong t1 = new TestLong(); + Asserts.assertEquals(UNSAFE.getLong(t1.address + off1 + 1030), lseed, "put long failed!"); + Asserts.assertEquals(UNSAFE.getLong(t1.address + 1023), lseed, "put long failed!"); + Asserts.assertEquals(UNSAFE.getLong(t1.address + off2 + 1001), lseed, "put long failed!"); + + TestLongIndirect t2 = new TestLongIndirect(); + Asserts.assertEquals(UNSAFE.getLong(t2.address + off1), lseed, "put long failed!"); + Asserts.assertEquals(UNSAFE.getLong(t2.address + off1 + off2), lseed, "put long failed!"); + Asserts.assertEquals(UNSAFE.getLong(t2.address + off3), lseed, "put long failed!"); + + TestInt t3 = new TestInt(); + Asserts.assertEquals(UNSAFE.getInt(t3.address + off1 + 274), iseed, "put int failed!"); + Asserts.assertEquals(UNSAFE.getInt(t3.address + 278), iseed, "put int failed!"); + Asserts.assertEquals(UNSAFE.getInt(t3.address + off2 + 282), iseed, "put int failed!"); + + TestIntIndirect t4 = new TestIntIndirect(); + Asserts.assertEquals(UNSAFE.getInt(t4.address + off1), iseed, "put int failed!"); + Asserts.assertEquals(UNSAFE.getInt(t4.address + off1 + off2), iseed, "put int failed!"); + Asserts.assertEquals(UNSAFE.getInt(t4.address + off3), iseed, "put int failed!"); + + TestShort t5 = new TestShort(); + Asserts.assertEquals(UNSAFE.getShort(t5.address + off1 + 257), sseed, "put short failed!"); + Asserts.assertEquals(UNSAFE.getShort(t5.address + 277), sseed, "put short failed!"); + Asserts.assertEquals(UNSAFE.getShort(t5.address + off2 + 283), sseed, "put short failed!"); + + TestShortIndirect t6 = new TestShortIndirect(); + Asserts.assertEquals(UNSAFE.getShort(t6.address + off1), sseed, "put short failed!"); + Asserts.assertEquals(UNSAFE.getShort(t6.address + off1 + off2), sseed, "put short failed!"); + Asserts.assertEquals(UNSAFE.getShort(t6.address + off3), sseed, "put short failed!"); + + TestByte t7 = new TestByte(); + Asserts.assertEquals(UNSAFE.getByte(t7.address + off1 + 257), bseed, "put byte failed!"); + Asserts.assertEquals(UNSAFE.getByte(t7.address + 277), bseed, "put byte failed!"); + Asserts.assertEquals(UNSAFE.getByte(t7.address + off2 + 283), bseed, "put byte failed!"); + + TestByteIndirect t8 = new TestByteIndirect(); + Asserts.assertEquals(UNSAFE.getByte(t8.address + off1), bseed, "put byte failed!"); + Asserts.assertEquals(UNSAFE.getByte(t8.address + off1 + off2), bseed, "put byte failed!"); + Asserts.assertEquals(UNSAFE.getByte(t8.address + off3), bseed, "put byte failed!"); + } + + public static void main(String[] strArr) { + test(); + } +}