Skip to content

Commit eaf59e0

Browse files
committed
8256215: Shenandoah: re-organize saving/restoring machine state in assembler code
Reviewed-by: zgu Backport-of: a97aedf
1 parent f700d37 commit eaf59e0

File tree

1 file changed

+81
-42
lines changed

1 file changed

+81
-42
lines changed

src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp

Lines changed: 81 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -46,28 +46,75 @@
4646

4747
address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
4848

49-
static void save_xmm_registers(MacroAssembler* masm) {
50-
__ subptr(rsp, 64);
51-
__ movdbl(Address(rsp, 0), xmm0);
52-
__ movdbl(Address(rsp, 8), xmm1);
53-
__ movdbl(Address(rsp, 16), xmm2);
54-
__ movdbl(Address(rsp, 24), xmm3);
55-
__ movdbl(Address(rsp, 32), xmm4);
56-
__ movdbl(Address(rsp, 40), xmm5);
57-
__ movdbl(Address(rsp, 48), xmm6);
58-
__ movdbl(Address(rsp, 56), xmm7);
49+
static void save_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
50+
if (handle_gpr) {
51+
__ push_IU_state();
52+
}
53+
54+
if (handle_fp) {
55+
// Some paths can be reached from the c2i adapter with live fp arguments in registers.
56+
LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call"));
57+
58+
if (UseSSE >= 2) {
59+
const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4);
60+
__ subptr(rsp, xmm_size * 8);
61+
__ movdbl(Address(rsp, xmm_size * 0), xmm0);
62+
__ movdbl(Address(rsp, xmm_size * 1), xmm1);
63+
__ movdbl(Address(rsp, xmm_size * 2), xmm2);
64+
__ movdbl(Address(rsp, xmm_size * 3), xmm3);
65+
__ movdbl(Address(rsp, xmm_size * 4), xmm4);
66+
__ movdbl(Address(rsp, xmm_size * 5), xmm5);
67+
__ movdbl(Address(rsp, xmm_size * 6), xmm6);
68+
__ movdbl(Address(rsp, xmm_size * 7), xmm7);
69+
} else if (UseSSE >= 1) {
70+
const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2);
71+
__ subptr(rsp, xmm_size * 8);
72+
__ movflt(Address(rsp, xmm_size * 0), xmm0);
73+
__ movflt(Address(rsp, xmm_size * 1), xmm1);
74+
__ movflt(Address(rsp, xmm_size * 2), xmm2);
75+
__ movflt(Address(rsp, xmm_size * 3), xmm3);
76+
__ movflt(Address(rsp, xmm_size * 4), xmm4);
77+
__ movflt(Address(rsp, xmm_size * 5), xmm5);
78+
__ movflt(Address(rsp, xmm_size * 6), xmm6);
79+
__ movflt(Address(rsp, xmm_size * 7), xmm7);
80+
} else {
81+
__ push_FPU_state();
82+
}
83+
}
5984
}
6085

61-
static void restore_xmm_registers(MacroAssembler* masm) {
62-
__ movdbl(xmm0, Address(rsp, 0));
63-
__ movdbl(xmm1, Address(rsp, 8));
64-
__ movdbl(xmm2, Address(rsp, 16));
65-
__ movdbl(xmm3, Address(rsp, 24));
66-
__ movdbl(xmm4, Address(rsp, 32));
67-
__ movdbl(xmm5, Address(rsp, 40));
68-
__ movdbl(xmm6, Address(rsp, 48));
69-
__ movdbl(xmm7, Address(rsp, 56));
70-
__ addptr(rsp, 64);
86+
static void restore_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
87+
if (handle_fp) {
88+
if (UseSSE >= 2) {
89+
const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4);
90+
__ movdbl(xmm0, Address(rsp, xmm_size * 0));
91+
__ movdbl(xmm1, Address(rsp, xmm_size * 1));
92+
__ movdbl(xmm2, Address(rsp, xmm_size * 2));
93+
__ movdbl(xmm3, Address(rsp, xmm_size * 3));
94+
__ movdbl(xmm4, Address(rsp, xmm_size * 4));
95+
__ movdbl(xmm5, Address(rsp, xmm_size * 5));
96+
__ movdbl(xmm6, Address(rsp, xmm_size * 6));
97+
__ movdbl(xmm7, Address(rsp, xmm_size * 7));
98+
__ addptr(rsp, xmm_size * 8);
99+
} else if (UseSSE >= 1) {
100+
const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2);
101+
__ movflt(xmm0, Address(rsp, xmm_size * 0));
102+
__ movflt(xmm1, Address(rsp, xmm_size * 1));
103+
__ movflt(xmm2, Address(rsp, xmm_size * 2));
104+
__ movflt(xmm3, Address(rsp, xmm_size * 3));
105+
__ movflt(xmm4, Address(rsp, xmm_size * 4));
106+
__ movflt(xmm5, Address(rsp, xmm_size * 5));
107+
__ movflt(xmm6, Address(rsp, xmm_size * 6));
108+
__ movflt(xmm7, Address(rsp, xmm_size * 7));
109+
__ addptr(rsp, xmm_size * 8);
110+
} else {
111+
__ pop_FPU_state();
112+
}
113+
}
114+
115+
if (handle_gpr) {
116+
__ pop_IU_state();
117+
}
71118
}
72119

73120
void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -112,7 +159,7 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
112159
__ testb(gc_state, flags);
113160
__ jcc(Assembler::zero, done);
114161

115-
__ pusha(); // push registers
162+
save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false);
116163

117164
#ifdef _LP64
118165
assert(src == rdi, "expected");
@@ -128,7 +175,8 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
128175
src, dst, count);
129176
}
130177

131-
__ popa();
178+
restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false);
179+
132180
__ bind(done);
133181
NOT_LP64(__ pop(thread);)
134182
}
@@ -289,7 +337,9 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
289337

290338
Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
291339
__ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
292-
__ jccb(Assembler::zero, done);
340+
__ jcc(Assembler::zero, done);
341+
342+
save_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true);
293343

294344
// Use rsi for src address
295345
const Register src_addr = rsi;
@@ -314,9 +364,7 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
314364
__ lea(src_addr, src);
315365
}
316366

317-
save_xmm_registers(masm);
318367
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
319-
restore_xmm_registers(masm);
320368

321369
if (need_addr_setup) {
322370
if (dst != rax) {
@@ -329,6 +377,8 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
329377
__ pop(rax);
330378
}
331379

380+
restore_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true);
381+
332382
__ bind(done);
333383

334384
#ifndef _LP64
@@ -348,12 +398,7 @@ void ShenandoahBarrierSetAssembler::iu_barrier_impl(MacroAssembler* masm, Regist
348398
if (dst == noreg) return;
349399

350400
if (ShenandoahIUBarrier) {
351-
// The set of registers to be saved+restored is the same as in the write-barrier above.
352-
// Those are the commonly used registers in the interpreter.
353-
__ pusha();
354-
// __ push_callee_saved_registers();
355-
__ subptr(rsp, 2 * Interpreter::stackElementSize);
356-
__ movdbl(Address(rsp, 0), xmm0);
401+
save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
357402

358403
#ifdef _LP64
359404
Register thread = r15_thread;
@@ -370,10 +415,8 @@ void ShenandoahBarrierSetAssembler::iu_barrier_impl(MacroAssembler* masm, Regist
370415
assert_different_registers(dst, tmp, thread);
371416

372417
satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false);
373-
__ movdbl(xmm0, Address(rsp, 0));
374-
__ addptr(rsp, 2 * Interpreter::stackElementSize);
375-
//__ pop_callee_saved_registers();
376-
__ popa();
418+
419+
restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
377420
}
378421
}
379422

@@ -448,11 +491,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
448491

449492
// 3: apply keep-alive barrier if needed
450493
if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
451-
__ push_IU_state();
452-
// That path can be reached from the c2i adapter with live fp
453-
// arguments in registers.
454-
LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call"));
455-
save_xmm_registers(masm);
494+
save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
456495

457496
Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
458497
assert_different_registers(dst, tmp1, tmp_thread);
@@ -469,8 +508,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
469508
tmp1 /* tmp */,
470509
true /* tosca_live */,
471510
true /* expand_call */);
472-
restore_xmm_registers(masm);
473-
__ pop_IU_state();
511+
512+
restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
474513
}
475514
}
476515

0 commit comments

Comments
 (0)