Skip to content

Commit a97aedf

Browse files
committed
8256215: Shenandoah: re-organize saving/restoring machine state in assembler code
Reviewed-by: rkennke, zgu
1 parent 316d52c commit a97aedf

File tree

2 files changed

+105
-49
lines changed

2 files changed

+105
-49
lines changed

src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp

Lines changed: 82 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -43,28 +43,75 @@
4343

4444
#define __ masm->
4545

46-
static void save_xmm_registers(MacroAssembler* masm) {
47-
__ subptr(rsp, 64);
48-
__ movdbl(Address(rsp, 0), xmm0);
49-
__ movdbl(Address(rsp, 8), xmm1);
50-
__ movdbl(Address(rsp, 16), xmm2);
51-
__ movdbl(Address(rsp, 24), xmm3);
52-
__ movdbl(Address(rsp, 32), xmm4);
53-
__ movdbl(Address(rsp, 40), xmm5);
54-
__ movdbl(Address(rsp, 48), xmm6);
55-
__ movdbl(Address(rsp, 56), xmm7);
46+
static void save_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
47+
if (handle_gpr) {
48+
__ push_IU_state();
49+
}
50+
51+
if (handle_fp) {
52+
// Some paths can be reached from the c2i adapter with live fp arguments in registers.
53+
LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call"));
54+
55+
if (UseSSE >= 2) {
56+
const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4);
57+
__ subptr(rsp, xmm_size * 8);
58+
__ movdbl(Address(rsp, xmm_size * 0), xmm0);
59+
__ movdbl(Address(rsp, xmm_size * 1), xmm1);
60+
__ movdbl(Address(rsp, xmm_size * 2), xmm2);
61+
__ movdbl(Address(rsp, xmm_size * 3), xmm3);
62+
__ movdbl(Address(rsp, xmm_size * 4), xmm4);
63+
__ movdbl(Address(rsp, xmm_size * 5), xmm5);
64+
__ movdbl(Address(rsp, xmm_size * 6), xmm6);
65+
__ movdbl(Address(rsp, xmm_size * 7), xmm7);
66+
} else if (UseSSE >= 1) {
67+
const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2);
68+
__ subptr(rsp, xmm_size * 8);
69+
__ movflt(Address(rsp, xmm_size * 0), xmm0);
70+
__ movflt(Address(rsp, xmm_size * 1), xmm1);
71+
__ movflt(Address(rsp, xmm_size * 2), xmm2);
72+
__ movflt(Address(rsp, xmm_size * 3), xmm3);
73+
__ movflt(Address(rsp, xmm_size * 4), xmm4);
74+
__ movflt(Address(rsp, xmm_size * 5), xmm5);
75+
__ movflt(Address(rsp, xmm_size * 6), xmm6);
76+
__ movflt(Address(rsp, xmm_size * 7), xmm7);
77+
} else {
78+
__ push_FPU_state();
79+
}
80+
}
5681
}
5782

58-
static void restore_xmm_registers(MacroAssembler* masm) {
59-
__ movdbl(xmm0, Address(rsp, 0));
60-
__ movdbl(xmm1, Address(rsp, 8));
61-
__ movdbl(xmm2, Address(rsp, 16));
62-
__ movdbl(xmm3, Address(rsp, 24));
63-
__ movdbl(xmm4, Address(rsp, 32));
64-
__ movdbl(xmm5, Address(rsp, 40));
65-
__ movdbl(xmm6, Address(rsp, 48));
66-
__ movdbl(xmm7, Address(rsp, 56));
67-
__ addptr(rsp, 64);
83+
static void restore_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
84+
if (handle_fp) {
85+
if (UseSSE >= 2) {
86+
const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4);
87+
__ movdbl(xmm0, Address(rsp, xmm_size * 0));
88+
__ movdbl(xmm1, Address(rsp, xmm_size * 1));
89+
__ movdbl(xmm2, Address(rsp, xmm_size * 2));
90+
__ movdbl(xmm3, Address(rsp, xmm_size * 3));
91+
__ movdbl(xmm4, Address(rsp, xmm_size * 4));
92+
__ movdbl(xmm5, Address(rsp, xmm_size * 5));
93+
__ movdbl(xmm6, Address(rsp, xmm_size * 6));
94+
__ movdbl(xmm7, Address(rsp, xmm_size * 7));
95+
__ addptr(rsp, xmm_size * 8);
96+
} else if (UseSSE >= 1) {
97+
const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2);
98+
__ movflt(xmm0, Address(rsp, xmm_size * 0));
99+
__ movflt(xmm1, Address(rsp, xmm_size * 1));
100+
__ movflt(xmm2, Address(rsp, xmm_size * 2));
101+
__ movflt(xmm3, Address(rsp, xmm_size * 3));
102+
__ movflt(xmm4, Address(rsp, xmm_size * 4));
103+
__ movflt(xmm5, Address(rsp, xmm_size * 5));
104+
__ movflt(xmm6, Address(rsp, xmm_size * 6));
105+
__ movflt(xmm7, Address(rsp, xmm_size * 7));
106+
__ addptr(rsp, xmm_size * 8);
107+
} else {
108+
__ pop_FPU_state();
109+
}
110+
}
111+
112+
if (handle_gpr) {
113+
__ pop_IU_state();
114+
}
68115
}
69116

70117
void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -109,7 +156,7 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
109156
__ testb(gc_state, flags);
110157
__ jcc(Assembler::zero, done);
111158

112-
__ pusha(); // push registers
159+
save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false);
113160

114161
#ifdef _LP64
115162
assert(src == rdi, "expected");
@@ -125,7 +172,8 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
125172
src, dst, count);
126173
}
127174

128-
__ popa();
175+
restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false);
176+
129177
__ bind(done);
130178
NOT_LP64(__ pop(thread);)
131179
}
@@ -329,6 +377,10 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
329377
__ jcc(Assembler::zero, not_cset);
330378
}
331379

380+
save_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true);
381+
382+
// The rest is saved with the optimized path
383+
332384
uint num_saved_regs = 4 + (dst != rax ? 1 : 0) LP64_ONLY(+4);
333385
__ subptr(rsp, num_saved_regs * wordSize);
334386
uint slot = num_saved_regs;
@@ -362,7 +414,6 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
362414
__ movptr(arg0, dst);
363415
}
364416

365-
save_xmm_registers(masm);
366417
if (is_strong) {
367418
if (is_narrow) {
368419
__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), arg0, arg1);
@@ -380,7 +431,6 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
380431
assert(!is_narrow, "phantom access cannot be narrow");
381432
__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), arg0, arg1);
382433
}
383-
restore_xmm_registers(masm);
384434

385435
#ifdef _LP64
386436
__ movptr(r11, Address(rsp, (slot++) * wordSize));
@@ -401,6 +451,8 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
401451
assert(slot == num_saved_regs, "must use all slots");
402452
__ addptr(rsp, num_saved_regs * wordSize);
403453

454+
restore_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true);
455+
404456
__ bind(not_cset);
405457

406458
if (is_strong) {
@@ -429,12 +481,7 @@ void ShenandoahBarrierSetAssembler::iu_barrier_impl(MacroAssembler* masm, Regist
429481
if (dst == noreg) return;
430482

431483
if (ShenandoahIUBarrier) {
432-
// The set of registers to be saved+restored is the same as in the write-barrier above.
433-
// Those are the commonly used registers in the interpreter.
434-
__ pusha();
435-
// __ push_callee_saved_registers();
436-
__ subptr(rsp, 2 * Interpreter::stackElementSize);
437-
__ movdbl(Address(rsp, 0), xmm0);
484+
save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
438485

439486
#ifdef _LP64
440487
Register thread = r15_thread;
@@ -451,10 +498,8 @@ void ShenandoahBarrierSetAssembler::iu_barrier_impl(MacroAssembler* masm, Regist
451498
assert_different_registers(dst, tmp, thread);
452499

453500
satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false);
454-
__ movdbl(xmm0, Address(rsp, 0));
455-
__ addptr(rsp, 2 * Interpreter::stackElementSize);
456-
//__ pop_callee_saved_registers();
457-
__ popa();
501+
502+
restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
458503
}
459504
}
460505

@@ -519,11 +564,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
519564

520565
// 3: apply keep-alive barrier if needed
521566
if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
522-
__ push_IU_state();
523-
// That path can be reached from the c2i adapter with live fp
524-
// arguments in registers.
525-
LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call"));
526-
save_xmm_registers(masm);
567+
save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
527568

528569
Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
529570
assert_different_registers(dst, tmp1, tmp_thread);
@@ -540,8 +581,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
540581
tmp1 /* tmp */,
541582
true /* tosca_live */,
542583
true /* expand_call */);
543-
restore_xmm_registers(masm);
544-
__ pop_IU_state();
584+
585+
restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);
545586
}
546587
}
547588

src/hotspot/cpu/x86/stubGenerator_x86_32.cpp

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3669,17 +3669,32 @@ class StubGenerator: public StubCodeGenerator {
36693669
__ pusha();
36703670

36713671
// xmm0 and xmm1 may be used for passing float/double arguments
3672-
const int xmm_size = wordSize * 4;
3673-
const int xmm_spill_size = xmm_size * 2;
3674-
__ subptr(rsp, xmm_spill_size);
3675-
__ movdqu(Address(rsp, xmm_size * 1), xmm1);
3676-
__ movdqu(Address(rsp, xmm_size * 0), xmm0);
3672+
3673+
if (UseSSE >= 2) {
3674+
const int xmm_size = wordSize * 4;
3675+
__ subptr(rsp, xmm_size * 2);
3676+
__ movdbl(Address(rsp, xmm_size * 1), xmm1);
3677+
__ movdbl(Address(rsp, xmm_size * 0), xmm0);
3678+
} else if (UseSSE >= 1) {
3679+
const int xmm_size = wordSize * 2;
3680+
__ subptr(rsp, xmm_size * 2);
3681+
__ movflt(Address(rsp, xmm_size * 1), xmm1);
3682+
__ movflt(Address(rsp, xmm_size * 0), xmm0);
3683+
}
36773684

36783685
__ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(address*)>(BarrierSetNMethod::nmethod_stub_entry_barrier)), rbx);
36793686

3680-
__ movdqu(xmm0, Address(rsp, xmm_size * 0));
3681-
__ movdqu(xmm1, Address(rsp, xmm_size * 1));
3682-
__ addptr(rsp, xmm_spill_size);
3687+
if (UseSSE >= 2) {
3688+
const int xmm_size = wordSize * 4;
3689+
__ movdbl(xmm0, Address(rsp, xmm_size * 0));
3690+
__ movdbl(xmm1, Address(rsp, xmm_size * 1));
3691+
__ addptr(rsp, xmm_size * 2);
3692+
} else if (UseSSE >= 1) {
3693+
const int xmm_size = wordSize * 2;
3694+
__ movflt(xmm0, Address(rsp, xmm_size * 0));
3695+
__ movflt(xmm1, Address(rsp, xmm_size * 1));
3696+
__ addptr(rsp, xmm_size * 2);
3697+
}
36833698

36843699
__ cmpl(rax, 1); // 1 means deoptimize
36853700
__ jcc(Assembler::equal, deoptimize_label);

0 commit comments

Comments
 (0)