4646
4747address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL ;
4848
49- static void save_xmm_registers (MacroAssembler* masm) {
50- __ subptr (rsp, 64 );
51- __ movdbl (Address (rsp, 0 ), xmm0);
52- __ movdbl (Address (rsp, 8 ), xmm1);
53- __ movdbl (Address (rsp, 16 ), xmm2);
54- __ movdbl (Address (rsp, 24 ), xmm3);
55- __ movdbl (Address (rsp, 32 ), xmm4);
56- __ movdbl (Address (rsp, 40 ), xmm5);
57- __ movdbl (Address (rsp, 48 ), xmm6);
58- __ movdbl (Address (rsp, 56 ), xmm7);
49+ static void save_machine_state (MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
50+ if (handle_gpr) {
51+ __ push_IU_state ();
52+ }
53+
54+ if (handle_fp) {
55+ // Some paths can be reached from the c2i adapter with live fp arguments in registers.
56+ LP64_ONLY (assert (Argument::n_float_register_parameters_j == 8 , " 8 fp registers to save at java call" ));
57+
58+ if (UseSSE >= 2 ) {
59+ const int xmm_size = wordSize * LP64_ONLY (2 ) NOT_LP64 (4 );
60+ __ subptr (rsp, xmm_size * 8 );
61+ __ movdbl (Address (rsp, xmm_size * 0 ), xmm0);
62+ __ movdbl (Address (rsp, xmm_size * 1 ), xmm1);
63+ __ movdbl (Address (rsp, xmm_size * 2 ), xmm2);
64+ __ movdbl (Address (rsp, xmm_size * 3 ), xmm3);
65+ __ movdbl (Address (rsp, xmm_size * 4 ), xmm4);
66+ __ movdbl (Address (rsp, xmm_size * 5 ), xmm5);
67+ __ movdbl (Address (rsp, xmm_size * 6 ), xmm6);
68+ __ movdbl (Address (rsp, xmm_size * 7 ), xmm7);
69+ } else if (UseSSE >= 1 ) {
70+ const int xmm_size = wordSize * LP64_ONLY (1 ) NOT_LP64 (2 );
71+ __ subptr (rsp, xmm_size * 8 );
72+ __ movflt (Address (rsp, xmm_size * 0 ), xmm0);
73+ __ movflt (Address (rsp, xmm_size * 1 ), xmm1);
74+ __ movflt (Address (rsp, xmm_size * 2 ), xmm2);
75+ __ movflt (Address (rsp, xmm_size * 3 ), xmm3);
76+ __ movflt (Address (rsp, xmm_size * 4 ), xmm4);
77+ __ movflt (Address (rsp, xmm_size * 5 ), xmm5);
78+ __ movflt (Address (rsp, xmm_size * 6 ), xmm6);
79+ __ movflt (Address (rsp, xmm_size * 7 ), xmm7);
80+ } else {
81+ __ push_FPU_state ();
82+ }
83+ }
5984}
6085
61- static void restore_xmm_registers (MacroAssembler* masm) {
62- __ movdbl (xmm0, Address (rsp, 0 ));
63- __ movdbl (xmm1, Address (rsp, 8 ));
64- __ movdbl (xmm2, Address (rsp, 16 ));
65- __ movdbl (xmm3, Address (rsp, 24 ));
66- __ movdbl (xmm4, Address (rsp, 32 ));
67- __ movdbl (xmm5, Address (rsp, 40 ));
68- __ movdbl (xmm6, Address (rsp, 48 ));
69- __ movdbl (xmm7, Address (rsp, 56 ));
70- __ addptr (rsp, 64 );
86+ static void restore_machine_state (MacroAssembler* masm, bool handle_gpr, bool handle_fp) {
87+ if (handle_fp) {
88+ if (UseSSE >= 2 ) {
89+ const int xmm_size = wordSize * LP64_ONLY (2 ) NOT_LP64 (4 );
90+ __ movdbl (xmm0, Address (rsp, xmm_size * 0 ));
91+ __ movdbl (xmm1, Address (rsp, xmm_size * 1 ));
92+ __ movdbl (xmm2, Address (rsp, xmm_size * 2 ));
93+ __ movdbl (xmm3, Address (rsp, xmm_size * 3 ));
94+ __ movdbl (xmm4, Address (rsp, xmm_size * 4 ));
95+ __ movdbl (xmm5, Address (rsp, xmm_size * 5 ));
96+ __ movdbl (xmm6, Address (rsp, xmm_size * 6 ));
97+ __ movdbl (xmm7, Address (rsp, xmm_size * 7 ));
98+ __ addptr (rsp, xmm_size * 8 );
99+ } else if (UseSSE >= 1 ) {
100+ const int xmm_size = wordSize * LP64_ONLY (1 ) NOT_LP64 (2 );
101+ __ movflt (xmm0, Address (rsp, xmm_size * 0 ));
102+ __ movflt (xmm1, Address (rsp, xmm_size * 1 ));
103+ __ movflt (xmm2, Address (rsp, xmm_size * 2 ));
104+ __ movflt (xmm3, Address (rsp, xmm_size * 3 ));
105+ __ movflt (xmm4, Address (rsp, xmm_size * 4 ));
106+ __ movflt (xmm5, Address (rsp, xmm_size * 5 ));
107+ __ movflt (xmm6, Address (rsp, xmm_size * 6 ));
108+ __ movflt (xmm7, Address (rsp, xmm_size * 7 ));
109+ __ addptr (rsp, xmm_size * 8 );
110+ } else {
111+ __ pop_FPU_state ();
112+ }
113+ }
114+
115+ if (handle_gpr) {
116+ __ pop_IU_state ();
117+ }
71118}
72119
73120void ShenandoahBarrierSetAssembler::arraycopy_prologue (MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@@ -112,7 +159,7 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
112159 __ testb (gc_state, flags);
113160 __ jcc (Assembler::zero, done);
114161
115- __ pusha (); // push registers
162+ save_machine_state (masm, /* handle_gpr = */ true , /* handle_fp = */ false );
116163
117164#ifdef _LP64
118165 assert (src == rdi, " expected" );
@@ -128,7 +175,8 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
128175 src, dst, count);
129176 }
130177
131- __ popa ();
178+ restore_machine_state (masm, /* handle_gpr = */ true , /* handle_fp = */ false );
179+
132180 __ bind (done);
133181 NOT_LP64 (__ pop (thread);)
134182 }
@@ -289,7 +337,9 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
289337
290338 Address gc_state (thread, in_bytes (ShenandoahThreadLocalData::gc_state_offset ()));
291339 __ testb (gc_state, ShenandoahHeap::HAS_FORWARDED);
292- __ jccb (Assembler::zero, done);
340+ __ jcc (Assembler::zero, done);
341+
342+ save_machine_state (masm, /* handle_gpr = */ false , /* handle_fp = */ true );
293343
294344 // Use rsi for src address
295345 const Register src_addr = rsi;
@@ -314,9 +364,7 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
314364 __ lea (src_addr, src);
315365 }
316366
317- save_xmm_registers (masm);
318367 __ call (RuntimeAddress (CAST_FROM_FN_PTR (address, ShenandoahBarrierSetAssembler::shenandoah_lrb ())));
319- restore_xmm_registers (masm);
320368
321369 if (need_addr_setup) {
322370 if (dst != rax) {
@@ -329,6 +377,8 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
329377 __ pop (rax);
330378 }
331379
380+ restore_machine_state (masm, /* handle_gpr = */ false , /* handle_fp = */ true );
381+
332382 __ bind (done);
333383
334384#ifndef _LP64
@@ -348,12 +398,7 @@ void ShenandoahBarrierSetAssembler::iu_barrier_impl(MacroAssembler* masm, Regist
348398 if (dst == noreg) return ;
349399
350400 if (ShenandoahIUBarrier) {
351- // The set of registers to be saved+restored is the same as in the write-barrier above.
352- // Those are the commonly used registers in the interpreter.
353- __ pusha ();
354- // __ push_callee_saved_registers();
355- __ subptr (rsp, 2 * Interpreter::stackElementSize);
356- __ movdbl (Address (rsp, 0 ), xmm0);
401+ save_machine_state (masm, /* handle_gpr = */ true , /* handle_fp = */ true );
357402
358403#ifdef _LP64
359404 Register thread = r15_thread;
@@ -370,10 +415,8 @@ void ShenandoahBarrierSetAssembler::iu_barrier_impl(MacroAssembler* masm, Regist
370415 assert_different_registers (dst, tmp, thread);
371416
372417 satb_write_barrier_pre (masm, noreg, dst, thread, tmp, true , false );
373- __ movdbl (xmm0, Address (rsp, 0 ));
374- __ addptr (rsp, 2 * Interpreter::stackElementSize);
375- // __ pop_callee_saved_registers();
376- __ popa ();
418+
419+ restore_machine_state (masm, /* handle_gpr = */ true , /* handle_fp = */ true );
377420 }
378421}
379422
@@ -448,11 +491,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
448491
449492 // 3: apply keep-alive barrier if needed
450493 if (ShenandoahBarrierSet::need_keep_alive_barrier (decorators, type)) {
451- __ push_IU_state ();
452- // That path can be reached from the c2i adapter with live fp
453- // arguments in registers.
454- LP64_ONLY (assert (Argument::n_float_register_parameters_j == 8 , " 8 fp registers to save at java call" ));
455- save_xmm_registers (masm);
494+ save_machine_state (masm, /* handle_gpr = */ true , /* handle_fp = */ true );
456495
457496 Register thread = NOT_LP64 (tmp_thread) LP64_ONLY (r15_thread);
458497 assert_different_registers (dst, tmp1, tmp_thread);
@@ -469,8 +508,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
469508 tmp1 /* tmp */ ,
470509 true /* tosca_live */ ,
471510 true /* expand_call */ );
472- restore_xmm_registers (masm);
473- __ pop_IU_state ( );
511+
512+ restore_machine_state (masm, /* handle_gpr = */ true , /* handle_fp = */ true );
474513 }
475514}
476515
0 commit comments