diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 1dd8b854bc..5f54e6e512 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -1028,6 +1028,18 @@ replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> replace_reg0([Other | T], Reg, Replacement, Acc) -> replace_reg0(T, Reg, Replacement, [Other | Acc]). +% Exchange registers in both Args and ArgsRegs lists +exchange_reg(Args, ArgsRegs, Reg1, Reg2) -> + NewArgs = replace_reg(Args, Reg1, Reg2), + NewArgsRegs = lists:map( + fun + (R) when R =:= Reg1 -> Reg2; + (R) -> R + end, + ArgsRegs + ), + {NewArgs, NewArgsRegs}. + set_args0([], [], [], _AvailGP, Acc) -> list_to_binary(lists:reverse(Acc)); set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc) -> @@ -1056,19 +1068,23 @@ set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); set_args0( [Arg | ArgsT], - [_ArgReg | ArgsRegs], + [ArgReg | ArgsRegs], [ParamReg | ParamRegs], - [Avail | AvailGPT] = AvailGP, + AvailGP, Acc ) -> - J = set_args1(Arg, ParamReg), case lists:member(ParamReg, ArgsRegs) of false -> + % Normal case: ParamReg is free, just move Arg to ParamReg + J = set_args1(Arg, ParamReg), set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); true -> - I = jit_x86_64_asm:movq(ParamReg, Avail), - NewArgsT = replace_reg(ArgsT, ParamReg, Avail), - set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, [J, I | Acc]) + % ParamReg is occupied by another argument that will go elsewhere + % Use xchg to swap ArgReg and ParamReg + % After xchg, the value from Arg (which was in ArgReg) is now in ParamReg + I = jit_x86_64_asm:xchgq(ArgReg, ParamReg), + {NewArgsT, NewArgsRegs} = exchange_reg(ArgsT, ArgsRegs, ParamReg, ArgReg), + set_args0(NewArgsT, NewArgsRegs, ParamRegs, AvailGP, [I | Acc]) end. set_args1(Reg, Reg) -> diff --git a/libs/jit/src/jit_x86_64_asm.erl b/libs/jit/src/jit_x86_64_asm.erl index 1c4565da36..fb5687e870 100644 --- a/libs/jit/src/jit_x86_64_asm.erl +++ b/libs/jit/src/jit_x86_64_asm.erl @@ -56,7 +56,8 @@ popq/1, jmpq/1, retq/0, - cmpb/2 + cmpb/2, + xchgq/2 ]). -define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). @@ -566,3 +567,25 @@ jmpq({Reg}) -> retq() -> <<16#C3>>. + +%% XCHG r64, r64: Exchange two 64-bit registers +%% Encoding: REX.W + 87 /r +xchgq(rax, rax) -> + % NOP + <<16#90>>; +xchgq(rax, Reg) when is_atom(Reg) -> + % Special short encoding for XCHG rax, r64 + % For low registers: REX.W + 0x90 + reg + % For high registers: REX.W + REX.B + 0x90 + reg (need REX.B to access r8-r11) + case x86_64_x_reg(Reg) of + {0, Index} -> <<16#48, (16#90 + Index)>>; + {1, Index} -> <<16#49, (16#90 + Index)>> + end; +xchgq(Reg, rax) when is_atom(Reg) -> + % XCHG is commutative + xchgq(rax, Reg); +xchgq(RegA, RegB) when is_atom(RegA), is_atom(RegB) -> + % General form: REX.W + 87 /r + {REX_R, MODRM_REG} = x86_64_x_reg(RegA), + {REX_B, MODRM_RM} = x86_64_x_reg(RegB), + <>. diff --git a/tests/libs/jit/jit_x86_64_asm_tests.erl b/tests/libs/jit/jit_x86_64_asm_tests.erl index b15805922c..65f3899bd7 100644 --- a/tests/libs/jit/jit_x86_64_asm_tests.erl +++ b/tests/libs/jit/jit_x86_64_asm_tests.erl @@ -957,3 +957,38 @@ retq_test_() -> [ ?_assertAsmEqual(<<16#c3>>, "retq", jit_x86_64_asm:retq()) ]. + +xchgq_test_() -> + [ + ?_assertAsmEqual(<<16#90>>, "xchg %rax,%rax", jit_x86_64_asm:xchgq(rax, rax)), + ?_assertAsmEqual(<<16#48, 16#91>>, "xchg %rax,%rcx", jit_x86_64_asm:xchgq(rax, rcx)), + ?_assertAsmEqual(<<16#48, 16#92>>, "xchg %rax,%rdx", jit_x86_64_asm:xchgq(rax, rdx)), + ?_assertAsmEqual(<<16#48, 16#96>>, "xchg %rax,%rsi", jit_x86_64_asm:xchgq(rax, rsi)), + ?_assertAsmEqual(<<16#48, 16#97>>, "xchg %rax,%rdi", jit_x86_64_asm:xchgq(rax, rdi)), + ?_assertAsmEqual(<<16#49, 16#90>>, "xchg %rax,%r8", jit_x86_64_asm:xchgq(rax, r8)), + ?_assertAsmEqual(<<16#49, 16#91>>, "xchg %rax,%r9", jit_x86_64_asm:xchgq(rax, r9)), + ?_assertAsmEqual(<<16#49, 16#92>>, "xchg %rax,%r10", jit_x86_64_asm:xchgq(rax, r10)), + ?_assertAsmEqual(<<16#49, 16#93>>, "xchg %rax,%r11", jit_x86_64_asm:xchgq(rax, r11)), + + % xchg reg, rax - commutative, should use same short encoding + ?_assertAsmEqual(<<16#48, 16#91>>, "xchg %rcx,%rax", jit_x86_64_asm:xchgq(rcx, rax)), + ?_assertAsmEqual(<<16#48, 16#92>>, "xchg %rdx,%rax", jit_x86_64_asm:xchgq(rdx, rax)), + ?_assertAsmEqual(<<16#49, 16#91>>, "xchg %r9,%rax", jit_x86_64_asm:xchgq(r9, rax)), + + % xchg reg, reg - general form (REX.W + 0x87 /r) + ?_assertAsmEqual( + <<16#48, 16#87, 16#D1>>, "xchg %rdx,%rcx", jit_x86_64_asm:xchgq(rdx, rcx) + ), + ?_assertAsmEqual( + <<16#48, 16#87, 16#F2>>, "xchg %rsi,%rdx", jit_x86_64_asm:xchgq(rsi, rdx) + ), + ?_assertAsmEqual( + <<16#4C, 16#87, 16#C1>>, "xchg %r8,%rcx", jit_x86_64_asm:xchgq(r8, rcx) + ), + ?_assertAsmEqual( + <<16#4D, 16#87, 16#C8>>, "xchg %r9,%r8", jit_x86_64_asm:xchgq(r9, r8) + ), + ?_assertAsmEqual( + <<16#4D, 16#87, 16#D1>>, "xchg %r10,%r9", jit_x86_64_asm:xchgq(r10, r9) + ) + ]. diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index 56889b5598..abdb0d6773 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -163,6 +163,60 @@ call_primitive_extended_regs_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_few_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, rax} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, r11} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r10} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r9} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, r8} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {State6, rcx} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + + CreatedBin = rax, + Offset = r11, + SrcReg = r8, + SizeValue = r9, + FlagsValue = rcx, + + {State7, r8} = ?BACKEND:call_primitive(State6, ?PRIM_BITSTRING_INSERT_INTEGER, [ + CreatedBin, Offset, {free, SrcReg}, SizeValue, {free, FlagsValue} + ]), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 4c 8b 5f 38 mov 0x38(%rdi),%r11\n" + " 8: 4c 8b 57 40 mov 0x40(%rdi),%r10\n" + " c: 4c 8b 4f 48 mov 0x48(%rdi),%r9\n" + " 10: 4c 8b 47 50 mov 0x50(%rdi),%r8\n" + " 14: 48 8b 4f 58 mov 0x58(%rdi),%rcx\n" + " 18: 57 push %rdi\n" + " 19: 56 push %rsi\n" + " 1a: 52 push %rdx\n" + " 1b: 41 51 push %r9\n" + " 1d: 41 52 push %r10\n" + " 1f: 41 53 push %r11\n" + " 21: 50 push %rax\n" + " 22: 48 8b 92 c8 01 00 00 mov 0x1c8(%rdx),%rdx\n" + " 29: 52 push %rdx\n" + " 2a: 48 89 c7 mov %rax,%rdi\n" + " 2d: 4c 89 de mov %r11,%rsi\n" + " 30: 4c 89 c2 mov %r8,%rdx\n" + " 33: 4c 87 c9 xchg %r9,%rcx\n" + " 36: 4d 89 c8 mov %r9,%r8\n" + " 39: 58 pop %rax\n" + " 3a: ff d0 callq *%rax\n" + " 3c: 49 89 c0 mov %rax,%r8\n" + " 3f: 58 pop %rax\n" + " 40: 41 5b pop %r11\n" + " 42: 41 5a pop %r10\n" + " 44: 41 59 pop %r9\n" + " 46: 5a pop %rdx\n" + " 47: 5e pop %rsi\n" + " 48: 5f pop %rdi" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_only_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),