Skip to content

Commit

Permalink
Implement x86 simd basics
Browse files Browse the repository at this point in the history
Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
  • Loading branch information
Zoltan Herczeg authored and clover2123 committed Oct 31, 2023
1 parent e877219 commit 91093ae
Show file tree
Hide file tree
Showing 7 changed files with 2,050 additions and 44 deletions.
6 changes: 4 additions & 2 deletions src/jit/Backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ struct CompileContext {
sljit_emit_op1(compiler, mov_op, (arg), (argw), (source_reg), 0); \
}

#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#define HAS_SIMD

#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
Expand Down Expand Up @@ -370,7 +370,9 @@ static void emitStoreImmediateParams(sljit_compiler* compiler, Instruction* inst
#include "TableInl.h"
#include "TryCatchInl.h"

#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#include "SimdX86Inl.h"
#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
#include "SimdArm64Inl.h"
#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
#include "SimdArm32Inl.h"
Expand Down
4 changes: 4 additions & 0 deletions src/jit/MemoryInl.h
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,11 @@ static void emitLoadLaneSIMD(sljit_compiler* compiler, Instruction* instr)
break;
default:
ASSERT(instr->opcode() == ByteCode::V128Load64LaneOpcode);
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
simdType = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
#else /* !SLJIT_32BIT_ARCHITECTURE */
simdType = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64;
#endif /* !SLJIT_32BIT_ARCHITECTURE */
size = 8;
break;
}
Expand Down
5 changes: 3 additions & 2 deletions src/jit/SimdArm32Inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,11 @@ enum ExtendType : uint32_t {
using unaryCallbackFunction = std::add_pointer<void(void*, void*)>::type;
using binaryCallbackFunction = std::add_pointer<void(void*, void*, void*)>::type;

void setArgs(Operand* operand, JITArg& arg) {
void setArgs(Operand* operand, JITArg& arg)
{
if (operand->item != nullptr && operand->item->asInstruction()->opcode() == ByteCode::Const128Opcode) {
arg.arg = SLJIT_MEM0();
arg.argw = (sljit_sw)reinterpret_cast<Const128*>(operand->item->asInstruction()->byteCode())->value();
arg.argw = (sljit_sw) reinterpret_cast<Const128*>(operand->item->asInstruction()->byteCode())->value();
} else {
arg.set(operand);
}
Expand Down
10 changes: 6 additions & 4 deletions src/jit/SimdArm64Inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -465,10 +465,12 @@ static void emitUnarySIMD(sljit_compiler* compiler, Instruction* instr)
break;
}

if (!isDSTNormalRegister) {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | type, dst, args[1].arg, args[1].argw);
} else {
sljit_emit_op1(compiler, SLJIT_MOV, args[1].arg, args[1].argw, dst, 0);
if (SLJIT_IS_MEM(args[1].arg)) {
if (!isDSTNormalRegister) {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | type, dst, args[1].arg, args[1].argw);
} else {
sljit_emit_op1(compiler, SLJIT_MOV32, args[1].arg, args[1].argw, dst, 0);
}
}
}

Expand Down
103 changes: 68 additions & 35 deletions src/jit/SimdInl.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,27 +67,49 @@ static void emitExtractLaneSIMD(sljit_compiler* compiler, Instruction* instr)
}

JITArg args[2];
simdOperandToArg(compiler, operands + 0, args[0], type & ~(SLJIT_SIMD_LANE_SIGNED | SLJIT_32), SLJIT_FR0);
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
JITArgPair dstArgPair;

if (type == (SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64)) {
dstArgPair.set(operands + 1);

if (SLJIT_IS_MEM(dstArgPair.arg1)) {
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
args[1].arg = dstArgPair.arg1;
args[1].argw = dstArgPair.arg1w - WORD_LOW_OFFSET;
}
} else {
args[1].set(operands + 1);
}
#else /* !SLJIT_32BIT_ARCHITECTURE */
args[1].set(operands + 1);
#endif /* SLJIT_32BIT_ARCHITECTURE */

simdOperandToArg(compiler, operands + 0, args[0], type & ~(SLJIT_SIMD_LANE_SIGNED | SLJIT_32), SLJIT_FR0);

if (type & SLJIT_SIMD_FLOAT) {
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, args[0].arg, index, args[1].arg, args[1].argw);
return;
}

#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
if (type == (SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64)) {
index <<= 1;
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;

sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, args[0].arg, index, dstArgPair.arg1, dstArgPair.arg1w);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, args[0].arg, index + 1, dstArgPair.arg2, dstArgPair.arg2w);
return;
}
#endif /* SLJIT_32BIT_ARCHITECTURE */

sljit_s32 dstReg = GET_TARGET_REG(args[1].arg, SLJIT_R0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, args[0].arg, index, dstReg, 0);

if (SLJIT_IS_MEM(args[1].arg)) {
#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
sljit_s32 op = (type & SLJIT_32) ? SLJIT_MOV32 : SLJIT_MOV;
sljit_emit_op1(compiler, op, args[1].arg, args[1].argw, dstReg, 0);
#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
if (type == (SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64)) {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8, index == 0 ? dstReg : getHighRegister(dstReg), args[1].arg, args[1].argw);
} else {
sljit_emit_op1(compiler, SLJIT_MOV, args[1].arg, args[1].argw, dstReg, 0);
}
#else /* !SLJIT_64BIT_ARCHITECTURE && !SLJIT_CONFIG_ARM_32 */
sljit_s32 op = SLJIT_MOV;
sljit_emit_op1(compiler, SLJIT_MOV, args[1].arg, args[1].argw, dstReg, 0);
Expand Down Expand Up @@ -134,38 +156,34 @@ static void emitReplaceLaneSIMD(sljit_compiler* compiler, Instruction* instr)
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | (type & ~SLJIT_32), dstReg, args[0].arg, 0);
}


#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
if (type & SLJIT_SIMD_FLOAT) {
floatOperandToArg(compiler, operands + 1, args[1], SLJIT_FR1);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, dstReg, index, args[1].arg, args[1].argw);
} else if (!((operands + 1)->item == nullptr || (operands + 1)->item->group() != Instruction::Immediate) && (operands + 1)->item->asInstruction()->opcode() == ByteCode::Const64Opcode) {
uint64_t value64 = reinterpret_cast<Const64*>(instr->byteCode())->value();
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, static_cast<sljit_sw>(value64));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, static_cast<sljit_sw>(value64 >> 32));
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
} else if (type == (SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64)) {
JITArgPair srcArgPair(operands + 1);

ASSERT(index < 2);
if (SLJIT_IS_MEM(srcArgPair.arg1)) {
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;

sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_32, dstReg, (index * 2), SLJIT_R0, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_32, dstReg, (index * 2) + 1, SLJIT_R1, 0);
} else {
args[1].set(operands + 1);

if (type == (SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64)) {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8, index == 0 ? dstReg : getHighRegister(dstReg), args[1].arg, args[1].argw);
args[1].arg = srcArgPair.arg1;
args[1].argw = srcArgPair.arg1w - WORD_LOW_OFFSET;
} else {
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, dstReg, index, args[1].arg, args[1].argw);
index <<= 1;
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_32;

sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, dstReg, index, srcArgPair.arg1, srcArgPair.arg1w);
index++;

args[1].arg = srcArgPair.arg2;
args[1].argw = srcArgPair.arg2w;
}
}
#else /* !SLJIT_CONFIG_ARM_32 */
if (type & SLJIT_SIMD_FLOAT) {
floatOperandToArg(compiler, operands + 1, args[1], SLJIT_FR1);
#endif /* SLJIT_32BIT_ARCHITECTURE */
} else {
args[1].set(operands + 1);
}

sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, dstReg, index, args[1].arg, args[1].argw);
#endif /* SLJIT_CONFIG_ARM_32 */

if (SLJIT_IS_MEM(args[2].arg)) {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | (type & ~SLJIT_32), dstReg, args[2].arg, args[2].argw);
}
Expand Down Expand Up @@ -199,19 +217,34 @@ static void emitSplatSIMD(sljit_compiler* compiler, Instruction* instr)
break;
}

JITArg args[2] = { operands + 0, operands + 1 };
JITArg args[2];
args[1].set(operands + 1);

sljit_s32 dstReg = GET_TARGET_REG(args[1].arg, SLJIT_FR0);

#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
if (type == (SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64)) {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8, SLJIT_FR0, args[0].arg, args[0].argw);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8, SLJIT_FR1, SLJIT_FR0, 0);
JITArgPair srcArgPair(operands + 1);

if (SLJIT_IS_MEM(srcArgPair.arg1)) {
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_replicate(compiler, type, dstReg, srcArgPair.arg1, srcArgPair.arg1w - WORD_LOW_OFFSET);
} else {
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, dstReg, 0, srcArgPair.arg1, srcArgPair.arg1w);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, dstReg, 1, srcArgPair.arg2, srcArgPair.arg2w);

type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_lane_replicate(compiler, type, dstReg, dstReg, 0);
}
} else {
#endif /* SLJIT_CONFIG_ARM_32 */
#endif /* SLJIT_32BIT_ARCHITECTURE */
args[0].set(operands);
sljit_emit_simd_replicate(compiler, type, dstReg, args[0].arg, args[0].argw);
#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
}
#endif /* SLJIT_CONFIG_ARM_32 */
#endif /* SLJIT_32BIT_ARCHITECTURE */

if (SLJIT_IS_MEM(args[1].arg)) {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, dstReg, args[1].arg, args[1].argw);
}
Expand Down
Loading

0 comments on commit 91093ae

Please sign in to comment.