Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update WebAssembly SIMD opcodes #56

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 15 additions & 17 deletions clang/include/clang/Basic/BuiltinsWebAssembly.def
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,20 @@ TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64_f64, "LLid", "nc", "nontrappi
TARGET_BUILTIN(__builtin_wasm_swizzle_v8x16, "V16cV16cV16c", "nc", "unimplemented-simd128")

TARGET_BUILTIN(__builtin_wasm_extract_lane_s_i8x16, "iV16cIi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i8x16, "iV16cIi", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i8x16, "iV16cIi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_s_i16x8, "iV8sIi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i16x8, "iV8sIi", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i16x8, "iV8sIi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_i32x4, "iV4iIi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_i64x2, "LLiV2LLiIi", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_i64x2, "LLiV2LLiIi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_f32x4, "fV4fIi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_f64x2, "dV2dIi", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_extract_lane_f64x2, "dV2dIi", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_replace_lane_i8x16, "V16cV16cIii", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_replace_lane_i16x8, "V8sV8sIii", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_replace_lane_i32x4, "V4iV4iIii", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_replace_lane_i64x2, "V2LLiV2LLiIiLLi", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_replace_lane_i64x2, "V2LLiV2LLiIiLLi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_replace_lane_f32x4, "V4fV4fIif", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_replace_lane_f64x2, "V2dV2dIid", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_replace_lane_f64x2, "V2dV2dIid", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_add_saturate_s_i8x16, "V16cV16cV16c", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_add_saturate_u_i8x16, "V16cV16cV16c", "nc", "simd128")
Expand All @@ -98,8 +98,8 @@ TARGET_BUILTIN(__builtin_wasm_sub_saturate_u_i8x16, "V16cV16cV16c", "nc", "simd1
TARGET_BUILTIN(__builtin_wasm_sub_saturate_s_i16x8, "V8sV8sV8s", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_sub_saturate_u_i16x8, "V8sV8sV8s", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_avgr_u_i8x16, "V16cV16cV16c", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_avgr_u_i16x8, "V8sV8sV8s", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_avgr_u_i8x16, "V16cV16cV16c", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_avgr_u_i16x8, "V8sV8sV8s", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128")

Expand All @@ -113,27 +113,25 @@ TARGET_BUILTIN(__builtin_wasm_all_true_i32x4, "iV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_all_true_i64x2, "iV2LLi", "nc", "unimplemented-simd128")

TARGET_BUILTIN(__builtin_wasm_abs_f32x4, "V4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_min_f32x4, "V4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_max_f32x4, "V4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_qfma_f32x4, "V4fV4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_qfms_f32x4, "V4fV4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_qfma_f32x4, "V4fV4fV4fV4f", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_qfms_f32x4, "V4fV4fV4fV4f", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_qfma_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_qfms_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-simd128")

TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128")

TARGET_BUILTIN(__builtin_wasm_narrow_s_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128")
Expand Down
6 changes: 2 additions & 4 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14462,8 +14462,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: {
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
Expand All @@ -14474,8 +14473,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4:
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: {
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
Expand Down
12 changes: 0 additions & 12 deletions clang/test/CodeGen/builtins-wasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,18 +519,6 @@ i32x4 trunc_saturate_u_i32x4_f32x4(f32x4 f) {
// WEBASSEMBLY-NEXT: ret
}

i64x2 trunc_saturate_s_i64x2_f64x2(f64x2 f) {
return __builtin_wasm_trunc_saturate_s_i64x2_f64x2(f);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.trunc.saturate.signed.v2i64.v2f64(<2 x double> %f)
// WEBASSEMBLY-NEXT: ret
}

i64x2 trunc_saturate_u_i64x2_f64x2(f64x2 f) {
return __builtin_wasm_trunc_saturate_u_i64x2_f64x2(f);
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.trunc.saturate.unsigned.v2i64.v2f64(<2 x double> %f)
// WEBASSEMBLY-NEXT: ret
}

i8x16 narrow_s_i8x16_i16x8(i16x8 low, i16x8 high) {
return __builtin_wasm_narrow_s_i8x16_i16x8(low, high);
// WEBASSEMBLY: call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(
Expand Down
152 changes: 65 additions & 87 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
}
if (Subtarget->hasUnimplementedSIMD128()) {
addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
}
Expand Down Expand Up @@ -116,10 +114,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
for (auto T : {MVT::i32, MVT::i64})
setOperationAction(Op, T, Expand);
if (Subtarget->hasSIMD128())
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Expand);
if (Subtarget->hasUnimplementedSIMD128())
setOperationAction(Op, MVT::v2i64, Expand);
}

// SIMD-specific configuration
Expand All @@ -130,83 +126,63 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(Op, T, Legal);

// Custom lower BUILD_VECTORs to minimize number of replace_lanes
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
MVT::v2f64})
setOperationAction(ISD::BUILD_VECTOR, T, Custom);
if (Subtarget->hasUnimplementedSIMD128())
for (auto T : {MVT::v2i64, MVT::v2f64})
setOperationAction(ISD::BUILD_VECTOR, T, Custom);

// We have custom shuffle lowering to expose the shuffle mask
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
MVT::v2f64})
setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
if (Subtarget->hasUnimplementedSIMD128())
for (auto T: {MVT::v2i64, MVT::v2f64})
setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);

// Custom lowering since wasm shifts must have a scalar shift amount
for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) {
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Custom);
if (Subtarget->hasUnimplementedSIMD128())
setOperationAction(Op, MVT::v2i64, Custom);
}

// Custom lower lane accesses to expand out variable indices
for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) {
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
MVT::v2f64})
setOperationAction(Op, T, Custom);
if (Subtarget->hasUnimplementedSIMD128())
for (auto T : {MVT::v2i64, MVT::v2f64})
setOperationAction(Op, T, Custom);
}

// There is no i64x2.mul instruction
// TODO: Actually, there is now. Implement it.
setOperationAction(ISD::MUL, MVT::v2i64, Expand);

// There are no vector select instructions
for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
MVT::v2f64})
setOperationAction(Op, T, Expand);
if (Subtarget->hasUnimplementedSIMD128())
for (auto T : {MVT::v2i64, MVT::v2f64})
setOperationAction(Op, T, Expand);
}

// Expand integer operations supported for scalars but not SIMD
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) {
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Expand);
if (Subtarget->hasUnimplementedSIMD128())
setOperationAction(Op, MVT::v2i64, Expand);
}

// But we do have integer min and max operations
if (Subtarget->hasUnimplementedSIMD128()) {
for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(Op, T, Legal);
}
for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(Op, T, Legal);

// Expand float operations supported for scalars but not SIMD
for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
ISD::FEXP, ISD::FEXP2, ISD::FRINT}) {
setOperationAction(Op, MVT::v4f32, Expand);
if (Subtarget->hasUnimplementedSIMD128())
setOperationAction(Op, MVT::v2f64, Expand);
}
ISD::FEXP, ISD::FEXP2, ISD::FRINT})
for (auto T : {MVT::v4f32, MVT::v2f64})
setOperationAction(Op, T, Expand);

// Expand operations not supported for i64x2 vectors
if (Subtarget->hasUnimplementedSIMD128())
for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);

// Expand additional SIMD ops that V8 hasn't implemented yet
if (!Subtarget->hasUnimplementedSIMD128()) {
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
}
for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);

// 64x2 conversions are not in the spec
for (auto Op :
{ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
for (auto T : {MVT::v2i64, MVT::v2f64})
setOperationAction(Op, T, Expand);
}

// As a special case, these operators use the type to mean the type to
Expand Down Expand Up @@ -1270,39 +1246,42 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
// If sign extension operations are disabled, allow sext_inreg only if operand
// is a vector extract. SIMD does not depend on sign extension operations, but
// allowing sext_inreg in this context lets us have simple patterns to select
// extract_lane_s instructions. Expanding sext_inreg everywhere would be
// simpler in this file, but would necessitate large and brittle patterns to
// undo the expansion and select extract_lane_s instructions.
// is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
// extension operations, but allowing sext_inreg in this context lets us have
// simple patterns to select extract_lane_s instructions. Expanding sext_inreg
// everywhere would be simpler in this file, but would necessitate large and
// brittle patterns to undo the expansion and select extract_lane_s
// instructions.
assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
const SDValue &Extract = Op.getOperand(0);
MVT VecT = Extract.getOperand(0).getSimpleValueType();
MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
->getVT()
.getSimpleVT();
MVT ExtractedVecT =
MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
if (ExtractedVecT == VecT)
return Op;
// Bitcast vector to appropriate type to ensure ISel pattern coverage
const SDValue &Index = Extract.getOperand(1);
unsigned IndexVal =
static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
unsigned Scale =
ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
assert(Scale > 1);
SDValue NewIndex =
DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
SDValue NewExtract = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
NewExtract, Op.getOperand(1));
}
// Otherwise expand
return SDValue();
if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();

const SDValue &Extract = Op.getOperand(0);
MVT VecT = Extract.getOperand(0).getSimpleValueType();
if (VecT.getVectorElementType().getSizeInBits() > 32)
return SDValue();
MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
->getVT()
.getSimpleVT();
MVT ExtractedVecT =
MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
if (ExtractedVecT == VecT)
return Op;

// Bitcast vector to appropriate type to ensure ISel pattern coverage
const SDValue &Index = Extract.getOperand(1);
unsigned IndexVal =
static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
unsigned Scale =
ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
assert(Scale > 1);
SDValue NewIndex =
DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
SDValue NewExtract = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
Op.getOperand(1));
}

SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
Expand Down Expand Up @@ -1502,7 +1481,6 @@ SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
// expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
// (which return i64x2 results) as well. So instead we manually unroll i64x2
// comparisons here.
assert(Subtarget->hasUnimplementedSIMD128());
assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
SmallVector<SDValue, 2> LHS, RHS;
DAG.ExtractVectorElements(Op->getOperand(0), LHS);
Expand Down
Loading