diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 01269b92a45d..670afb0081bc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -4322,7 +4322,18 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm_unreachable("NEON::BI__builtin_neon_vsraq_n_v NYI"); case NEON::BI__builtin_neon_vrsra_n_v: case NEON::BI__builtin_neon_vrsraq_n_v: { - llvm_unreachable("NEON::BI__builtin_neon_vrsraq_n_v NYI"); + llvm::SmallVector tmpOps = {Ops[1], Ops[2]}; + // The llvm intrinsic is expecting negative shift amount for right shift. + // Thus we have to make shift amount vec type to be signed. + cir::VectorType shitAmtVecTy = + usgn ? getSignChangedVectorType(builder, vTy) : vTy; + mlir::Value tmp = + emitNeonCall(builder, {vTy, shitAmtVecTy}, tmpOps, + usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy, + getLoc(E->getExprLoc()), false, + 1 /* shift amount is args[1]*/, true /* right shift */); + Ops[0] = builder.createBitcast(Ops[0], vTy); + return builder.createBinop(Ops[0], cir::BinOpKind::Add, tmp); } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: { diff --git a/clang/test/CIR/CodeGen/AArch64/neon.c b/clang/test/CIR/CodeGen/AArch64/neon.c index 3ddaea58f163..19f5e2e30658 100644 --- a/clang/test/CIR/CodeGen/AArch64/neon.c +++ b/clang/test/CIR/CodeGen/AArch64/neon.c @@ -6105,157 +6105,261 @@ uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { // LLVM: ret <2 x i64> [[VRSHR_N1]] } -// NYI-LABEL: @test_vrsra_n_s8( -// NYI: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> ) -// NYI: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] -// NYI: ret <8 x i8> [[TMP0]] -// int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { -// return vrsra_n_s8(a, b, 3); -// } - -// NYI-LABEL: @test_vrsra_n_s16( -// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// NYI: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) -// NYI: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// NYI: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <4 x i16> [[TMP3]] -// int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { -// return vrsra_n_s16(a, b, 3); -// } - -// NYI-LABEL: @test_vrsra_n_s32( -// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// NYI: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) -// NYI: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// NYI: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <2 x i32> [[TMP3]] -// int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { -// return vrsra_n_s32(a, b, 3); -// } - -// NYI-LABEL: @test_vrsraq_n_s8( -// NYI: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> ) -// NYI: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] -// NYI: ret <16 x i8> [[TMP0]] -// int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { -// return vrsraq_n_s8(a, b, 3); -// } - -// NYI-LABEL: @test_vrsraq_n_s16( -// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// NYI: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) -// NYI: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// NYI: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <8 x i16> [[TMP3]] -// int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { -// return vrsraq_n_s16(a, b, 3); -// } - -// NYI-LABEL: @test_vrsraq_n_s32( -// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// NYI: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) -// NYI: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// NYI: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <4 x i32> [[TMP3]] -// int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { -// return vrsraq_n_s32(a, b, 3); -// } - -// NYI-LABEL: @test_vrsraq_n_s64( -// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// NYI: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) -// NYI: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// NYI: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <2 x i64> [[TMP3]] -// int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { -// return vrsraq_n_s64(a, b, 3); -// } - -// NYI-LABEL: @test_vrsra_n_u8( -// NYI: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> ) -// NYI: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] -// NYI: ret <8 x i8> [[TMP0]] -// uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { -// return vrsra_n_u8(a, b, 3); -// } - -// NYI-LABEL: @test_vrsra_n_u16( -// NYI: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// NYI: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// NYI: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) -// NYI: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// NYI: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <4 x i16> [[TMP3]] -// uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { -// return vrsra_n_u16(a, b, 3); -// } - -// NYI-LABEL: @test_vrsra_n_u32( -// NYI: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// NYI: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// NYI: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) -// NYI: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// NYI: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <2 x i32> [[TMP3]] -// uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { -// return vrsra_n_u32(a, b, 3); -// } - -// NYI-LABEL: @test_vrsraq_n_u8( -// NYI: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> ) -// NYI: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] -// NYI: ret <16 x i8> [[TMP0]] -// uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { -// return vrsraq_n_u8(a, b, 3); -// } - -// NYI-LABEL: @test_vrsraq_n_u16( -// NYI: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// NYI: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// NYI: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) -// NYI: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// NYI: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <8 x i16> [[TMP3]] -// uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { -// return vrsraq_n_u16(a, b, 3); -// } - -// NYI-LABEL: @test_vrsraq_n_u32( -// NYI: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// NYI: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// NYI: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) -// NYI: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// NYI: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <4 x i32> [[TMP3]] -// uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { -// return vrsraq_n_u32(a, b, 3); -// } - -// NYI-LABEL: @test_vrsraq_n_u64( -// NYI: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// NYI: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// NYI: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) -// NYI: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// NYI: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <2 x i64> [[TMP3]] -// uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { -// return vrsraq_n_u64(a, b, 3); -// } +int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { + return vrsra_n_s8(a, b, 3); + + // CIR-LABEL: vrsra_n_s8 + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: cir.binop(add, {{%.*}}, [[VRSHR_N]]) : !cir.vector + + // LLVM-LABEL: test_vrsra_n_s8 + // LLVM: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %1, <8 x i8> splat (i8 -3)) + // LLVM: [[TMP0:%.*]] = add <8 x i8> %0, [[VRSHR_N]] + // LLVM: ret <8 x i8> [[TMP0]] +} + +int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { + return vrsra_n_s16(a, b, 3); + + // CIR-LABEL: vrsra_n_s16 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsra_n_s16 + // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> %0 to <8 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <4 x i16> %1 to <8 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> + // LLVM: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <4 x i16> [[TMP3]] +} + +int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { + return vrsra_n_s32(a, b, 3); + + // CIR-LABEL: vrsra_n_s32 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsra_n_s32 + // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> %0 to <8 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <2 x i32> %1 to <8 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> + // LLVM: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <2 x i32> [[TMP3]] +} + +int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { + return vrsraq_n_s8(a, b, 3); + + // CIR-LABEL: vrsraq_n_s8 + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: cir.binop(add, {{%.*}}, [[VRSHR_N]]) : !cir.vector + + // LLVM-LABEL: test_vrsraq_n_s8 + // LLVM: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %1, <16 x i8> splat (i8 -3)) + // LLVM: [[TMP0:%.*]] = add <16 x i8> %0, [[VRSHR_N]] + // LLVM: ret <16 x i8> [[TMP0]] +} + +int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { + return vrsraq_n_s16(a, b, 3); + + // CIR-LABEL: vrsraq_n_s16 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsraq_n_s16 + // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> %0 to <16 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <8 x i16> %1 to <16 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> + // LLVM: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> + // LLVM: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <8 x i16> [[TMP3]] +} + +int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { + return vrsraq_n_s32(a, b, 3); + + // CIR-LABEL: vrsraq_n_s32 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsraq_n_s32 + // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> %0 to <16 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <4 x i32> %1 to <16 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> + // LLVM: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> + // LLVM: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <4 x i32> [[TMP3]] +} + +int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { + return vrsraq_n_s64(a, b, 3); + + // CIR-LABEL: vrsraq_n_s64 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsraq_n_s64 + // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> %0 to <16 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <2 x i64> %1 to <16 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> + // LLVM: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> + // LLVM: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <2 x i64> [[TMP3]] +} + +uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { + return vrsra_n_u8(a, b, 3); + + // CIR-LABEL: vrsra_n_u8 + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: cir.binop(add, {{%.*}}, [[VRSHR_N]]) : !cir.vector + + // LLVM-LABEL: test_vrsra_n_u8 + // LLVM: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %1, <8 x i8> splat (i8 -3)) + // LLVM: [[TMP0:%.*]] = add <8 x i8> %0, [[VRSHR_N]] + // LLVM: ret <8 x i8> [[TMP0]] +} + +uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { + return vrsra_n_u16(a, b, 3); + + // CIR-LABEL: vrsra_n_u16 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsra_n_u16 + // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> %0 to <8 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <4 x i16> %1 to <8 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> + // LLVM: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <4 x i16> [[TMP3]] +} + +uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { + return vrsra_n_u32(a, b, 3); + + // CIR-LABEL: vrsra_n_u32 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsra_n_u32 + // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> %0 to <8 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <2 x i32> %1 to <8 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> + // LLVM: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <2 x i32> [[TMP3]] +} + +uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { + return vrsraq_n_u8(a, b, 3); + + // CIR-LABEL: vrsraq_n_u8 + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: cir.binop(add, {{%.*}}, [[VRSHR_N]]) : !cir.vector + + // LLVM-LABEL: test_vrsraq_n_u8 + // LLVM: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %1, <16 x i8> splat (i8 -3)) + // LLVM: [[TMP0:%.*]] = add <16 x i8> %0, [[VRSHR_N]] + // LLVM: ret <16 x i8> [[TMP0]] +} + +uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { + return vrsraq_n_u16(a, b, 3); + + // CIR-LABEL: vrsraq_n_u16 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsraq_n_u16 + // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> %0 to <16 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <8 x i16> %1 to <16 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> + // LLVM: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> + // LLVM: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <8 x i16> [[TMP3]] +} + +uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { + return vrsraq_n_u32(a, b, 3); + + // CIR-LABEL: vrsraq_n_u32 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsraq_n_u32 + // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> %0 to <16 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <4 x i32> %1 to <16 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> + // LLVM: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> + // LLVM: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <4 x i32> [[TMP3]] +} + +uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { + return vrsraq_n_u64(a, b, 3); + + // CIR-LABEL: vrsraq_n_u64 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsraq_n_u64 + // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> %0 to <16 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <2 x i64> %1 to <16 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> + // LLVM: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) + // LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> + // LLVM: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <2 x i64> [[TMP3]] +} // NYI-LABEL: @test_vsri_n_s8( // NYI: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) @@ -14974,17 +15078,25 @@ void test_vst1q_s64(int64_t *a, int64x2_t b) { // return (int64_t)vrsrad_n_s64(a, b, 63); // } -// NYI-LABEL: @test_vrsra_n_s64( -// NYI: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// NYI: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// NYI: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) -// NYI: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// NYI: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <1 x i64> [[TMP3]] -// int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { -// return vrsra_n_s64(a, b, 1); -// } +int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { + return vrsra_n_s64(a, b, 1); + + // CIR-LABEL: vrsra_n_s64 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsra_n_s64 + // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> %0 to <8 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <1 x i64> %1 to <8 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> + // LLVM: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) + // LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> + // LLVM: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <1 x i64> [[TMP3]] +} // NYI-LABEL: @test_vrsrad_n_u64( // NYI: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63) @@ -14994,17 +15106,25 @@ void test_vst1q_s64(int64_t *a, int64x2_t b) { // return (uint64_t)vrsrad_n_u64(a, b, 63); // } -// NYI-LABEL: @test_vrsra_n_u64( -// NYI: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// NYI: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// NYI: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// NYI: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) -// NYI: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// NYI: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] -// NYI: ret <1 x i64> [[TMP3]] -// uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { -// return vrsra_n_u64(a, b, 1); -// } +uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { + return vrsra_n_u64(a, b, 1); + + // CIR-LABEL: vrsra_n_u64 + // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: [[splat:%.*]] = cir.const #cir.const_vector + // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector, !cir.vector) -> !cir.vector + // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector), !cir.vector + // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector + + // LLVM-LABEL: test_vrsra_n_u64 + // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> %0 to <8 x i8> + // LLVM: [[TMP1:%.*]] = bitcast <1 x i64> %1 to <8 x i8> + // LLVM: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> + // LLVM: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) + // LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> + // LLVM: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] + // LLVM: ret <1 x i64> [[TMP3]] +} // NYI-LABEL: @test_vshld_n_s64( // NYI: [[SHLD_N:%.*]] = shl i64 %a, 1