diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 01269b92a45d..670afb0081bc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -4322,7 +4322,18 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     llvm_unreachable("NEON::BI__builtin_neon_vsraq_n_v NYI");
   case NEON::BI__builtin_neon_vrsra_n_v:
   case NEON::BI__builtin_neon_vrsraq_n_v: {
-    llvm_unreachable("NEON::BI__builtin_neon_vrsraq_n_v NYI");
+    llvm::SmallVector<mlir::Value> tmpOps = {Ops[1], Ops[2]};
+    // The llvm intrinsic is expecting negative shift amount for right shift.
+    // Thus we have to make shift amount vec type to be signed.
+    cir::VectorType shitAmtVecTy =
+        usgn ? getSignChangedVectorType(builder, vTy) : vTy;
+    mlir::Value tmp =
+        emitNeonCall(builder, {vTy, shitAmtVecTy}, tmpOps,
+                     usgn ? "aarch64.neon.urshl" : "aarch64.neon.srshl", vTy,
+                     getLoc(E->getExprLoc()), false,
+                     1 /* shift amount is args[1]*/, true /* right shift */);
+    Ops[0] = builder.createBitcast(Ops[0], vTy);
+    return builder.createBinop(Ops[0], cir::BinOpKind::Add, tmp);
   }
   case NEON::BI__builtin_neon_vld1_v:
   case NEON::BI__builtin_neon_vld1q_v: {
diff --git a/clang/test/CIR/CodeGen/AArch64/neon.c b/clang/test/CIR/CodeGen/AArch64/neon.c
index 3ddaea58f163..19f5e2e30658 100644
--- a/clang/test/CIR/CodeGen/AArch64/neon.c
+++ b/clang/test/CIR/CodeGen/AArch64/neon.c
@@ -6105,157 +6105,261 @@ uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
   // LLVM: ret <2 x i64> [[VRSHR_N1]]
 }
 
-// NYI-LABEL: @test_vrsra_n_s8(
-// NYI:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
-// NYI:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
-// NYI:   ret <8 x i8> [[TMP0]]
-// int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
-//   return vrsra_n_s8(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsra_n_s16(
-// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// NYI:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// NYI:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <4 x i16> [[TMP3]]
-// int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
-//   return vrsra_n_s16(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsra_n_s32(
-// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// NYI:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// NYI:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <2 x i32> [[TMP3]]
-// int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
-//   return vrsra_n_s32(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsraq_n_s8(
-// NYI:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
-// NYI:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
-// NYI:   ret <16 x i8> [[TMP0]]
-// int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
-//   return vrsraq_n_s8(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsraq_n_s16(
-// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// NYI:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// NYI:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <8 x i16> [[TMP3]]
-// int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
-//   return vrsraq_n_s16(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsraq_n_s32(
-// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// NYI:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// NYI:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <4 x i32> [[TMP3]]
-// int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
-//   return vrsraq_n_s32(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsraq_n_s64(
-// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// NYI:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// NYI:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <2 x i64> [[TMP3]]
-// int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
-//   return vrsraq_n_s64(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsra_n_u8(
-// NYI:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
-// NYI:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
-// NYI:   ret <8 x i8> [[TMP0]]
-// uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
-//   return vrsra_n_u8(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsra_n_u16(
-// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// NYI:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// NYI:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <4 x i16> [[TMP3]]
-// uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
-//   return vrsra_n_u16(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsra_n_u32(
-// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// NYI:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// NYI:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <2 x i32> [[TMP3]]
-// uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
-//   return vrsra_n_u32(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsraq_n_u8(
-// NYI:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
-// NYI:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
-// NYI:   ret <16 x i8> [[TMP0]]
-// uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
-//   return vrsraq_n_u8(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsraq_n_u16(
-// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// NYI:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// NYI:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <8 x i16> [[TMP3]]
-// uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
-//   return vrsraq_n_u16(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsraq_n_u32(
-// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// NYI:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// NYI:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <4 x i32> [[TMP3]]
-// uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
-//   return vrsraq_n_u32(a, b, 3);
-// }
-
-// NYI-LABEL: @test_vrsraq_n_u64(
-// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
-// NYI:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
-// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// NYI:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <2 x i64> [[TMP3]]
-// uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
-//   return vrsraq_n_u64(a, b, 3);
-// }
+int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
+  return vrsra_n_s8(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_s8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[splat]] : (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+  // CIR: cir.binop(add, {{%.*}}, [[VRSHR_N]]) : !cir.vector<!s8i x 8>
+
+  // LLVM-LABEL: test_vrsra_n_s8
+  // LLVM:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %1, <8 x i8> splat (i8 -3))
+  // LLVM:   [[TMP0:%.*]] = add <8 x i8> %0, [[VRSHR_N]]
+  // LLVM:   ret <8 x i8> [[TMP0]]
+}
+
+int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
+  return vrsra_n_s16(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_s16
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s16i x 4>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s16i x 4>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s16i x 4>
+
+  // LLVM-LABEL: test_vrsra_n_s16
+  // LLVM:   [[TMP0:%.*]] = bitcast <4 x i16> %0 to <8 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <4 x i16> %1 to <8 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <4 x i16> [[TMP3]]
+}
+
+int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
+  return vrsra_n_s32(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_s32
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s32i x 2>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s32i x 2>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s32i x 2>
+
+  // LLVM-LABEL: test_vrsra_n_s32
+  // LLVM:   [[TMP0:%.*]] = bitcast <2 x i32> %0 to <8 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <2 x i32> %1 to <8 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <2 x i32> [[TMP3]]
+}
+
+int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
+  return vrsraq_n_s8(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_s8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[splat]] : (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+  // CIR: cir.binop(add, {{%.*}}, [[VRSHR_N]]) : !cir.vector<!s8i x 16>
+
+  // LLVM-LABEL: test_vrsraq_n_s8
+  // LLVM:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %1, <16 x i8> splat (i8 -3))
+  // LLVM:   [[TMP0:%.*]] = add <16 x i8> %0, [[VRSHR_N]]
+  // LLVM:   ret <16 x i8> [[TMP0]]
+}
+
+int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
+  return vrsraq_n_s16(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_s16
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s16i x 8>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s16i x 8>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: test_vrsraq_n_s16
+  // LLVM:   [[TMP0:%.*]] = bitcast <8 x i16> %0 to <16 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <8 x i16> %1 to <16 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <8 x i16> [[TMP3]]
+}
+
+int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
+  return vrsraq_n_s32(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_s32
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s32i x 4>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s32i x 4>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: test_vrsraq_n_s32
+  // LLVM:   [[TMP0:%.*]] = bitcast <4 x i32> %0 to <16 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <4 x i32> %1 to <16 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <4 x i32> [[TMP3]]
+}
+
+int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
+  return vrsraq_n_s64(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_s64
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: test_vrsraq_n_s64
+  // LLVM:   [[TMP0:%.*]] = bitcast <2 x i64> %0 to <16 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <2 x i64> %1 to <16 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <2 x i64> [[TMP3]]
+}
+
+uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
+  return vrsra_n_u8(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_u8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[splat]] : (!cir.vector<!u8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!u8i x 8>
+  // CIR: cir.binop(add, {{%.*}}, [[VRSHR_N]]) : !cir.vector<!u8i x 8>
+
+  // LLVM-LABEL: test_vrsra_n_u8
+  // LLVM:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %1, <8 x i8> splat (i8 -3))
+  // LLVM:   [[TMP0:%.*]] = add <8 x i8> %0, [[VRSHR_N]]
+  // LLVM:   ret <8 x i8> [[TMP0]]
+}
+
+uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
+  return vrsra_n_u16(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_u16
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u16i x 4>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!u16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!u16i x 4>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u16i x 4>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u16i x 4>
+
+  // LLVM-LABEL: test_vrsra_n_u16
+  // LLVM:   [[TMP0:%.*]] = bitcast <4 x i16> %0 to <8 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <4 x i16> %1 to <8 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <4 x i16> [[TMP3]]
+}
+
+uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
+  return vrsra_n_u32(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_u32
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u32i x 2>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!u32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!u32i x 2>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u32i x 2>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u32i x 2>
+
+  // LLVM-LABEL: test_vrsra_n_u32
+  // LLVM:   [[TMP0:%.*]] = bitcast <2 x i32> %0 to <8 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <2 x i32> %1 to <8 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <2 x i32> [[TMP3]]
+}
+
+uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
+  return vrsraq_n_u8(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_u8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[splat]] : (!cir.vector<!u8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!u8i x 16>
+  // CIR: cir.binop(add, {{%.*}}, [[VRSHR_N]]) : !cir.vector<!u8i x 16>
+
+  // LLVM-LABEL: test_vrsraq_n_u8
+  // LLVM:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %1, <16 x i8> splat (i8 -3))
+  // LLVM:   [[TMP0:%.*]] = add <16 x i8> %0, [[VRSHR_N]]
+  // LLVM:   ret <16 x i8> [[TMP0]]
+}
+
+uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
+  return vrsraq_n_u16(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_u16
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u16i x 8>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!u16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!u16i x 8>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u16i x 8>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u16i x 8>
+
+  // LLVM-LABEL: test_vrsraq_n_u16
+  // LLVM:   [[TMP0:%.*]] = bitcast <8 x i16> %0 to <16 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <8 x i16> %1 to <16 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <8 x i16> [[TMP3]]
+}
+
+uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
+  return vrsraq_n_u32(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_u32
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u32i x 4>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!u32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!u32i x 4>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u32i x 4>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u32i x 4>
+
+  // LLVM-LABEL: test_vrsraq_n_u32
+  // LLVM:   [[TMP0:%.*]] = bitcast <4 x i32> %0 to <16 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <4 x i32> %1 to <16 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <4 x i32> [[TMP3]]
+}
+
+uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
+  return vrsraq_n_u64(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_u64
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!u64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!u64i x 2>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u64i x 2>
+
+  // LLVM-LABEL: test_vrsraq_n_u64
+  // LLVM:   [[TMP0:%.*]] = bitcast <2 x i64> %0 to <16 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <2 x i64> %1 to <16 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <2 x i64> [[TMP3]]
+}
 
 // NYI-LABEL: @test_vsri_n_s8(
 // NYI:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
@@ -14974,17 +15078,25 @@ void test_vst1q_s64(int64_t *a, int64x2_t b) {
 //   return (int64_t)vrsrad_n_s64(a, b, 63);
 // }
 
-// NYI-LABEL: @test_vrsra_n_s64(
-// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// NYI:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
-// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// NYI:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <1 x i64> [[TMP3]]
-// int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
-//   return vrsra_n_s64(a, b, 1);
-// }
+int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
+  return vrsra_n_s64(a, b, 1);
+
+  // CIR-LABEL: vrsra_n_s64
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s64i x 1>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[splat]] : (!cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s64i x 1>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s64i x 1>
+
+  // LLVM-LABEL: test_vrsra_n_s64
+  // LLVM:   [[TMP0:%.*]] = bitcast <1 x i64> %0 to <8 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <1 x i64> %1 to <8 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <1 x i64> [[TMP3]]
+}
 
 // NYI-LABEL: @test_vrsrad_n_u64(
 // NYI:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
@@ -14994,17 +15106,25 @@ void test_vst1q_s64(int64_t *a, int64x2_t b) {
 //   return (uint64_t)vrsrad_n_u64(a, b, 63);
 // }
 
-// NYI-LABEL: @test_vrsra_n_u64(
-// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
-// NYI:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
-// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// NYI:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
-// NYI:   ret <1 x i64> [[TMP3]]
-// uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
-//   return vrsra_n_u64(a, b, 1);
-// }
+uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
+  return vrsra_n_u64(a, b, 1);
+
+  // CIR-LABEL: vrsra_n_u64
+  // CIR: [[VRSHR_N:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" [[VRSHR_N]], [[splat]] : (!cir.vector<!u64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!u64i x 1>
+  // CIR: [[TMP2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u64i x 1>
+
+  // LLVM-LABEL: test_vrsra_n_u64
+  // LLVM:   [[TMP0:%.*]] = bitcast <1 x i64> %0 to <8 x i8>
+  // LLVM:   [[TMP1:%.*]] = bitcast <1 x i64> %1 to <8 x i8>
+  // LLVM:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+  // LLVM:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
+  // LLVM:   ret <1 x i64> [[TMP3]]
+}
 
 // NYI-LABEL: @test_vshld_n_s64(
 // NYI:   [[SHLD_N:%.*]] = shl i64 %a, 1