[CIR] Implement __builtin_ia32_cmpnleps/cmpnlepd

badumbatish · badumbatish · commit 21aab62d454f · 2025-09-05T13:09:57.000-07:00
- Add `isOrdered` UnitAttr to CIR_VecCmpOp for controlling FP comparison semantics
- Add verifier to CIR_VecCmpOp to ensure isOrdered only used with
floating-point vector types
- Implement createVecCompare, getCIRIntOrFloatBitWidth, getVectorFCmpIR helper.
- Update LLVM lowering to emit ordered vs unordered fcmp
predicates (olt/ult, etc.)
- Add clang/test/CIR/CodeGen/builtin-fcmp-sse.c test.
- Update clang/test/CIR/Lowering/vec-cmp.cir to reflect ordered semantics of VecCmpOp
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -89,6 +89,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     return cir::IntType::get(getContext(), N, false);
   }
 
+  unsigned getCIRIntOrFloatBitWidth(mlir::Type eltTy) const {
+    if (auto intType = mlir::dyn_cast<cir::IntTypeInterface>(eltTy))
+      return intType.getWidth();
+    if (auto floatType = mlir::dyn_cast<cir::FPTypeInterface>(eltTy))
+      return floatType.getWidth();
+
+    llvm_unreachable("Wrong type passed in or Non-CIR type passed in");
+  }
   cir::IntType getSIntNTy(int N) {
     return cir::IntType::get(getContext(), N, true);
   }
@@ -188,6 +196,18 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     return cir::CmpOp::create(*this, loc, getBoolTy(), kind, lhs, rhs);
   }
 
+  cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind,
+                                 mlir::Value lhs, mlir::Value rhs,
+                                 bool isOrdered) {
+    VectorType vecCast = mlir::cast<VectorType>(lhs.getType());
+    auto integralTy =
+        getSIntNTy(getCIRIntOrFloatBitWidth(vecCast.getElementType()));
+    VectorType integralVecTy =
+        VectorType::get(context, integralTy, vecCast.getSize());
+    return cir::VecCmpOp::create(*this, loc, integralVecTy, kind, lhs, rhs,
+                                 isOrdered);
+  }
+
   mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
     return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
   }
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -3533,17 +3533,19 @@ def CIR_VecCmpOp : CIR_Op<"vec.cmp", [Pure, SameTypeOperands]> {
   let arguments = (ins
     CIR_CmpOpKind:$kind,
     CIR_VectorType:$lhs,
-    CIR_VectorType:$rhs
+    CIR_VectorType:$rhs,
+    UnitAttr:$isOrdered
   );
 
   let results = (outs CIR_VectorType:$result);
 
   let assemblyFormat = [{
-    `(` $kind `,` $lhs `,` $rhs `)` `:` qualified(type($lhs)) `,`
+    `(` $kind `,` $lhs `,` $rhs(`,` `ordered` $isOrdered^)? `)` `:` qualified(type($lhs)) `,`
     qualified(type($result)) attr-dict
   }];
 
   let hasFolder = 1;
+  let hasVerifier = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -183,6 +183,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
     Ops.push_back(emitScalarOrConstFoldImmArg(ICEArguments, i, E));
   }
 
+  // TODO: Add isSignaling boolean once emitConstrainedFPCall implemented
+  auto getVectorFCmpIR = [this, &Ops, &E](cir::CmpOpKind pred, bool isOrdered) {
+    assert(!cir::MissingFeatures::CGFPOptionsRAII());
+    assert(!cir::MissingFeatures::emitConstrainedFPCall());
+    auto loc = getLoc(E->getExprLoc());
+    mlir::Value cmp =
+        builder.createVecCompare(loc, pred, Ops[0], Ops[1], isOrdered);
+    return cmp;
+  };
+
   switch (BuiltinID) {
   default:
     return nullptr;
@@ -1411,7 +1421,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
     llvm_unreachable("cmpnltps NYI");
   case X86::BI__builtin_ia32_cmpnleps:
   case X86::BI__builtin_ia32_cmpnlepd:
-    llvm_unreachable("cmpnleps NYI");
+    return getVectorFCmpIR(cir::CmpOpKind::gt, /*isOrdered=*/false);
   case X86::BI__builtin_ia32_cmpordps:
   case X86::BI__builtin_ia32_cmpordpd:
     llvm_unreachable("cmpordps NYI");
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -1321,6 +1321,18 @@ OpFoldResult cir::VecCmpOp::fold(FoldAdaptor adaptor) {
       getType(), mlir::ArrayAttr::get(getContext(), elements));
 }
 
+LogicalResult cir::VecCmpOp::verify() {
+  // Check that isOrderedAttr attribute is emitted only with floating point
+  // types
+  if (getIsOrderedAttr()) {
+    cir::VectorType vecType = mlir::cast<cir::VectorType>(getLhs().getType());
+    if (!mlir::isa<cir::FPTypeInterface>(vecType.getElementType()))
+      return emitOpError("only floating point types can elect to be either "
+                         "ordered or unordered");
+  }
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // VecExtractOp
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -116,7 +116,8 @@ mlir::LLVM::ICmpPredicate convertCmpKindToICmpPredicate(cir::CmpOpKind kind,
 
 /// Convert from a CIR comparison kind to an LLVM IR floating-point comparison
 /// kind.
-mlir::LLVM::FCmpPredicate convertCmpKindToFCmpPredicate(cir::CmpOpKind kind) {
+mlir::LLVM::FCmpPredicate convertCmpKindToFCmpPredicate(cir::CmpOpKind kind,
+                                                        bool isOrdered) {
   using CIR = cir::CmpOpKind;
   using LLVMFCmp = mlir::LLVM::FCmpPredicate;
   switch (kind) {
@@ -125,13 +126,13 @@ mlir::LLVM::FCmpPredicate convertCmpKindToFCmpPredicate(cir::CmpOpKind kind) {
   case CIR::ne:
     return LLVMFCmp::une;
   case CIR::lt:
-    return LLVMFCmp::olt;
+    return isOrdered ? LLVMFCmp::olt : LLVMFCmp::ult;
   case CIR::le:
-    return LLVMFCmp::ole;
+    return isOrdered ? LLVMFCmp::ole : LLVMFCmp::ule;
   case CIR::gt:
-    return LLVMFCmp::ogt;
+    return isOrdered ? LLVMFCmp::ogt : LLVMFCmp::ugt;
   case CIR::ge:
-    return LLVMFCmp::oge;
+    return isOrdered ? LLVMFCmp::oge : LLVMFCmp::uge;
   }
   llvm_unreachable("Unknown CmpOpKind");
 }
@@ -2069,7 +2070,8 @@ mlir::LogicalResult CIRToLLVMVecCmpOpLowering::matchAndRewrite(
         adaptor.getLhs(), adaptor.getRhs());
   } else if (mlir::isa<cir::FPTypeInterface>(elementType)) {
     bitResult = rewriter.create<mlir::LLVM::FCmpOp>(
-        op.getLoc(), convertCmpKindToFCmpPredicate(op.getKind()),
+        op.getLoc(),
+        convertCmpKindToFCmpPredicate(op.getKind(), op.getIsOrdered()),
         adaptor.getLhs(), adaptor.getRhs());
   } else {
     return op.emitError() << "unsupported type for VecCmpOp: " << elementType;
@@ -3226,7 +3228,8 @@ mlir::LogicalResult CIRToLLVMCmpOpLowering::matchAndRewrite(
     rewriter.replaceOpWithNewOp<mlir::LLVM::ICmpOp>(
         cmpOp, kind, adaptor.getLhs(), adaptor.getRhs());
   } else if (mlir::isa<cir::FPTypeInterface>(type)) {
-    auto kind = convertCmpKindToFCmpPredicate(cmpOp.getKind());
+    auto kind =
+        convertCmpKindToFCmpPredicate(cmpOp.getKind(), /*isOrdered=*/true);
     rewriter.replaceOpWithNewOp<mlir::LLVM::FCmpOp>(
         cmpOp, kind, adaptor.getLhs(), adaptor.getRhs());
   } else {
diff --git a/clang/test/CIR/CodeGen/builtin-fcmp-sse.c b/clang/test/CIR/CodeGen/builtin-fcmp-sse.c
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o -  | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=OG
+
+typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
+
+
+__m128 test_cmpnleps(__m128 A, __m128 B) {
+
+  // CIR-LABEL: @test_cmpnleps
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(gt, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: [[CAST:%.*]] = cir.cast(bitcast, [[ALLOCA:%.*]] : !cir.ptr<!cir.vector<!cir.float x 4>>), !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: cir.store [[CMP]], [[CAST]] :  !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
+  // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: test_cmpnleps
+  // LLVM: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
+  // LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // LLVM-NEXT: ret <4 x float> [[CAST]]
+
+  // OG-LABEL: test_cmpnleps
+  // OG: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
+  // OG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // OG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // OG-NEXT: ret <4 x float> [[CAST]]
+  return __builtin_ia32_cmpnleps(A, B);
+}
+
+
+__m128 test_cmpnlepd(__m128 A, __m128 B) {
+
+  // CIR-LABEL: @test_cmpnlepd
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(gt, [[A:%.*]], [[B:%.*]]) :  !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // CIR-NEXT: [[CAST:%.*]] = cir.cast(bitcast, [[CMP]] :  !cir.vector<!s64i x 2>), !cir.vector<!cir.float x 4>
+  // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>> 
+  // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
+  // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
+  
+  // LLVM-LABEL: test_cmpnlepd
+  // LLVM: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
+  // LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <4 x float>
+  // LLVM-NEXT: ret <4 x float> [[CAST]]
+  
+  // OG-LABEL: test_cmpnlepd
+  // OG: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
+  // OG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // OG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <4 x float>
+  // OG-NEXT: ret <4 x float> [[CAST]]
+ return  __builtin_ia32_cmpnlepd(A, B);
+} 
diff --git a/clang/test/CIR/CodeGen/vectype-ext.cpp b/clang/test/CIR/CodeGen/vectype-ext.cpp
@@ -318,19 +318,19 @@ void vector_double_test(int x, double y) {
   // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
   vl2 q = a < b;
   // CIR: %{{[0-9]+}} = cir.vec.cmp(lt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
-  // LLVM: %[[#RES:]] = fcmp olt <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM: %[[#RES:]] = fcmp ult <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
   // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
   vl2 r = a > b;
   // CIR: %{{[0-9]+}} = cir.vec.cmp(gt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
-  // LLVM: %[[#RES:]] = fcmp ogt <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM: %[[#RES:]] = fcmp ugt <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
   // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
   vl2 s = a <= b;
   // CIR: %{{[0-9]+}} = cir.vec.cmp(le, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
-  // LLVM: %[[#RES:]] = fcmp ole <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM: %[[#RES:]] = fcmp ule <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
   // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
   vl2 t = a >= b;
   // CIR: %{{[0-9]+}} = cir.vec.cmp(ge, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
-  // LLVM: %[[#RES:]] = fcmp oge <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM: %[[#RES:]] = fcmp uge <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
   // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
 
   // __builtin_convertvector
diff --git a/clang/test/CIR/Lowering/vec-cmp.cir b/clang/test/CIR/Lowering/vec-cmp.cir
@@ -14,3 +14,13 @@ cir.func @vec_cmp(%0: !cir.vector<!s16i x 16>, %1: !cir.vector<!s16i x 16>) -> (
 // MLIR-NEXT: %{{[0-9]+}} = llvm.icmp "slt" %arg0, %arg1 : vector<16xi16>
 // MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : vector<16xi1> to i16
 // MLIR-NEXT: llvm.return
+
+
+cir.func @vec_fcmp_ordered(%0: !cir.vector<!cir.float x 4>, %1: !cir.vector<!cir.float x 4>) -> () {
+  %2 = cir.vec.cmp(lt, %0, %1, ordered) : !cir.vector<!cir.float x 4>, !cir.vector<!cir.int<u, 1> x 4> 
+  cir.return
+}
+
+// MLIR: llvm.func @vec_fcmp_ordered
+// MLIR-NEXT: %{{[0-9]+}} = llvm.fcmp "olt" %arg0, %arg1 : vector<4xf32>
+// MLIR-NEXT: llvm.return
diff --git a/clang/test/CIR/Lowering/vectype.cpp b/clang/test/CIR/Lowering/vectype.cpp
@@ -321,25 +321,25 @@ void vector_double_test(int x, double y) {
   vll2 q = a < b;
   // CHECK: %[[#T68:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
   // CHECK: %[[#T69:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
-  // CHECK: %[[#T70:]] = llvm.fcmp "olt" %[[#T68]], %[[#T69]] : vector<2xf64>
+  // CHECK: %[[#T70:]] = llvm.fcmp "ult" %[[#T68]], %[[#T69]] : vector<2xf64>
   // CHECK: %[[#T71:]] = llvm.sext %[[#T70]] : vector<2xi1> to vector<2xi64>
   // CHECK: llvm.store %[[#T71]], %[[#Tq:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
   vll2 r = a > b;
   // CHECK: %[[#T72:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
   // CHECK: %[[#T73:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
-  // CHECK: %[[#T74:]] = llvm.fcmp "ogt" %[[#T72]], %[[#T73]] : vector<2xf64>
+  // CHECK: %[[#T74:]] = llvm.fcmp "ugt" %[[#T72]], %[[#T73]] : vector<2xf64>
   // CHECK: %[[#T75:]] = llvm.sext %[[#T74]] : vector<2xi1> to vector<2xi64>
   // CHECK: llvm.store %[[#T75]], %[[#Tr:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
   vll2 s = a <= b;
   // CHECK: %[[#T76:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
   // CHECK: %[[#T77:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
-  // CHECK: %[[#T78:]] = llvm.fcmp "ole" %[[#T76]], %[[#T77]] : vector<2xf64>
+  // CHECK: %[[#T78:]] = llvm.fcmp "ule" %[[#T76]], %[[#T77]] : vector<2xf64>
   // CHECK: %[[#T79:]] = llvm.sext %[[#T78]] : vector<2xi1> to vector<2xi64>
   // CHECK: llvm.store %[[#T79]], %[[#Ts:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
   vll2 t = a >= b;
   // CHECK: %[[#T80:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
   // CHECK: %[[#T81:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
-  // CHECK: %[[#T82:]] = llvm.fcmp "oge" %[[#T80]], %[[#T81]] : vector<2xf64>
+  // CHECK: %[[#T82:]] = llvm.fcmp "uge" %[[#T80]], %[[#T81]] : vector<2xf64>
   // CHECK: %[[#T83:]] = llvm.sext %[[#T82]] : vector<2xi1> to vector<2xi64>
   // CHECK: llvm.store %[[#T83]], %[[#Tt:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr