@@ -173,16 +173,20 @@ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
173173 AND (SM_86, PTX72))
174174TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, " V2hV2hV2h" , " " ,
175175 AND (SM_86, PTX72))
176- TARGET_BUILTIN(__nvvm_fmin_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
177- TARGET_BUILTIN(__nvvm_fmin_nan_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
178- TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, " UsUsUs" , " " , AND(SM_86, PTX72))
179- TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, " UsUsUs" , " " ,
176+ TARGET_BUILTIN(__nvvm_fmin_bf16, " yyy" , " " , AND(SM_80, PTX70))
177+ TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, " yyy" , " " , AND(SM_80, PTX70))
178+ TARGET_BUILTIN(__nvvm_fmin_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
179+ TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
180+ TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, " yyy" , " " , AND(SM_86, PTX72))
181+ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, " yyy" , " " ,
180182 AND (SM_86, PTX72))
181- TARGET_BUILTIN(__nvvm_fmin_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
182- TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
183- TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, " ZUiZUiZUi" , " " ,
183+ TARGET_BUILTIN(__nvvm_fmin_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
184+ TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
185+ TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
186+ TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
187+ TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, " V2yV2yV2y" , " " ,
184188 AND (SM_86, PTX72))
185- TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, " ZUiZUiZUi " , " " ,
189+ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, " V2yV2yV2y " , " " ,
186190 AND (SM_86, PTX72))
187191BUILTIN(__nvvm_fmin_f, " fff" , " " )
188192BUILTIN(__nvvm_fmin_ftz_f, " fff" , " " )
@@ -215,16 +219,20 @@ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
215219 AND (SM_86, PTX72))
216220TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, " V2hV2hV2h" , " " ,
217221 AND (SM_86, PTX72))
218- TARGET_BUILTIN(__nvvm_fmax_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
219- TARGET_BUILTIN(__nvvm_fmax_nan_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
220- TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, " UsUsUs" , " " , AND(SM_86, PTX72))
221- TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, " UsUsUs" , " " ,
222+ TARGET_BUILTIN(__nvvm_fmax_bf16, " yyy" , " " , AND(SM_80, PTX70))
223+ TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, " yyy" , " " , AND(SM_80, PTX70))
224+ TARGET_BUILTIN(__nvvm_fmax_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
225+ TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
226+ TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, " yyy" , " " , AND(SM_86, PTX72))
227+ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, " yyy" , " " ,
222228 AND (SM_86, PTX72))
223- TARGET_BUILTIN(__nvvm_fmax_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
224- TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
225- TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, " ZUiZUiZUi" , " " ,
229+ TARGET_BUILTIN(__nvvm_fmax_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
230+ TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
231+ TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
232+ TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
233+ TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, " V2yV2yV2y" , " " ,
226234 AND (SM_86, PTX72))
227- TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, " ZUiZUiZUi " , " " ,
235+ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, " V2yV2yV2y " , " " ,
228236 AND (SM_86, PTX72))
229237BUILTIN(__nvvm_fmax_f, " fff" , " " )
230238BUILTIN(__nvvm_fmax_ftz_f, " fff" , " " )
@@ -352,10 +360,10 @@ TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
352360TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_53, PTX42))
353361TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_80, PTX70))
354362TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_80, PTX70))
355- TARGET_BUILTIN(__nvvm_fma_rn_bf16, " UsUsUsUs " , " " , AND(SM_80, PTX70))
356- TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, " UsUsUsUs " , " " , AND(SM_80, PTX70))
357- TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, " ZUiZUiZUiZUi " , " " , AND(SM_80, PTX70))
358- TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, " ZUiZUiZUiZUi " , " " , AND(SM_80, PTX70))
363+ TARGET_BUILTIN(__nvvm_fma_rn_bf16, " yyyy " , " " , AND(SM_80, PTX70))
364+ TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, " yyyy " , " " , AND(SM_80, PTX70))
365+ TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, " V2yV2yV2yV2y " , " " , AND(SM_80, PTX70))
366+ TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, " V2yV2yV2yV2y " , " " , AND(SM_80, PTX70))
359367BUILTIN(__nvvm_fma_rn_ftz_f, " ffff" , " " )
360368BUILTIN(__nvvm_fma_rn_f, " ffff" , " " )
361369BUILTIN(__nvvm_fma_rz_ftz_f, " ffff" , " " )
@@ -543,20 +551,20 @@ BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
543551BUILTIN(__nvvm_f2h_rn_ftz, " Usf" , " " )
544552BUILTIN(__nvvm_f2h_rn, " Usf" , " " )
545553
546- TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, " ZUiff " , " " , AND(SM_80,PTX70))
547- TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, " ZUiff " , " " , AND(SM_80,PTX70))
548- TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, " ZUiff " , " " , AND(SM_80,PTX70))
549- TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, " ZUiff " , " " , AND(SM_80,PTX70))
554+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, " V2yff " , " " , AND(SM_80,PTX70))
555+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, " V2yff " , " " , AND(SM_80,PTX70))
556+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, " V2yff " , " " , AND(SM_80,PTX70))
557+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, " V2yff " , " " , AND(SM_80,PTX70))
550558
551559TARGET_BUILTIN(__nvvm_ff2f16x2_rn, " V2hff" , " " , AND(SM_80,PTX70))
552560TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, " V2hff" , " " , AND(SM_80,PTX70))
553561TARGET_BUILTIN(__nvvm_ff2f16x2_rz, " V2hff" , " " , AND(SM_80,PTX70))
554562TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, " V2hff" , " " , AND(SM_80,PTX70))
555563
556- TARGET_BUILTIN(__nvvm_f2bf16_rn, " ZUsf " , " " , AND(SM_80,PTX70))
557- TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, " ZUsf " , " " , AND(SM_80,PTX70))
558- TARGET_BUILTIN(__nvvm_f2bf16_rz, " ZUsf " , " " , AND(SM_80,PTX70))
559- TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, " ZUsf " , " " , AND(SM_80,PTX70))
564+ TARGET_BUILTIN(__nvvm_f2bf16_rn, " yf " , " " , AND(SM_80,PTX70))
565+ TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, " yf " , " " , AND(SM_80,PTX70))
566+ TARGET_BUILTIN(__nvvm_f2bf16_rz, " yf " , " " , AND(SM_80,PTX70))
567+ TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, " yf " , " " , AND(SM_80,PTX70))
560568
561569TARGET_BUILTIN(__nvvm_f2tf32_rna, " ZUif" , " " , AND(SM_80,PTX70))
562570
@@ -1024,10 +1032,10 @@ TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
10241032
10251033
10261034// bf16, bf16x2 abs, neg
1027- TARGET_BUILTIN(__nvvm_abs_bf16, " UsUs " , " " , AND(SM_80,PTX70))
1028- TARGET_BUILTIN(__nvvm_abs_bf16x2, " ZUiZUi " , " " , AND(SM_80,PTX70))
1029- TARGET_BUILTIN(__nvvm_neg_bf16, " UsUs " , " " , AND(SM_80,PTX70))
1030- TARGET_BUILTIN(__nvvm_neg_bf16x2, " ZUiZUi " , " " , AND(SM_80,PTX70))
1035+ TARGET_BUILTIN(__nvvm_abs_bf16, " yy " , " " , AND(SM_80,PTX70))
1036+ TARGET_BUILTIN(__nvvm_abs_bf16x2, " V2yV2y " , " " , AND(SM_80,PTX70))
1037+ TARGET_BUILTIN(__nvvm_neg_bf16, " yy " , " " , AND(SM_80,PTX70))
1038+ TARGET_BUILTIN(__nvvm_neg_bf16x2, " V2yV2y " , " " , AND(SM_80,PTX70))
10311039
10321040TARGET_BUILTIN(__nvvm_mapa, " v*v*i" , " " , AND(SM_90, PTX78))
10331041TARGET_BUILTIN(__nvvm_mapa_shared_cluster, " v*3v*3i" , " " , AND(SM_90, PTX78))
0 commit comments