|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2 |
| -; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s |
| 2 | +; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVFHMIN-ZVFBFMIN |
| 3 | +; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN-ZVFBFMIN |
| 4 | + |
3 | 5 |
|
4 | 6 | define void @test(ptr %p, ptr noalias %s) {
|
5 | 7 | ; CHECK-LABEL: @test(
|
@@ -308,3 +310,267 @@ entry:
|
308 | 310 | ret void
|
309 | 311 | }
|
310 | 312 |
|
| 313 | + |
| 314 | +define void @test_bf16(ptr %p, ptr noalias %s) { |
| 315 | +; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16( |
| 316 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry: |
| 317 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0 |
| 318 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 |
| 319 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30 |
| 320 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load bfloat, ptr [[ARRAYIDX1]], align 4 |
| 321 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast bfloat [[I1]], [[I]] |
| 322 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0 |
| 323 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD]], ptr [[ARRAYIDX2]], align 4 |
| 324 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 4 |
| 325 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load bfloat, ptr [[ARRAYIDX4]], align 4 |
| 326 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 26 |
| 327 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load bfloat, ptr [[ARRAYIDX6]], align 4 |
| 328 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast bfloat [[I3]], [[I2]] |
| 329 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 1 |
| 330 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD7]], ptr [[ARRAYIDX9]], align 4 |
| 331 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 8 |
| 332 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load bfloat, ptr [[ARRAYIDX11]], align 4 |
| 333 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 22 |
| 334 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load bfloat, ptr [[ARRAYIDX13]], align 4 |
| 335 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast bfloat [[I5]], [[I4]] |
| 336 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 2 |
| 337 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD14]], ptr [[ARRAYIDX16]], align 4 |
| 338 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 12 |
| 339 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load bfloat, ptr [[ARRAYIDX18]], align 4 |
| 340 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 18 |
| 341 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load bfloat, ptr [[ARRAYIDX20]], align 4 |
| 342 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast bfloat [[I7]], [[I6]] |
| 343 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 3 |
| 344 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD21]], ptr [[ARRAYIDX23]], align 4 |
| 345 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 16 |
| 346 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load bfloat, ptr [[ARRAYIDX25]], align 4 |
| 347 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 14 |
| 348 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load bfloat, ptr [[ARRAYIDX27]], align 4 |
| 349 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast bfloat [[I9]], [[I8]] |
| 350 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 4 |
| 351 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD28]], ptr [[ARRAYIDX30]], align 4 |
| 352 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 20 |
| 353 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load bfloat, ptr [[ARRAYIDX32]], align 4 |
| 354 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 10 |
| 355 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load bfloat, ptr [[ARRAYIDX34]], align 4 |
| 356 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast bfloat [[I11]], [[I10]] |
| 357 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 5 |
| 358 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD35]], ptr [[ARRAYIDX37]], align 4 |
| 359 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 24 |
| 360 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load bfloat, ptr [[ARRAYIDX39]], align 4 |
| 361 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 6 |
| 362 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load bfloat, ptr [[ARRAYIDX41]], align 4 |
| 363 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast bfloat [[I13]], [[I12]] |
| 364 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 6 |
| 365 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD42]], ptr [[ARRAYIDX44]], align 4 |
| 366 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 28 |
| 367 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load bfloat, ptr [[ARRAYIDX46]], align 4 |
| 368 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 2 |
| 369 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load bfloat, ptr [[ARRAYIDX48]], align 4 |
| 370 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast bfloat [[I15]], [[I14]] |
| 371 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 7 |
| 372 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD49]], ptr [[ARRAYIDX51]], align 4 |
| 373 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void |
| 374 | +; |
| 375 | +; ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16( |
| 376 | +; ZVFHMIN-ZVFBFMIN-NEXT: entry: |
| 377 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0 |
| 378 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30 |
| 379 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0 |
| 380 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8) |
| 381 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8) |
| 382 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x bfloat> [[TMP7]], [[TMP15]] |
| 383 | +; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x bfloat> [[TMP16]], ptr [[ARRAYIDX2]], align 4 |
| 384 | +; ZVFHMIN-ZVFBFMIN-NEXT: ret void |
| 385 | +; |
| 386 | +entry: |
| 387 | + %arrayidx = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 0 |
| 388 | + %i = load bfloat, ptr %arrayidx, align 4 |
| 389 | + %arrayidx1 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 30 |
| 390 | + %i1 = load bfloat, ptr %arrayidx1, align 4 |
| 391 | + %add = fsub fast bfloat %i1, %i |
| 392 | + %arrayidx2 = getelementptr inbounds bfloat, ptr %s, i64 0 |
| 393 | + store bfloat %add, ptr %arrayidx2, align 4 |
| 394 | + %arrayidx4 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 4 |
| 395 | + %i2 = load bfloat, ptr %arrayidx4, align 4 |
| 396 | + %arrayidx6 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 26 |
| 397 | + %i3 = load bfloat, ptr %arrayidx6, align 4 |
| 398 | + %add7 = fsub fast bfloat %i3, %i2 |
| 399 | + %arrayidx9 = getelementptr inbounds bfloat, ptr %s, i64 1 |
| 400 | + store bfloat %add7, ptr %arrayidx9, align 4 |
| 401 | + %arrayidx11 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 8 |
| 402 | + %i4 = load bfloat, ptr %arrayidx11, align 4 |
| 403 | + %arrayidx13 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 22 |
| 404 | + %i5 = load bfloat, ptr %arrayidx13, align 4 |
| 405 | + %add14 = fsub fast bfloat %i5, %i4 |
| 406 | + %arrayidx16 = getelementptr inbounds bfloat, ptr %s, i64 2 |
| 407 | + store bfloat %add14, ptr %arrayidx16, align 4 |
| 408 | + %arrayidx18 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 12 |
| 409 | + %i6 = load bfloat, ptr %arrayidx18, align 4 |
| 410 | + %arrayidx20 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 18 |
| 411 | + %i7 = load bfloat, ptr %arrayidx20, align 4 |
| 412 | + %add21 = fsub fast bfloat %i7, %i6 |
| 413 | + %arrayidx23 = getelementptr inbounds bfloat, ptr %s, i64 3 |
| 414 | + store bfloat %add21, ptr %arrayidx23, align 4 |
| 415 | + %arrayidx25 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 16 |
| 416 | + %i8 = load bfloat, ptr %arrayidx25, align 4 |
| 417 | + %arrayidx27 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 14 |
| 418 | + %i9 = load bfloat, ptr %arrayidx27, align 4 |
| 419 | + %add28 = fsub fast bfloat %i9, %i8 |
| 420 | + %arrayidx30 = getelementptr inbounds bfloat, ptr %s, i64 4 |
| 421 | + store bfloat %add28, ptr %arrayidx30, align 4 |
| 422 | + %arrayidx32 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 20 |
| 423 | + %i10 = load bfloat, ptr %arrayidx32, align 4 |
| 424 | + %arrayidx34 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 10 |
| 425 | + %i11 = load bfloat, ptr %arrayidx34, align 4 |
| 426 | + %add35 = fsub fast bfloat %i11, %i10 |
| 427 | + %arrayidx37 = getelementptr inbounds bfloat, ptr %s, i64 5 |
| 428 | + store bfloat %add35, ptr %arrayidx37, align 4 |
| 429 | + %arrayidx39 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 24 |
| 430 | + %i12 = load bfloat, ptr %arrayidx39, align 4 |
| 431 | + %arrayidx41 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 6 |
| 432 | + %i13 = load bfloat, ptr %arrayidx41, align 4 |
| 433 | + %add42 = fsub fast bfloat %i13, %i12 |
| 434 | + %arrayidx44 = getelementptr inbounds bfloat, ptr %s, i64 6 |
| 435 | + store bfloat %add42, ptr %arrayidx44, align 4 |
| 436 | + %arrayidx46 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 28 |
| 437 | + %i14 = load bfloat, ptr %arrayidx46, align 4 |
| 438 | + %arrayidx48 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 2 |
| 439 | + %i15 = load bfloat, ptr %arrayidx48, align 4 |
| 440 | + %add49 = fsub fast bfloat %i15, %i14 |
| 441 | + %arrayidx51 = getelementptr inbounds bfloat, ptr %s, i64 7 |
| 442 | + store bfloat %add49, ptr %arrayidx51, align 4 |
| 443 | + ret void |
| 444 | +} |
| 445 | + |
| 446 | +define void @test_f16(ptr %p, ptr noalias %s) { |
| 447 | +; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_f16( |
| 448 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry: |
| 449 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0 |
| 450 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load half, ptr [[ARRAYIDX]], align 4 |
| 451 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30 |
| 452 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load half, ptr [[ARRAYIDX1]], align 4 |
| 453 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast half [[I1]], [[I]] |
| 454 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0 |
| 455 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD]], ptr [[ARRAYIDX2]], align 4 |
| 456 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 4 |
| 457 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load half, ptr [[ARRAYIDX4]], align 4 |
| 458 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 26 |
| 459 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load half, ptr [[ARRAYIDX6]], align 4 |
| 460 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast half [[I3]], [[I2]] |
| 461 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 1 |
| 462 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD7]], ptr [[ARRAYIDX9]], align 4 |
| 463 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 8 |
| 464 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load half, ptr [[ARRAYIDX11]], align 4 |
| 465 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 22 |
| 466 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load half, ptr [[ARRAYIDX13]], align 4 |
| 467 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast half [[I5]], [[I4]] |
| 468 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds half, ptr [[S]], i64 2 |
| 469 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD14]], ptr [[ARRAYIDX16]], align 4 |
| 470 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 12 |
| 471 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load half, ptr [[ARRAYIDX18]], align 4 |
| 472 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 18 |
| 473 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load half, ptr [[ARRAYIDX20]], align 4 |
| 474 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast half [[I7]], [[I6]] |
| 475 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds half, ptr [[S]], i64 3 |
| 476 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD21]], ptr [[ARRAYIDX23]], align 4 |
| 477 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 16 |
| 478 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load half, ptr [[ARRAYIDX25]], align 4 |
| 479 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 14 |
| 480 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load half, ptr [[ARRAYIDX27]], align 4 |
| 481 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast half [[I9]], [[I8]] |
| 482 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds half, ptr [[S]], i64 4 |
| 483 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD28]], ptr [[ARRAYIDX30]], align 4 |
| 484 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 20 |
| 485 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load half, ptr [[ARRAYIDX32]], align 4 |
| 486 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 10 |
| 487 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load half, ptr [[ARRAYIDX34]], align 4 |
| 488 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast half [[I11]], [[I10]] |
| 489 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds half, ptr [[S]], i64 5 |
| 490 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD35]], ptr [[ARRAYIDX37]], align 4 |
| 491 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 24 |
| 492 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load half, ptr [[ARRAYIDX39]], align 4 |
| 493 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 6 |
| 494 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load half, ptr [[ARRAYIDX41]], align 4 |
| 495 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast half [[I13]], [[I12]] |
| 496 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds half, ptr [[S]], i64 6 |
| 497 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD42]], ptr [[ARRAYIDX44]], align 4 |
| 498 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 28 |
| 499 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load half, ptr [[ARRAYIDX46]], align 4 |
| 500 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 2 |
| 501 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load half, ptr [[ARRAYIDX48]], align 4 |
| 502 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast half [[I15]], [[I14]] |
| 503 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds half, ptr [[S]], i64 7 |
| 504 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD49]], ptr [[ARRAYIDX51]], align 4 |
| 505 | +; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void |
| 506 | +; |
| 507 | +; ZVFHMIN-ZVFBFMIN-LABEL: @test_f16( |
| 508 | +; ZVFHMIN-ZVFBFMIN-NEXT: entry: |
| 509 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0 |
| 510 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30 |
| 511 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0 |
| 512 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8) |
| 513 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8) |
| 514 | +; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x half> [[TMP7]], [[TMP15]] |
| 515 | +; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x half> [[TMP16]], ptr [[ARRAYIDX2]], align 4 |
| 516 | +; ZVFHMIN-ZVFBFMIN-NEXT: ret void |
| 517 | +; |
| 518 | +entry: |
| 519 | + %arrayidx = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 0 |
| 520 | + %i = load half, ptr %arrayidx, align 4 |
| 521 | + %arrayidx1 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 30 |
| 522 | + %i1 = load half, ptr %arrayidx1, align 4 |
| 523 | + %add = fsub fast half %i1, %i |
| 524 | + %arrayidx2 = getelementptr inbounds half, ptr %s, i64 0 |
| 525 | + store half %add, ptr %arrayidx2, align 4 |
| 526 | + %arrayidx4 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 4 |
| 527 | + %i2 = load half, ptr %arrayidx4, align 4 |
| 528 | + %arrayidx6 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 26 |
| 529 | + %i3 = load half, ptr %arrayidx6, align 4 |
| 530 | + %add7 = fsub fast half %i3, %i2 |
| 531 | + %arrayidx9 = getelementptr inbounds half, ptr %s, i64 1 |
| 532 | + store half %add7, ptr %arrayidx9, align 4 |
| 533 | + %arrayidx11 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 8 |
| 534 | + %i4 = load half, ptr %arrayidx11, align 4 |
| 535 | + %arrayidx13 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 22 |
| 536 | + %i5 = load half, ptr %arrayidx13, align 4 |
| 537 | + %add14 = fsub fast half %i5, %i4 |
| 538 | + %arrayidx16 = getelementptr inbounds half, ptr %s, i64 2 |
| 539 | + store half %add14, ptr %arrayidx16, align 4 |
| 540 | + %arrayidx18 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 12 |
| 541 | + %i6 = load half, ptr %arrayidx18, align 4 |
| 542 | + %arrayidx20 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 18 |
| 543 | + %i7 = load half, ptr %arrayidx20, align 4 |
| 544 | + %add21 = fsub fast half %i7, %i6 |
| 545 | + %arrayidx23 = getelementptr inbounds half, ptr %s, i64 3 |
| 546 | + store half %add21, ptr %arrayidx23, align 4 |
| 547 | + %arrayidx25 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 16 |
| 548 | + %i8 = load half, ptr %arrayidx25, align 4 |
| 549 | + %arrayidx27 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 14 |
| 550 | + %i9 = load half, ptr %arrayidx27, align 4 |
| 551 | + %add28 = fsub fast half %i9, %i8 |
| 552 | + %arrayidx30 = getelementptr inbounds half, ptr %s, i64 4 |
| 553 | + store half %add28, ptr %arrayidx30, align 4 |
| 554 | + %arrayidx32 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 20 |
| 555 | + %i10 = load half, ptr %arrayidx32, align 4 |
| 556 | + %arrayidx34 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 10 |
| 557 | + %i11 = load half, ptr %arrayidx34, align 4 |
| 558 | + %add35 = fsub fast half %i11, %i10 |
| 559 | + %arrayidx37 = getelementptr inbounds half, ptr %s, i64 5 |
| 560 | + store half %add35, ptr %arrayidx37, align 4 |
| 561 | + %arrayidx39 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 24 |
| 562 | + %i12 = load half, ptr %arrayidx39, align 4 |
| 563 | + %arrayidx41 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 6 |
| 564 | + %i13 = load half, ptr %arrayidx41, align 4 |
| 565 | + %add42 = fsub fast half %i13, %i12 |
| 566 | + %arrayidx44 = getelementptr inbounds half, ptr %s, i64 6 |
| 567 | + store half %add42, ptr %arrayidx44, align 4 |
| 568 | + %arrayidx46 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 28 |
| 569 | + %i14 = load half, ptr %arrayidx46, align 4 |
| 570 | + %arrayidx48 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 2 |
| 571 | + %i15 = load half, ptr %arrayidx48, align 4 |
| 572 | + %add49 = fsub fast half %i15, %i14 |
| 573 | + %arrayidx51 = getelementptr inbounds half, ptr %s, i64 7 |
| 574 | + store half %add49, ptr %arrayidx51, align 4 |
| 575 | + ret void |
| 576 | +} |
0 commit comments