Skip to content

Commit 343a810

Browse files
authored
[RISCV] Allow f16/bf16 with zvfhmin/zvfbfmin as legal strided access (llvm#115264)
This is also split off from the zvfhmin/zvfbfmin isLegalElementTypeForRVV work. Enabling this will cause SLP and RISCVGatherScatterLowering to emit @llvm.experimental.vp.strided.{load,store} intrinsics, and codegen support for this was added in llvm#109387 and llvm#114750.
1 parent 9b058bb commit 343a810

File tree

2 files changed

+271
-2
lines changed

2 files changed

+271
-2
lines changed

Diff for: llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -21554,7 +21554,10 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
2155421554
return false;
2155521555

2155621556
EVT ScalarType = DataType.getScalarType();
21557-
if (!isLegalElementTypeForRVV(ScalarType))
21557+
// TODO: Move bf16/f16 support into isLegalElementTypeForRVV
21558+
if (!(isLegalElementTypeForRVV(ScalarType) ||
21559+
(ScalarType == MVT::bf16 && Subtarget.hasVInstructionsBF16Minimal()) ||
21560+
(ScalarType == MVT::f16 && Subtarget.hasVInstructionsF16Minimal())))
2155821561
return false;
2155921562

2156021563
if (!Subtarget.enableUnalignedVectorMem() &&

Diff for: llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll

+267-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
2+
; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVFHMIN-ZVFBFMIN
3+
; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN-ZVFBFMIN
4+
35

46
define void @test(ptr %p, ptr noalias %s) {
57
; CHECK-LABEL: @test(
@@ -308,3 +310,267 @@ entry:
308310
ret void
309311
}
310312

313+
314+
define void @test_bf16(ptr %p, ptr noalias %s) {
315+
; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16(
316+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry:
317+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0
318+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4
319+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30
320+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load bfloat, ptr [[ARRAYIDX1]], align 4
321+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast bfloat [[I1]], [[I]]
322+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0
323+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD]], ptr [[ARRAYIDX2]], align 4
324+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 4
325+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load bfloat, ptr [[ARRAYIDX4]], align 4
326+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 26
327+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load bfloat, ptr [[ARRAYIDX6]], align 4
328+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast bfloat [[I3]], [[I2]]
329+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 1
330+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD7]], ptr [[ARRAYIDX9]], align 4
331+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 8
332+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load bfloat, ptr [[ARRAYIDX11]], align 4
333+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 22
334+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load bfloat, ptr [[ARRAYIDX13]], align 4
335+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast bfloat [[I5]], [[I4]]
336+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 2
337+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD14]], ptr [[ARRAYIDX16]], align 4
338+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 12
339+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load bfloat, ptr [[ARRAYIDX18]], align 4
340+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 18
341+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load bfloat, ptr [[ARRAYIDX20]], align 4
342+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast bfloat [[I7]], [[I6]]
343+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 3
344+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD21]], ptr [[ARRAYIDX23]], align 4
345+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 16
346+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load bfloat, ptr [[ARRAYIDX25]], align 4
347+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 14
348+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load bfloat, ptr [[ARRAYIDX27]], align 4
349+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast bfloat [[I9]], [[I8]]
350+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 4
351+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD28]], ptr [[ARRAYIDX30]], align 4
352+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 20
353+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load bfloat, ptr [[ARRAYIDX32]], align 4
354+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 10
355+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load bfloat, ptr [[ARRAYIDX34]], align 4
356+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast bfloat [[I11]], [[I10]]
357+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 5
358+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD35]], ptr [[ARRAYIDX37]], align 4
359+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 24
360+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load bfloat, ptr [[ARRAYIDX39]], align 4
361+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 6
362+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load bfloat, ptr [[ARRAYIDX41]], align 4
363+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast bfloat [[I13]], [[I12]]
364+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 6
365+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD42]], ptr [[ARRAYIDX44]], align 4
366+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 28
367+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load bfloat, ptr [[ARRAYIDX46]], align 4
368+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 2
369+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load bfloat, ptr [[ARRAYIDX48]], align 4
370+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast bfloat [[I15]], [[I14]]
371+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 7
372+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD49]], ptr [[ARRAYIDX51]], align 4
373+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void
374+
;
375+
; ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16(
376+
; ZVFHMIN-ZVFBFMIN-NEXT: entry:
377+
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0
378+
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30
379+
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0
380+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8)
381+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8)
382+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x bfloat> [[TMP7]], [[TMP15]]
383+
; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x bfloat> [[TMP16]], ptr [[ARRAYIDX2]], align 4
384+
; ZVFHMIN-ZVFBFMIN-NEXT: ret void
385+
;
386+
entry:
387+
%arrayidx = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 0
388+
%i = load bfloat, ptr %arrayidx, align 4
389+
%arrayidx1 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 30
390+
%i1 = load bfloat, ptr %arrayidx1, align 4
391+
%add = fsub fast bfloat %i1, %i
392+
%arrayidx2 = getelementptr inbounds bfloat, ptr %s, i64 0
393+
store bfloat %add, ptr %arrayidx2, align 4
394+
%arrayidx4 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 4
395+
%i2 = load bfloat, ptr %arrayidx4, align 4
396+
%arrayidx6 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 26
397+
%i3 = load bfloat, ptr %arrayidx6, align 4
398+
%add7 = fsub fast bfloat %i3, %i2
399+
%arrayidx9 = getelementptr inbounds bfloat, ptr %s, i64 1
400+
store bfloat %add7, ptr %arrayidx9, align 4
401+
%arrayidx11 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 8
402+
%i4 = load bfloat, ptr %arrayidx11, align 4
403+
%arrayidx13 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 22
404+
%i5 = load bfloat, ptr %arrayidx13, align 4
405+
%add14 = fsub fast bfloat %i5, %i4
406+
%arrayidx16 = getelementptr inbounds bfloat, ptr %s, i64 2
407+
store bfloat %add14, ptr %arrayidx16, align 4
408+
%arrayidx18 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 12
409+
%i6 = load bfloat, ptr %arrayidx18, align 4
410+
%arrayidx20 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 18
411+
%i7 = load bfloat, ptr %arrayidx20, align 4
412+
%add21 = fsub fast bfloat %i7, %i6
413+
%arrayidx23 = getelementptr inbounds bfloat, ptr %s, i64 3
414+
store bfloat %add21, ptr %arrayidx23, align 4
415+
%arrayidx25 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 16
416+
%i8 = load bfloat, ptr %arrayidx25, align 4
417+
%arrayidx27 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 14
418+
%i9 = load bfloat, ptr %arrayidx27, align 4
419+
%add28 = fsub fast bfloat %i9, %i8
420+
%arrayidx30 = getelementptr inbounds bfloat, ptr %s, i64 4
421+
store bfloat %add28, ptr %arrayidx30, align 4
422+
%arrayidx32 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 20
423+
%i10 = load bfloat, ptr %arrayidx32, align 4
424+
%arrayidx34 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 10
425+
%i11 = load bfloat, ptr %arrayidx34, align 4
426+
%add35 = fsub fast bfloat %i11, %i10
427+
%arrayidx37 = getelementptr inbounds bfloat, ptr %s, i64 5
428+
store bfloat %add35, ptr %arrayidx37, align 4
429+
%arrayidx39 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 24
430+
%i12 = load bfloat, ptr %arrayidx39, align 4
431+
%arrayidx41 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 6
432+
%i13 = load bfloat, ptr %arrayidx41, align 4
433+
%add42 = fsub fast bfloat %i13, %i12
434+
%arrayidx44 = getelementptr inbounds bfloat, ptr %s, i64 6
435+
store bfloat %add42, ptr %arrayidx44, align 4
436+
%arrayidx46 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 28
437+
%i14 = load bfloat, ptr %arrayidx46, align 4
438+
%arrayidx48 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 2
439+
%i15 = load bfloat, ptr %arrayidx48, align 4
440+
%add49 = fsub fast bfloat %i15, %i14
441+
%arrayidx51 = getelementptr inbounds bfloat, ptr %s, i64 7
442+
store bfloat %add49, ptr %arrayidx51, align 4
443+
ret void
444+
}
445+
446+
define void @test_f16(ptr %p, ptr noalias %s) {
447+
; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_f16(
448+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry:
449+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0
450+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load half, ptr [[ARRAYIDX]], align 4
451+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30
452+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load half, ptr [[ARRAYIDX1]], align 4
453+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast half [[I1]], [[I]]
454+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0
455+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD]], ptr [[ARRAYIDX2]], align 4
456+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 4
457+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load half, ptr [[ARRAYIDX4]], align 4
458+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 26
459+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load half, ptr [[ARRAYIDX6]], align 4
460+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast half [[I3]], [[I2]]
461+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 1
462+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD7]], ptr [[ARRAYIDX9]], align 4
463+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 8
464+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load half, ptr [[ARRAYIDX11]], align 4
465+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 22
466+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load half, ptr [[ARRAYIDX13]], align 4
467+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast half [[I5]], [[I4]]
468+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds half, ptr [[S]], i64 2
469+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD14]], ptr [[ARRAYIDX16]], align 4
470+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 12
471+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load half, ptr [[ARRAYIDX18]], align 4
472+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 18
473+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load half, ptr [[ARRAYIDX20]], align 4
474+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast half [[I7]], [[I6]]
475+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds half, ptr [[S]], i64 3
476+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD21]], ptr [[ARRAYIDX23]], align 4
477+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 16
478+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load half, ptr [[ARRAYIDX25]], align 4
479+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 14
480+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load half, ptr [[ARRAYIDX27]], align 4
481+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast half [[I9]], [[I8]]
482+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds half, ptr [[S]], i64 4
483+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD28]], ptr [[ARRAYIDX30]], align 4
484+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 20
485+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load half, ptr [[ARRAYIDX32]], align 4
486+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 10
487+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load half, ptr [[ARRAYIDX34]], align 4
488+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast half [[I11]], [[I10]]
489+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds half, ptr [[S]], i64 5
490+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD35]], ptr [[ARRAYIDX37]], align 4
491+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 24
492+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load half, ptr [[ARRAYIDX39]], align 4
493+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 6
494+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load half, ptr [[ARRAYIDX41]], align 4
495+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast half [[I13]], [[I12]]
496+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds half, ptr [[S]], i64 6
497+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD42]], ptr [[ARRAYIDX44]], align 4
498+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 28
499+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load half, ptr [[ARRAYIDX46]], align 4
500+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 2
501+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load half, ptr [[ARRAYIDX48]], align 4
502+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast half [[I15]], [[I14]]
503+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds half, ptr [[S]], i64 7
504+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD49]], ptr [[ARRAYIDX51]], align 4
505+
; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void
506+
;
507+
; ZVFHMIN-ZVFBFMIN-LABEL: @test_f16(
508+
; ZVFHMIN-ZVFBFMIN-NEXT: entry:
509+
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0
510+
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30
511+
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0
512+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8)
513+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8)
514+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x half> [[TMP7]], [[TMP15]]
515+
; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x half> [[TMP16]], ptr [[ARRAYIDX2]], align 4
516+
; ZVFHMIN-ZVFBFMIN-NEXT: ret void
517+
;
518+
entry:
519+
%arrayidx = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 0
520+
%i = load half, ptr %arrayidx, align 4
521+
%arrayidx1 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 30
522+
%i1 = load half, ptr %arrayidx1, align 4
523+
%add = fsub fast half %i1, %i
524+
%arrayidx2 = getelementptr inbounds half, ptr %s, i64 0
525+
store half %add, ptr %arrayidx2, align 4
526+
%arrayidx4 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 4
527+
%i2 = load half, ptr %arrayidx4, align 4
528+
%arrayidx6 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 26
529+
%i3 = load half, ptr %arrayidx6, align 4
530+
%add7 = fsub fast half %i3, %i2
531+
%arrayidx9 = getelementptr inbounds half, ptr %s, i64 1
532+
store half %add7, ptr %arrayidx9, align 4
533+
%arrayidx11 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 8
534+
%i4 = load half, ptr %arrayidx11, align 4
535+
%arrayidx13 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 22
536+
%i5 = load half, ptr %arrayidx13, align 4
537+
%add14 = fsub fast half %i5, %i4
538+
%arrayidx16 = getelementptr inbounds half, ptr %s, i64 2
539+
store half %add14, ptr %arrayidx16, align 4
540+
%arrayidx18 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 12
541+
%i6 = load half, ptr %arrayidx18, align 4
542+
%arrayidx20 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 18
543+
%i7 = load half, ptr %arrayidx20, align 4
544+
%add21 = fsub fast half %i7, %i6
545+
%arrayidx23 = getelementptr inbounds half, ptr %s, i64 3
546+
store half %add21, ptr %arrayidx23, align 4
547+
%arrayidx25 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 16
548+
%i8 = load half, ptr %arrayidx25, align 4
549+
%arrayidx27 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 14
550+
%i9 = load half, ptr %arrayidx27, align 4
551+
%add28 = fsub fast half %i9, %i8
552+
%arrayidx30 = getelementptr inbounds half, ptr %s, i64 4
553+
store half %add28, ptr %arrayidx30, align 4
554+
%arrayidx32 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 20
555+
%i10 = load half, ptr %arrayidx32, align 4
556+
%arrayidx34 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 10
557+
%i11 = load half, ptr %arrayidx34, align 4
558+
%add35 = fsub fast half %i11, %i10
559+
%arrayidx37 = getelementptr inbounds half, ptr %s, i64 5
560+
store half %add35, ptr %arrayidx37, align 4
561+
%arrayidx39 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 24
562+
%i12 = load half, ptr %arrayidx39, align 4
563+
%arrayidx41 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 6
564+
%i13 = load half, ptr %arrayidx41, align 4
565+
%add42 = fsub fast half %i13, %i12
566+
%arrayidx44 = getelementptr inbounds half, ptr %s, i64 6
567+
store half %add42, ptr %arrayidx44, align 4
568+
%arrayidx46 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 28
569+
%i14 = load half, ptr %arrayidx46, align 4
570+
%arrayidx48 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 2
571+
%i15 = load half, ptr %arrayidx48, align 4
572+
%add49 = fsub fast half %i15, %i14
573+
%arrayidx51 = getelementptr inbounds half, ptr %s, i64 7
574+
store half %add49, ptr %arrayidx51, align 4
575+
ret void
576+
}

0 commit comments

Comments
 (0)