@@ -1474,6 +1474,51 @@ declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32
14741474
14751475declare <4 x i32 > @llvm.amdgcn.raw.tbuffer.load.v4i32 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
14761476
1477+ ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16(
1478+ ; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1479+ ; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 3
1480+ ; CHECK-NEXT: ret half %elt1
1481+ define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16 (<4 x i32 > inreg %rsrc , i32 %arg0 , i32 inreg %arg1 ) #0 {
1482+ %data = call <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 > %rsrc , i32 %arg0 , i32 %arg1 , i32 78 , i32 0 )
1483+ %elt1 = extractelement <4 x half > %data , i32 3
1484+ ret half %elt1
1485+ }
1486+
1487+ ; FIXME: Enable load shortening when full support for v3f16 has been added (should expect call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16).
1488+ ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16(
1489+ ; CHECK-NEXT: %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1490+ ; CHECK-NEXT: %elt1 = extractelement <4 x half> %data, i32 2
1491+ ; CHECK-NEXT: ret half %elt1
1492+ define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16 (<4 x i32 > inreg %rsrc , i32 %arg0 , i32 inreg %arg1 ) #0 {
1493+ %data = call <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 > %rsrc , i32 %arg0 , i32 %arg1 , i32 78 , i32 0 )
1494+ %elt1 = extractelement <4 x half > %data , i32 2
1495+ ret half %elt1
1496+ }
1497+
1498+ ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16(
1499+ ; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1500+ ; CHECK-NEXT: %elt1 = extractelement <2 x half> %data, i32 1
1501+ ; CHECK-NEXT: ret half %elt1
1502+ define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16 (<4 x i32 > inreg %rsrc , i32 %arg0 , i32 inreg %arg1 ) #0 {
1503+ %data = call <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 > %rsrc , i32 %arg0 , i32 %arg1 , i32 78 , i32 0 )
1504+ %elt1 = extractelement <4 x half > %data , i32 1
1505+ ret half %elt1
1506+ }
1507+
1508+ ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16(
1509+ ; CHECK-NEXT: %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
1510+ ; CHECK-NEXT: ret half %data
1511+ define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16 (<4 x i32 > inreg %rsrc , i32 %arg0 , i32 inreg %arg1 ) #0 {
1512+ %data = call <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 > %rsrc , i32 %arg0 , i32 %arg1 , i32 78 , i32 0 )
1513+ %elt1 = extractelement <4 x half > %data , i32 0
1514+ ret half %elt1
1515+ }
1516+
1517+ declare half @llvm.amdgcn.raw.tbuffer.load.f16 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
1518+ declare <2 x half > @llvm.amdgcn.raw.tbuffer.load.v2f16 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
1519+ declare <3 x half > @llvm.amdgcn.raw.tbuffer.load.v3f16 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
1520+ declare <4 x half > @llvm.amdgcn.raw.tbuffer.load.v4f16 (<4 x i32 >, i32 , i32 , i32 , i32 ) #1
1521+
14771522; --------------------------------------------------------------------
14781523; llvm.amdgcn.struct.tbuffer.load
14791524; --------------------------------------------------------------------
0 commit comments