|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| 2 | +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s |
| 3 | + |
| 4 | +; Check that a call featuring a scalable-vector byval argument fed by a memcpy |
| 5 | +; doesn't crash the compiler. It previously assumed the byval type's size could |
| 6 | +; be represented as a known constant amount. |
| 7 | +define void @byval_caller(i8 *%P) { |
| 8 | +; CHECK-LABEL: @byval_caller( |
| 9 | +; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 |
| 10 | +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A]], i8* align 4 [[P:%.*]], i64 8, i1 false) |
| 11 | +; CHECK-NEXT: [[VA:%.*]] = bitcast i8* [[A]] to <vscale x 1 x i8>* |
| 12 | +; CHECK-NEXT: call void @byval_callee(<vscale x 1 x i8>* byval(<vscale x 1 x i8>) align 1 [[VA]]) |
| 13 | +; CHECK-NEXT: ret void |
| 14 | +; |
| 15 | + %a = alloca i8 |
| 16 | + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 %P, i64 8, i1 false) |
| 17 | + %va = bitcast i8* %a to <vscale x 1 x i8>* |
| 18 | + call void @byval_callee(<vscale x 1 x i8>* align 1 byval(<vscale x 1 x i8>) %va) |
| 19 | + ret void |
| 20 | +} |
| 21 | + |
| 22 | +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4, i8* align 4, i64, i1) |
| 23 | +declare void @byval_callee(<vscale x 1 x i8>* align 1 byval(<vscale x 1 x i8>)) |
| 24 | + |
| 25 | +; Check that two scalable-vector stores (overlapping, with a constant offset) |
| 26 | +; do not crash the compiler when checked whether or not they can be merged into |
| 27 | +; a single memset. There was previously an assumption that the stored values' |
| 28 | +; sizes could be represented by a known constant amount. |
| 29 | +define void @merge_stores_both_scalable(<vscale x 1 x i8>* %ptr) { |
| 30 | +; CHECK-LABEL: @merge_stores_both_scalable( |
| 31 | +; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* [[PTR:%.*]], align 1 |
| 32 | +; CHECK-NEXT: [[PTRI8:%.*]] = bitcast <vscale x 1 x i8>* [[PTR]] to i8* |
| 33 | +; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, i8* [[PTRI8]], i64 1 |
| 34 | +; CHECK-NEXT: [[PTR_NEXT_2:%.*]] = bitcast i8* [[PTR_NEXT]] to <vscale x 1 x i8>* |
| 35 | +; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* [[PTR_NEXT_2]], align 1 |
| 36 | +; CHECK-NEXT: ret void |
| 37 | +; |
| 38 | + store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* %ptr |
| 39 | + %ptri8 = bitcast <vscale x 1 x i8>* %ptr to i8* |
| 40 | + %ptr.next = getelementptr i8, i8* %ptri8, i64 1 |
| 41 | + %ptr.next.2 = bitcast i8* %ptr.next to <vscale x 1 x i8>* |
| 42 | + store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* %ptr.next.2 |
| 43 | + ret void |
| 44 | +} |
| 45 | + |
| 46 | +; As above, but where the base is scalable but the subsequent store(s) are not. |
| 47 | +define void @merge_stores_first_scalable(<vscale x 1 x i8>* %ptr) { |
| 48 | +; CHECK-LABEL: @merge_stores_first_scalable( |
| 49 | +; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* [[PTR:%.*]], align 1 |
| 50 | +; CHECK-NEXT: [[PTRI8:%.*]] = bitcast <vscale x 1 x i8>* [[PTR]] to i8* |
| 51 | +; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, i8* [[PTRI8]], i64 1 |
| 52 | +; CHECK-NEXT: store i8 0, i8* [[PTR_NEXT]], align 1 |
| 53 | +; CHECK-NEXT: ret void |
| 54 | +; |
| 55 | + store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* %ptr |
| 56 | + %ptri8 = bitcast <vscale x 1 x i8>* %ptr to i8* |
| 57 | + %ptr.next = getelementptr i8, i8* %ptri8, i64 1 |
| 58 | + store i8 zeroinitializer, i8* %ptr.next |
| 59 | + ret void |
| 60 | +} |
| 61 | + |
| 62 | +; As above, but where the base is not scalable but the subsequent store(s) are. |
| 63 | +define void @merge_stores_second_scalable(i8* %ptr) { |
| 64 | +; CHECK-LABEL: @merge_stores_second_scalable( |
| 65 | +; CHECK-NEXT: store i8 0, i8* [[PTR:%.*]], align 1 |
| 66 | +; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, i8* [[PTR]], i64 1 |
| 67 | +; CHECK-NEXT: [[PTR_NEXT_2:%.*]] = bitcast i8* [[PTR_NEXT]] to <vscale x 1 x i8>* |
| 68 | +; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* [[PTR_NEXT_2]], align 1 |
| 69 | +; CHECK-NEXT: ret void |
| 70 | +; |
| 71 | + store i8 zeroinitializer, i8* %ptr |
| 72 | + %ptr.next = getelementptr i8, i8* %ptr, i64 1 |
| 73 | + %ptr.next.2 = bitcast i8* %ptr.next to <vscale x 1 x i8>* |
| 74 | + store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* %ptr.next.2 |
| 75 | + ret void |
| 76 | +} |
| 77 | + |
| 78 | +; Check that the call-slot optimization doesn't crash when encountering scalable types. |
| 79 | +define void @callslotoptzn(<vscale x 4 x float> %val, <vscale x 4 x float>* %out) { |
| 80 | +; CHECK-LABEL: @callslotoptzn( |
| 81 | +; CHECK-NEXT: [[ALLOC:%.*]] = alloca <vscale x 4 x float>, align 16 |
| 82 | +; CHECK-NEXT: [[IDX:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() |
| 83 | +; CHECK-NEXT: [[BALLOC:%.*]] = getelementptr inbounds <vscale x 4 x float>, <vscale x 4 x float>* [[ALLOC]], i64 0, i64 0 |
| 84 | +; CHECK-NEXT: [[STRIDE:%.*]] = getelementptr inbounds float, float* [[BALLOC]], <vscale x 4 x i32> [[IDX]] |
| 85 | +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> [[VAL:%.*]], <vscale x 4 x float*> [[STRIDE]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) |
| 86 | +; CHECK-NEXT: [[LI:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[ALLOC]], align 4 |
| 87 | +; CHECK-NEXT: store <vscale x 4 x float> [[LI]], <vscale x 4 x float>* [[OUT:%.*]], align 4 |
| 88 | +; CHECK-NEXT: ret void |
| 89 | +; |
| 90 | + %alloc = alloca <vscale x 4 x float>, align 16 |
| 91 | + %idx = tail call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() |
| 92 | + %balloc = getelementptr inbounds <vscale x 4 x float>, <vscale x 4 x float>* %alloc, i64 0, i64 0 |
| 93 | + %stride = getelementptr inbounds float, float* %balloc, <vscale x 4 x i32> %idx |
| 94 | + call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %stride, i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)) |
| 95 | + %li = load <vscale x 4 x float>, <vscale x 4 x float>* %alloc, align 4 |
| 96 | + store <vscale x 4 x float> %li, <vscale x 4 x float>* %out, align 4 |
| 97 | + ret void |
| 98 | +} |
| 99 | + |
| 100 | +declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() |
| 101 | +declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> , <vscale x 4 x float*> , i32, <vscale x 4 x i1>) |
0 commit comments