diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index b985b6c79d..a6f2cefcfe 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -3530,14 +3530,14 @@ static bool ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC, V->replaceAllUsesWith(Src); } else { // Replace Constant with a non-Constant. - IRBuilder<> Builder(MC); + IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(MC)); ReplaceConstantWithInst(C, Src, Builder); } } else { // Try convert special pattern for cbuffer which copy array of float4 to // array of float. if (!tryToReplaceCBVec4ArrayToScalarArray(V, TyV, Src, TySrc, MC, DL)) { - IRBuilder<> Builder(MC); + IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(MC)); Src = Builder.CreateBitCast(Src, V->getType()); ReplaceConstantWithInst(C, Src, Builder); } @@ -6582,6 +6582,7 @@ bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca( // Store initializer is exist. if (GV->hasInitializer() && !isa(GV->getInitializer()) && !bIsObjectTy) { // Do not zerio-initialize object allocas + Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(F)); Builder.CreateStore(GV->getInitializer(), GV); } } diff --git a/tools/clang/test/DXC/scalarrepl-param-hlsl-const-to-local-and-back.hlsl b/tools/clang/test/DXC/scalarrepl-param-hlsl-const-to-local-and-back.hlsl new file mode 100644 index 0000000000..1e950c6920 --- /dev/null +++ b/tools/clang/test/DXC/scalarrepl-param-hlsl-const-to-local-and-back.hlsl @@ -0,0 +1,46 @@ +// RUN: %dxc -T ps_6_2 %s | FileCheck %s + +// Validate that copying from static array to local, then back to static +// array does not crash the compiler. This was resulting in an invalid +// ReplaceConstantWithInst from ScalarReplAggregatesHLSL, which would +// result in referenced deleted instruction in a later pass. + +// This test is expected to fail with "error: Loop must have a break." +// XFAIL: * + +static int arr1[10] = (int[10])0; +static int arr2[10] = (int[10])0; +static float result = 0; +ByteAddressBuffer buff : register(t0); + +void foo() { + int i = 0; + if (buff.Load(0u)) { + return; + } + arr2[i] = arr1[i]; + result = float(arr1[0]); +} + +struct tint_symbol { + float4 value : SV_Target0; +}; + +float main_inner() { + foo(); + bool cond = false; + while (true) { + if (cond) { break; } + } + int arr1_copy[10] = arr1; // constant to local + arr1 = arr1_copy; // local to constant + foo(); + return result; +} + +tint_symbol main() { + float inner_result = main_inner(); + tint_symbol wrapper_result = (tint_symbol)0; + wrapper_result.value.x = inner_result; + return wrapper_result; +} diff --git a/tools/clang/test/DXC/scalarrepl-param-hlsl-const-to-local-and-back.ll b/tools/clang/test/DXC/scalarrepl-param-hlsl-const-to-local-and-back.ll new file mode 100644 index 0000000000..6ca08ab3a9 --- /dev/null +++ b/tools/clang/test/DXC/scalarrepl-param-hlsl-const-to-local-and-back.ll @@ -0,0 +1,253 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; The first memcpy, from arr1 to arr1_copy.i, should be replaced by a series of 10 loads and stores, +; while the second memcpy, from arr1_copy.i back to arr1, should be removed: +; %19 = bitcast [10 x i32]* %arr1_copy.i to i8*, !dbg !33 ; line:25 col:23 +; call void @llvm.memcpy.p0i8.p0i8.i64(i8* %19, i8* bitcast ([10 x i32]* @arr1 to i8*), i64 40, i32 1, i1 false) #0, !dbg !33 ; line:25 col:23 +; %20 = bitcast [10 x i32]* %arr1_copy.i to i8*, !dbg !34 ; line:26 col:10 +; call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast ([10 x i32]* @arr1 to i8*), i8* %20, i64 40, i32 1, i1 false) #0, !dbg !34 ; line:26 col:10 +; store i32 0, i32* %i.i.1.i, align 4, !dbg !35, !tbaa !12 ; line:7 col:7 + +; CHECK: [[DEST0:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 0 +; CHECK-NEXT: [[SRC0:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 0) +; CHECK-NEXT: store i32 [[SRC0:%[a-z0-9\.]+]], i32* [[DEST0:%[a-z0-9\.]+]] +; CHECK-NEXT: [[DEST1:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 1 +; CHECK-NEXT: [[SRC1:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 1) +; CHECK-NEXT: store i32 [[SRC1:%[a-z0-9\.]+]], i32* [[DEST1:%[a-z0-9\.]+]] +; CHECK-NEXT: [[DEST2:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 2 +; CHECK-NEXT: [[SRC2:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 2) +; CHECK-NEXT: store i32 [[SRC2:%[a-z0-9\.]+]], i32* [[DEST2:%[a-z0-9\.]+]] +; CHECK-NEXT: [[DEST3:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 3 +; CHECK-NEXT: [[SRC3:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 3) +; CHECK-NEXT: store i32 [[SRC3:%[a-z0-9\.]+]], i32* [[DEST3:%[a-z0-9\.]+]] +; CHECK-NEXT: [[DEST4:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 4 +; CHECK-NEXT: [[SRC4:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 4) +; CHECK-NEXT: store i32 [[SRC4:%[a-z0-9\.]+]], i32* [[DEST4:%[a-z0-9\.]+]] +; CHECK-NEXT: [[DEST5:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 5 +; CHECK-NEXT: [[SRC5:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 5) +; CHECK-NEXT: store i32 [[SRC5:%[a-z0-9\.]+]], i32* [[DEST5:%[a-z0-9\.]+]] +; CHECK-NEXT: [[DEST6:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 6 +; CHECK-NEXT: [[SRC6:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 6) +; CHECK-NEXT: store i32 [[SRC6:%[a-z0-9\.]+]], i32* [[DEST6:%[a-z0-9\.]+]] +; CHECK-NEXT: [[DEST7:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 7 +; CHECK-NEXT: [[SRC7:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 7) +; CHECK-NEXT: store i32 [[SRC7:%[a-z0-9\.]+]], i32* [[DEST7:%[a-z0-9\.]+]] +; CHECK-NEXT: [[DEST8:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 8 +; CHECK-NEXT: [[SRC8:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 8) +; CHECK-NEXT: store i32 [[SRC8:%[a-z0-9\.]+]], i32* [[DEST8:%[a-z0-9\.]+]] +; CHECK-NEXT: [[DEST9:%[a-z0-9\.]+]] = getelementptr inbounds [10 x i32], [10 x i32]* %arr1_copy.i, i32 0, i32 9 +; CHECK-NEXT: [[SRC9:%[a-z0-9\.]+]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @arr1, i32 0, i32 9) +; CHECK-NEXT: store i32 [[SRC9:%[a-z0-9\.]+]], i32* [[DEST9:%[a-z0-9\.]+]] + +; +; Buffer Definitions: +; +; cbuffer $Globals +; { +; +; [0 x i8] (type annotation not present) +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; $Globals cbuffer NA NA CB0 cb4294967295 1 +; buff texture byte r/o T0 t0 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.ByteAddressBuffer = type { i32 } +%ConstantBuffer = type opaque +%struct.tint_symbol = type { <4 x float> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?buff@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 +@arr1 = internal global [10 x i32] zeroinitializer, align 4 +@arr2 = internal global [10 x i32] zeroinitializer, align 4 +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @main(%struct.tint_symbol* noalias sret %agg.result) #0 { + %1 = alloca float + store float 0.000000e+00, float* %1 + %i.i.1.i = alloca i32, align 4 + %i.i.i = alloca i32, align 4 + %cond.i = alloca i32, align 4 + %arr1_copy.i = alloca [10 x i32], align 4 + %inner_result = alloca float, align 4 + %wrapper_result = alloca %struct.tint_symbol, align 4 + store i32 0, i32* %i.i.i, align 4, !dbg !23, !tbaa !31 ; line:7 col:7 + %2 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?buff@@3UByteAddressBuffer@@A", !dbg !35 ; line:8 col:7 + %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %2) #0, !dbg !35 ; line:8 col:7 + %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef) #0, !dbg !35 ; line:8 col:7 + %5 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %4, i32 0) #0, !dbg !35 ; line:8 col:7 + %6 = icmp ne i32 %5, 0, !dbg !35 ; line:8 col:7 + br i1 %6, label %"\01?foo@@YAXXZ.exit.i", label %7, !dbg !35 ; line:8 col:7 + +;