From e2046b5000e1a6e104121a8022d33f4f181f5d03 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 27 Nov 2024 18:44:38 +0000 Subject: [PATCH 1/7] update llc tc affected --- .../CostModel/PowerPC/load-to-trunc.ll | 4 +- llvm/test/CodeGen/PowerPC/ctrloop-sh.ll | 58 ++++++++++--------- llvm/test/CodeGen/PowerPC/pr59074.ll | 29 +++++----- ...lar-shift-by-byte-multiple-legalization.ll | 48 +++++++-------- .../PowerPC/wide-scalar-shift-legalization.ll | 33 ++++++----- .../AtomicExpand/PowerPC/cmpxchg.ll | 6 +- 6 files changed, 92 insertions(+), 86 deletions(-) diff --git a/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll b/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll index 57a6e98cfb4ee6..b78d121ff4f389 100644 --- a/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll +++ b/llvm/test/Analysis/CostModel/PowerPC/load-to-trunc.ll @@ -7,7 +7,7 @@ ; Check that cost is 1 for unusual load to register sized load. define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) { ; CHECK-LABEL: 'loadUnusualIntegerWithTrunc' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc ; @@ -18,7 +18,7 @@ define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) { define i128 @loadUnusualInteger(ptr %ptr) { ; CHECK-LABEL: 'loadUnusualInteger' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out ; %out = load i128, ptr %ptr diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll index 72de456cba395b..19f86f9d1af6ff 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll @@ -16,39 +16,41 @@ define void @foo1(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-NEXT: addi 7, 1, 16 ; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lwz 8, 0(4) ; CHECK-NEXT: lwz 9, 4(4) +; CHECK-NEXT: lwz 8, 0(4) ; CHECK-NEXT: lwz 10, 8(4) ; CHECK-NEXT: lwz 11, 12(4) ; CHECK-NEXT: lwz 12, 12(5) +; CHECK-NEXT: stw 9, 20(1) +; CHECK-NEXT: mr 9, 7 ; CHECK-NEXT: stw 6, 44(1) +; CHECK-NEXT: rlwimi 9, 12, 29, 28, 29 ; CHECK-NEXT: stw 6, 40(1) ; CHECK-NEXT: stw 6, 36(1) ; CHECK-NEXT: stw 6, 32(1) ; CHECK-NEXT: stw 11, 28(1) ; CHECK-NEXT: stw 10, 24(1) ; CHECK-NEXT: clrlwi 10, 12, 27 -; CHECK-NEXT: stw 9, 20(1) ; CHECK-NEXT: stw 8, 16(1) -; CHECK-NEXT: rlwinm 8, 12, 29, 28, 29 -; CHECK-NEXT: lwzux 9, 8, 7 -; CHECK-NEXT: subfic 12, 10, 32 -; CHECK-NEXT: lwz 11, 8(8) -; CHECK-NEXT: slw 9, 9, 10 -; CHECK-NEXT: lwz 0, 4(8) -; CHECK-NEXT: lwz 8, 12(8) -; CHECK-NEXT: srw 30, 11, 12 -; CHECK-NEXT: slw 29, 0, 10 -; CHECK-NEXT: srw 0, 0, 12 -; CHECK-NEXT: srw 12, 8, 12 -; CHECK-NEXT: slw 11, 11, 10 +; CHECK-NEXT: rlwinm 12, 12, 29, 28, 29 +; CHECK-NEXT: lwz 8, 8(9) +; CHECK-NEXT: subfic 0, 10, 32 +; CHECK-NEXT: lwz 11, 4(9) +; CHECK-NEXT: lwz 9, 12(9) +; CHECK-NEXT: srw 30, 8, 0 +; CHECK-NEXT: lwzx 12, 7, 12 +; CHECK-NEXT: slw 29, 11, 10 +; CHECK-NEXT: srw 11, 11, 0 +; CHECK-NEXT: srw 0, 9, 0 ; CHECK-NEXT: slw 8, 8, 10 -; CHECK-NEXT: stw 8, 12(3) -; CHECK-NEXT: or 8, 11, 12 +; CHECK-NEXT: slw 12, 12, 10 +; CHECK-NEXT: or 8, 8, 0 ; CHECK-NEXT: stw 8, 8(3) -; CHECK-NEXT: or 8, 9, 0 +; CHECK-NEXT: or 8, 12, 11 +; CHECK-NEXT: slw 9, 9, 10 ; CHECK-NEXT: stw 8, 0(3) ; CHECK-NEXT: or 8, 29, 30 +; CHECK-NEXT: stw 9, 12(3) ; CHECK-NEXT: stw 8, 4(3) ; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.2: # %for.end @@ -77,11 +79,11 @@ for.end: ; preds = %for.body define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-LABEL: foo2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; CHECK-NEXT: stwu 1, -64(1) +; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill ; CHECK-NEXT: li 6, 2048 ; CHECK-NEXT: mtctr 6 -; CHECK-NEXT: addi 6, 1, 24 +; CHECK-NEXT: addi 6, 1, 32 ; CHECK-NEXT: .LBB1_1: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: lwz 7, 0(4) @@ -89,18 +91,18 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-NEXT: lwz 11, 12(5) ; CHECK-NEXT: lwz 9, 8(4) ; CHECK-NEXT: lwz 10, 12(4) -; CHECK-NEXT: stw 8, 28(1) +; CHECK-NEXT: stw 8, 36(1) ; CHECK-NEXT: rlwinm 8, 11, 29, 28, 29 -; CHECK-NEXT: stw 7, 24(1) +; CHECK-NEXT: stw 7, 32(1) ; CHECK-NEXT: srawi 7, 7, 31 -; CHECK-NEXT: stw 10, 36(1) +; CHECK-NEXT: stw 10, 44(1) ; CHECK-NEXT: clrlwi 10, 11, 27 -; CHECK-NEXT: stw 9, 32(1) +; CHECK-NEXT: stw 9, 40(1) ; CHECK-NEXT: subfic 12, 10, 32 +; CHECK-NEXT: stw 7, 28(1) +; CHECK-NEXT: stw 7, 24(1) ; CHECK-NEXT: stw 7, 20(1) ; CHECK-NEXT: stw 7, 16(1) -; CHECK-NEXT: stw 7, 12(1) -; CHECK-NEXT: stw 7, 8(1) ; CHECK-NEXT: sub 7, 6, 8 ; CHECK-NEXT: lwz 8, 4(7) ; CHECK-NEXT: lwz 9, 0(7) @@ -122,8 +124,8 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-NEXT: stw 7, 4(3) ; CHECK-NEXT: bdnz .LBB1_1 ; CHECK-NEXT: # %bb.2: # %for.end -; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 64 ; CHECK-NEXT: blr entry: br label %for.body diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll index 6264b9f22876cc..ba21fed9f5abd4 100644 --- a/llvm/test/CodeGen/PowerPC/pr59074.ll +++ b/llvm/test/CodeGen/PowerPC/pr59074.ll @@ -38,26 +38,27 @@ define void @pr59074(ptr %0) { ; LE32-NEXT: stw 7, 40(1) ; LE32-NEXT: stw 7, 36(1) ; LE32-NEXT: stw 8, 16(1) -; LE32-NEXT: rlwinm 9, 4, 29, 28, 29 +; LE32-NEXT: rlwinm 7, 4, 29, 28, 29 ; LE32-NEXT: stxvd2x 0, 0, 5 +; LE32-NEXT: lwzx 5, 6, 7 +; LE32-NEXT: rlwimi 6, 4, 29, 28, 29 ; LE32-NEXT: clrlwi 4, 4, 27 -; LE32-NEXT: lwzux 5, 9, 6 -; LE32-NEXT: lwz 6, 8(9) -; LE32-NEXT: lwz 7, 4(9) -; LE32-NEXT: lwz 8, 12(9) +; LE32-NEXT: lwz 7, 8(6) +; LE32-NEXT: lwz 8, 4(6) +; LE32-NEXT: lwz 6, 12(6) ; LE32-NEXT: xori 9, 4, 31 ; LE32-NEXT: subfic 11, 4, 32 ; LE32-NEXT: srw 5, 5, 4 -; LE32-NEXT: slwi 10, 6, 1 -; LE32-NEXT: srw 6, 6, 4 +; LE32-NEXT: slwi 10, 7, 1 +; LE32-NEXT: srw 12, 8, 4 +; LE32-NEXT: slw 8, 8, 11 +; LE32-NEXT: srw 7, 7, 4 +; LE32-NEXT: slw 11, 6, 11 +; LE32-NEXT: srw 4, 6, 4 ; LE32-NEXT: slw 9, 10, 9 -; LE32-NEXT: srw 10, 7, 4 -; LE32-NEXT: slw 7, 7, 11 -; LE32-NEXT: slw 11, 8, 11 -; LE32-NEXT: srw 4, 8, 4 -; LE32-NEXT: or 5, 7, 5 -; LE32-NEXT: or 6, 11, 6 -; LE32-NEXT: or 7, 10, 9 +; LE32-NEXT: or 5, 8, 5 +; LE32-NEXT: or 6, 11, 7 +; LE32-NEXT: or 7, 12, 9 ; LE32-NEXT: stw 4, 12(3) ; LE32-NEXT: stw 6, 8(3) ; LE32-NEXT: stw 5, 0(3) diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll index 12976e838f3ca6..6bd5cd23dcec8c 100644 --- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -405,25 +405,26 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; LE-32BIT-NEXT: stw 9, 24(1) ; LE-32BIT-NEXT: rlwinm 4, 4, 3, 27, 28 ; LE-32BIT-NEXT: stw 8, 20(1) -; LE-32BIT-NEXT: subfic 8, 4, 32 +; LE-32BIT-NEXT: subfic 9, 4, 32 ; LE-32BIT-NEXT: stw 7, 16(1) -; LE-32BIT-NEXT: lwzux 3, 6, 3 -; LE-32BIT-NEXT: lwz 9, 4(6) -; LE-32BIT-NEXT: slw 3, 3, 4 -; LE-32BIT-NEXT: lwz 7, 8(6) -; LE-32BIT-NEXT: lwz 6, 12(6) -; LE-32BIT-NEXT: slw 11, 9, 4 -; LE-32BIT-NEXT: srw 9, 9, 8 -; LE-32BIT-NEXT: srw 10, 7, 8 -; LE-32BIT-NEXT: srw 8, 6, 8 +; LE-32BIT-NEXT: lwzx 7, 3, 6 +; LE-32BIT-NEXT: rlwimi 3, 6, 0, 28, 29 +; LE-32BIT-NEXT: lwz 6, 8(3) +; LE-32BIT-NEXT: lwz 8, 4(3) ; LE-32BIT-NEXT: slw 7, 7, 4 -; LE-32BIT-NEXT: slw 4, 6, 4 -; LE-32BIT-NEXT: or 3, 3, 9 -; LE-32BIT-NEXT: stw 4, 12(5) -; LE-32BIT-NEXT: or 4, 7, 8 +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: srw 10, 6, 9 +; LE-32BIT-NEXT: slw 11, 8, 4 +; LE-32BIT-NEXT: srw 8, 8, 9 +; LE-32BIT-NEXT: srw 9, 3, 9 +; LE-32BIT-NEXT: slw 6, 6, 4 +; LE-32BIT-NEXT: slw 3, 3, 4 +; LE-32BIT-NEXT: stw 3, 12(5) +; LE-32BIT-NEXT: or 3, 6, 9 +; LE-32BIT-NEXT: stw 3, 8(5) +; LE-32BIT-NEXT: or 3, 7, 8 ; LE-32BIT-NEXT: stw 3, 0(5) ; LE-32BIT-NEXT: or 3, 11, 10 -; LE-32BIT-NEXT: stw 4, 8(5) ; LE-32BIT-NEXT: stw 3, 4(5) ; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr @@ -483,22 +484,23 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; LE-32BIT-NEXT: lwz 4, 12(4) ; LE-32BIT-NEXT: stw 6, 44(1) ; LE-32BIT-NEXT: stw 6, 40(1) -; LE-32BIT-NEXT: rlwinm 4, 4, 2, 28, 29 ; LE-32BIT-NEXT: stw 6, 36(1) ; LE-32BIT-NEXT: stw 6, 32(1) +; LE-32BIT-NEXT: rlwinm 6, 4, 2, 28, 29 ; LE-32BIT-NEXT: stw 3, 28(1) ; LE-32BIT-NEXT: addi 3, 1, 16 ; LE-32BIT-NEXT: stw 9, 24(1) ; LE-32BIT-NEXT: stw 8, 20(1) ; LE-32BIT-NEXT: stw 7, 16(1) -; LE-32BIT-NEXT: lwzux 3, 4, 3 -; LE-32BIT-NEXT: lwz 6, 4(4) -; LE-32BIT-NEXT: lwz 7, 12(4) -; LE-32BIT-NEXT: lwz 4, 8(4) -; LE-32BIT-NEXT: stw 3, 0(5) -; LE-32BIT-NEXT: stw 4, 8(5) +; LE-32BIT-NEXT: lwzx 6, 3, 6 +; LE-32BIT-NEXT: rlwimi 3, 4, 2, 28, 29 +; LE-32BIT-NEXT: lwz 4, 4(3) +; LE-32BIT-NEXT: lwz 7, 12(3) +; LE-32BIT-NEXT: lwz 3, 8(3) +; LE-32BIT-NEXT: stw 6, 0(5) +; LE-32BIT-NEXT: stw 3, 8(5) ; LE-32BIT-NEXT: stw 7, 12(5) -; LE-32BIT-NEXT: stw 6, 4(5) +; LE-32BIT-NEXT: stw 4, 4(5) ; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll index e1731ddfd92875..5fe7fca5b6e41d 100644 --- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll @@ -304,27 +304,28 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; LE-32BIT-NEXT: stw 3, 28(1) ; LE-32BIT-NEXT: addi 3, 1, 16 ; LE-32BIT-NEXT: stw 9, 24(1) -; LE-32BIT-NEXT: clrlwi 4, 4, 27 ; LE-32BIT-NEXT: stw 8, 20(1) -; LE-32BIT-NEXT: subfic 8, 4, 32 ; LE-32BIT-NEXT: stw 7, 16(1) -; LE-32BIT-NEXT: lwzux 3, 6, 3 -; LE-32BIT-NEXT: lwz 9, 4(6) -; LE-32BIT-NEXT: slw 3, 3, 4 -; LE-32BIT-NEXT: lwz 7, 8(6) -; LE-32BIT-NEXT: lwz 6, 12(6) -; LE-32BIT-NEXT: slw 11, 9, 4 -; LE-32BIT-NEXT: srw 9, 9, 8 -; LE-32BIT-NEXT: srw 10, 7, 8 -; LE-32BIT-NEXT: srw 8, 6, 8 +; LE-32BIT-NEXT: lwzx 6, 3, 6 +; LE-32BIT-NEXT: rlwimi 3, 4, 29, 28, 29 +; LE-32BIT-NEXT: lwz 7, 8(3) +; LE-32BIT-NEXT: clrlwi 4, 4, 27 +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: subfic 9, 4, 32 +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: srw 10, 7, 9 +; LE-32BIT-NEXT: slw 11, 8, 4 +; LE-32BIT-NEXT: srw 8, 8, 9 +; LE-32BIT-NEXT: srw 9, 3, 9 ; LE-32BIT-NEXT: slw 7, 7, 4 -; LE-32BIT-NEXT: slw 4, 6, 4 -; LE-32BIT-NEXT: or 3, 3, 9 -; LE-32BIT-NEXT: stw 4, 12(5) -; LE-32BIT-NEXT: or 4, 7, 8 +; LE-32BIT-NEXT: slw 3, 3, 4 +; LE-32BIT-NEXT: slw 6, 6, 4 +; LE-32BIT-NEXT: stw 3, 12(5) +; LE-32BIT-NEXT: or 3, 7, 9 +; LE-32BIT-NEXT: stw 3, 8(5) +; LE-32BIT-NEXT: or 3, 6, 8 ; LE-32BIT-NEXT: stw 3, 0(5) ; LE-32BIT-NEXT: or 3, 11, 10 -; LE-32BIT-NEXT: stw 4, 8(5) ; LE-32BIT-NEXT: stw 3, 4(5) ; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll index b94023b97a2950..cc51a00db415ca 100644 --- a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll +++ b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll @@ -30,11 +30,11 @@ define i1 @test_cmpxchg_seq_cst(ptr %addr, i128 %desire, i128 %new) { ; ; PWR7-LABEL: @test_cmpxchg_seq_cst( ; PWR7-NEXT: entry: -; PWR7-NEXT: [[TMP0:%.*]] = alloca i128, align 8 +; PWR7-NEXT: [[TMP0:%.*]] = alloca i128, align 16 ; PWR7-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP0]]) -; PWR7-NEXT: store i128 [[DESIRE:%.*]], ptr [[TMP0]], align 8 +; PWR7-NEXT: store i128 [[DESIRE:%.*]], ptr [[TMP0]], align 16 ; PWR7-NEXT: [[TMP1:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[ADDR:%.*]], ptr [[TMP0]], i128 [[NEW:%.*]], i32 5, i32 5) -; PWR7-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8 +; PWR7-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16 ; PWR7-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP0]]) ; PWR7-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP2]], 0 ; PWR7-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 [[TMP1]], 1 From c6897cabe4be9a7ca933e723a4834e99b6936147 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 27 Nov 2024 22:16:29 +0000 Subject: [PATCH 2/7] additional tc updates --- clang/test/CodeGen/target-data.c | 22 +-- llvm/test/CodeGen/PowerPC/all-atomics.ll | 130 +++++++++--------- .../Bitcode/DataLayoutUpgradeTest.cpp | 2 +- 3 files changed, 77 insertions(+), 77 deletions(-) diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index cb89fad941c832..3e11f6ae183b6b 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -88,7 +88,7 @@ // RUN: %clang_cc1 -triple powerpc64-lv2 -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PS3 -// PS3: target datalayout = "E-m:e-p:32:32-Fi64-i64:64-n32:64" +// PS3: target datalayout = "E-m:e-p:32:32-Fi64-i64:64-i128:128-n32:64" // RUN: %clang_cc1 -triple i686-nacl -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=I686-NACL @@ -120,43 +120,43 @@ // RUN: %clang_cc1 -triple powerpc-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPC -// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-n32" +// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-i128:128-n32" // RUN: %clang_cc1 -triple powerpcle-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPCLE -// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-n32" +// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-i128:128-n32" // RUN: %clang_cc1 -triple powerpc64-freebsd -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPC64-FREEBSD -// PPC64-FREEBSD: target datalayout = "E-m:e-Fn32-i64:64-n32:64" +// PPC64-FREEBSD: target datalayout = "E-m:e-Fn32-i64:64-i128:128-n32:64" // RUN: %clang_cc1 -triple powerpc64le-freebsd -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPC64LE-FREEBSD -// PPC64LE-FREEBSD: target datalayout = "e-m:e-Fn32-i64:64-n32:64" +// PPC64LE-FREEBSD: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64" // RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPC64-LINUX -// PPC64-LINUX: target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" +// PPC64-LINUX: target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu future %s | \ // RUN: FileCheck %s -check-prefix=PPC64-FUTURE -// PPC64-FUTURE: target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" +// PPC64-FUTURE: target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu pwr10 %s | \ // RUN: FileCheck %s -check-prefix=PPC64-P10 -// PPC64-P10: target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" +// PPC64-P10: target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPC64LE-LINUX -// PPC64LE-LINUX: target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512" +// PPC64LE-LINUX: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu future %s | \ // RUN: FileCheck %s -check-prefix=PPC64LE-FUTURE -// PPC64LE-FUTURE: target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512" +// PPC64LE-FUTURE: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu pwr10 %s | \ // RUN: FileCheck %s -check-prefix=PPC64LE-P10 -// PPC64LE-P10: target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512" +// PPC64LE-P10: target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=NVPTX diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll index 531e559ea7309c..40b96dab94b9ff 100644 --- a/llvm/test/CodeGen/PowerPC/all-atomics.ll +++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll @@ -509,31 +509,31 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-LABEL: test_op_ignore: ; AIX32: # %bb.0: # %entry ; AIX32-NEXT: mflr 0 -; AIX32-NEXT: stwu 1, -160(1) +; AIX32-NEXT: stwu 1, -176(1) ; AIX32-NEXT: lwz 3, L..C0(2) # @sc -; AIX32-NEXT: stw 0, 168(1) +; AIX32-NEXT: stw 0, 184(1) ; AIX32-NEXT: rlwinm 4, 3, 3, 27, 28 -; AIX32-NEXT: stw 15, 92(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 26, 136(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 28, 144(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 15, 108(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 26, 152(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 28, 160(1) # 4-byte Folded Spill ; AIX32-NEXT: li 15, 1 ; AIX32-NEXT: rlwinm 28, 3, 0, 0, 29 ; AIX32-NEXT: li 3, 255 ; AIX32-NEXT: xori 26, 4, 24 -; AIX32-NEXT: stw 16, 96(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 17, 100(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 18, 104(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 19, 108(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 20, 112(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 21, 116(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 22, 120(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 23, 124(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 24, 128(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 25, 132(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 27, 140(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 29, 148(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 30, 152(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 31, 156(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 16, 112(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 17, 116(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 18, 120(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 19, 124(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 20, 128(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 21, 132(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 22, 136(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 23, 140(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 24, 144(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 25, 148(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 27, 156(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 29, 164(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 30, 168(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 31, 172(1) # 4-byte Folded Spill ; AIX32-NEXT: sync ; AIX32-NEXT: slw 29, 15, 26 ; AIX32-NEXT: slw 3, 3, 26 @@ -906,8 +906,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-NEXT: bl .__atomic_fetch_xor_8[PR] ; AIX32-NEXT: nop ; AIX32-NEXT: lwz 31, L..C8(2) # @u128 -; AIX32-NEXT: addi 30, 1, 72 -; AIX32-NEXT: addi 29, 1, 56 +; AIX32-NEXT: addi 30, 1, 80 +; AIX32-NEXT: addi 29, 1, 64 ; AIX32-NEXT: lwz 5, 12(31) ; AIX32-NEXT: lwz 4, 8(31) ; AIX32-NEXT: lwz 6, 4(31) @@ -916,32 +916,32 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-NEXT: L..BB0_49: # %atomicrmw.start2 ; AIX32-NEXT: # ; AIX32-NEXT: xori 3, 5, 1 -; AIX32-NEXT: stw 7, 72(1) -; AIX32-NEXT: stw 7, 56(1) +; AIX32-NEXT: stw 7, 80(1) +; AIX32-NEXT: stw 7, 64(1) ; AIX32-NEXT: li 7, 5 -; AIX32-NEXT: stw 3, 68(1) +; AIX32-NEXT: stw 3, 76(1) ; AIX32-NEXT: li 3, 16 ; AIX32-NEXT: li 8, 5 -; AIX32-NEXT: stw 6, 76(1) -; AIX32-NEXT: stw 4, 80(1) -; AIX32-NEXT: stw 5, 84(1) -; AIX32-NEXT: stw 4, 64(1) -; AIX32-NEXT: stw 6, 60(1) +; AIX32-NEXT: stw 6, 84(1) +; AIX32-NEXT: stw 4, 88(1) +; AIX32-NEXT: stw 5, 92(1) +; AIX32-NEXT: stw 4, 72(1) +; AIX32-NEXT: stw 6, 68(1) ; AIX32-NEXT: mr 4, 31 ; AIX32-NEXT: mr 5, 30 ; AIX32-NEXT: mr 6, 29 ; AIX32-NEXT: bl .__atomic_compare_exchange[PR] ; AIX32-NEXT: nop -; AIX32-NEXT: lwz 5, 84(1) -; AIX32-NEXT: lwz 4, 80(1) -; AIX32-NEXT: lwz 6, 76(1) -; AIX32-NEXT: lwz 7, 72(1) +; AIX32-NEXT: lwz 5, 92(1) +; AIX32-NEXT: lwz 4, 88(1) +; AIX32-NEXT: lwz 6, 84(1) +; AIX32-NEXT: lwz 7, 80(1) ; AIX32-NEXT: cmplwi 3, 0 ; AIX32-NEXT: beq 0, L..BB0_49 ; AIX32-NEXT: # %bb.50: # %atomicrmw.end1 ; AIX32-NEXT: lwz 31, L..C9(2) # @s128 -; AIX32-NEXT: addi 30, 1, 72 -; AIX32-NEXT: addi 29, 1, 56 +; AIX32-NEXT: addi 30, 1, 80 +; AIX32-NEXT: addi 29, 1, 64 ; AIX32-NEXT: lwz 5, 12(31) ; AIX32-NEXT: lwz 4, 8(31) ; AIX32-NEXT: lwz 6, 4(31) @@ -950,26 +950,26 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-NEXT: L..BB0_51: # %atomicrmw.start ; AIX32-NEXT: # ; AIX32-NEXT: xori 3, 5, 1 -; AIX32-NEXT: stw 7, 72(1) -; AIX32-NEXT: stw 7, 56(1) +; AIX32-NEXT: stw 7, 80(1) +; AIX32-NEXT: stw 7, 64(1) ; AIX32-NEXT: li 7, 5 -; AIX32-NEXT: stw 3, 68(1) +; AIX32-NEXT: stw 3, 76(1) ; AIX32-NEXT: li 3, 16 ; AIX32-NEXT: li 8, 5 -; AIX32-NEXT: stw 6, 76(1) -; AIX32-NEXT: stw 4, 80(1) -; AIX32-NEXT: stw 5, 84(1) -; AIX32-NEXT: stw 4, 64(1) -; AIX32-NEXT: stw 6, 60(1) +; AIX32-NEXT: stw 6, 84(1) +; AIX32-NEXT: stw 4, 88(1) +; AIX32-NEXT: stw 5, 92(1) +; AIX32-NEXT: stw 4, 72(1) +; AIX32-NEXT: stw 6, 68(1) ; AIX32-NEXT: mr 4, 31 ; AIX32-NEXT: mr 5, 30 ; AIX32-NEXT: mr 6, 29 ; AIX32-NEXT: bl .__atomic_compare_exchange[PR] ; AIX32-NEXT: nop -; AIX32-NEXT: lwz 5, 84(1) -; AIX32-NEXT: lwz 4, 80(1) -; AIX32-NEXT: lwz 6, 76(1) -; AIX32-NEXT: lwz 7, 72(1) +; AIX32-NEXT: lwz 5, 92(1) +; AIX32-NEXT: lwz 4, 88(1) +; AIX32-NEXT: lwz 6, 84(1) +; AIX32-NEXT: lwz 7, 80(1) ; AIX32-NEXT: cmplwi 3, 0 ; AIX32-NEXT: beq 0, L..BB0_51 ; AIX32-NEXT: # %bb.52: # %atomicrmw.end @@ -1156,24 +1156,24 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-NEXT: li 6, 5 ; AIX32-NEXT: bl .__atomic_fetch_and_8[PR] ; AIX32-NEXT: nop -; AIX32-NEXT: lwz 31, 156(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 30, 152(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 29, 148(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 28, 144(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 27, 140(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 26, 136(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 25, 132(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 24, 128(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 23, 124(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 22, 120(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 21, 116(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 20, 112(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 19, 108(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 18, 104(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 17, 100(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 16, 96(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 15, 92(1) # 4-byte Folded Reload -; AIX32-NEXT: addi 1, 1, 160 +; AIX32-NEXT: lwz 31, 172(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 30, 168(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 29, 164(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 28, 160(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 27, 156(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 26, 152(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 25, 148(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 24, 144(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 23, 140(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 22, 136(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 21, 132(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 20, 128(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 19, 124(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 18, 120(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 17, 116(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 16, 112(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 15, 108(1) # 4-byte Folded Reload +; AIX32-NEXT: addi 1, 1, 176 ; AIX32-NEXT: lwz 0, 8(1) ; AIX32-NEXT: mtlr 0 ; AIX32-NEXT: blr diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index 795646b22b945a..67dccb3849f93c 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -118,7 +118,7 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128" "-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" "-f80:128:128-n8:16:32:64-S128"); - EXPECT_EQ(DL2, "e-m:e-i64:64-n32:64"); + EXPECT_EQ(DL2, "e-m:e-i64:64-i128:128-n32:64"); EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:" "64-S128-Fn32"); From 1d65696ccd6839eaf0ee3c3b08665d8f2990accd Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 27 Nov 2024 22:19:49 +0000 Subject: [PATCH 3/7] [PowerPC] Update data layout aligment of i128 to 16 --- clang/lib/Basic/Targets/OSTargets.h | 2 +- clang/lib/Basic/Targets/PPC.h | 12 ++++++------ llvm/lib/IR/AutoUpgrade.cpp | 2 +- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 3 +++ 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 75f53e96ce28f6..c0351f26e9bee6 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -473,7 +473,7 @@ class LLVM_LIBRARY_VISIBILITY PS3PPUTargetInfo : public OSTargetInfo { this->IntMaxType = TargetInfo::SignedLongLong; this->Int64Type = TargetInfo::SignedLongLong; this->SizeType = TargetInfo::UnsignedInt; - this->resetDataLayout("E-m:e-p:32:32-Fi64-i64:64-n32:64"); + this->resetDataLayout("E-m:e-p:32:32-Fi64-i64:64-i128:128-n32:64"); } }; diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 4e8da8406116ea..40c076208ff2a9 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -404,11 +404,11 @@ class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo { PPC32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : PPCTargetInfo(Triple, Opts) { if (Triple.isOSAIX()) - resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-n32"); + resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-i128:128-n32"); else if (Triple.getArch() == llvm::Triple::ppcle) - resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-n32"); + resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-i128:128-n32"); else - resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-n32"); + resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-i128:128-n32"); switch (getTriple().getOS()) { case llvm::Triple::Linux: @@ -463,12 +463,12 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo { if (Triple.isOSAIX()) { // TODO: Set appropriate ABI for AIX platform. - DataLayout = "E-m:a-Fi64-i64:64-n32:64"; + DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64"; LongDoubleWidth = 64; LongDoubleAlign = DoubleAlign = 32; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); } else if ((Triple.getArch() == llvm::Triple::ppc64le)) { - DataLayout = "e-m:e-Fn32-i64:64-n32:64"; + DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64"; ABI = "elfv2"; } else { DataLayout = "E-m:e"; @@ -479,7 +479,7 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo { ABI = "elfv1"; DataLayout += "-Fi64"; } - DataLayout += "-i64:64-n32:64"; + DataLayout += "-i64:64-i128:128-n32:64"; } if (Triple.isOSFreeBSD() || Triple.isOSOpenBSD() || Triple.isMusl()) { diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e73538da282e99..be426de2fa80a8 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5559,7 +5559,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { return Res; } - if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m"))) { + if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC()) { // Mips64 with o32 ABI did not add "-i128:128". // Add "-i128:128" std::string I64 = "-i64:64"; diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 11d7ea68312fbc..73b2020ff47334 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -183,6 +183,9 @@ static std::string getDataLayoutString(const Triple &T) { // documentation are wrong; these are correct (i.e. "what gcc does"). Ret += "-i64:64"; + // Alignment for 128 bit integers. + Ret += "-i128:128"; + // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. if (is64Bit) Ret += "-n32:64"; From 1d5ba80639965d15ecfa78102d36c127b0c964bb Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 27 Nov 2024 22:37:30 +0000 Subject: [PATCH 4/7] add tc to check alignment of int types --- llvm/test/CodeGen/PowerPC/data-align.ll | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/data-align.ll diff --git a/llvm/test/CodeGen/PowerPC/data-align.ll b/llvm/test/CodeGen/PowerPC/data-align.ll new file mode 100644 index 00000000000000..bfedec139369c0 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/data-align.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=powerpc-unknown-linux | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux | FileCheck %s + +; CHECK: .set .Li8, +; CHECK-NEXT: .size .Li8, 1 +@i8 = private constant i8 42 + +; CHECK: .set .Li16, +; CHECK-NEXT: .size .Li16, 2 +@i16 = private constant i16 42 + +; CHECK: .set .Li32, +; CHECK-NEXT: .size .Li32, 4 +@i32 = private constant i32 42 + +; CHECK: .set .Li64, +; CHECK-NEXT: .size .Li64, 8 +@i64 = private constant i64 42 + +; CHECK: .set .Li128, +; CHECK-NEXT: .size .Li128, 16 +@i128 = private constant i128 42 + From 2dae7daa378f94bce0306eedc988e7cb6614e0dd Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 28 Nov 2024 13:48:12 +0000 Subject: [PATCH 5/7] update alignment for 32bit only --- clang/lib/Basic/Targets/PPC.h | 6 +- clang/test/CodeGen/target-data.c | 4 +- llvm/lib/IR/AutoUpgrade.cpp | 2 +- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 5 +- llvm/test/CodeGen/PowerPC/all-atomics.ll | 130 +++++++++--------- llvm/test/CodeGen/PowerPC/ctrloop-sh.ll | 58 ++++---- llvm/test/CodeGen/PowerPC/pr59074.ll | 29 ++-- ...lar-shift-by-byte-multiple-legalization.ll | 48 ++++--- .../PowerPC/wide-scalar-shift-legalization.ll | 33 +++-- 9 files changed, 153 insertions(+), 162 deletions(-) diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 40c076208ff2a9..b58375f6f37935 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -404,11 +404,11 @@ class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo { PPC32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : PPCTargetInfo(Triple, Opts) { if (Triple.isOSAIX()) - resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-i128:128-n32"); + resetDataLayout("E-m:a-p:32:32-Fi32-i64:64-n32"); else if (Triple.getArch() == llvm::Triple::ppcle) - resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-i128:128-n32"); + resetDataLayout("e-m:e-p:32:32-Fn32-i64:64-n32"); else - resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-i128:128-n32"); + resetDataLayout("E-m:e-p:32:32-Fn32-i64:64-n32"); switch (getTriple().getOS()) { case llvm::Triple::Linux: diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 3e11f6ae183b6b..2c758567faa1f6 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -120,11 +120,11 @@ // RUN: %clang_cc1 -triple powerpc-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPC -// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-i128:128-n32" +// PPC: target datalayout = "E-m:e-p:32:32-Fn32-i64:64-n32" // RUN: %clang_cc1 -triple powerpcle-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPCLE -// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-i128:128-n32" +// PPCLE: target datalayout = "e-m:e-p:32:32-Fn32-i64:64-n32" // RUN: %clang_cc1 -triple powerpc64-freebsd -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPC64-FREEBSD diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index be426de2fa80a8..d54176a7878b5a 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5559,7 +5559,7 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { return Res; } - if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC()) { + if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64()) { // Mips64 with o32 ABI did not add "-i128:128". // Add "-i128:128" std::string I64 = "-i64:64"; diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 73b2020ff47334..b1ad041bde790c 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -183,12 +183,9 @@ static std::string getDataLayoutString(const Triple &T) { // documentation are wrong; these are correct (i.e. "what gcc does"). Ret += "-i64:64"; - // Alignment for 128 bit integers. - Ret += "-i128:128"; - // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. if (is64Bit) - Ret += "-n32:64"; + Ret += "-i128:128-n32:64"; else Ret += "-n32"; diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll index 40b96dab94b9ff..531e559ea7309c 100644 --- a/llvm/test/CodeGen/PowerPC/all-atomics.ll +++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll @@ -509,31 +509,31 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-LABEL: test_op_ignore: ; AIX32: # %bb.0: # %entry ; AIX32-NEXT: mflr 0 -; AIX32-NEXT: stwu 1, -176(1) +; AIX32-NEXT: stwu 1, -160(1) ; AIX32-NEXT: lwz 3, L..C0(2) # @sc -; AIX32-NEXT: stw 0, 184(1) +; AIX32-NEXT: stw 0, 168(1) ; AIX32-NEXT: rlwinm 4, 3, 3, 27, 28 -; AIX32-NEXT: stw 15, 108(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 26, 152(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 28, 160(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 15, 92(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 26, 136(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 28, 144(1) # 4-byte Folded Spill ; AIX32-NEXT: li 15, 1 ; AIX32-NEXT: rlwinm 28, 3, 0, 0, 29 ; AIX32-NEXT: li 3, 255 ; AIX32-NEXT: xori 26, 4, 24 -; AIX32-NEXT: stw 16, 112(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 17, 116(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 18, 120(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 19, 124(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 20, 128(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 21, 132(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 22, 136(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 23, 140(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 24, 144(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 25, 148(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 27, 156(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 29, 164(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 30, 168(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 31, 172(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 16, 96(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 17, 100(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 18, 104(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 19, 108(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 20, 112(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 21, 116(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 22, 120(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 23, 124(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 24, 128(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 25, 132(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 27, 140(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 29, 148(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 30, 152(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 31, 156(1) # 4-byte Folded Spill ; AIX32-NEXT: sync ; AIX32-NEXT: slw 29, 15, 26 ; AIX32-NEXT: slw 3, 3, 26 @@ -906,8 +906,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-NEXT: bl .__atomic_fetch_xor_8[PR] ; AIX32-NEXT: nop ; AIX32-NEXT: lwz 31, L..C8(2) # @u128 -; AIX32-NEXT: addi 30, 1, 80 -; AIX32-NEXT: addi 29, 1, 64 +; AIX32-NEXT: addi 30, 1, 72 +; AIX32-NEXT: addi 29, 1, 56 ; AIX32-NEXT: lwz 5, 12(31) ; AIX32-NEXT: lwz 4, 8(31) ; AIX32-NEXT: lwz 6, 4(31) @@ -916,32 +916,32 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-NEXT: L..BB0_49: # %atomicrmw.start2 ; AIX32-NEXT: # ; AIX32-NEXT: xori 3, 5, 1 -; AIX32-NEXT: stw 7, 80(1) -; AIX32-NEXT: stw 7, 64(1) +; AIX32-NEXT: stw 7, 72(1) +; AIX32-NEXT: stw 7, 56(1) ; AIX32-NEXT: li 7, 5 -; AIX32-NEXT: stw 3, 76(1) +; AIX32-NEXT: stw 3, 68(1) ; AIX32-NEXT: li 3, 16 ; AIX32-NEXT: li 8, 5 -; AIX32-NEXT: stw 6, 84(1) -; AIX32-NEXT: stw 4, 88(1) -; AIX32-NEXT: stw 5, 92(1) -; AIX32-NEXT: stw 4, 72(1) -; AIX32-NEXT: stw 6, 68(1) +; AIX32-NEXT: stw 6, 76(1) +; AIX32-NEXT: stw 4, 80(1) +; AIX32-NEXT: stw 5, 84(1) +; AIX32-NEXT: stw 4, 64(1) +; AIX32-NEXT: stw 6, 60(1) ; AIX32-NEXT: mr 4, 31 ; AIX32-NEXT: mr 5, 30 ; AIX32-NEXT: mr 6, 29 ; AIX32-NEXT: bl .__atomic_compare_exchange[PR] ; AIX32-NEXT: nop -; AIX32-NEXT: lwz 5, 92(1) -; AIX32-NEXT: lwz 4, 88(1) -; AIX32-NEXT: lwz 6, 84(1) -; AIX32-NEXT: lwz 7, 80(1) +; AIX32-NEXT: lwz 5, 84(1) +; AIX32-NEXT: lwz 4, 80(1) +; AIX32-NEXT: lwz 6, 76(1) +; AIX32-NEXT: lwz 7, 72(1) ; AIX32-NEXT: cmplwi 3, 0 ; AIX32-NEXT: beq 0, L..BB0_49 ; AIX32-NEXT: # %bb.50: # %atomicrmw.end1 ; AIX32-NEXT: lwz 31, L..C9(2) # @s128 -; AIX32-NEXT: addi 30, 1, 80 -; AIX32-NEXT: addi 29, 1, 64 +; AIX32-NEXT: addi 30, 1, 72 +; AIX32-NEXT: addi 29, 1, 56 ; AIX32-NEXT: lwz 5, 12(31) ; AIX32-NEXT: lwz 4, 8(31) ; AIX32-NEXT: lwz 6, 4(31) @@ -950,26 +950,26 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-NEXT: L..BB0_51: # %atomicrmw.start ; AIX32-NEXT: # ; AIX32-NEXT: xori 3, 5, 1 -; AIX32-NEXT: stw 7, 80(1) -; AIX32-NEXT: stw 7, 64(1) +; AIX32-NEXT: stw 7, 72(1) +; AIX32-NEXT: stw 7, 56(1) ; AIX32-NEXT: li 7, 5 -; AIX32-NEXT: stw 3, 76(1) +; AIX32-NEXT: stw 3, 68(1) ; AIX32-NEXT: li 3, 16 ; AIX32-NEXT: li 8, 5 -; AIX32-NEXT: stw 6, 84(1) -; AIX32-NEXT: stw 4, 88(1) -; AIX32-NEXT: stw 5, 92(1) -; AIX32-NEXT: stw 4, 72(1) -; AIX32-NEXT: stw 6, 68(1) +; AIX32-NEXT: stw 6, 76(1) +; AIX32-NEXT: stw 4, 80(1) +; AIX32-NEXT: stw 5, 84(1) +; AIX32-NEXT: stw 4, 64(1) +; AIX32-NEXT: stw 6, 60(1) ; AIX32-NEXT: mr 4, 31 ; AIX32-NEXT: mr 5, 30 ; AIX32-NEXT: mr 6, 29 ; AIX32-NEXT: bl .__atomic_compare_exchange[PR] ; AIX32-NEXT: nop -; AIX32-NEXT: lwz 5, 92(1) -; AIX32-NEXT: lwz 4, 88(1) -; AIX32-NEXT: lwz 6, 84(1) -; AIX32-NEXT: lwz 7, 80(1) +; AIX32-NEXT: lwz 5, 84(1) +; AIX32-NEXT: lwz 4, 80(1) +; AIX32-NEXT: lwz 6, 76(1) +; AIX32-NEXT: lwz 7, 72(1) ; AIX32-NEXT: cmplwi 3, 0 ; AIX32-NEXT: beq 0, L..BB0_51 ; AIX32-NEXT: # %bb.52: # %atomicrmw.end @@ -1156,24 +1156,24 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 { ; AIX32-NEXT: li 6, 5 ; AIX32-NEXT: bl .__atomic_fetch_and_8[PR] ; AIX32-NEXT: nop -; AIX32-NEXT: lwz 31, 172(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 30, 168(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 29, 164(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 28, 160(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 27, 156(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 26, 152(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 25, 148(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 24, 144(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 23, 140(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 22, 136(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 21, 132(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 20, 128(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 19, 124(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 18, 120(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 17, 116(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 16, 112(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 15, 108(1) # 4-byte Folded Reload -; AIX32-NEXT: addi 1, 1, 176 +; AIX32-NEXT: lwz 31, 156(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 30, 152(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 29, 148(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 28, 144(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 27, 140(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 26, 136(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 25, 132(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 24, 128(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 23, 124(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 22, 120(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 21, 116(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 20, 112(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 19, 108(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 18, 104(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 17, 100(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 16, 96(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 15, 92(1) # 4-byte Folded Reload +; AIX32-NEXT: addi 1, 1, 160 ; AIX32-NEXT: lwz 0, 8(1) ; AIX32-NEXT: mtlr 0 ; AIX32-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll index 19f86f9d1af6ff..72de456cba395b 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll @@ -16,41 +16,39 @@ define void @foo1(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-NEXT: addi 7, 1, 16 ; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lwz 9, 4(4) ; CHECK-NEXT: lwz 8, 0(4) +; CHECK-NEXT: lwz 9, 4(4) ; CHECK-NEXT: lwz 10, 8(4) ; CHECK-NEXT: lwz 11, 12(4) ; CHECK-NEXT: lwz 12, 12(5) -; CHECK-NEXT: stw 9, 20(1) -; CHECK-NEXT: mr 9, 7 ; CHECK-NEXT: stw 6, 44(1) -; CHECK-NEXT: rlwimi 9, 12, 29, 28, 29 ; CHECK-NEXT: stw 6, 40(1) ; CHECK-NEXT: stw 6, 36(1) ; CHECK-NEXT: stw 6, 32(1) ; CHECK-NEXT: stw 11, 28(1) ; CHECK-NEXT: stw 10, 24(1) ; CHECK-NEXT: clrlwi 10, 12, 27 +; CHECK-NEXT: stw 9, 20(1) ; CHECK-NEXT: stw 8, 16(1) -; CHECK-NEXT: rlwinm 12, 12, 29, 28, 29 -; CHECK-NEXT: lwz 8, 8(9) -; CHECK-NEXT: subfic 0, 10, 32 -; CHECK-NEXT: lwz 11, 4(9) -; CHECK-NEXT: lwz 9, 12(9) -; CHECK-NEXT: srw 30, 8, 0 -; CHECK-NEXT: lwzx 12, 7, 12 -; CHECK-NEXT: slw 29, 11, 10 -; CHECK-NEXT: srw 11, 11, 0 -; CHECK-NEXT: srw 0, 9, 0 +; CHECK-NEXT: rlwinm 8, 12, 29, 28, 29 +; CHECK-NEXT: lwzux 9, 8, 7 +; CHECK-NEXT: subfic 12, 10, 32 +; CHECK-NEXT: lwz 11, 8(8) +; CHECK-NEXT: slw 9, 9, 10 +; CHECK-NEXT: lwz 0, 4(8) +; CHECK-NEXT: lwz 8, 12(8) +; CHECK-NEXT: srw 30, 11, 12 +; CHECK-NEXT: slw 29, 0, 10 +; CHECK-NEXT: srw 0, 0, 12 +; CHECK-NEXT: srw 12, 8, 12 +; CHECK-NEXT: slw 11, 11, 10 ; CHECK-NEXT: slw 8, 8, 10 -; CHECK-NEXT: slw 12, 12, 10 -; CHECK-NEXT: or 8, 8, 0 +; CHECK-NEXT: stw 8, 12(3) +; CHECK-NEXT: or 8, 11, 12 ; CHECK-NEXT: stw 8, 8(3) -; CHECK-NEXT: or 8, 12, 11 -; CHECK-NEXT: slw 9, 9, 10 +; CHECK-NEXT: or 8, 9, 0 ; CHECK-NEXT: stw 8, 0(3) ; CHECK-NEXT: or 8, 29, 30 -; CHECK-NEXT: stw 9, 12(3) ; CHECK-NEXT: stw 8, 4(3) ; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.2: # %for.end @@ -79,11 +77,11 @@ for.end: ; preds = %for.body define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-LABEL: foo2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -64(1) -; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; CHECK-NEXT: stwu 1, -48(1) +; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill ; CHECK-NEXT: li 6, 2048 ; CHECK-NEXT: mtctr 6 -; CHECK-NEXT: addi 6, 1, 32 +; CHECK-NEXT: addi 6, 1, 24 ; CHECK-NEXT: .LBB1_1: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: lwz 7, 0(4) @@ -91,18 +89,18 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-NEXT: lwz 11, 12(5) ; CHECK-NEXT: lwz 9, 8(4) ; CHECK-NEXT: lwz 10, 12(4) -; CHECK-NEXT: stw 8, 36(1) +; CHECK-NEXT: stw 8, 28(1) ; CHECK-NEXT: rlwinm 8, 11, 29, 28, 29 -; CHECK-NEXT: stw 7, 32(1) +; CHECK-NEXT: stw 7, 24(1) ; CHECK-NEXT: srawi 7, 7, 31 -; CHECK-NEXT: stw 10, 44(1) +; CHECK-NEXT: stw 10, 36(1) ; CHECK-NEXT: clrlwi 10, 11, 27 -; CHECK-NEXT: stw 9, 40(1) +; CHECK-NEXT: stw 9, 32(1) ; CHECK-NEXT: subfic 12, 10, 32 -; CHECK-NEXT: stw 7, 28(1) -; CHECK-NEXT: stw 7, 24(1) ; CHECK-NEXT: stw 7, 20(1) ; CHECK-NEXT: stw 7, 16(1) +; CHECK-NEXT: stw 7, 12(1) +; CHECK-NEXT: stw 7, 8(1) ; CHECK-NEXT: sub 7, 6, 8 ; CHECK-NEXT: lwz 8, 4(7) ; CHECK-NEXT: lwz 9, 0(7) @@ -124,8 +122,8 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-NEXT: stw 7, 4(3) ; CHECK-NEXT: bdnz .LBB1_1 ; CHECK-NEXT: # %bb.2: # %for.end -; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: blr entry: br label %for.body diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll index ba21fed9f5abd4..6264b9f22876cc 100644 --- a/llvm/test/CodeGen/PowerPC/pr59074.ll +++ b/llvm/test/CodeGen/PowerPC/pr59074.ll @@ -38,27 +38,26 @@ define void @pr59074(ptr %0) { ; LE32-NEXT: stw 7, 40(1) ; LE32-NEXT: stw 7, 36(1) ; LE32-NEXT: stw 8, 16(1) -; LE32-NEXT: rlwinm 7, 4, 29, 28, 29 +; LE32-NEXT: rlwinm 9, 4, 29, 28, 29 ; LE32-NEXT: stxvd2x 0, 0, 5 -; LE32-NEXT: lwzx 5, 6, 7 -; LE32-NEXT: rlwimi 6, 4, 29, 28, 29 ; LE32-NEXT: clrlwi 4, 4, 27 -; LE32-NEXT: lwz 7, 8(6) -; LE32-NEXT: lwz 8, 4(6) -; LE32-NEXT: lwz 6, 12(6) +; LE32-NEXT: lwzux 5, 9, 6 +; LE32-NEXT: lwz 6, 8(9) +; LE32-NEXT: lwz 7, 4(9) +; LE32-NEXT: lwz 8, 12(9) ; LE32-NEXT: xori 9, 4, 31 ; LE32-NEXT: subfic 11, 4, 32 ; LE32-NEXT: srw 5, 5, 4 -; LE32-NEXT: slwi 10, 7, 1 -; LE32-NEXT: srw 12, 8, 4 -; LE32-NEXT: slw 8, 8, 11 -; LE32-NEXT: srw 7, 7, 4 -; LE32-NEXT: slw 11, 6, 11 -; LE32-NEXT: srw 4, 6, 4 +; LE32-NEXT: slwi 10, 6, 1 +; LE32-NEXT: srw 6, 6, 4 ; LE32-NEXT: slw 9, 10, 9 -; LE32-NEXT: or 5, 8, 5 -; LE32-NEXT: or 6, 11, 7 -; LE32-NEXT: or 7, 12, 9 +; LE32-NEXT: srw 10, 7, 4 +; LE32-NEXT: slw 7, 7, 11 +; LE32-NEXT: slw 11, 8, 11 +; LE32-NEXT: srw 4, 8, 4 +; LE32-NEXT: or 5, 7, 5 +; LE32-NEXT: or 6, 11, 6 +; LE32-NEXT: or 7, 10, 9 ; LE32-NEXT: stw 4, 12(3) ; LE32-NEXT: stw 6, 8(3) ; LE32-NEXT: stw 5, 0(3) diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll index 6bd5cd23dcec8c..12976e838f3ca6 100644 --- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -405,26 +405,25 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; LE-32BIT-NEXT: stw 9, 24(1) ; LE-32BIT-NEXT: rlwinm 4, 4, 3, 27, 28 ; LE-32BIT-NEXT: stw 8, 20(1) -; LE-32BIT-NEXT: subfic 9, 4, 32 +; LE-32BIT-NEXT: subfic 8, 4, 32 ; LE-32BIT-NEXT: stw 7, 16(1) -; LE-32BIT-NEXT: lwzx 7, 3, 6 -; LE-32BIT-NEXT: rlwimi 3, 6, 0, 28, 29 -; LE-32BIT-NEXT: lwz 6, 8(3) -; LE-32BIT-NEXT: lwz 8, 4(3) -; LE-32BIT-NEXT: slw 7, 7, 4 -; LE-32BIT-NEXT: lwz 3, 12(3) -; LE-32BIT-NEXT: srw 10, 6, 9 -; LE-32BIT-NEXT: slw 11, 8, 4 -; LE-32BIT-NEXT: srw 8, 8, 9 -; LE-32BIT-NEXT: srw 9, 3, 9 -; LE-32BIT-NEXT: slw 6, 6, 4 +; LE-32BIT-NEXT: lwzux 3, 6, 3 +; LE-32BIT-NEXT: lwz 9, 4(6) ; LE-32BIT-NEXT: slw 3, 3, 4 -; LE-32BIT-NEXT: stw 3, 12(5) -; LE-32BIT-NEXT: or 3, 6, 9 -; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: or 3, 7, 8 +; LE-32BIT-NEXT: lwz 7, 8(6) +; LE-32BIT-NEXT: lwz 6, 12(6) +; LE-32BIT-NEXT: slw 11, 9, 4 +; LE-32BIT-NEXT: srw 9, 9, 8 +; LE-32BIT-NEXT: srw 10, 7, 8 +; LE-32BIT-NEXT: srw 8, 6, 8 +; LE-32BIT-NEXT: slw 7, 7, 4 +; LE-32BIT-NEXT: slw 4, 6, 4 +; LE-32BIT-NEXT: or 3, 3, 9 +; LE-32BIT-NEXT: stw 4, 12(5) +; LE-32BIT-NEXT: or 4, 7, 8 ; LE-32BIT-NEXT: stw 3, 0(5) ; LE-32BIT-NEXT: or 3, 11, 10 +; LE-32BIT-NEXT: stw 4, 8(5) ; LE-32BIT-NEXT: stw 3, 4(5) ; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr @@ -484,23 +483,22 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; LE-32BIT-NEXT: lwz 4, 12(4) ; LE-32BIT-NEXT: stw 6, 44(1) ; LE-32BIT-NEXT: stw 6, 40(1) +; LE-32BIT-NEXT: rlwinm 4, 4, 2, 28, 29 ; LE-32BIT-NEXT: stw 6, 36(1) ; LE-32BIT-NEXT: stw 6, 32(1) -; LE-32BIT-NEXT: rlwinm 6, 4, 2, 28, 29 ; LE-32BIT-NEXT: stw 3, 28(1) ; LE-32BIT-NEXT: addi 3, 1, 16 ; LE-32BIT-NEXT: stw 9, 24(1) ; LE-32BIT-NEXT: stw 8, 20(1) ; LE-32BIT-NEXT: stw 7, 16(1) -; LE-32BIT-NEXT: lwzx 6, 3, 6 -; LE-32BIT-NEXT: rlwimi 3, 4, 2, 28, 29 -; LE-32BIT-NEXT: lwz 4, 4(3) -; LE-32BIT-NEXT: lwz 7, 12(3) -; LE-32BIT-NEXT: lwz 3, 8(3) -; LE-32BIT-NEXT: stw 6, 0(5) -; LE-32BIT-NEXT: stw 3, 8(5) +; LE-32BIT-NEXT: lwzux 3, 4, 3 +; LE-32BIT-NEXT: lwz 6, 4(4) +; LE-32BIT-NEXT: lwz 7, 12(4) +; LE-32BIT-NEXT: lwz 4, 8(4) +; LE-32BIT-NEXT: stw 3, 0(5) +; LE-32BIT-NEXT: stw 4, 8(5) ; LE-32BIT-NEXT: stw 7, 12(5) -; LE-32BIT-NEXT: stw 4, 4(5) +; LE-32BIT-NEXT: stw 6, 4(5) ; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll index 5fe7fca5b6e41d..e1731ddfd92875 100644 --- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll @@ -304,28 +304,27 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; LE-32BIT-NEXT: stw 3, 28(1) ; LE-32BIT-NEXT: addi 3, 1, 16 ; LE-32BIT-NEXT: stw 9, 24(1) +; LE-32BIT-NEXT: clrlwi 4, 4, 27 ; LE-32BIT-NEXT: stw 8, 20(1) +; LE-32BIT-NEXT: subfic 8, 4, 32 ; LE-32BIT-NEXT: stw 7, 16(1) -; LE-32BIT-NEXT: lwzx 6, 3, 6 -; LE-32BIT-NEXT: rlwimi 3, 4, 29, 28, 29 -; LE-32BIT-NEXT: lwz 7, 8(3) -; LE-32BIT-NEXT: clrlwi 4, 4, 27 -; LE-32BIT-NEXT: lwz 8, 4(3) -; LE-32BIT-NEXT: subfic 9, 4, 32 -; LE-32BIT-NEXT: lwz 3, 12(3) -; LE-32BIT-NEXT: srw 10, 7, 9 -; LE-32BIT-NEXT: slw 11, 8, 4 -; LE-32BIT-NEXT: srw 8, 8, 9 -; LE-32BIT-NEXT: srw 9, 3, 9 -; LE-32BIT-NEXT: slw 7, 7, 4 +; LE-32BIT-NEXT: lwzux 3, 6, 3 +; LE-32BIT-NEXT: lwz 9, 4(6) ; LE-32BIT-NEXT: slw 3, 3, 4 -; LE-32BIT-NEXT: slw 6, 6, 4 -; LE-32BIT-NEXT: stw 3, 12(5) -; LE-32BIT-NEXT: or 3, 7, 9 -; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: or 3, 6, 8 +; LE-32BIT-NEXT: lwz 7, 8(6) +; LE-32BIT-NEXT: lwz 6, 12(6) +; LE-32BIT-NEXT: slw 11, 9, 4 +; LE-32BIT-NEXT: srw 9, 9, 8 +; LE-32BIT-NEXT: srw 10, 7, 8 +; LE-32BIT-NEXT: srw 8, 6, 8 +; LE-32BIT-NEXT: slw 7, 7, 4 +; LE-32BIT-NEXT: slw 4, 6, 4 +; LE-32BIT-NEXT: or 3, 3, 9 +; LE-32BIT-NEXT: stw 4, 12(5) +; LE-32BIT-NEXT: or 4, 7, 8 ; LE-32BIT-NEXT: stw 3, 0(5) ; LE-32BIT-NEXT: or 3, 11, 10 +; LE-32BIT-NEXT: stw 4, 8(5) ; LE-32BIT-NEXT: stw 3, 4(5) ; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr From c05f7ddf74ca64bc3c0e702a22b3501bda7f5940 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 28 Nov 2024 15:16:33 +0000 Subject: [PATCH 6/7] add additional tests for DataLayoutUpgrade and move existing to proper location --- .../Bitcode/DataLayoutUpgradeTest.cpp | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index 67dccb3849f93c..7a74a5882286f9 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -92,6 +92,16 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64", "mips64el"), "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"); + // Check that PowerPC64 targets add -i128:128. + EXPECT_EQ(UpgradeDataLayoutString("e-m:e-i64:64-n32:64", "powerpc64le-linux"), + "e-m:e-i64:64-i128:128-n32:64"); + EXPECT_EQ(UpgradeDataLayoutString( + "E-m:e-Fn32-i64:64-n32:64", "powerpc64-linux"), + "E-m:e-Fn32-i64:64-i128:128-n32:64"); + EXPECT_EQ(UpgradeDataLayoutString( + "E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"), + "E-m:a-Fi64-i64:64-i128:128-n32:64"); + // Check that SPIR && SPIRV targets add -G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir"), "e-p:32:32-G1"); EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir64"), "e-p:32:32-G1"); @@ -108,8 +118,6 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { "-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" "-n8:16:32:64-S128", "x86_64-unknown-linux-gnu"); - std::string DL2 = UpgradeDataLayoutString("e-m:e-i64:64-n32:64", - "powerpc64le-unknown-linux-gnu"); std::string DL3 = UpgradeDataLayoutString( "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32", "aarch64--"); @@ -118,7 +126,6 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128" "-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" "-f80:128:128-n8:16:32:64-S128"); - EXPECT_EQ(DL2, "e-m:e-i64:64-i128:128-n32:64"); EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:" "64-S128-Fn32"); @@ -153,6 +160,14 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { EXPECT_EQ(UpgradeDataLayoutString("G2", "spir64"), "G2"); EXPECT_EQ(UpgradeDataLayoutString("G2", "spirv32"), "G2"); EXPECT_EQ(UpgradeDataLayoutString("G2", "spirv64"), "G2"); + + // Check that PowerPC32 targets don't add -i128:128. + EXPECT_EQ(UpgradeDataLayoutString("e-m:e-i64:64-n32", "powerpcle-linux"), + "e-m:e-i64:64-n32"); + EXPECT_EQ(UpgradeDataLayoutString("E-m:e-Fn32-i64:64-n32", "powerpc-linux"), + "E-m:e-Fn32-i64:64-n32"); + EXPECT_EQ(UpgradeDataLayoutString("E-m:a-Fi64-i64:64-n32", "powerpc-aix"), + "E-m:a-Fi64-i64:64-n32"); } TEST(DataLayoutUpgradeTest, EmptyDataLayout) { From e256b64faa232a93420ce8657cd9c2a4870b0f5c Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 28 Nov 2024 11:07:21 -0500 Subject: [PATCH 7/7] clang-format code --- llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index 7a74a5882286f9..00bb963d39cedb 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -95,12 +95,12 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { // Check that PowerPC64 targets add -i128:128. EXPECT_EQ(UpgradeDataLayoutString("e-m:e-i64:64-n32:64", "powerpc64le-linux"), "e-m:e-i64:64-i128:128-n32:64"); - EXPECT_EQ(UpgradeDataLayoutString( - "E-m:e-Fn32-i64:64-n32:64", "powerpc64-linux"), - "E-m:e-Fn32-i64:64-i128:128-n32:64"); - EXPECT_EQ(UpgradeDataLayoutString( - "E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"), - "E-m:a-Fi64-i64:64-i128:128-n32:64"); + EXPECT_EQ( + UpgradeDataLayoutString("E-m:e-Fn32-i64:64-n32:64", "powerpc64-linux"), + "E-m:e-Fn32-i64:64-i128:128-n32:64"); + EXPECT_EQ( + UpgradeDataLayoutString("E-m:a-Fi64-i64:64-n32:64", "powerpc64-ibm-aix"), + "E-m:a-Fi64-i64:64-i128:128-n32:64"); // Check that SPIR && SPIRV targets add -G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "spir"), "e-p:32:32-G1");