Skip to content

Commit a7cd0c6

Browse files
topperctstellar
authored andcommitted
[RISCV] Add a unaligned-scalar-mem feature like we had in clang 17.
This is ORed with the fast-unaligned-access feature which applies to scalar and vector together.:
1 parent 2a4a0bf commit a7cd0c6

8 files changed

+31
-5
lines changed

llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,9 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
317317
.addReg(MBBI->getOperand(1).getReg())
318318
.add(MBBI->getOperand(2));
319319
if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
320-
// FIXME: Zdinx RV32 can not work on unaligned memory.
321-
assert(!STI->hasFastUnalignedAccess());
320+
// FIXME: Zdinx RV32 can not work on unaligned scalar memory.
321+
assert(!STI->hasFastUnalignedAccess() &&
322+
!STI->enableUnalignedScalarMem());
322323

323324
assert(MBBI->getOperand(2).getOffset() % 8 == 0);
324325
MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4);

llvm/lib/Target/RISCV/RISCVFeatures.td

+5
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,11 @@ def FeatureFastUnalignedAccess
10251025
"true", "Has reasonably performant unaligned "
10261026
"loads and stores (both scalar and vector)">;
10271027

1028+
def FeatureUnalignedScalarMem
1029+
: SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
1030+
"true", "Has reasonably performant unaligned scalar "
1031+
"loads and stores">;
1032+
10281033
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
10291034
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
10301035

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -1883,7 +1883,8 @@ bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
18831883
// replace. If we don't support unaligned scalar mem, prefer the constant
18841884
// pool.
18851885
// TODO: Can the caller pass down the alignment?
1886-
if (!Subtarget.hasFastUnalignedAccess())
1886+
if (!Subtarget.hasFastUnalignedAccess() &&
1887+
!Subtarget.enableUnalignedScalarMem())
18871888
return true;
18881889

18891890
// Prefer to keep the load if it would require many instructions.
@@ -19772,8 +19773,10 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
1977219773
unsigned *Fast) const {
1977319774
if (!VT.isVector()) {
1977419775
if (Fast)
19775-
*Fast = Subtarget.hasFastUnalignedAccess();
19776-
return Subtarget.hasFastUnalignedAccess();
19776+
*Fast = Subtarget.hasFastUnalignedAccess() ||
19777+
Subtarget.enableUnalignedScalarMem();
19778+
return Subtarget.hasFastUnalignedAccess() ||
19779+
Subtarget.enableUnalignedScalarMem();
1977719780
}
1977819781

1977919782
// All vector implementations must support element alignment

llvm/test/CodeGen/RISCV/memcpy-inline.ll

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
88
; RUN: llc < %s -mtriple=riscv64 -mattr=+fast-unaligned-access \
99
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
10+
; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \
11+
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
12+
; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \
13+
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
1014

1115
; ----------------------------------------------------------------------
1216
; Fully unaligned cases

llvm/test/CodeGen/RISCV/memcpy.ll

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
88
; RUN: llc < %s -mtriple=riscv64 -mattr=+fast-unaligned-access \
99
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
10+
; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \
11+
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
12+
; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \
13+
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
1014
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
1115

1216
@src = external dso_local global %struct.x

llvm/test/CodeGen/RISCV/memset-inline.ll

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
88
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+fast-unaligned-access \
99
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
10+
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \
11+
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
12+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \
13+
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
1014
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
1115

1216
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind

llvm/test/CodeGen/RISCV/pr56110.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=riscv32 | FileCheck %s
33
; RUN: llc < %s -mtriple=riscv32 -mattr=+fast-unaligned-access | FileCheck %s
4+
; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem | FileCheck %s
45

56
define void @foo_set(ptr nocapture noundef %a, i32 noundef %v) {
67
; CHECK-LABEL: foo_set:

llvm/test/CodeGen/RISCV/unaligned-load-store.ll

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
; RUN: | FileCheck -check-prefixes=ALL,FAST,RV32I-FAST %s
88
; RUN: llc -mtriple=riscv64 -mattr=+fast-unaligned-access -verify-machineinstrs < %s \
99
; RUN: | FileCheck -check-prefixes=ALL,FAST,RV64I-FAST %s
10+
; RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \
11+
; RUN: | FileCheck -check-prefixes=ALL,FAST,RV32I-FAST %s
12+
; RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \
13+
; RUN: | FileCheck -check-prefixes=ALL,FAST,RV64I-FAST %s
1014

1115
; A collection of cases showing codegen for unaligned loads and stores
1216

0 commit comments

Comments
 (0)