Skip to content

Commit

Permalink
[Aarch64] Materialize immediates with 64-bit ORR + EOR if shorter (#6…
Browse files Browse the repository at this point in the history
…8287)

A number of useful constants can be encoded with a 64-bit ORR followed
by a 64-bit EOR, including all remaining repeated byte patterns, some
useful repeated 16-bit patterns, and some irregular masks. This patch
prioritizes that encoding over three or four instruction encodings.
Encoding with MOV + MOVK or ORR + MOVK is still preferred for fast
literal generation and readability respectively.

The method devises three candidate values, and checks if both Candidate
and (Imm ^ Candidate) are valid logical immediates. If so, Imm is
materialized with:

```
ORR Xd, XZR, #(Imm ^ Candidate)
EOR Xd, Xd, #(Candidate)
```

The method has been exhaustively tested to ensure it can solve all
possible values (excluding 0, ~0, and plain logical immediates, which
are handled earlier).
  • Loading branch information
dougallj authored Oct 10, 2023
1 parent a2b8c49 commit f74e9f8
Show file tree
Hide file tree
Showing 3 changed files with 221 additions and 0 deletions.
103 changes: 103 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,105 @@ static bool tryAndOfLogicalImmediates(uint64_t UImm,
return false;
}

// Check whether the constant can be represented by exclusive-or of two 64-bit
// logical immediates. If so, materialize it with an ORR instruction followed
// by an EOR instruction.
//
// This encoding allows all remaining repeated byte patterns, and many repeated
// 16-bit values, to be encoded without needing four instructions. It can also
// represent some irregular bitmasks (although those would mostly only need
// three instructions otherwise).
static bool tryEorOfLogicalImmediates(uint64_t Imm,
SmallVectorImpl<ImmInsnModel> &Insn) {
// Determine the larger repetition size of the two possible logical
// immediates, by finding the repetition size of Imm.
unsigned BigSize = 64;

do {
BigSize /= 2;
uint64_t Mask = (1ULL << BigSize) - 1;

if ((Imm & Mask) != ((Imm >> BigSize) & Mask)) {
BigSize *= 2;
break;
}
} while (BigSize > 2);

uint64_t BigMask = ((uint64_t)-1LL) >> (64 - BigSize);

// Find the last bit of each run of ones, circularly. For runs which wrap
// around from bit 0 to bit 63, this is the bit before the most-significant
// zero, otherwise it is the least-significant bit in the run of ones.
uint64_t RunStarts = Imm & ~rotl<uint64_t>(Imm, 1);

// Find the smaller repetition size of the two possible logical immediates by
// counting the number of runs of one-bits within the BigSize-bit value. Both
// sizes may be the same. The EOR may add one or subtract one from the
// power-of-two count that can be represented by a logical immediate, or it
// may be left unchanged.
int RunsPerBigChunk = popcount(RunStarts & BigMask);

static const int8_t BigToSmallSizeTable[32] = {
-1, -1, 0, 1, 2, 2, -1, 3, 3, 3, -1, -1, -1, -1, -1, 4,
4, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5,
};

int BigToSmallShift = BigToSmallSizeTable[RunsPerBigChunk];

// Early-exit if the big chunk couldn't be a power-of-two number of runs
// EORed with another single run.
if (BigToSmallShift == -1)
return false;

unsigned SmallSize = BigSize >> BigToSmallShift;

// 64-bit values with a bit set every (1 << index) bits.
static const uint64_t RepeatedOnesTable[] = {
0xffffffffffffffff, 0x5555555555555555, 0x1111111111111111,
0x0101010101010101, 0x0001000100010001, 0x0000000100000001,
0x0000000000000001,
};

// This RepeatedOnesTable lookup is a faster implementation of the division
// 0xffffffffffffffff / ((1 << SmallSize) - 1), and can be thought of as
// dividing the 64-bit value into fields of width SmallSize, and placing a
// one in the least significant bit of each field.
uint64_t SmallOnes = RepeatedOnesTable[countr_zero(SmallSize)];

// Now we try to find the number of ones in each of the smaller repetitions,
// by looking at runs of ones in Imm. This can take three attempts, as the
// EOR may have changed the length of the first two runs we find.

// Rotate a run of ones so we can count the number of trailing set bits.
int Rotation = countr_zero(RunStarts);
uint64_t RotatedImm = rotr<uint64_t>(Imm, Rotation);
for (int Attempt = 0; Attempt < 3; ++Attempt) {
unsigned RunLength = countr_one(RotatedImm);

// Construct candidate values BigImm and SmallImm, such that if these two
// values are encodable, we have a solution. (SmallImm is constructed to be
// encodable, but this isn't guaranteed when RunLength >= SmallSize)
uint64_t SmallImm =
rotl<uint64_t>((SmallOnes << RunLength) - SmallOnes, Rotation);
uint64_t BigImm = Imm ^ SmallImm;

uint64_t BigEncoding = 0;
uint64_t SmallEncoding = 0;
if (AArch64_AM::processLogicalImmediate(BigImm, 64, BigEncoding) &&
AArch64_AM::processLogicalImmediate(SmallImm, 64, SmallEncoding)) {
Insn.push_back({AArch64::ORRXri, 0, SmallEncoding});
Insn.push_back({AArch64::EORXri, 1, BigEncoding});
return true;
}

// Rotate to the next run of ones
Rotation += countr_zero(rotr<uint64_t>(RunStarts, Rotation) & ~1);
RotatedImm = rotr<uint64_t>(Imm, Rotation);
}

return false;
}

/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
Expand Down Expand Up @@ -503,6 +602,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
if (tryAndOfLogicalImmediates(Imm, Insn))
return;

// Attempt to use a sequence of ORR-immediate followed by EOR-immediate.
if (tryEorOfLogicalImmediates(UImm, Insn))
return;

// FIXME: Add more two-instruction sequences.

// Three instruction sequences.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
}
break;
case AArch64::ANDXri:
case AArch64::EORXri:
if (I->Op1 == 0) {
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
.add(MI.getOperand(0))
Expand Down
117 changes: 117 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-movi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -432,3 +432,120 @@ define i64 @orr_64_orr_8() nounwind {
; CHECK-NEXT: ret
ret i64 -5764607889538110806
}

define i64 @orr_2_eor_16() nounwind {
; CHECK-LABEL: orr_2_eor_16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #6148914691236517205
; CHECK-NEXT: eor x0, x0, #0x3000300030003000
; CHECK-NEXT: ret
ret i64 7301853788297848149
}

define i64 @orr_2_eor_32() nounwind {
; CHECK-LABEL: orr_2_eor_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #6148914691236517205
; CHECK-NEXT: eor x0, x0, #0x1fffc0001fffc0
; CHECK-NEXT: ret
ret i64 6145912199858268821
}

define i64 @orr_2_eor_64() nounwind {
; CHECK-LABEL: orr_2_eor_64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #6148914691236517205
; CHECK-NEXT: eor x0, x0, #0x1fffffffffc00
; CHECK-NEXT: ret
ret i64 6148727041252043093
}

define i64 @orr_4_eor_8() nounwind {
; CHECK-LABEL: orr_4_eor_8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #2459565876494606882
; CHECK-NEXT: eor x0, x0, #0x8f8f8f8f8f8f8f8f
; CHECK-NEXT: ret
ret i64 12514849900987264429
}

define i64 @orr_4_eor_16() nounwind {
; CHECK-LABEL: orr_4_eor_16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #4919131752989213764
; CHECK-NEXT: eor x0, x0, #0xf00ff00ff00ff00f
; CHECK-NEXT: ret
ret i64 12991675787320734795
}

define i64 @orr_4_eor_32() nounwind {
; CHECK-LABEL: orr_4_eor_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #4919131752989213764
; CHECK-NEXT: eor x0, x0, #0x1ff800001ff80000
; CHECK-NEXT: ret
ret i64 6610233413460575300
}

define i64 @orr_4_eor_64() nounwind {
; CHECK-LABEL: orr_4_eor_64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #1229782938247303441
; CHECK-NEXT: eor x0, x0, #0xfff80000000
; CHECK-NEXT: ret
ret i64 1229798183233720593
}

define i64 @orr_8_eor_16() nounwind {
; CHECK-LABEL: orr_8_eor_16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #3472328296227680304
; CHECK-NEXT: eor x0, x0, #0x1f801f801f801f80
; CHECK-NEXT: ret
ret i64 3436298949444513712
}

define i64 @orr_8_eor_32() nounwind {
; CHECK-LABEL: orr_8_eor_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #1157442765409226768
; CHECK-NEXT: eor x0, x0, #0xffff8001ffff8001
; CHECK-NEXT: ret
ret i64 17289195901212921873
}

define i64 @orr_8_eor_64() nounwind {
; CHECK-LABEL: orr_8_eor_64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #3472328296227680304
; CHECK-NEXT: eor x0, x0, #0x3ffffffff00000
; CHECK-NEXT: ret
ret i64 3463215129921859632
}

define i64 @orr_16_eor_32() nounwind {
; CHECK-LABEL: orr_16_eor_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #1143931760365539296
; CHECK-NEXT: eor x0, x0, #0xffff0001ffff0001
; CHECK-NEXT: ret
ret i64 17302565756451360737
}

define i64 @orr_16_eor_64() nounwind {
; CHECK-LABEL: orr_16_eor_64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #9214505439794855904
; CHECK-NEXT: eor x0, x0, #0xfe000
; CHECK-NEXT: ret
ret i64 9214505439795847136
}

define i64 @orr_32_eor_64() nounwind {
; CHECK-LABEL: orr_32_eor_64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #1030792151280
; CHECK-NEXT: eor x0, x0, #0xffff8000003fffff
; CHECK-NEXT: ret
ret i64 18446604367017541391
}

0 comments on commit f74e9f8

Please sign in to comment.