Skip to content

Commit 07e8b3a

Browse files
Rollup merge of #126555 - beetrees:f16-inline-asm-arm, r=Amanieu
Add `f16` inline ASM support for 32-bit ARM Adds `f16` inline ASM support for 32-bit ARM. SIMD vector types are taken from [here](https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:`@navigationhierarchiesreturnbasetype=[float]&f:@navigationhierarchieselementbitsize=[16]&f:@navigationhierarchiesarchitectures=[A32]).` Relevant issue: #125398 Tracking issue: #116909 `@rustbot` label +F-f16_and_f128
2 parents f1b0d54 + 753fb07 commit 07e8b3a

File tree

3 files changed

+365
-183
lines changed

3 files changed

+365
-183
lines changed

compiler/rustc_codegen_llvm/src/asm.rs

+39
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,19 @@ fn llvm_fixup_input<'ll, 'tcx>(
10371037
value
10381038
}
10391039
}
1040+
(
1041+
InlineAsmRegClass::Arm(
1042+
ArmInlineAsmRegClass::dreg
1043+
| ArmInlineAsmRegClass::dreg_low8
1044+
| ArmInlineAsmRegClass::dreg_low16
1045+
| ArmInlineAsmRegClass::qreg
1046+
| ArmInlineAsmRegClass::qreg_low4
1047+
| ArmInlineAsmRegClass::qreg_low8,
1048+
),
1049+
Abi::Vector { element, count: count @ (4 | 8) },
1050+
) if element.primitive() == Primitive::Float(Float::F16) => {
1051+
bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
1052+
}
10401053
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
10411054
match s.primitive() {
10421055
// MIPS only supports register-length arithmetics.
@@ -1158,6 +1171,19 @@ fn llvm_fixup_output<'ll, 'tcx>(
11581171
value
11591172
}
11601173
}
1174+
(
1175+
InlineAsmRegClass::Arm(
1176+
ArmInlineAsmRegClass::dreg
1177+
| ArmInlineAsmRegClass::dreg_low8
1178+
| ArmInlineAsmRegClass::dreg_low16
1179+
| ArmInlineAsmRegClass::qreg
1180+
| ArmInlineAsmRegClass::qreg_low4
1181+
| ArmInlineAsmRegClass::qreg_low8,
1182+
),
1183+
Abi::Vector { element, count: count @ (4 | 8) },
1184+
) if element.primitive() == Primitive::Float(Float::F16) => {
1185+
bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
1186+
}
11611187
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
11621188
match s.primitive() {
11631189
// MIPS only supports register-length arithmetics.
@@ -1270,6 +1296,19 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
12701296
layout.llvm_type(cx)
12711297
}
12721298
}
1299+
(
1300+
InlineAsmRegClass::Arm(
1301+
ArmInlineAsmRegClass::dreg
1302+
| ArmInlineAsmRegClass::dreg_low8
1303+
| ArmInlineAsmRegClass::dreg_low16
1304+
| ArmInlineAsmRegClass::qreg
1305+
| ArmInlineAsmRegClass::qreg_low4
1306+
| ArmInlineAsmRegClass::qreg_low8,
1307+
),
1308+
Abi::Vector { element, count: count @ (4 | 8) },
1309+
) if element.primitive() == Primitive::Float(Float::F16) => {
1310+
cx.type_vector(cx.type_i16(), count)
1311+
}
12731312
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
12741313
match s.primitive() {
12751314
// MIPS only supports register-length arithmetics.

compiler/rustc_target/src/asm/arm.rs

+7-5
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,18 @@ impl ArmInlineAsmRegClass {
4747
_arch: InlineAsmArch,
4848
) -> &'static [(InlineAsmType, Option<Symbol>)] {
4949
match self {
50-
Self::reg => types! { _: I8, I16, I32, F32; },
51-
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F32; },
50+
Self::reg => types! { _: I8, I16, I32, F16, F32; },
51+
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F16, F32; },
5252
Self::dreg_low16 | Self::dreg_low8 => types! {
53-
vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
53+
vfp2: I64, F64;
54+
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5455
},
5556
Self::dreg => types! {
56-
d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
57+
d32: I64, F64;
58+
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5759
},
5860
Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! {
59-
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4);
61+
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4);
6062
},
6163
}
6264
}

0 commit comments

Comments
 (0)