Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Copy 1-/2-element arrays as scalars, not vectors #116479

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions compiler/rustc_codegen_llvm/src/type_of.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,6 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {

// Vectors, even for non-power-of-two sizes, have the same layout as
// arrays but don't count as aggregate types
// While LLVM theoretically supports non-power-of-two sizes, and they
// often work fine, sometimes x86-isel deals with them horribly
// (see #115212) so for now only use power-of-two ones.
if let FieldsShape::Array { count, .. } = self.layout.fields()
&& count.is_power_of_two()
&& let element = self.field(cx, 0)
Expand All @@ -396,8 +393,19 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
// up suppressing vectorization as it introduces shifts when it
// extracts all the individual values.

let ety = element.llvm_type(cx);
return Some(cx.type_vector(ety, *count));
if *count <= 2 {
// For short arrays, use LLVM's array type which it will unpack
// out in optimizations to a scalar or pair of scalars.
// (Having types like `<1 x u8>` is silly.)
let ety = element.llvm_type(cx);
return Some(cx.type_array(ety, *count));
} else if count.is_power_of_two() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already inside if count.is_power_of_two(), did you mean to remove it from the outer condition?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! I think I'd intended to remove it from the outer, but with the tests not actually failing I guess I never did.

@rustbot author

// While LLVM theoretically supports non-power-of-two sizes, and they
// often work fine, sometimes x86-isel deals with them horribly
// (see #115212) so for now only use power-of-two ones.
let ety = element.llvm_type(cx);
return Some(cx.type_vector(ety, *count));
}
}

// FIXME: The above only handled integer arrays; surely more things
Expand Down
18 changes: 18 additions & 0 deletions tests/assembly/x86_64-array-pair-load-store-merge.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// assembly-output: emit-asm
// compile-flags: --crate-type=lib -O -C llvm-args=-x86-asm-syntax=intel
// only-x86_64
// ignore-sgx

// This is emitted in a way that results in two loads and two stores in LLVM-IR.
// Confirm that that doesn't mean 4 instructions in assembly.

// CHECK-LABEL: array_copy_2_elements:
#[no_mangle]
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
// CHECK-NOT: byte
// CHECK-NOT: mov
// CHECK: mov{{.+}}, word ptr
// CHECK-NEXT: mov word ptr
// CHECK-NEXT: ret
*p = *a;
}
1 change: 1 addition & 0 deletions tests/codegen/array-clone.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// compile-flags: -O
// min-llvm-version: 17 (earlier versions have trouble merging the loads)

#![crate_type = "lib"]

Expand Down
22 changes: 22 additions & 0 deletions tests/codegen/array-codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,25 @@ pub fn array_copy(a: &[u8; 4], p: &mut [u8; 4]) {
// CHECK: store <4 x i8> %[[TEMP2]], ptr %p, align 1
*p = *a;
}

// CHECK-LABEL: @array_copy_1_element
#[no_mangle]
pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
// CHECK: %[[LOCAL:.+]] = alloca [1 x i8], align 1
// CHECK: %[[TEMP1:.+]] = load [1 x i8], ptr %a, align 1
// CHECK: store [1 x i8] %[[TEMP1]], ptr %[[LOCAL]], align 1
// CHECK: %[[TEMP2:.+]] = load [1 x i8], ptr %[[LOCAL]], align 1
// CHECK: store [1 x i8] %[[TEMP2]], ptr %p, align 1
*p = *a;
}

// CHECK-LABEL: @array_copy_2_elements
#[no_mangle]
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
// CHECK: %[[LOCAL:.+]] = alloca [2 x i8], align 1
// CHECK: %[[TEMP1:.+]] = load [2 x i8], ptr %a, align 1
// CHECK: store [2 x i8] %[[TEMP1]], ptr %[[LOCAL]], align 1
// CHECK: %[[TEMP2:.+]] = load [2 x i8], ptr %[[LOCAL]], align 1
// CHECK: store [2 x i8] %[[TEMP2]], ptr %p, align 1
*p = *a;
}
35 changes: 35 additions & 0 deletions tests/codegen/array-optimized.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// compile-flags: -O

#![crate_type = "lib"]

// CHECK-LABEL: @array_copy_1_element
#[no_mangle]
pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
// CHECK-NOT: alloca
// CHECK: %[[TEMP:.+]] = load i8, ptr %a, align 1
// CHECK: store i8 %[[TEMP]], ptr %p, align 1
// CHECK: ret
*p = *a;
}

// CHECK-LABEL: @array_copy_2_elements
#[no_mangle]
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
// CHECK-NOT: alloca
// CHECK: %[[TEMP1:.+]] = load i8, ptr %a, align 1
// CHECK: %[[TEMP2:.+]] = load i8, ptr
// CHECK: store i8 %[[TEMP1]], ptr %p, align 1
// CHECK: store i8 %[[TEMP2]], ptr
// CHECK: ret
*p = *a;
}

// CHECK-LABEL: @array_copy_4_elements
#[no_mangle]
pub fn array_copy_4_elements(a: &[u8; 4], p: &mut [u8; 4]) {
// CHECK-NOT: alloca
// CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1
// CHECK: store <4 x i8> %[[TEMP]], ptr %p, align 1
// CHECK: ret
*p = *a;
}
Loading