Skip to content

Commit

Permalink
Auto merge of #116510 - scottmcm:no-1-simd-v2, r=compiler-errors
Browse files Browse the repository at this point in the history
Copy 1-element arrays as scalars, not vectors

For `[T; 1]` it's silly to copy as `<1 x T>` when we can just copy as `T`.

Inspired by #101210 (comment), which pointed out that `Option<[u8; 1]>` was codegenning worse than `Option<u8>`.

(I'm not sure *why* LLVM doesn't optimize out `<1 x u8>`, but might as well just not emit it in the first place in this codepath.)

---

I think I bit off too much in #116479; let me try just the scalar case first.

r? `@ghost`
  • Loading branch information
bors committed Oct 12, 2023
2 parents 672fad9 + f5cdd3e commit df4379b
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 1 deletion.
7 changes: 6 additions & 1 deletion compiler/rustc_codegen_llvm/src/type_of.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,12 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
// extracts all the individual values.

let ety = element.llvm_type(cx);
return Some(cx.type_vector(ety, *count));
if *count == 1 {
// Emitting `<1 x T>` would be silly; just use the scalar.
return Some(ety);
} else {
return Some(cx.type_vector(ety, *count));
}
}

// FIXME: The above only handled integer arrays; surely more things
Expand Down
20 changes: 20 additions & 0 deletions tests/assembly/x86_64-array-pair-load-store-merge.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// assembly-output: emit-asm
// compile-flags: --crate-type=lib -O -C llvm-args=-x86-asm-syntax=intel
// only-x86_64
// ignore-sgx
// ignore-macos (manipulates rsp too)

// Depending on various codegen choices, this might end up copying
// a `<2 x i8>`, an `i16`, or two `i8`s.
// Regardless of those choices, make sure the instructions use (2-byte) words.

// CHECK-LABEL: array_copy_2_elements:
#[no_mangle]
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
// CHECK-NOT: byte
// CHECK-NOT: mov
// CHECK: mov{{.+}}, word ptr
// CHECK-NEXT: mov word ptr
// CHECK-NEXT: ret
*p = *a;
}
22 changes: 22 additions & 0 deletions tests/codegen/array-codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,25 @@ pub fn array_copy(a: &[u8; 4], p: &mut [u8; 4]) {
// CHECK: store <4 x i8> %[[TEMP2]], ptr %p, align 1
*p = *a;
}

// CHECK-LABEL: @array_copy_1_element
#[no_mangle]
pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
// CHECK: %[[LOCAL:.+]] = alloca [1 x i8], align 1
// CHECK: %[[TEMP1:.+]] = load i8, ptr %a, align 1
// CHECK: store i8 %[[TEMP1]], ptr %[[LOCAL]], align 1
// CHECK: %[[TEMP2:.+]] = load i8, ptr %[[LOCAL]], align 1
// CHECK: store i8 %[[TEMP2]], ptr %p, align 1
*p = *a;
}

// CHECK-LABEL: @array_copy_2_elements
#[no_mangle]
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
// CHECK: %[[LOCAL:.+]] = alloca [2 x i8], align 1
// CHECK: %[[TEMP1:.+]] = load <2 x i8>, ptr %a, align 1
// CHECK: store <2 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1
// CHECK: %[[TEMP2:.+]] = load <2 x i8>, ptr %[[LOCAL]], align 1
// CHECK: store <2 x i8> %[[TEMP2]], ptr %p, align 1
*p = *a;
}
33 changes: 33 additions & 0 deletions tests/codegen/array-optimized.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// compile-flags: -O

#![crate_type = "lib"]

// CHECK-LABEL: @array_copy_1_element
#[no_mangle]
pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
// CHECK-NOT: alloca
// CHECK: %[[TEMP:.+]] = load i8, ptr %a, align 1
// CHECK: store i8 %[[TEMP]], ptr %p, align 1
// CHECK: ret
*p = *a;
}

// CHECK-LABEL: @array_copy_2_elements
#[no_mangle]
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
// CHECK-NOT: alloca
// CHECK: %[[TEMP:.+]] = load <2 x i8>, ptr %a, align 1
// CHECK: store <2 x i8> %[[TEMP]], ptr %p, align 1
// CHECK: ret
*p = *a;
}

// CHECK-LABEL: @array_copy_4_elements
#[no_mangle]
pub fn array_copy_4_elements(a: &[u8; 4], p: &mut [u8; 4]) {
// CHECK-NOT: alloca
// CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1
// CHECK: store <4 x i8> %[[TEMP]], ptr %p, align 1
// CHECK: ret
*p = *a;
}

0 comments on commit df4379b

Please sign in to comment.