From ae9cec58394d7a38aac17e2873d213d5fcd85f7a Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Fri, 6 Oct 2023 01:17:09 -0700 Subject: [PATCH 1/2] Copy 1-element arrays as scalars, not vectors For `[T; 1]` it's silly to copy as `<1 x T>` when we can just copy as `T`. --- compiler/rustc_codegen_llvm/src/type_of.rs | 7 +++- .../x86_64-array-pair-load-store-merge.rs | 19 +++++++++++ tests/codegen/array-codegen.rs | 22 +++++++++++++ tests/codegen/array-optimized.rs | 33 +++++++++++++++++++ 4 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 tests/assembly/x86_64-array-pair-load-store-merge.rs create mode 100644 tests/codegen/array-optimized.rs diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs index dcc62d314fff4..fd4c9572af2fe 100644 --- a/compiler/rustc_codegen_llvm/src/type_of.rs +++ b/compiler/rustc_codegen_llvm/src/type_of.rs @@ -397,7 +397,12 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> { // extracts all the individual values. let ety = element.llvm_type(cx); - return Some(cx.type_vector(ety, *count)); + if *count == 1 { + // Emitting `<1 x T>` would be silly; just use the scalar. + return Some(ety); + } else { + return Some(cx.type_vector(ety, *count)); + } } // FIXME: The above only handled integer arrays; surely more things diff --git a/tests/assembly/x86_64-array-pair-load-store-merge.rs b/tests/assembly/x86_64-array-pair-load-store-merge.rs new file mode 100644 index 0000000000000..4a8e40f85300e --- /dev/null +++ b/tests/assembly/x86_64-array-pair-load-store-merge.rs @@ -0,0 +1,19 @@ +// assembly-output: emit-asm +// compile-flags: --crate-type=lib -O -C llvm-args=-x86-asm-syntax=intel +// only-x86_64 +// ignore-sgx + +// Depending on various codegen choices, this might end up copying +// a `<2 x i8>`, an `i16`, or two `i8`s. +// Regardless of those choices, make sure the instructions use (2-byte) words. + +// CHECK-LABEL: array_copy_2_elements: +#[no_mangle] +pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) { + // CHECK-NOT: byte + // CHECK-NOT: mov + // CHECK: mov{{.+}}, word ptr + // CHECK-NEXT: mov word ptr + // CHECK-NEXT: ret + *p = *a; +} diff --git a/tests/codegen/array-codegen.rs b/tests/codegen/array-codegen.rs index ba0d444f97e39..bf5ae74679bb2 100644 --- a/tests/codegen/array-codegen.rs +++ b/tests/codegen/array-codegen.rs @@ -32,3 +32,25 @@ pub fn array_copy(a: &[u8; 4], p: &mut [u8; 4]) { // CHECK: store <4 x i8> %[[TEMP2]], ptr %p, align 1 *p = *a; } + +// CHECK-LABEL: @array_copy_1_element +#[no_mangle] +pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) { + // CHECK: %[[LOCAL:.+]] = alloca [1 x i8], align 1 + // CHECK: %[[TEMP1:.+]] = load i8, ptr %a, align 1 + // CHECK: store i8 %[[TEMP1]], ptr %[[LOCAL]], align 1 + // CHECK: %[[TEMP2:.+]] = load i8, ptr %[[LOCAL]], align 1 + // CHECK: store i8 %[[TEMP2]], ptr %p, align 1 + *p = *a; +} + +// CHECK-LABEL: @array_copy_2_elements +#[no_mangle] +pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) { + // CHECK: %[[LOCAL:.+]] = alloca [2 x i8], align 1 + // CHECK: %[[TEMP1:.+]] = load <2 x i8>, ptr %a, align 1 + // CHECK: store <2 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1 + // CHECK: %[[TEMP2:.+]] = load <2 x i8>, ptr %[[LOCAL]], align 1 + // CHECK: store <2 x i8> %[[TEMP2]], ptr %p, align 1 + *p = *a; +} diff --git a/tests/codegen/array-optimized.rs b/tests/codegen/array-optimized.rs new file mode 100644 index 0000000000000..27448fdcfade2 --- /dev/null +++ b/tests/codegen/array-optimized.rs @@ -0,0 +1,33 @@ +// compile-flags: -O + +#![crate_type = "lib"] + +// CHECK-LABEL: @array_copy_1_element +#[no_mangle] +pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = load i8, ptr %a, align 1 + // CHECK: store i8 %[[TEMP]], ptr %p, align 1 + // CHECK: ret + *p = *a; +} + +// CHECK-LABEL: @array_copy_2_elements +#[no_mangle] +pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = load <2 x i8>, ptr %a, align 1 + // CHECK: store <2 x i8> %[[TEMP]], ptr %p, align 1 + // CHECK: ret + *p = *a; +} + +// CHECK-LABEL: @array_copy_4_elements +#[no_mangle] +pub fn array_copy_4_elements(a: &[u8; 4], p: &mut [u8; 4]) { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1 + // CHECK: store <4 x i8> %[[TEMP]], ptr %p, align 1 + // CHECK: ret + *p = *a; +} From f5cdd3e130dce081a72735eeeb0eca283912f48b Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Thu, 12 Oct 2023 11:03:02 -0700 Subject: [PATCH 2/2] Exclude apple from assembly test --- tests/assembly/x86_64-array-pair-load-store-merge.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/assembly/x86_64-array-pair-load-store-merge.rs b/tests/assembly/x86_64-array-pair-load-store-merge.rs index 4a8e40f85300e..55e317e91bf0b 100644 --- a/tests/assembly/x86_64-array-pair-load-store-merge.rs +++ b/tests/assembly/x86_64-array-pair-load-store-merge.rs @@ -2,6 +2,7 @@ // compile-flags: --crate-type=lib -O -C llvm-args=-x86-asm-syntax=intel // only-x86_64 // ignore-sgx +// ignore-macos (manipulates rsp too) // Depending on various codegen choices, this might end up copying // a `<2 x i8>`, an `i16`, or two `i8`s.