Skip to content

Commit f4a2665

Browse files
committed
Auto merge of #54592 - GabrielMajeri:no-plt, r=<try>
Support for disabling PLT for better function call performance This PR gives `rustc` the ability to skip the PLT when generating function calls into shared libraries. This can improve performance by reducing branch indirection. AFAIK, the only advantage of using the PLT is to allow for ELF lazy binding. However, since Rust already [enables full relro for security](#43170), lazy binding was disabled anyway. This is a little known feature which is supported by [GCC](https://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html) and [Clang](https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang-fplt) as `-fno-plt` (some Linux distros [enable it by default](https://git.archlinux.org/svntogit/packages.git/tree/trunk/makepkg.conf?h=packages/pacman#n40) for all builds). Implementation inspired by [this patch](https://reviews.llvm.org/D39079#change-YvkpNDlMs_LT) which adds `-fno-plt` support to Clang. ## Performance I didn't run a lot of benchmarks, but these are the results on my machine for a `clap` [benchmark](https://github.com/clap-rs/clap/blob/master/benches/05_ripgrep.rs): ``` name control ns/iter no-plt ns/iter diff ns/iter diff % speedup build_app_long 11,097 10,733 -364 -3.28% x 1.03 build_app_short 11,089 10,742 -347 -3.13% x 1.03 build_help_long 186,835 182,713 -4,122 -2.21% x 1.02 build_help_short 80,949 78,455 -2,494 -3.08% x 1.03 parse_clean 12,385 12,044 -341 -2.75% x 1.03 parse_complex 19,438 19,017 -421 -2.17% x 1.02 parse_lots 431,493 421,421 -10,072 -2.33% x 1.02 ``` A small performance improvement across the board, with no downsides. It's likely binaries which make a lot of function calls into dynamic libraries could see even more improvements. [This comment](https://patchwork.ozlabs.org/patch/468993/#1028255) suggests that, in some cases, `-fno-plt` could improve PIC/PIE code performance by 10%. ## Security benefits **Bonus**: some of the speculative execution attacks rely on the PLT, by disabling it we reduce a big attack surface and reduce the need for [`retpoline`](https://reviews.llvm.org/D41723). ## Remaining PLT calls The compiled binaries still have plenty of PLT calls, coming from C/C++ libraries. Building dependencies with `CFLAGS=-fno-plt CXXFLAGS=-fno-plt` removes them.
2 parents 6622172 + 76c4031 commit f4a2665

File tree

10 files changed

+76
-7
lines changed

10 files changed

+76
-7
lines changed

Diff for: src/librustc/session/config.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1387,6 +1387,10 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
13871387
"output a json file with profiler results"),
13881388
emit_stack_sizes: bool = (false, parse_bool, [UNTRACKED],
13891389
"emits a section containing stack size metadata"),
1390+
plt: Option<bool> = (None, parse_opt_bool, [TRACKED],
1391+
"whether to use the PLT when calling into shared libraries;
1392+
only has effect for PIC code on systems with ELF binaries
1393+
(default: PLT is disabled if full relro is enabled)"),
13901394
}
13911395

13921396
pub fn default_lib_output() -> CrateType {

Diff for: src/librustc/session/mod.rs

+18-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ use syntax::feature_gate::AttributeType;
4141
use syntax_pos::{MultiSpan, Span};
4242
use util::profiling::SelfProfiler;
4343

44-
use rustc_target::spec::PanicStrategy;
45-
use rustc_target::spec::{Target, TargetTriple};
44+
use rustc_target::spec::{PanicStrategy, RelroLevel, Target, TargetTriple};
4645
use rustc_data_structures::flock;
4746
use jobserver::Client;
4847

@@ -981,6 +980,23 @@ impl Session {
981980
pub fn edition(&self) -> Edition {
982981
self.opts.edition
983982
}
983+
984+
/// True if we cannot skip the PLT for shared library calls.
985+
pub fn needs_plt(&self) -> bool {
986+
let dbg_opts = &self.opts.debugging_opts;
987+
988+
let relro_level = dbg_opts.relro_level
989+
.unwrap_or(self.target.target.options.relro_level);
990+
991+
// Only enable this optimization by default if full relro is also enabled.
992+
// In this case, lazy binding was already unavailable, so nothing is lost.
993+
// This also ensures `-Wl,-z,now` is supported by the linker.
994+
let full_relro = RelroLevel::Full == relro_level;
995+
996+
// If user didn't explicitly forced us to use the PLT,
997+
// then try to skip it where possible.
998+
dbg_opts.plt.unwrap_or(!full_relro)
999+
}
9841000
}
9851001

9861002
pub fn build_session(

Diff for: src/librustc_codegen_llvm/attributes.rs

+9
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,15 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
138138
target_cpu.as_c_str());
139139
}
140140

141+
/// Sets the `NonLazyBind` LLVM attribute on a given function,
142+
/// assuming the codegen options allow skipping the PLT.
143+
pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
144+
// Don't generate calls through PLT if it's not necessary
145+
if !sess.needs_plt() {
146+
Attribute::NonLazyBind.apply_llfn(Function, llfn);
147+
}
148+
}
149+
141150
/// Composite function which sets LLVM attributes for function depending on its AST (#[attribute])
142151
/// attributes.
143152
pub fn from_fn_attrs(

Diff for: src/librustc_codegen_llvm/context.rs

+7
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,13 @@ pub unsafe fn create_module(
208208
llvm::LLVMRustSetModulePIELevel(llmod);
209209
}
210210

211+
// If skipping the PLT is enabled, we need to add some module metadata
212+
// to ensure intrinsic calls don't use it.
213+
if !sess.needs_plt() {
214+
let avoid_plt = "RtLibUseGOT\0".as_ptr() as *const _;
215+
llvm::LLVMRustAddModuleFlag(llmod, avoid_plt, 1);
216+
}
217+
211218
llmod
212219
}
213220

Diff for: src/librustc_codegen_llvm/declare.rs

+2
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ fn declare_raw_fn(
104104
attributes::unwind(llfn, false);
105105
}
106106

107+
attributes::non_lazy_bind(cx.sess(), llfn);
108+
107109
llfn
108110
}
109111

Diff for: src/librustc_codegen_llvm/llvm/ffi.rs

+1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ pub enum Attribute {
122122
SanitizeThread = 20,
123123
SanitizeAddress = 21,
124124
SanitizeMemory = 22,
125+
NonLazyBind = 23,
125126
}
126127

127128
/// LLVMIntPredicate

Diff for: src/rustllvm/RustWrapper.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ static Attribute::AttrKind fromRust(LLVMRustAttribute Kind) {
178178
return Attribute::SanitizeAddress;
179179
case SanitizeMemory:
180180
return Attribute::SanitizeMemory;
181+
case NonLazyBind:
182+
return Attribute::NonLazyBind;
181183
}
182184
report_fatal_error("bad AttributeKind");
183185
}

Diff for: src/rustllvm/rustllvm.h

+1
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ enum LLVMRustAttribute {
9797
SanitizeThread = 20,
9898
SanitizeAddress = 21,
9999
SanitizeMemory = 22,
100+
NonLazyBind = 23,
100101
};
101102

102103
typedef struct OpaqueRustString *RustStringRef;

Diff for: src/test/codegen/naked-functions.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#![crate_type = "lib"]
1616
#![feature(naked_functions)]
1717

18-
// CHECK: Function Attrs: naked uwtable
18+
// CHECK: Function Attrs: naked
1919
// CHECK-NEXT: define void @naked_empty()
2020
#[no_mangle]
2121
#[naked]
@@ -24,7 +24,7 @@ pub fn naked_empty() {
2424
// CHECK-NEXT: ret void
2525
}
2626

27-
// CHECK: Function Attrs: naked uwtable
27+
// CHECK: Function Attrs: naked
2828
#[no_mangle]
2929
#[naked]
3030
// CHECK-NEXT: define void @naked_with_args(i{{[0-9]+}})
@@ -35,7 +35,7 @@ pub fn naked_with_args(a: isize) {
3535
// CHECK: ret void
3636
}
3737

38-
// CHECK: Function Attrs: naked uwtable
38+
// CHECK: Function Attrs: naked
3939
// CHECK-NEXT: define i{{[0-9]+}} @naked_with_return()
4040
#[no_mangle]
4141
#[naked]
@@ -45,7 +45,7 @@ pub fn naked_with_return() -> isize {
4545
0
4646
}
4747

48-
// CHECK: Function Attrs: naked uwtable
48+
// CHECK: Function Attrs: naked
4949
// CHECK-NEXT: define i{{[0-9]+}} @naked_with_args_and_return(i{{[0-9]+}})
5050
#[no_mangle]
5151
#[naked]
@@ -57,7 +57,7 @@ pub fn naked_with_args_and_return(a: isize) -> isize {
5757
a
5858
}
5959

60-
// CHECK: Function Attrs: naked uwtable
60+
// CHECK: Function Attrs: naked
6161
// CHECK-NEXT: define void @naked_recursive()
6262
#[no_mangle]
6363
#[naked]

Diff for: src/test/codegen/no-plt.rs

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// compile-flags: -C relocation-model=pic -Z plt=no
12+
13+
#![crate_type = "lib"]
14+
15+
// We need a function which is normally called through the PLT.
16+
extern "C" {
17+
// CHECK: Function Attrs: nounwind nonlazybind
18+
fn getenv(name: *const u8) -> *mut u8;
19+
}
20+
21+
// Ensure the function gets referenced.
22+
pub unsafe fn call_through_plt() -> *mut u8 {
23+
getenv(b"\0".as_ptr())
24+
}
25+
26+
// Ensure intrinsics also skip the PLT
27+
// CHECK: !"RtLibUseGOT"

0 commit comments

Comments
 (0)