Skip to content

cg_llvm: Use a type-safe helper to cast &str and &[u8] to *const c_char #132260

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions compiler/rustc_codegen_llvm/src/allocator.rs
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ use rustc_middle::bug;
use rustc_middle::ty::TyCtxt;
use rustc_session::config::{DebugInfo, OomStrategy};

use crate::common::AsCCharPtr;
use crate::llvm::{self, Context, False, Module, True, Type};
use crate::{ModuleLlvm, attributes, debuginfo};

@@ -76,14 +77,14 @@ pub(crate) unsafe fn codegen(
unsafe {
// __rust_alloc_error_handler_should_panic
let name = OomStrategy::SYMBOL;
let ll_g = llvm::LLVMRustGetOrInsertGlobal(llmod, name.as_ptr().cast(), name.len(), i8);
let ll_g = llvm::LLVMRustGetOrInsertGlobal(llmod, name.as_c_char_ptr(), name.len(), i8);
llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
let val = tcx.sess.opts.unstable_opts.oom.should_panic();
let llval = llvm::LLVMConstInt(i8, val as u64, False);
llvm::LLVMSetInitializer(ll_g, llval);

let name = NO_ALLOC_SHIM_IS_UNSTABLE;
let ll_g = llvm::LLVMRustGetOrInsertGlobal(llmod, name.as_ptr().cast(), name.len(), i8);
let ll_g = llvm::LLVMRustGetOrInsertGlobal(llmod, name.as_c_char_ptr(), name.len(), i8);
llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
let llval = llvm::LLVMConstInt(i8, 0, False);
llvm::LLVMSetInitializer(ll_g, llval);
@@ -115,7 +116,7 @@ fn create_wrapper_function(
);
let llfn = llvm::LLVMRustGetOrInsertFunction(
llmod,
from_name.as_ptr().cast(),
from_name.as_c_char_ptr(),
from_name.len(),
ty,
);
@@ -137,7 +138,7 @@ fn create_wrapper_function(
}

let callee =
llvm::LLVMRustGetOrInsertFunction(llmod, to_name.as_ptr().cast(), to_name.len(), ty);
llvm::LLVMRustGetOrInsertFunction(llmod, to_name.as_c_char_ptr(), to_name.len(), ty);
if let Some(no_return) = no_return {
// -> ! DIFlagNoReturn
attributes::apply_to_llfn(callee, llvm::AttributePlace::Function, &[no_return]);
10 changes: 5 additions & 5 deletions compiler/rustc_codegen_llvm/src/asm.rs
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@ use smallvec::SmallVec;
use tracing::debug;

use crate::builder::Builder;
use crate::common::Funclet;
use crate::common::{AsCCharPtr, Funclet};
use crate::context::CodegenCx;
use crate::type_::Type;
use crate::type_of::LayoutLlvmExt;
@@ -420,7 +420,7 @@ impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
unsafe {
llvm::LLVMAppendModuleInlineAsm(
self.llmod,
template_str.as_ptr().cast(),
template_str.as_c_char_ptr(),
template_str.len(),
);
}
@@ -458,14 +458,14 @@ pub(crate) fn inline_asm_call<'ll>(
let fty = bx.cx.type_func(&argtys, output);
unsafe {
// Ask LLVM to verify that the constraints are well-formed.
let constraints_ok = llvm::LLVMRustInlineAsmVerify(fty, cons.as_ptr().cast(), cons.len());
let constraints_ok = llvm::LLVMRustInlineAsmVerify(fty, cons.as_c_char_ptr(), cons.len());
debug!("constraint verification result: {:?}", constraints_ok);
if constraints_ok {
let v = llvm::LLVMRustInlineAsm(
fty,
asm.as_ptr().cast(),
asm.as_c_char_ptr(),
asm.len(),
cons.as_ptr().cast(),
cons.as_c_char_ptr(),
cons.len(),
volatile,
alignstack,
3 changes: 2 additions & 1 deletion compiler/rustc_codegen_llvm/src/back/lto.rs
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@ use tracing::{debug, info};
use crate::back::write::{
self, CodegenDiagnosticsStage, DiagnosticHandlers, bitcode_section_name, save_temp_bitcode,
};
use crate::common::AsCCharPtr;
use crate::errors::{
DynamicLinkingWithLTO, LlvmError, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib, LtoProcMacro,
};
@@ -604,7 +605,7 @@ pub(crate) fn run_pass_manager(
unsafe {
if !llvm::LLVMRustHasModuleFlag(
module.module_llvm.llmod(),
"LTOPostLink".as_ptr().cast(),
"LTOPostLink".as_c_char_ptr(),
11,
) {
llvm::LLVMRustAddModuleFlagU32(
11 changes: 6 additions & 5 deletions compiler/rustc_codegen_llvm/src/back/write.rs
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@ use crate::back::owned_target_machine::OwnedTargetMachine;
use crate::back::profiling::{
LlvmSelfProfiler, selfprofile_after_pass_callback, selfprofile_before_pass_callback,
};
use crate::common::AsCCharPtr;
use crate::errors::{
CopyBitcode, FromLlvmDiag, FromLlvmOptimizationDiag, LlvmError, UnknownCompression,
WithLlvmError, WriteBytecode,
@@ -596,9 +597,9 @@ pub(crate) unsafe fn llvm_optimize(
llvm_selfprofiler,
selfprofile_before_pass_callback,
selfprofile_after_pass_callback,
extra_passes.as_ptr().cast(),
extra_passes.as_c_char_ptr(),
extra_passes.len(),
llvm_plugins.as_ptr().cast(),
llvm_plugins.as_c_char_ptr(),
llvm_plugins.len(),
)
};
@@ -1042,7 +1043,7 @@ unsafe fn embed_bitcode(
llvm::LLVMSetInitializer(llglobal, llconst);

let section = bitcode_section_name(cgcx);
llvm::LLVMSetSection(llglobal, section.as_ptr().cast());
llvm::LLVMSetSection(llglobal, section.as_c_char_ptr());
Comment on lines 1045 to +1046
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LLVMSetSection accepts const char* without length, where section is &str.

Ahh, bitcode_section_name have fixme, so it can actually return cstr literals.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, and all of the calls to llvm::LLVMSetSection should really be calling the safe wrapper llvm::set_section (which takes &CStr), which would enforce type-safety.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage);
llvm::LLVMSetGlobalConstant(llglobal, llvm::True);

@@ -1066,9 +1067,9 @@ unsafe fn embed_bitcode(
// We need custom section flags, so emit module-level inline assembly.
let section_flags = if cgcx.is_pe_coff { "n" } else { "e" };
let asm = create_section_with_flags_asm(".llvmbc", section_flags, bitcode);
llvm::LLVMAppendModuleInlineAsm(llmod, asm.as_ptr().cast(), asm.len());
llvm::LLVMAppendModuleInlineAsm(llmod, asm.as_c_char_ptr(), asm.len());
let asm = create_section_with_flags_asm(".llvmcmd", section_flags, cmdline.as_bytes());
llvm::LLVMAppendModuleInlineAsm(llmod, asm.as_ptr().cast(), asm.len());
llvm::LLVMAppendModuleInlineAsm(llmod, asm.as_c_char_ptr(), asm.len());
}
}
}
18 changes: 18 additions & 0 deletions compiler/rustc_codegen_llvm/src/common.rs
Original file line number Diff line number Diff line change
@@ -392,3 +392,21 @@ pub(crate) fn get_dllimport<'tcx>(
tcx.native_library(id)
.and_then(|lib| lib.dll_imports.iter().find(|di| di.name.as_str() == name))
}

/// Extension trait for explicit casts to `*const c_char`.
pub(crate) trait AsCCharPtr {
/// Equivalent to `self.as_ptr().cast()`, but only casts to `*const c_char`.
fn as_c_char_ptr(&self) -> *const c_char;
}

impl AsCCharPtr for str {
fn as_c_char_ptr(&self) -> *const c_char {
self.as_ptr().cast()
}
}
Comment on lines +402 to +406
Copy link
Contributor

@klensy klensy Oct 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this wrong? &str can contain null bytes and casting it to c_char will lead to funny results in that case. It works for now because all used strings are well behaved and do not contain nulls.

Same with [u8].

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in #132319 it broken further: c literals (for which it's ok to take *c_char), you changed to &str, which don't have guarantees of cstr.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A pointer to c_char doesn't have to be nul-terminated. We have plenty of code that passes strings over FFI as pointer/length pairs.

As long as it gets reassembled with StringRef(Ptr, Len) on the C++ side, it's fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And in #132319, all of the relevant FFI functions have been adjusted to take pointer/length pairs instead of nul-terminated strings, so there's no longer any need for those strings to be c"" literals.

Copy link
Contributor

@klensy klensy Oct 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but fragile. Can't find if it ok for StringRef to have nulls inside, if it later pass them as c strings, things will blow.


impl AsCCharPtr for [u8] {
fn as_c_char_ptr(&self) -> *const c_char {
self.as_ptr().cast()
}
}
8 changes: 4 additions & 4 deletions compiler/rustc_codegen_llvm/src/consts.rs
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@ use rustc_target::abi::{
};
use tracing::{debug, instrument, trace};

use crate::common::CodegenCx;
use crate::common::{AsCCharPtr, CodegenCx};
use crate::errors::{
InvalidMinimumAlignmentNotPowerOfTwo, InvalidMinimumAlignmentTooLarge, SymbolAlreadyDefined,
};
@@ -400,7 +400,7 @@ impl<'ll> CodegenCx<'ll, '_> {

let new_g = llvm::LLVMRustGetOrInsertGlobal(
self.llmod,
name.as_ptr().cast(),
name.as_c_char_ptr(),
name.len(),
val_llty,
);
@@ -451,7 +451,7 @@ impl<'ll> CodegenCx<'ll, '_> {
if let Some(section) = attrs.link_section {
let section = llvm::LLVMMDStringInContext2(
self.llcx,
section.as_str().as_ptr().cast(),
section.as_str().as_c_char_ptr(),
section.as_str().len(),
);
assert!(alloc.provenance().ptrs().is_empty());
@@ -462,7 +462,7 @@ impl<'ll> CodegenCx<'ll, '_> {
let bytes =
alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len());
let alloc =
llvm::LLVMMDStringInContext2(self.llcx, bytes.as_ptr().cast(), bytes.len());
llvm::LLVMMDStringInContext2(self.llcx, bytes.as_c_char_ptr(), bytes.len());
let data = [section, alloc];
let meta = llvm::LLVMMDNodeInContext2(self.llcx, data.as_ptr(), data.len());
let val = llvm::LLVMMetadataAsValue(self.llcx, meta);
11 changes: 6 additions & 5 deletions compiler/rustc_codegen_llvm/src/context.rs
Original file line number Diff line number Diff line change
@@ -29,6 +29,7 @@ use smallvec::SmallVec;

use crate::back::write::to_llvm_code_model;
use crate::callee::get_fn;
use crate::common::AsCCharPtr;
use crate::debuginfo::metadata::apply_vcall_visibility_metadata;
use crate::llvm::{Metadata, MetadataType};
use crate::type_::Type;
@@ -231,7 +232,7 @@ pub(crate) unsafe fn create_module<'ll>(
// If we're normalizing integers with CFI, ensure LLVM generated functions do the same.
// See https://github.com/llvm/llvm-project/pull/104826
if sess.is_sanitizer_cfi_normalize_integers_enabled() {
let cfi_normalize_integers = c"cfi-normalize-integers".as_ptr().cast();
let cfi_normalize_integers = c"cfi-normalize-integers".as_ptr();
unsafe {
llvm::LLVMRustAddModuleFlagU32(
llmod,
@@ -268,7 +269,7 @@ pub(crate) unsafe fn create_module<'ll>(
let pfe =
PatchableFunctionEntry::from_config(sess.opts.unstable_opts.patchable_function_entry);
if pfe.prefix() > 0 {
let kcfi_offset = c"kcfi-offset".as_ptr().cast();
let kcfi_offset = c"kcfi-offset".as_ptr();
unsafe {
llvm::LLVMRustAddModuleFlagU32(
llmod,
@@ -429,7 +430,7 @@ pub(crate) unsafe fn create_module<'ll>(
let name_metadata = unsafe {
llvm::LLVMMDStringInContext2(
llcx,
rustc_producer.as_ptr().cast(),
rustc_producer.as_c_char_ptr(),
rustc_producer.as_bytes().len(),
)
};
@@ -453,7 +454,7 @@ pub(crate) unsafe fn create_module<'ll>(
llmod,
llvm::LLVMModFlagBehavior::Error,
c"target-abi".as_ptr(),
llvm_abiname.as_ptr().cast(),
llvm_abiname.as_c_char_ptr(),
llvm_abiname.len(),
);
}
@@ -474,7 +475,7 @@ pub(crate) unsafe fn create_module<'ll>(
// We already checked this during option parsing
_ => unreachable!(),
};
unsafe { llvm::LLVMRustAddModuleFlagU32(llmod, behavior, key.as_ptr().cast(), *value) }
unsafe { llvm::LLVMRustAddModuleFlagU32(llmod, behavior, key.as_c_char_ptr(), *value) }
}

llmod
8 changes: 4 additions & 4 deletions compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@ use rustc_target::abi::Size;
use tracing::{debug, instrument};

use crate::builder::Builder;
use crate::common::CodegenCx;
use crate::common::{AsCCharPtr, CodegenCx};
use crate::coverageinfo::map_data::FunctionCoverageCollector;
use crate::llvm;

@@ -236,7 +236,7 @@ fn create_pgo_func_name_var<'ll, 'tcx>(
unsafe {
llvm::LLVMRustCoverageCreatePGOFuncNameVar(
llfn,
mangled_fn_name.as_ptr().cast(),
mangled_fn_name.as_c_char_ptr(),
mangled_fn_name.len(),
)
}
@@ -248,7 +248,7 @@ pub(crate) fn write_filenames_section_to_buffer<'a>(
) {
let (pointers, lengths) = filenames
.into_iter()
.map(|s: &str| (s.as_ptr().cast(), s.len()))
.map(|s: &str| (s.as_c_char_ptr(), s.len()))
.unzip::<_, _, Vec<_>, Vec<_>>();

unsafe {
@@ -291,7 +291,7 @@ pub(crate) fn write_mapping_to_buffer(
}

pub(crate) fn hash_bytes(bytes: &[u8]) -> u64 {
unsafe { llvm::LLVMRustCoverageHashByteArray(bytes.as_ptr().cast(), bytes.len()) }
unsafe { llvm::LLVMRustCoverageHashByteArray(bytes.as_c_char_ptr(), bytes.len()) }
}

pub(crate) fn mapping_version() -> u32 {
Loading