Skip to content

Commit

Permalink
Add support for split dyld shared cache.
Browse files Browse the repository at this point in the history
Fixes gimli-rs#358.

This adds support for the dyld cache format that is used on macOS 12 and
iOS 15. The cache is split over multiple files, with a "root" cache
and one or more subcaches, for example:

```
/System/Library/dyld/dyld_shared_cache_x86_64
/System/Library/dyld/dyld_shared_cache_x86_64.1
/System/Library/dyld/dyld_shared_cache_x86_64.2
/System/Library/dyld/dyld_shared_cache_x86_64.3
```

Each file has a set of mappings. For each image in the cache, the
segments of that image can be distributed over multiple files: For
example, on macOS 12.0.1, the image for libsystem_malloc.dylib for the
arm64e architecture has its __TEXT segment in the root cache and the
__LINKEDIT segment in the .1 subcache - there's a single __LINKEDIT
segment which is shared between all images across both files. The
remaining libsystem_malloc.dylib segments are in the same file as the
__TEXT segment.

The DyldCache API now requires the data for all subcaches to be supplied
to the constructor.

The parse_at methods have been removed and been replaced with a
parse_dyld_cache_image method.

With this patch, the following command outputs correct symbols for
libsystem_malloc.dylib:

```
cargo run --release --bin objdump -- /System/Library/dyld/dyld_shared_cache_arm64e /usr/lib/system/libsystem_malloc.dylib
```
  • Loading branch information
mstange committed Nov 25, 2021
1 parent 5b6b6a9 commit 100473f
Show file tree
Hide file tree
Showing 10 changed files with 342 additions and 108 deletions.
35 changes: 27 additions & 8 deletions crates/examples/src/bin/dyldcachedump.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use object::read::macho::DyldCache;
use object::macho;
use object::Endianness;
use std::{env, fs, process};

Expand Down Expand Up @@ -29,8 +29,11 @@ fn main() {
continue;
}
};
let cache = match DyldCache::<Endianness>::parse(&*file) {
Ok(cache) => cache,

// TODO: Convert this back to use DyldCache, and read subcaches first.

let header = match macho::DyldCacheHeader::<Endianness>::parse(&*file) {
Ok(header) => header,
Err(err) => {
println!(
"Failed to parse Dyld shared cache file '{}': {}",
Expand All @@ -39,12 +42,28 @@ fn main() {
continue;
}
};
let (_arch, endian) = header.parse_magic().unwrap();
let mappings = header.mappings(endian, &*file).unwrap();
let images = header.images(endian, &*file).unwrap();

// Print the list of image paths in this file.
for image in cache.images() {
if let Ok(path) = image.path() {
println!("{}", path);
}
println!("Mappings:");
for mapping in mappings {
let start_address = mapping.address.get(endian);
let end_address = start_address.wrapping_add(mapping.size.get(endian));
let file_offset = mapping.file_offset.get(endian);
println!(
"0x{:x}-0x{:x} at file offset 0x{:x}",
start_address, end_address, file_offset
);
}
println!();
println!("Images:");
for image in images {
let address = image.address.get(endian);
let path = image.path(endian, &*file).unwrap();
// The path should always be ascii, so from_utf8 should alway succeed.
let path = core::str::from_utf8(path).unwrap();
println!("0x{:x} {}", address, path);
}
}
}
42 changes: 41 additions & 1 deletion crates/examples/src/bin/objdump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,55 @@ fn main() {
process::exit(1);
}
};
let extra_files = open_subcaches_if_exist(&file_path);
let file = match unsafe { memmap2::Mmap::map(&file) } {
Ok(mmap) => mmap,
Err(err) => {
eprintln!("Failed to map file '{}': {}", file_path, err,);
process::exit(1);
}
};
let extra_files: Vec<_> = extra_files
.into_iter()
.map(
|subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } {
Ok(mmap) => mmap,
Err(err) => {
eprintln!("Failed to map file '{}': {}", file_path, err,);
process::exit(1);
}
},
)
.collect();
let extra_file_data: Vec<&[u8]> = extra_files.iter().map(|f| &**f).collect();

let stdout = io::stdout();
let stderr = io::stderr();
objdump::print(&mut stdout.lock(), &mut stderr.lock(), &*file, member_names).unwrap();
objdump::print(
&mut stdout.lock(),
&mut stderr.lock(),
&*file,
&extra_file_data,
member_names,
)
.unwrap();
}

// If the file is a dyld shared cache, and we're on macOS 12 or later,
// then there will be one or more "subcache" files next to this file,
// with the names filename.1, filename.2 etc.
// Read those files now, if they exist, even if we don't know that
// we're dealing with a dyld shared cache. By the time we know what
// we're dealing with, it's too late to read more files.
fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
let mut files = Vec::new();
for i in 1.. {
let subcache_path = format!("{}.{}", path, i);
match fs::File::open(&subcache_path) {
Ok(subcache_file) => files.push(subcache_file),
Err(_) => break,
};
}
println!("have {} extra files", files.len());
files
}
3 changes: 2 additions & 1 deletion crates/examples/src/objdump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pub fn print<W: Write, E: Write>(
w: &mut W,
e: &mut E,
file: &[u8],
extra_files: &[&[u8]],
member_names: Vec<String>,
) -> Result<()> {
let mut member_names: Vec<_> = member_names.into_iter().map(|name| (name, false)).collect();
Expand Down Expand Up @@ -47,7 +48,7 @@ pub fn print<W: Write, E: Write>(
Err(err) => writeln!(e, "Failed to parse Fat 64 data: {}", err)?,
}
}
} else if let Ok(cache) = DyldCache::<Endianness>::parse(&*file) {
} else if let Ok(cache) = DyldCache::<Endianness>::parse(&*file, extra_files) {
writeln!(w, "Format: dyld cache {:?}-endian", cache.endianness())?;
writeln!(w, "Architecture: {:?}", cache.architecture())?;
for image in cache.images() {
Expand Down
2 changes: 1 addition & 1 deletion crates/examples/tests/testfiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fn testfiles() {
println!("File {}", path);
let data = fs::read(&path).unwrap();
fail |= testfile(path, &data, "objdump", |mut out, mut err, data| {
objdump::print(&mut out, &mut err, data, vec![]).unwrap()
objdump::print(&mut out, &mut err, data, &[], vec![]).unwrap()
});
fail |= testfile(path, &data, "readobj", readobj::print);
println!();
Expand Down
40 changes: 35 additions & 5 deletions src/macho.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,15 +295,33 @@ pub struct DyldCacheHeader<E: Endian> {
/// e.g. "dyld_v0 i386"
pub magic: [u8; 16],
/// file offset to first dyld_cache_mapping_info
pub mapping_offset: U32<E>,
pub mapping_offset: U32<E>, // offset: 0x10
/// number of dyld_cache_mapping_info entries
pub mapping_count: U32<E>,
pub mapping_count: U32<E>, // offset: 0x14
/// file offset to first dyld_cache_image_info
pub images_offset: U32<E>,
pub images_offset: U32<E>, // offset: 0x18
/// number of dyld_cache_image_info entries
pub images_count: U32<E>,
pub images_count: U32<E>, // offset: 0x1c
/// base address of dyld when cache was built
pub dyld_base_address: U64<E>,
pub dyld_base_address: U64<E>, // offset: 0x20
///
reserved1: [u8; 48], // offset: 0x28
/// unique value for each shared cache file
pub uuid: [u8; 16], // offset: 0x58
///
reserved2: [u8; 288], // offset: 0x68
/// file offset to first dyld_subcache_info
pub subcaches_offset: U32<E>, // offset: 0x188
/// number of dyld_subcache_info entries
pub subcaches_count: U32<E>, // offset: 0x18c
///
reserved3: [u8; 48], // offset: 0x1a0
/// file offset to first dyld_cache_image_info
/// Use this instead of images_offset if mapping_offset is at least 0x1c4.
pub images_across_all_subcaches_offset: U32<E>, // offset: 0x1c0
/// number of dyld_cache_image_info entries
/// Use this instead of images_count if mapping_offset is at least 0x1c4.
pub images_across_all_subcaches_count: U32<E>, // offset: 0x1c4
}

/// Corresponds to struct dyld_cache_mapping_info from dyld_cache_format.h.
Expand Down Expand Up @@ -338,6 +356,17 @@ pub struct DyldCacheImageInfo<E: Endian> {
pub pad: U32<E>,
}

/// Corresponds to a struct whose source code has not been published as of Nov 2021.
/// Added in the dyld cache version which shipped with macOS 12 / iOS 15.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct DyldSubCacheInfo<E: Endian> {
/// The UUID of this subcache.
pub uuid: [u8; 16],
///
pub unknown: U64<E>,
}

// Definitions from "/usr/include/mach-o/loader.h".

/*
Expand Down Expand Up @@ -3199,6 +3228,7 @@ unsafe_impl_endian_pod!(
DyldCacheHeader,
DyldCacheMappingInfo,
DyldCacheImageInfo,
DyldSubCacheInfo,
MachHeader32,
MachHeader64,
LoadCommand,
Expand Down
34 changes: 16 additions & 18 deletions src/read/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::read::{
SymbolMapName, SymbolScope, SymbolSection,
};
#[allow(unused_imports)]
use crate::Endianness;
use crate::{AddressSize, Endian, Endianness};

/// Evaluate an expression on the contents of a file format enum.
///
Expand Down Expand Up @@ -220,23 +220,21 @@ impl<'data, R: ReadRef<'data>> File<'data, R> {
Ok(File { inner })
}

/// Parse the raw file data at an arbitrary offset inside the input data.
///
/// Currently, this is only supported for Mach-O images.
/// This can be used for parsing Mach-O images inside the dyld shared cache,
/// where multiple images, located at different offsets, share the same address
/// space.
pub fn parse_at(data: R, offset: u64) -> Result<Self> {
let _inner = match FileKind::parse_at(data, offset)? {
#[cfg(feature = "macho")]
FileKind::MachO32 => FileInternal::MachO32(macho::MachOFile32::parse_at(data, offset)?),
#[cfg(feature = "macho")]
FileKind::MachO64 => FileInternal::MachO64(macho::MachOFile64::parse_at(data, offset)?),
#[allow(unreachable_patterns)]
/// Parse a Mach-O image from the dyld shared cache.
#[cfg(feature = "macho")]
pub fn parse_dyld_cache_image<'cache, E: Endian>(
image: &macho::DyldCacheImage<'data, 'cache, E, R>,
) -> Result<Self> {
let inner = match image.cache.architecture().address_size() {
Some(AddressSize::U64) => {
FileInternal::MachO64(macho::MachOFile64::parse_dyld_cache_image(image)?)
}
Some(AddressSize::U32) => {
FileInternal::MachO32(macho::MachOFile32::parse_dyld_cache_image(image)?)
}
_ => return Err(Error("Unsupported file format")),
};
#[allow(unreachable_code)]
Ok(File { inner: _inner })
Ok(File { inner })
}

/// Return the file format.
Expand Down Expand Up @@ -501,9 +499,9 @@ where
#[cfg(feature = "elf")]
Elf64(elf::ElfSegment64<'data, 'file, Endianness, R>),
#[cfg(feature = "macho")]
MachO32(macho::MachOSegment32<'data, 'file, Endianness, R>),
MachO32(macho::MachOSegment32<'data, Endianness, R>),
#[cfg(feature = "macho")]
MachO64(macho::MachOSegment64<'data, 'file, Endianness, R>),
MachO64(macho::MachOSegment64<'data, Endianness, R>),
#[cfg(feature = "pe")]
Pe32(pe::PeSegment32<'data, 'file, R>),
#[cfg(feature = "pe")]
Expand Down
Loading

0 comments on commit 100473f

Please sign in to comment.