Skip to content

Commit

Permalink
Correctly handle dyld caches on macOS 13 and above (#642)
Browse files Browse the repository at this point in the history
This allows successful parsing of dyld caches on
macOS 13 and above on Intel Macs.

The main dyld cache file on macOS contains an array of
subcache info structs, each of which specifies the UUID
(and some other information) of each subcache.
`DyldCache::parse` checks that the subcache UUIDs match
these expected UUIDs.

In macOS 13, the format of the subcache info struct
changed: it gained an additional field after the UUID
field. This means that as soon as you had more than
one subcache, our UUID check would fail, because the
second subcache UUID would be read from the wrong offset.

I didn't notice this on my Apple Silicon Mac, because
the arm64e dyld cache only has one subcache:
`dyld_shared_cache_arm64e.01`.
But on Intel Macs, there are currently four subcaches:
`dyld_shared_cache_x86_64.01`, `.02`, `.03`, and `.04`.

In practice this means that my software hasn't been able to
symbolicate macOS system libraries on Intel Macs since
the release of macOS 13.

This commit adds the new struct definition and makes
the UUID check work correctly.

This is a breaking change to the public API. I added
a `DyldSubCacheSlice` enum, but I'm not particularly
fond of it.

dyldcachedump was working correctly on macOS 13+ because it was trying
the "leading zero" suffix format as well as the "no leading zero" suffix
format. This commit changes it to read the suffix from the main cache
header.

objdump was not able to parse dyld shared cache files on macOS 13+ because
it was only using the "no leading zero" suffix format, and thus not finding
the subcaches.
  • Loading branch information
mstange authored Mar 11, 2024
1 parent 6677d67 commit 791adb0
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 50 deletions.
69 changes: 53 additions & 16 deletions crates/examples/src/bin/dyldcachedump.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use object::read::macho::DyldCache;
use object::macho::DyldCacheHeader;
use object::read::macho::{DyldCache, DyldSubCacheSlice};
use object::Endianness;
use std::{env, fs, process};

Expand All @@ -22,21 +23,34 @@ fn main() {
continue;
}
};
let subcache_files = open_subcaches_if_exist(&file_path);
let file = match unsafe { memmap2::Mmap::map(&file) } {
Ok(mmap) => mmap,
Err(err) => {
println!("Failed to map file '{}': {}", file_path, err,);
continue;
}
};

let subcaches_info = match get_subcache_info(&file) {
Ok(subcaches_info) => subcaches_info,
Err(err) => {
println!(
"Failed to parse Dyld shared cache file '{}': {}",
file_path, err,
);
continue;
}
};
let subcache_files = subcaches_info
.map(|info| open_subcaches(&file_path, info))
.unwrap_or_default();
let subcache_files: Option<Vec<_>> = subcache_files
.into_iter()
.map(
|subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } {
Ok(mmap) => Some(mmap),
Err(err) => {
eprintln!("Failed to map file '{}': {}", file_path, err);
println!("Failed to map file '{}': {}", file_path, err);
None
}
},
Expand Down Expand Up @@ -69,28 +83,51 @@ fn main() {
}
}

/// Gets the slice of subcache info structs from the header of the main cache.
fn get_subcache_info(
main_cache_data: &[u8],
) -> object::read::Result<Option<DyldSubCacheSlice<'_, Endianness>>> {
let header = DyldCacheHeader::<Endianness>::parse(main_cache_data)?;
let (_arch, endian) = header.parse_magic()?;
let subcaches_info = header.subcaches(endian, main_cache_data)?;
Ok(subcaches_info)
}

// If the file is a dyld shared cache, and we're on macOS 12 or later,
// then there will be one or more "subcache" files next to this file,
// with the names filename.1, filename.2, ..., filename.symbols
// or filename.01, filename.02 on macOS 13
fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
// or filename.01, filename.02, ..., filename.symbols on macOS 13
fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice<Endianness>) -> Vec<fs::File> {
let subcache_suffixes: Vec<String> = match subcaches_info {
DyldSubCacheSlice::V1(subcaches) => {
// macOS 12: Subcaches have the file suffixes .1, .2, .3 etc.
(1..subcaches.len() + 1).map(|i| format!(".{i}")).collect()
}
DyldSubCacheSlice::V2(subcaches) => {
// macOS 13+: The subcache file suffix is written down in the header of the main cache.
subcaches
.iter()
.map(|s| {
// The suffix is a nul-terminated string in a fixed-size byte array.
let suffix = s.file_suffix;
let len = suffix.iter().position(|&c| c == 0).unwrap_or(suffix.len());
String::from_utf8_lossy(&suffix[..len]).to_string()
})
.collect()
}
_ => panic!(
"If this case is hit, it means that someone added a variant to the (non-exhaustive) \
DyldSubCacheSlice enum and forgot to update this example"
),
};
let mut files = Vec::new();
for i in 1.. {
let subcache_path = format!("{}.{}", path, i);
for suffix in subcache_suffixes {
let subcache_path = format!("{path}{suffix}");
match fs::File::open(subcache_path) {
Ok(subcache_file) => files.push(subcache_file),
Err(_) => break,
};
}
if files.is_empty() {
for i in 1.. {
let subcache_path = format!("{}.{:02}", path, i);
match fs::File::open(subcache_path) {
Ok(subcache_file) => files.push(subcache_file),
Err(_) => break,
};
}
}
let symbols_subcache_path = format!("{}.symbols", path);
if let Ok(subcache_file) = fs::File::open(symbols_subcache_path) {
files.push(subcache_file);
Expand Down
54 changes: 45 additions & 9 deletions crates/examples/src/bin/objdump.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use object::{macho::DyldCacheHeader, read::macho::DyldSubCacheSlice, Endianness};
use object_examples::objdump;
use std::{env, fs, io, process};

Expand All @@ -18,14 +19,17 @@ fn main() {
process::exit(1);
}
};
let extra_files = open_subcaches_if_exist(&file_path);
let file = match unsafe { memmap2::Mmap::map(&file) } {
Ok(mmap) => mmap,
Err(err) => {
eprintln!("Failed to map file '{}': {}", file_path, err,);
process::exit(1);
}
};
let subcaches_info = get_subcache_info_if_dyld_cache(&file).ok().flatten();
let extra_files = subcaches_info
.map(|info| open_subcaches(&file_path, info))
.unwrap_or_default();
let extra_files: Vec<_> = extra_files
.into_iter()
.map(
Expand All @@ -52,17 +56,48 @@ fn main() {
.unwrap();
}

/// Gets the slice of subcache info structs from the header of the main cache,
/// if `main_cache_data` is the data of a Dyld shared cache.
fn get_subcache_info_if_dyld_cache(
main_cache_data: &[u8],
) -> object::read::Result<Option<DyldSubCacheSlice<'_, Endianness>>> {
let header = DyldCacheHeader::<Endianness>::parse(main_cache_data)?;
let (_arch, endian) = header.parse_magic()?;
let subcaches_info = header.subcaches(endian, main_cache_data)?;
Ok(subcaches_info)
}

// If the file is a dyld shared cache, and we're on macOS 12 or later,
// then there will be one or more "subcache" files next to this file,
// with the names filename.1, filename.2 etc.
// Read those files now, if they exist, even if we don't know that
// we're dealing with a dyld shared cache. By the time we know what
// we're dealing with, it's too late to read more files.
fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
// with the names filename.1, filename.2, ..., filename.symbols
// or filename.01, filename.02, ..., filename.symbols on macOS 13
fn open_subcaches(path: &str, subcaches_info: DyldSubCacheSlice<Endianness>) -> Vec<fs::File> {
let subcache_suffixes: Vec<String> = match subcaches_info {
DyldSubCacheSlice::V1(subcaches) => {
// macOS 12: Subcaches have the file suffixes .1, .2, .3 etc.
(1..subcaches.len() + 1).map(|i| format!(".{i}")).collect()
}
DyldSubCacheSlice::V2(subcaches) => {
// macOS 13+: The subcache file suffix is written down in the header of the main cache.
subcaches
.iter()
.map(|s| {
// The suffix is a nul-terminated string in a fixed-size byte array.
let suffix = s.file_suffix;
let len = suffix.iter().position(|&c| c == 0).unwrap_or(suffix.len());
String::from_utf8_lossy(&suffix[..len]).to_string()
})
.collect()
}
_ => panic!(
"If this case is hit, it means that someone added a variant to the (non-exhaustive) \
DyldSubCacheSlice enum and forgot to update this example"
),
};
let mut files = Vec::new();
for i in 1.. {
let subcache_path = format!("{}.{}", path, i);
match fs::File::open(&subcache_path) {
for suffix in subcache_suffixes {
let subcache_path = format!("{path}{suffix}");
match fs::File::open(subcache_path) {
Ok(subcache_file) => files.push(subcache_file),
Err(_) => break,
};
Expand All @@ -71,5 +106,6 @@ fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
if let Ok(subcache_file) = fs::File::open(symbols_subcache_path) {
files.push(subcache_file);
};
println!("Found {} subcache files", files.len());
files
}
2 changes: 1 addition & 1 deletion crates/examples/src/objdump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub fn print<W: Write, E: Write>(
let path = match image.path() {
Ok(path) => path,
Err(err) => {
writeln!(e, "Failed to parse dydld image name: {}", err)?;
writeln!(e, "Failed to parse dyld image name: {}", err)?;
continue;
}
};
Expand Down
27 changes: 21 additions & 6 deletions src/macho.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,15 +379,29 @@ pub struct DyldCacheImageInfo<E: Endian> {
pub pad: U32<E>,
}

/// Corresponds to a struct whose source code has not been published as of Nov 2021.
/// Added in the dyld cache version which shipped with macOS 12 / iOS 15.
/// Added in dyld-940, which shipped with macOS 12 / iOS 15.
/// Originally called `dyld_subcache_entry`, renamed to `dyld_subcache_entry_v1`
/// in dyld-1042.1.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct DyldSubCacheInfo<E: Endian> {
pub struct DyldSubCacheEntryV1<E: Endian> {
/// The UUID of this subcache.
pub uuid: [u8; 16],
/// The size of this subcache plus all previous subcaches.
pub cumulative_size: U64<E>,
/// The offset of this subcache from the main cache base address.
pub cache_vm_offset: U64<E>,
}

/// Added in dyld-1042.1, which shipped with macOS 13 / iOS 16.
/// Called `dyld_subcache_entry` as of dyld-1042.1.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct DyldSubCacheEntryV2<E: Endian> {
/// The UUID of this subcache.
pub uuid: [u8; 16],
/// The offset of this subcache from the main cache base address.
pub cache_vm_offset: U64<E>,
/// The file name suffix of the subCache file, e.g. ".25.data" or ".03.development".
pub file_suffix: [u8; 32],
}

// Definitions from "/usr/include/mach-o/loader.h".
Expand Down Expand Up @@ -3253,7 +3267,8 @@ unsafe_impl_endian_pod!(
DyldCacheHeader,
DyldCacheMappingInfo,
DyldCacheImageInfo,
DyldSubCacheInfo,
DyldSubCacheEntryV1,
DyldSubCacheEntryV2,
MachHeader32,
MachHeader64,
LoadCommand,
Expand Down
75 changes: 57 additions & 18 deletions src/read/macho/dyld_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,25 @@ where
mappings: &'data [macho::DyldCacheMappingInfo<E>],
}

// This is the offset of the images_across_all_subcaches_count field.
const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4;
/// A slice of structs describing each subcache. The struct gained
/// an additional field (the file suffix) in dyld-1042.1 (macOS 13 / iOS 16),
/// so this is an enum of the two possible slice types.
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub enum DyldSubCacheSlice<'data, E: Endian> {
/// V1, used between dyld-940 and dyld-1042.1.
V1(&'data [macho::DyldSubCacheEntryV1<E>]),
/// V2, used since dyld-1042.1.
V2(&'data [macho::DyldSubCacheEntryV2<E>]),
}

// This is the offset of the end of the images_across_all_subcaches_count field.
const MIN_HEADER_SIZE_SUBCACHES_V1: u32 = 0x1c8;

// This is the offset of the end of the cacheSubType field.
// This field comes right after the images_across_all_subcaches_count field,
// and we don't currently have it in our definition of the DyldCacheHeader type.
const MIN_HEADER_SIZE_SUBCACHES_V2: u32 = 0x1d0;

impl<'data, E, R> DyldCache<'data, E, R>
where
Expand All @@ -51,9 +68,13 @@ where
let mappings = header.mappings(endian, data)?;

let symbols_subcache_uuid = header.symbols_subcache_uuid(endian);
let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]);

if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize {
let subcaches_info = header.subcaches(endian, data)?;
let subcaches_count = match subcaches_info {
Some(DyldSubCacheSlice::V1(subcaches)) => subcaches.len(),
Some(DyldSubCacheSlice::V2(subcaches)) => subcaches.len(),
None => 0,
};
if subcache_data.len() != subcaches_count + symbols_subcache_uuid.is_some() as usize {
return Err(Error("Incorrect number of SubCaches"));
}

Expand All @@ -66,15 +87,22 @@ where
(None, subcache_data)
};

// Read the regular SubCaches (.1, .2, ...), if present.
// Read the regular SubCaches, if present.
let mut subcaches = Vec::new();
for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) {
let sc_header = macho::DyldCacheHeader::<E>::parse(data)?;
if sc_header.uuid != info.uuid {
return Err(Error("Unexpected SubCache UUID"));
if let Some(subcaches_info) = subcaches_info {
let (v1, v2) = match subcaches_info {
DyldSubCacheSlice::V1(s) => (s, &[][..]),
DyldSubCacheSlice::V2(s) => (&[][..], s),
};
let uuids = v1.iter().map(|e| &e.uuid).chain(v2.iter().map(|e| &e.uuid));
for (&data, uuid) in subcache_data.iter().zip(uuids) {
let sc_header = macho::DyldCacheHeader::<E>::parse(data)?;
if &sc_header.uuid != uuid {
return Err(Error("Unexpected SubCache UUID"));
}
let mappings = sc_header.mappings(endian, data)?;
subcaches.push(DyldSubCache { data, mappings });
}
let mappings = sc_header.mappings(endian, data)?;
subcaches.push(DyldSubCache { data, mappings });
}

// Read the .symbols SubCache, if present.
Expand Down Expand Up @@ -255,27 +283,38 @@ impl<E: Endian> macho::DyldCacheHeader<E> {
}

/// Return the information about subcaches, if present.
///
/// Returns `None` for dyld caches produced before dyld-940 (macOS 12).
pub fn subcaches<'data, R: ReadRef<'data>>(
&self,
endian: E,
data: R,
) -> Result<Option<&'data [macho::DyldSubCacheInfo<E>]>> {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES {
) -> Result<Option<DyldSubCacheSlice<'data, E>>> {
let header_size = self.mapping_offset.get(endian);
if header_size >= MIN_HEADER_SIZE_SUBCACHES_V2 {
let subcaches = data
.read_slice_at::<macho::DyldSubCacheEntryV2<E>>(
self.subcaches_offset.get(endian).into(),
self.subcaches_count.get(endian) as usize,
)
.read_error("Invalid dyld subcaches size or alignment")?;
Ok(Some(DyldSubCacheSlice::V2(subcaches)))
} else if header_size >= MIN_HEADER_SIZE_SUBCACHES_V1 {
let subcaches = data
.read_slice_at::<macho::DyldSubCacheInfo<E>>(
.read_slice_at::<macho::DyldSubCacheEntryV1<E>>(
self.subcaches_offset.get(endian).into(),
self.subcaches_count.get(endian) as usize,
)
.read_error("Invalid dyld subcaches size or alignment")?;
Ok(Some(subcaches))
Ok(Some(DyldSubCacheSlice::V1(subcaches)))
} else {
Ok(None)
}
}

/// Return the UUID for the .symbols subcache, if present.
pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 {
let uuid = self.symbols_subcache_uuid;
if uuid != [0; 16] {
return Some(uuid);
Expand All @@ -290,7 +329,7 @@ impl<E: Endian> macho::DyldCacheHeader<E> {
endian: E,
data: R,
) -> Result<&'data [macho::DyldCacheImageInfo<E>]> {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES {
if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 {
data.read_slice_at::<macho::DyldCacheImageInfo<E>>(
self.images_across_all_subcaches_offset.get(endian).into(),
self.images_across_all_subcaches_count.get(endian) as usize,
Expand Down

0 comments on commit 791adb0

Please sign in to comment.