From b706949736fe67e104a14ac1dcaac8b7eb1cc33f Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Wed, 27 Aug 2025 15:36:17 -0400 Subject: [PATCH 01/85] debug: refactor stack frame capturing --- lib/std/debug.zig | 69 +-- lib/std/debug/Dwarf.zig | 1029 +++++++------------------------- lib/std/debug/Dwarf/Unwind.zig | 645 ++++++++++++++++++++ lib/std/debug/SelfInfo.zig | 535 +++++++++-------- 4 files changed, 1155 insertions(+), 1123 deletions(-) create mode 100644 lib/std/debug/Dwarf/Unwind.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index bd849a32d320..d5f7791fc210 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -498,10 +498,17 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT } stack_trace.index = slice.len; } else { - // TODO: This should use the DWARF unwinder if .eh_frame_hdr is available (so that full debug info parsing isn't required). - // A new path for loading SelfInfo needs to be created which will only attempt to parse in-memory sections, because - // stopping to load other debug info (ie. source line info) from disk here is not required for unwinding. - var it = StackIterator.init(first_address, @frameAddress()); + if (builtin.cpu.arch == .powerpc64) { + // https://github.com/ziglang/zig/issues/24970 + stack_trace.index = 0; + return; + } + var context: ThreadContext = undefined; + const has_context = getContext(&context); + + var it = (if (has_context) blk: { + break :blk StackIterator.initWithContext(first_address, getSelfDebugInfo() catch break :blk null, &context) catch null; + } else null) orelse StackIterator.init(first_address, null); defer it.deinit(); for (stack_trace.instruction_addresses, 0..) |*addr, i| { addr.* = it.next() orelse { @@ -764,7 +771,7 @@ pub fn writeStackTrace( } pub const UnwindError = if (have_ucontext) - @typeInfo(@typeInfo(@TypeOf(StackIterator.next_unwind)).@"fn".return_type.?).error_union.error_set + @typeInfo(@typeInfo(@TypeOf(SelfInfo.unwindFrame)).@"fn".return_type.?).error_union.error_set else void; @@ -865,11 +872,11 @@ pub const StackIterator = struct { @sizeOf(usize); pub fn next(it: *StackIterator) ?usize { - var address = it.next_internal() orelse return null; + var address = it.nextInternal() orelse return null; if (it.first_address) |first_address| { while (address != first_address) { - address = it.next_internal() orelse return null; + address = it.nextInternal() orelse return null; } it.first_address = null; } @@ -877,48 +884,13 @@ pub const StackIterator = struct { return address; } - fn next_unwind(it: *StackIterator) !usize { - const unwind_state = &it.unwind_state.?; - const module = try unwind_state.debug_info.getModuleForAddress(unwind_state.dwarf_context.pc); - switch (native_os) { - .macos, .ios, .watchos, .tvos, .visionos => { - // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding - // via DWARF before attempting to use the compact unwind info will produce incorrect results. - if (module.unwind_info) |unwind_info| { - if (SelfInfo.unwindFrameMachO( - unwind_state.debug_info.allocator, - module.base_address, - &unwind_state.dwarf_context, - unwind_info, - module.eh_frame, - )) |return_address| { - return return_address; - } else |err| { - if (err != error.RequiresDWARFUnwind) return err; - } - } else return error.MissingUnwindInfo; - }, - else => {}, - } - - if (try module.getDwarfInfoForAddress(unwind_state.debug_info.allocator, unwind_state.dwarf_context.pc)) |di| { - return SelfInfo.unwindFrameDwarf( - unwind_state.debug_info.allocator, - di, - module.base_address, - &unwind_state.dwarf_context, - null, - ); - } else return error.MissingDebugInfo; - } - - fn next_internal(it: *StackIterator) ?usize { + fn nextInternal(it: *StackIterator) ?usize { if (have_ucontext) { if (it.unwind_state) |*unwind_state| { if (!unwind_state.failed) { if (unwind_state.dwarf_context.pc == 0) return null; defer it.fp = unwind_state.dwarf_context.getFp() catch 0; - if (it.next_unwind()) |return_address| { + if (unwind_state.debug_info.unwindFrame(&unwind_state.dwarf_context)) |return_address| { return return_address; } else |err| { unwind_state.last_error = err; @@ -948,7 +920,7 @@ pub const StackIterator = struct { // Sanity check: the stack grows down thus all the parent frames must be // be at addresses that are greater (or equal) than the previous one. // A zero frame pointer often signals this is the last frame, that case - // is gracefully handled by the next call to next_internal. + // is gracefully handled by the next call to nextInternal. if (new_fp != 0 and new_fp < it.fp) return null; const new_pc = @as(*usize, @ptrFromInt(math.add(usize, fp, pc_offset) catch return null)).*; @@ -1099,12 +1071,7 @@ fn printUnwindError(debug_info: *SelfInfo, writer: *Writer, address: usize, err: } pub fn printSourceAtAddress(debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) !void { - const module = debug_info.getModuleForAddress(address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, writer, address, tty_config), - else => return err, - }; - - const symbol_info = module.getSymbolAtAddress(debug_info.allocator, address) catch |err| switch (err) { + const symbol_info = debug_info.getSymbolAtAddress(address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, writer, address, tty_config), else => return err, }; diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 5f448075a827..e0d74172da66 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -16,7 +16,6 @@ const elf = std.elf; const mem = std.mem; const DW = std.dwarf; const AT = DW.AT; -const EH = DW.EH; const FORM = DW.FORM; const Format = DW.Format; const RLE = DW.RLE; @@ -34,13 +33,12 @@ const Dwarf = @This(); pub const expression = @import("Dwarf/expression.zig"); pub const abi = @import("Dwarf/abi.zig"); pub const call_frame = @import("Dwarf/call_frame.zig"); +pub const Unwind = @import("Dwarf/Unwind.zig"); /// Useful to temporarily enable while working on this file. const debug_debug_mode = false; -endian: Endian, -sections: SectionArray = null_section_array, -is_macho: bool, +sections: SectionArray = @splat(null), /// Filled later by the initializer abbrev_table_list: ArrayList(Abbrev.Table) = .empty, @@ -49,14 +47,6 @@ compile_unit_list: ArrayList(CompileUnit) = .empty, /// Filled later by the initializer func_list: ArrayList(Func) = .empty, -/// Starts out non-`null` if the `.eh_frame_hdr` section is present. May become `null` later if we -/// find that `.eh_frame_hdr` is incomplete. -eh_frame_hdr: ?ExceptionFrameHeader = null, -/// These lookup tables are only used if `eh_frame_hdr` is null -cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .empty, -/// Sorted by start_pc -fde_list: ArrayList(FrameDescriptionEntry) = .empty, - /// Populated by `populateRanges`. ranges: ArrayList(Range) = .empty, @@ -87,9 +77,6 @@ pub const Section = struct { debug_rnglists, debug_addr, debug_names, - debug_frame, - eh_frame, - eh_frame_hdr, }; // For sections that are not memory mapped by the loader, this is an offset @@ -258,13 +245,14 @@ pub const Die = struct { fn getAttrAddr( self: *const Die, di: *const Dwarf, + endian: Endian, id: u64, - compile_unit: CompileUnit, + compile_unit: *const CompileUnit, ) error{ InvalidDebugInfo, MissingDebugInfo }!u64 { const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; return switch (form_value.*) { .addr => |value| value, - .addrx => |index| di.readDebugAddr(compile_unit, index), + .addrx => |index| di.readDebugAddr(endian, compile_unit, index), else => bad(), }; } @@ -294,9 +282,10 @@ pub const Die = struct { pub fn getAttrString( self: *const Die, di: *Dwarf, + endian: Endian, id: u64, opt_str: ?[]const u8, - compile_unit: CompileUnit, + compile_unit: *const CompileUnit, ) error{ InvalidDebugInfo, MissingDebugInfo }![]const u8 { const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; switch (form_value.*) { @@ -309,13 +298,13 @@ pub const Die = struct { .@"32" => { const byte_offset = compile_unit.str_offsets_base + 4 * index; if (byte_offset + 4 > debug_str_offsets.len) return bad(); - const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); + const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], endian); return getStringGeneric(opt_str, offset); }, .@"64" => { const byte_offset = compile_unit.str_offsets_base + 8 * index; if (byte_offset + 8 > debug_str_offsets.len) return bad(); - const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); + const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], endian); return getStringGeneric(opt_str, offset); }, } @@ -326,440 +315,17 @@ pub const Die = struct { } }; -/// This represents the decoded .eh_frame_hdr header -pub const ExceptionFrameHeader = struct { - eh_frame_ptr: usize, - table_enc: u8, - fde_count: usize, - entries: []const u8, - - pub fn entrySize(table_enc: u8) !u8 { - return switch (table_enc & EH.PE.type_mask) { - EH.PE.udata2, - EH.PE.sdata2, - => 4, - EH.PE.udata4, - EH.PE.sdata4, - => 8, - EH.PE.udata8, - EH.PE.sdata8, - => 16, - // This is a binary search table, so all entries must be the same length - else => return bad(), - }; - } - - pub fn findEntry( - self: ExceptionFrameHeader, - eh_frame_len: usize, - eh_frame_hdr_ptr: usize, - pc: usize, - cie: *CommonInformationEntry, - fde: *FrameDescriptionEntry, - endian: Endian, - ) !void { - const entry_size = try entrySize(self.table_enc); - - var left: usize = 0; - var len: usize = self.fde_count; - var fbr: Reader = .fixed(self.entries); - - while (len > 1) { - const mid = left + len / 2; - - fbr.seek = mid * entry_size; - const pc_begin = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); - - if (pc < pc_begin) { - len /= 2; - } else { - left = mid; - if (pc == pc_begin) break; - len -= len / 2; - } - } - - if (len == 0) return missing(); - fbr.seek = left * entry_size; - - // Read past the pc_begin field of the entry - _ = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); - - const fde_ptr = cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad()) orelse return bad(); - - if (fde_ptr < self.eh_frame_ptr) return bad(); - - const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0..eh_frame_len]; - - const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: Reader = .fixed(eh_frame); - eh_frame_fbr.seek = fde_offset; - - const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (fde_entry_header.type != .fde) return bad(); - - // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable - const cie_offset = fde_entry_header.type.fde; - eh_frame_fbr.seek = @intCast(cie_offset); - const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (cie_entry_header.type != .cie) return bad(); - - cie.* = try CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - .eh_frame, - cie_entry_header.length_offset, - @sizeOf(usize), - endian, - ); - - fde.* = try FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie.*, - @sizeOf(usize), - endian, - ); - - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return missing(); - } -}; - -pub const EntryHeader = struct { - /// Offset of the length field in the backing buffer - length_offset: usize, - format: Format, - type: union(enum) { - cie, - /// Value is the offset of the corresponding CIE - fde: u64, - terminator, - }, - /// The entry's contents, not including the ID field - entry_bytes: []const u8, - - /// The length of the entry including the ID field, but not the length field itself - pub fn entryLength(self: EntryHeader) usize { - return self.entry_bytes.len + @as(u8, if (self.format == .@"64") 8 else 4); - } - - /// Reads a header for either an FDE or a CIE, then advances the fbr to the - /// position after the trailing structure. - /// - /// `fbr` must be backed by either the .eh_frame or .debug_frame sections. - /// - /// TODO that's a bad API, don't do that. this function should neither require - /// a fixed reader nor depend on seeking. - pub fn read(fbr: *Reader, dwarf_section: Section.Id, endian: Endian) !EntryHeader { - assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); - - const length_offset = fbr.seek; - const unit_header = try readUnitHeader(fbr, endian); - const unit_length = cast(usize, unit_header.unit_length) orelse return bad(); - if (unit_length == 0) return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = .terminator, - .entry_bytes = &.{}, - }; - const start_offset = fbr.seek; - const end_offset = start_offset + unit_length; - defer fbr.seek = end_offset; - - const id = try readAddress(fbr, unit_header.format, endian); - const entry_bytes = fbr.buffer[fbr.seek..end_offset]; - const cie_id: u64 = switch (dwarf_section) { - .eh_frame => CommonInformationEntry.eh_id, - .debug_frame => switch (unit_header.format) { - .@"32" => CommonInformationEntry.dwarf32_id, - .@"64" => CommonInformationEntry.dwarf64_id, - }, - else => unreachable, - }; - - return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = if (id == cie_id) .cie else .{ .fde = switch (dwarf_section) { - .eh_frame => try std.math.sub(u64, start_offset, id), - .debug_frame => id, - else => unreachable, - } }, - .entry_bytes = entry_bytes, - }; - } -}; - -pub const CommonInformationEntry = struct { - // Used in .eh_frame - pub const eh_id = 0; - - // Used in .debug_frame (DWARF32) - pub const dwarf32_id = maxInt(u32); - - // Used in .debug_frame (DWARF64) - pub const dwarf64_id = maxInt(u64); - - // Offset of the length field of this entry in the eh_frame section. - // This is the key that FDEs use to reference CIEs. - length_offset: u64, - version: u8, - address_size: u8, - format: Format, - - // Only present in version 4 - segment_selector_size: ?u8, - - code_alignment_factor: u32, - data_alignment_factor: i32, - return_address_register: u8, - - aug_str: []const u8, - aug_data: []const u8, - lsda_pointer_enc: u8, - personality_enc: ?u8, - personality_routine_pointer: ?u64, - fde_pointer_enc: u8, - initial_instructions: []const u8, - - pub fn isSignalFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'S') return true; - return false; - } - - pub fn addressesSignedWithBKey(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'B') return true; - return false; - } - - pub fn mteTaggedFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'G') return true; - return false; - } - - /// This function expects to read the CIE starting with the version field. - /// The returned struct references memory backed by cie_bytes. - /// - /// See the FrameDescriptionEntry.parse documentation for the description - /// of `pc_rel_offset` and `is_runtime`. - /// - /// `length_offset` specifies the offset of this CIE's length field in the - /// .eh_frame / .debug_frame section. - pub fn parse( - cie_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, - format: Format, - dwarf_section: Section.Id, - length_offset: u64, - addr_size_bytes: u8, - endian: Endian, - ) !CommonInformationEntry { - if (addr_size_bytes > 8) return error.UnsupportedAddrSize; - - var fbr: Reader = .fixed(cie_bytes); - - const version = try fbr.takeByte(); - switch (dwarf_section) { - .eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion, - .debug_frame => if (version != 4) return error.UnsupportedDwarfVersion, - else => return error.UnsupportedDwarfSection, - } - - var has_eh_data = false; - var has_aug_data = false; - - var aug_str_len: usize = 0; - const aug_str_start = fbr.seek; - var aug_byte = try fbr.takeByte(); - while (aug_byte != 0) : (aug_byte = try fbr.takeByte()) { - switch (aug_byte) { - 'z' => { - if (aug_str_len != 0) return bad(); - has_aug_data = true; - }, - 'e' => { - if (has_aug_data or aug_str_len != 0) return bad(); - if (try fbr.takeByte() != 'h') return bad(); - has_eh_data = true; - }, - else => if (has_eh_data) return bad(), - } - - aug_str_len += 1; - } - - if (has_eh_data) { - // legacy data created by older versions of gcc - unsupported here - for (0..addr_size_bytes) |_| _ = try fbr.takeByte(); - } - - const address_size = if (version == 4) try fbr.takeByte() else addr_size_bytes; - const segment_selector_size = if (version == 4) try fbr.takeByte() else null; - - const code_alignment_factor = try fbr.takeLeb128(u32); - const data_alignment_factor = try fbr.takeLeb128(i32); - const return_address_register = if (version == 1) try fbr.takeByte() else try fbr.takeLeb128(u8); - - var lsda_pointer_enc: u8 = EH.PE.omit; - var personality_enc: ?u8 = null; - var personality_routine_pointer: ?u64 = null; - var fde_pointer_enc: u8 = EH.PE.absptr; - - var aug_data: []const u8 = &[_]u8{}; - const aug_str = if (has_aug_data) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = cie_bytes[aug_data_start..][0..aug_data_len]; - - const aug_str = cie_bytes[aug_str_start..][0..aug_str_len]; - for (aug_str[1..]) |byte| { - switch (byte) { - 'L' => { - lsda_pointer_enc = try fbr.takeByte(); - }, - 'P' => { - personality_enc = try fbr.takeByte(); - personality_routine_pointer = try readEhPointer(&fbr, personality_enc.?, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian); - }, - 'R' => { - fde_pointer_enc = try fbr.takeByte(); - }, - 'S', 'B', 'G' => {}, - else => return bad(), - } - } - - // aug_data_len can include padding so the CIE ends on an address boundary - fbr.seek = aug_data_start + aug_data_len; - break :blk aug_str; - } else &[_]u8{}; - - const initial_instructions = cie_bytes[fbr.seek..]; - return .{ - .length_offset = length_offset, - .version = version, - .address_size = address_size, - .format = format, - .segment_selector_size = segment_selector_size, - .code_alignment_factor = code_alignment_factor, - .data_alignment_factor = data_alignment_factor, - .return_address_register = return_address_register, - .aug_str = aug_str, - .aug_data = aug_data, - .lsda_pointer_enc = lsda_pointer_enc, - .personality_enc = personality_enc, - .personality_routine_pointer = personality_routine_pointer, - .fde_pointer_enc = fde_pointer_enc, - .initial_instructions = initial_instructions, - }; - } -}; - -pub const FrameDescriptionEntry = struct { - // Offset into eh_frame where the CIE for this FDE is stored - cie_length_offset: u64, - - pc_begin: u64, - pc_range: u64, - lsda_pointer: ?u64, - aug_data: []const u8, - instructions: []const u8, - - /// This function expects to read the FDE starting at the PC Begin field. - /// The returned struct references memory backed by `fde_bytes`. - /// - /// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values - /// used when decoding pointers. This should be set to zero if fde_bytes is - /// backed by the memory of a .eh_frame / .debug_frame section in the running executable. - /// Otherwise, it should be the relative offset to translate addresses from - /// where the section is currently stored in memory, to where it *would* be - /// stored at runtime: section base addr - backing data base ptr. - /// - /// Similarly, `is_runtime` specifies this function is being called on a runtime - /// section, and so indirect pointers can be followed. - pub fn parse( - fde_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, - cie: CommonInformationEntry, - addr_size_bytes: u8, - endian: Endian, - ) !FrameDescriptionEntry { - if (addr_size_bytes > 8) return error.InvalidAddrSize; - - var fbr: Reader = .fixed(fde_bytes); - - const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) orelse return bad(); - - const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = 0, - .follow_indirect = false, - }, endian) orelse return bad(); - - var aug_data: []const u8 = &[_]u8{}; - const lsda_pointer = if (cie.aug_str.len > 0) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = fde_bytes[aug_data_start..][0..aug_data_len]; - - const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit) - try readEhPointer(&fbr, cie.lsda_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) - else - null; - - fbr.seek = aug_data_start + aug_data_len; - break :blk lsda_pointer; - } else null; - - const instructions = fde_bytes[fbr.seek..]; - return .{ - .cie_length_offset = cie.length_offset, - .pc_begin = pc_begin, - .pc_range = pc_range, - .lsda_pointer = lsda_pointer, - .aug_data = aug_data, - .instructions = instructions, - }; - } -}; - const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); pub const SectionArray = [num_sections]?Section; -pub const null_section_array = [_]?Section{null} ** num_sections; pub const OpenError = ScanError; /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(d: *Dwarf, gpa: Allocator) OpenError!void { - try d.scanAllFunctions(gpa); - try d.scanAllCompileUnits(gpa); +pub fn open(d: *Dwarf, gpa: Allocator, endian: Endian) OpenError!void { + try d.scanAllFunctions(gpa, endian); + try d.scanAllCompileUnits(gpa, endian); } const PcRange = struct { @@ -825,31 +391,30 @@ pub const ScanError = error{ StreamTooLong, } || Allocator.Error; -fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { - const endian = di.endian; - var fbr: Reader = .fixed(di.section(.debug_info).?); +fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError!void { + var fr: Reader = .fixed(di.section(.debug_info).?); var this_unit_offset: u64 = 0; - while (this_unit_offset < fbr.buffer.len) { - fbr.seek = @intCast(this_unit_offset); + while (this_unit_offset < fr.buffer.len) { + fr.seek = @intCast(this_unit_offset); - const unit_header = try readUnitHeader(&fbr, endian); + const unit_header = try readUnitHeader(&fr, endian); if (unit_header.unit_length == 0) return; const next_offset = unit_header.header_length + unit_header.unit_length; - const version = try fbr.takeInt(u16, endian); + const version = try fr.takeInt(u16, endian); if (version < 2 or version > 5) return bad(); var address_size: u8 = undefined; var debug_abbrev_offset: u64 = undefined; if (version >= 5) { - const unit_type = try fbr.takeByte(); + const unit_type = try fr.takeByte(); if (unit_type != DW.UT.compile) return bad(); - address_size = try fbr.takeByte(); - debug_abbrev_offset = try readAddress(&fbr, unit_header.format, endian); + address_size = try fr.takeByte(); + debug_abbrev_offset = try readAddress(&fr, unit_header.format, endian); } else { - debug_abbrev_offset = try readAddress(&fbr, unit_header.format, endian); - address_size = try fbr.takeByte(); + debug_abbrev_offset = try readAddress(&fr, unit_header.format, endian); + address_size = try fr.takeByte(); } if (address_size != @sizeOf(usize)) return bad(); @@ -890,12 +455,12 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { }; while (true) { - fbr.seek = std.mem.indexOfNonePos(u8, fbr.buffer, fbr.seek, &.{ + fr.seek = std.mem.indexOfNonePos(u8, fr.buffer, fr.seek, &.{ zig_padding_abbrev_code, 0, - }) orelse fbr.buffer.len; - if (fbr.seek >= next_unit_pos) break; + }) orelse fr.buffer.len; + if (fr.seek >= next_unit_pos) break; var die_obj = (try parseDie( - &fbr, + &fr, attrs_bufs[0], abbrev_table, unit_header.format, @@ -920,30 +485,30 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { // Prevent endless loops for (0..3) |_| { if (this_die_obj.getAttr(AT.name)) |_| { - break :x try this_die_obj.getAttrString(di, AT.name, di.section(.debug_str), compile_unit); + break :x try this_die_obj.getAttrString(di, endian, AT.name, di.section(.debug_str), &compile_unit); } else if (this_die_obj.getAttr(AT.abstract_origin)) |_| { - const after_die_offset = fbr.seek; - defer fbr.seek = after_die_offset; + const after_die_offset = fr.seek; + defer fr.seek = after_die_offset; // Follow the DIE it points to and repeat const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin, this_unit_offset, next_offset); - fbr.seek = @intCast(ref_offset); + fr.seek = @intCast(ref_offset); this_die_obj = (try parseDie( - &fbr, + &fr, attrs_bufs[2], abbrev_table, // wrong abbrev table for different cu unit_header.format, endian, )) orelse return bad(); } else if (this_die_obj.getAttr(AT.specification)) |_| { - const after_die_offset = fbr.seek; - defer fbr.seek = after_die_offset; + const after_die_offset = fr.seek; + defer fr.seek = after_die_offset; // Follow the DIE it points to and repeat const ref_offset = try this_die_obj.getAttrRef(AT.specification, this_unit_offset, next_offset); - fbr.seek = @intCast(ref_offset); + fr.seek = @intCast(ref_offset); this_die_obj = (try parseDie( - &fbr, + &fr, attrs_bufs[2], abbrev_table, // wrong abbrev table for different cu unit_header.format, @@ -957,7 +522,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { break :x null; }; - var range_added = if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| blk: { + var range_added = if (die_obj.getAttrAddr(di, endian, AT.low_pc, &compile_unit)) |low_pc| blk: { if (die_obj.getAttr(AT.high_pc)) |high_pc_value| { const pc_end = switch (high_pc_value.*) { .addr => |value| value, @@ -983,7 +548,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { }; if (die_obj.getAttr(AT.ranges)) |ranges_value| blk: { - var iter = DebugRangeIterator.init(ranges_value, di, &compile_unit) catch |err| { + var iter = DebugRangeIterator.init(ranges_value, di, endian, &compile_unit) catch |err| { if (err != error.MissingDebugInfo) return err; break :blk; }; @@ -1015,34 +580,33 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { } } -fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { - const endian = di.endian; - var fbr: Reader = .fixed(di.section(.debug_info).?); +fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError!void { + var fr: Reader = .fixed(di.section(.debug_info).?); var this_unit_offset: u64 = 0; var attrs_buf = std.array_list.Managed(Die.Attr).init(allocator); defer attrs_buf.deinit(); - while (this_unit_offset < fbr.buffer.len) { - fbr.seek = @intCast(this_unit_offset); + while (this_unit_offset < fr.buffer.len) { + fr.seek = @intCast(this_unit_offset); - const unit_header = try readUnitHeader(&fbr, endian); + const unit_header = try readUnitHeader(&fr, endian); if (unit_header.unit_length == 0) return; const next_offset = unit_header.header_length + unit_header.unit_length; - const version = try fbr.takeInt(u16, endian); + const version = try fr.takeInt(u16, endian); if (version < 2 or version > 5) return bad(); var address_size: u8 = undefined; var debug_abbrev_offset: u64 = undefined; if (version >= 5) { - const unit_type = try fbr.takeByte(); + const unit_type = try fr.takeByte(); if (unit_type != UT.compile) return bad(); - address_size = try fbr.takeByte(); - debug_abbrev_offset = try readAddress(&fbr, unit_header.format, endian); + address_size = try fr.takeByte(); + debug_abbrev_offset = try readAddress(&fr, unit_header.format, endian); } else { - debug_abbrev_offset = try readAddress(&fbr, unit_header.format, endian); - address_size = try fbr.takeByte(); + debug_abbrev_offset = try readAddress(&fr, unit_header.format, endian); + address_size = try fr.takeByte(); } if (address_size != @sizeOf(usize)) return bad(); @@ -1055,7 +619,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { try attrs_buf.resize(max_attrs); var compile_unit_die = (try parseDie( - &fbr, + &fr, attrs_buf.items, abbrev_table, unit_header.format, @@ -1080,7 +644,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { }; compile_unit.pc_range = x: { - if (compile_unit_die.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| { + if (compile_unit_die.getAttrAddr(di, endian, AT.low_pc, &compile_unit)) |low_pc| { if (compile_unit_die.getAttr(AT.high_pc)) |high_pc_value| { const pc_end = switch (high_pc_value.*) { .addr => |value| value, @@ -1144,10 +708,11 @@ const DebugRangeIterator = struct { base_address: u64, section_type: Section.Id, di: *const Dwarf, + endian: Endian, compile_unit: *const CompileUnit, - fbr: Reader, + fr: Reader, - pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { + pub fn init(ranges_value: *const FormValue, di: *const Dwarf, endian: Endian, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo; @@ -1158,13 +723,13 @@ const DebugRangeIterator = struct { .@"32" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); if (offset_loc + 4 > debug_ranges.len) return bad(); - const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); + const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], endian); break :off compile_unit.rnglists_base + offset; }, .@"64" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); if (offset_loc + 8 > debug_ranges.len) return bad(); - const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); + const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], endian); break :off compile_unit.rnglists_base + offset; }, } @@ -1176,42 +741,43 @@ const DebugRangeIterator = struct { // specified by DW_AT.low_pc or to some other value encoded // in the list itself. // If no starting value is specified use zero. - const base_address = compile_unit.die.getAttrAddr(di, AT.low_pc, compile_unit.*) catch |err| switch (err) { + const base_address = compile_unit.die.getAttrAddr(di, endian, AT.low_pc, compile_unit) catch |err| switch (err) { error.MissingDebugInfo => 0, else => return err, }; - var fbr: Reader = .fixed(debug_ranges); - fbr.seek = cast(usize, ranges_offset) orelse return bad(); + var fr: Reader = .fixed(debug_ranges); + fr.seek = cast(usize, ranges_offset) orelse return bad(); return .{ .base_address = base_address, .section_type = section_type, .di = di, + .endian = endian, .compile_unit = compile_unit, - .fbr = fbr, + .fr = fr, }; } // Returns the next range in the list, or null if the end was reached. pub fn next(self: *@This()) !?PcRange { - const endian = self.di.endian; + const endian = self.endian; switch (self.section_type) { .debug_rnglists => { - const kind = try self.fbr.takeByte(); + const kind = try self.fr.takeByte(); switch (kind) { RLE.end_of_list => return null, RLE.base_addressx => { - const index = try self.fbr.takeLeb128(usize); - self.base_address = try self.di.readDebugAddr(self.compile_unit.*, index); + const index = try self.fr.takeLeb128(usize); + self.base_address = try self.di.readDebugAddr(endian, self.compile_unit, index); return try self.next(); }, RLE.startx_endx => { - const start_index = try self.fbr.takeLeb128(usize); - const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index); + const start_index = try self.fr.takeLeb128(usize); + const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index); - const end_index = try self.fbr.takeLeb128(usize); - const end_addr = try self.di.readDebugAddr(self.compile_unit.*, end_index); + const end_index = try self.fr.takeLeb128(usize); + const end_addr = try self.di.readDebugAddr(endian, self.compile_unit, end_index); return .{ .start = start_addr, @@ -1219,10 +785,10 @@ const DebugRangeIterator = struct { }; }, RLE.startx_length => { - const start_index = try self.fbr.takeLeb128(usize); - const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index); + const start_index = try self.fr.takeLeb128(usize); + const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index); - const len = try self.fbr.takeLeb128(usize); + const len = try self.fr.takeLeb128(usize); const end_addr = start_addr + len; return .{ @@ -1231,8 +797,8 @@ const DebugRangeIterator = struct { }; }, RLE.offset_pair => { - const start_addr = try self.fbr.takeLeb128(usize); - const end_addr = try self.fbr.takeLeb128(usize); + const start_addr = try self.fr.takeLeb128(usize); + const end_addr = try self.fr.takeLeb128(usize); // This is the only kind that uses the base address return .{ @@ -1241,12 +807,12 @@ const DebugRangeIterator = struct { }; }, RLE.base_address => { - self.base_address = try self.fbr.takeInt(usize, endian); + self.base_address = try self.fr.takeInt(usize, endian); return try self.next(); }, RLE.start_end => { - const start_addr = try self.fbr.takeInt(usize, endian); - const end_addr = try self.fbr.takeInt(usize, endian); + const start_addr = try self.fr.takeInt(usize, endian); + const end_addr = try self.fr.takeInt(usize, endian); return .{ .start = start_addr, @@ -1254,8 +820,8 @@ const DebugRangeIterator = struct { }; }, RLE.start_length => { - const start_addr = try self.fbr.takeInt(usize, endian); - const len = try self.fbr.takeLeb128(usize); + const start_addr = try self.fr.takeInt(usize, endian); + const len = try self.fr.takeLeb128(usize); const end_addr = start_addr + len; return .{ @@ -1267,8 +833,8 @@ const DebugRangeIterator = struct { } }, .debug_ranges => { - const start_addr = try self.fbr.takeInt(usize, endian); - const end_addr = try self.fbr.takeInt(usize, endian); + const start_addr = try self.fr.takeInt(usize, endian); + const end_addr = try self.fr.takeInt(usize, endian); if (start_addr == 0 and end_addr == 0) return null; // This entry selects a new value for the base address @@ -1288,14 +854,14 @@ const DebugRangeIterator = struct { }; /// TODO: change this to binary searching the sorted compile unit list -pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*CompileUnit { +pub fn findCompileUnit(di: *const Dwarf, endian: Endian, target_address: u64) !*CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; } const ranges_value = compile_unit.die.getAttr(AT.ranges) orelse continue; - var iter = DebugRangeIterator.init(ranges_value, di, compile_unit) catch continue; + var iter = DebugRangeIterator.init(ranges_value, di, endian, compile_unit) catch continue; while (try iter.next()) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; } @@ -1320,8 +886,8 @@ fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const } fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { - var fbr: Reader = .fixed(di.section(.debug_abbrev).?); - fbr.seek = cast(usize, offset) orelse return bad(); + var fr: Reader = .fixed(di.section(.debug_abbrev).?); + fr.seek = cast(usize, offset) orelse return bad(); var abbrevs = std.array_list.Managed(Abbrev).init(allocator); defer { @@ -1335,20 +901,20 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table defer attrs.deinit(); while (true) { - const code = try fbr.takeLeb128(u64); + const code = try fr.takeLeb128(u64); if (code == 0) break; - const tag_id = try fbr.takeLeb128(u64); - const has_children = (try fbr.takeByte()) == DW.CHILDREN.yes; + const tag_id = try fr.takeLeb128(u64); + const has_children = (try fr.takeByte()) == DW.CHILDREN.yes; while (true) { - const attr_id = try fbr.takeLeb128(u64); - const form_id = try fbr.takeLeb128(u64); + const attr_id = try fr.takeLeb128(u64); + const form_id = try fr.takeLeb128(u64); if (attr_id == 0 and form_id == 0) break; try attrs.append(.{ .id = attr_id, .form_id = form_id, .payload = switch (form_id) { - FORM.implicit_const => try fbr.takeLeb128(i64), + FORM.implicit_const => try fr.takeLeb128(i64), else => undefined, }, }); @@ -1369,20 +935,20 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table } fn parseDie( - fbr: *Reader, + fr: *Reader, attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, endian: Endian, ) ScanError!?Die { - const abbrev_code = try fbr.takeLeb128(u64); + const abbrev_code = try fr.takeLeb128(u64); if (abbrev_code == 0) return null; const table_entry = abbrev_table.get(abbrev_code) orelse return bad(); const attrs = attrs_buf[0..table_entry.attrs.len]; for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = .{ .id = attr.id, - .value = try parseFormValue(fbr, attr.form_id, format, endian, attr.payload), + .value = try parseFormValue(fr, attr.form_id, format, endian, attr.payload), }; return .{ .tag_id = table_entry.tag_id, @@ -1392,25 +958,24 @@ fn parseDie( } /// Ensures that addresses in the returned LineTable are monotonically increasing. -fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) !CompileUnit.SrcLocCache { - const endian = d.endian; - const compile_unit_cwd = try compile_unit.die.getAttrString(d, AT.comp_dir, d.section(.debug_line_str), compile_unit.*); +fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: *const CompileUnit) !CompileUnit.SrcLocCache { + const compile_unit_cwd = try compile_unit.die.getAttrString(d, endian, AT.comp_dir, d.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: Reader = .fixed(d.section(.debug_line).?); - fbr.seek = @intCast(line_info_offset); + var fr: Reader = .fixed(d.section(.debug_line).?); + fr.seek = @intCast(line_info_offset); - const unit_header = try readUnitHeader(&fbr, endian); + const unit_header = try readUnitHeader(&fr, endian); if (unit_header.unit_length == 0) return missing(); const next_offset = unit_header.header_length + unit_header.unit_length; - const version = try fbr.takeInt(u16, endian); + const version = try fr.takeInt(u16, endian); if (version < 2) return bad(); const addr_size: u8, const seg_size: u8 = if (version >= 5) .{ - try fbr.takeByte(), - try fbr.takeByte(), + try fr.takeByte(), + try fr.takeByte(), } else .{ switch (unit_header.format) { .@"32" => 4, @@ -1421,26 +986,26 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! _ = addr_size; _ = seg_size; - const prologue_length = try readAddress(&fbr, unit_header.format, endian); - const prog_start_offset = fbr.seek + prologue_length; + const prologue_length = try readAddress(&fr, unit_header.format, endian); + const prog_start_offset = fr.seek + prologue_length; - const minimum_instruction_length = try fbr.takeByte(); + const minimum_instruction_length = try fr.takeByte(); if (minimum_instruction_length == 0) return bad(); if (version >= 4) { - const maximum_operations_per_instruction = try fbr.takeByte(); + const maximum_operations_per_instruction = try fr.takeByte(); _ = maximum_operations_per_instruction; } - const default_is_stmt = (try fbr.takeByte()) != 0; - const line_base = try fbr.takeByteSigned(); + const default_is_stmt = (try fr.takeByte()) != 0; + const line_base = try fr.takeByteSigned(); - const line_range = try fbr.takeByte(); + const line_range = try fr.takeByte(); if (line_range == 0) return bad(); - const opcode_base = try fbr.takeByte(); + const opcode_base = try fr.takeByte(); - const standard_opcode_lengths = try fbr.take(opcode_base - 1); + const standard_opcode_lengths = try fr.take(opcode_base - 1); var directories: ArrayList(FileEntry) = .empty; defer directories.deinit(gpa); @@ -1451,17 +1016,17 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! try directories.append(gpa, .{ .path = compile_unit_cwd }); while (true) { - const dir = try fbr.takeSentinel(0); + const dir = try fr.takeSentinel(0); if (dir.len == 0) break; try directories.append(gpa, .{ .path = dir }); } while (true) { - const file_name = try fbr.takeSentinel(0); + const file_name = try fr.takeSentinel(0); if (file_name.len == 0) break; - const dir_index = try fbr.takeLeb128(u32); - const mtime = try fbr.takeLeb128(u64); - const size = try fbr.takeLeb128(u64); + const dir_index = try fr.takeLeb128(u32); + const mtime = try fr.takeLeb128(u64); + const size = try fr.takeLeb128(u64); try file_entries.append(gpa, .{ .path = file_name, .dir_index = dir_index, @@ -1476,21 +1041,21 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! }; { var dir_ent_fmt_buf: [10]FileEntFmt = undefined; - const directory_entry_format_count = try fbr.takeByte(); + const directory_entry_format_count = try fr.takeByte(); if (directory_entry_format_count > dir_ent_fmt_buf.len) return bad(); for (dir_ent_fmt_buf[0..directory_entry_format_count]) |*ent_fmt| { ent_fmt.* = .{ - .content_type_code = try fbr.takeLeb128(u8), - .form_code = try fbr.takeLeb128(u16), + .content_type_code = try fr.takeLeb128(u8), + .form_code = try fr.takeLeb128(u16), }; } - const directories_count = try fbr.takeLeb128(usize); + const directories_count = try fr.takeLeb128(usize); for (try directories.addManyAsSlice(gpa, directories_count)) |*e| { e.* = .{ .path = &.{} }; for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue(&fbr, ent_fmt.form_code, unit_header.format, endian, null); + const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, null); switch (ent_fmt.content_type_code) { DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), @@ -1507,22 +1072,22 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! } var file_ent_fmt_buf: [10]FileEntFmt = undefined; - const file_name_entry_format_count = try fbr.takeByte(); + const file_name_entry_format_count = try fr.takeByte(); if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { ent_fmt.* = .{ - .content_type_code = try fbr.takeLeb128(u16), - .form_code = try fbr.takeLeb128(u16), + .content_type_code = try fr.takeLeb128(u16), + .form_code = try fr.takeLeb128(u16), }; } - const file_names_count = try fbr.takeLeb128(usize); + const file_names_count = try fr.takeLeb128(usize); try file_entries.ensureUnusedCapacity(gpa, file_names_count); for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| { e.* = .{ .path = &.{} }; for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue(&fbr, ent_fmt.form_code, unit_header.format, endian, null); + const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, null); switch (ent_fmt.content_type_code) { DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), @@ -1542,17 +1107,17 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! var line_table: CompileUnit.SrcLocCache.LineTable = .{}; errdefer line_table.deinit(gpa); - fbr.seek = @intCast(prog_start_offset); + fr.seek = @intCast(prog_start_offset); const next_unit_pos = line_info_offset + next_offset; - while (fbr.seek < next_unit_pos) { - const opcode = try fbr.takeByte(); + while (fr.seek < next_unit_pos) { + const opcode = try fr.takeByte(); if (opcode == DW.LNS.extended_op) { - const op_size = try fbr.takeLeb128(u64); + const op_size = try fr.takeLeb128(u64); if (op_size < 1) return bad(); - const sub_op = try fbr.takeByte(); + const sub_op = try fr.takeByte(); switch (sub_op) { DW.LNE.end_sequence => { // The row being added here is an "end" address, meaning @@ -1571,14 +1136,14 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! prog.reset(); }, DW.LNE.set_address => { - const addr = try fbr.takeInt(usize, endian); + const addr = try fr.takeInt(usize, endian); prog.address = addr; }, DW.LNE.define_file => { - const path = try fbr.takeSentinel(0); - const dir_index = try fbr.takeLeb128(u32); - const mtime = try fbr.takeLeb128(u64); - const size = try fbr.takeLeb128(u64); + const path = try fr.takeSentinel(0); + const dir_index = try fr.takeLeb128(u32); + const mtime = try fr.takeLeb128(u64); + const size = try fr.takeLeb128(u64); try file_entries.append(gpa, .{ .path = path, .dir_index = dir_index, @@ -1586,7 +1151,7 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! .size = size, }); }, - else => try fbr.discardAll64(op_size - 1), + else => try fr.discardAll64(op_size - 1), } } else if (opcode >= opcode_base) { // special opcodes @@ -1604,19 +1169,19 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! prog.basic_block = false; }, DW.LNS.advance_pc => { - const arg = try fbr.takeLeb128(usize); + const arg = try fr.takeLeb128(usize); prog.address += arg * minimum_instruction_length; }, DW.LNS.advance_line => { - const arg = try fbr.takeLeb128(i64); + const arg = try fr.takeLeb128(i64); prog.line += arg; }, DW.LNS.set_file => { - const arg = try fbr.takeLeb128(usize); + const arg = try fr.takeLeb128(usize); prog.file = arg; }, DW.LNS.set_column => { - const arg = try fbr.takeLeb128(u64); + const arg = try fr.takeLeb128(u64); prog.column = arg; }, DW.LNS.negate_stmt => { @@ -1630,13 +1195,13 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! prog.address += inc_addr; }, DW.LNS.fixed_advance_pc => { - const arg = try fbr.takeInt(u16, endian); + const arg = try fr.takeInt(u16, endian); prog.address += arg; }, DW.LNS.set_prologue_end => {}, else => { if (opcode - 1 >= standard_opcode_lengths.len) return bad(); - try fbr.discardAll(standard_opcode_lengths[opcode - 1]); + try fr.discardAll(standard_opcode_lengths[opcode - 1]); }, } } @@ -1661,18 +1226,19 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! }; } -pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, cu: *CompileUnit) ScanError!void { +pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, endian: Endian, cu: *CompileUnit) ScanError!void { if (cu.src_loc_cache != null) return; - cu.src_loc_cache = try runLineNumberProgram(d, gpa, cu); + cu.src_loc_cache = try d.runLineNumberProgram(gpa, endian, cu); } pub fn getLineNumberInfo( d: *Dwarf, gpa: Allocator, + endian: Endian, compile_unit: *CompileUnit, target_address: u64, ) !std.debug.SourceLocation { - try populateSrcLocCache(d, gpa, compile_unit); + try d.populateSrcLocCache(gpa, endian, compile_unit); const slc = &compile_unit.src_loc_cache.?; const entry = try slc.findSource(target_address); const file_index = entry.file - @intFromBool(slc.version < 5); @@ -1696,7 +1262,7 @@ fn getLineString(di: Dwarf, offset: u64) ![:0]const u8 { return getStringGeneric(di.section(.debug_line_str), offset); } -fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { +fn readDebugAddr(di: Dwarf, endian: Endian, compile_unit: *const CompileUnit, index: u64) !u64 { const debug_addr = di.section(.debug_addr) orelse return bad(); // addr_base points to the first item after the header, however we @@ -1705,7 +1271,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { // The header is 8 or 12 bytes depending on is_64. if (compile_unit.addr_base < 8) return bad(); - const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); + const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], endian); if (version != 5) return bad(); const addr_size = debug_addr[compile_unit.addr_base - 2]; @@ -1715,113 +1281,13 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { if (byte_offset + addr_size > debug_addr.len) return bad(); return switch (addr_size) { 1 => debug_addr[byte_offset], - 2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian), - 4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian), - 8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian), + 2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], endian), + 4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], endian), + 8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], endian), else => bad(), }; } -/// If `.eh_frame_hdr` is present, then only the header needs to be parsed. Otherwise, `.eh_frame` -/// and `.debug_frame` are scanned and a sorted list of FDEs is built for binary searching during -/// unwinding. Even if `.eh_frame_hdr` is used, we may find during unwinding that it's incomplete, -/// in which case we build the sorted list of FDEs at that point. -/// -/// See also `scanCieFdeInfo`. -pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { - const endian = di.endian; - - if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: Reader = .fixed(eh_frame_hdr); - - const version = try fbr.takeByte(); - if (version != 1) break :blk; - - const eh_frame_ptr_enc = try fbr.takeByte(); - if (eh_frame_ptr_enc == EH.PE.omit) break :blk; - const fde_count_enc = try fbr.takeByte(); - if (fde_count_enc == EH.PE.omit) break :blk; - const table_enc = try fbr.takeByte(); - if (table_enc == EH.PE.omit) break :blk; - - const eh_frame_ptr = cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const fde_count = cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const entry_size = try ExceptionFrameHeader.entrySize(table_enc); - const entries_len = fde_count * entry_size; - if (entries_len > eh_frame_hdr.len - fbr.seek) return bad(); - - di.eh_frame_hdr = .{ - .eh_frame_ptr = eh_frame_ptr, - .table_enc = table_enc, - .fde_count = fde_count, - .entries = eh_frame_hdr[fbr.seek..][0..entries_len], - }; - - // No need to scan .eh_frame, we have a binary search table already - return; - } - - try di.scanCieFdeInfo(allocator, base_address); -} - -/// Scan `.eh_frame` and `.debug_frame` and build a sorted list of FDEs for binary searching during -/// unwinding. -pub fn scanCieFdeInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { - const endian = di.endian; - const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; - for (frame_sections) |frame_section| { - if (di.section(frame_section)) |section_data| { - var fbr: Reader = .fixed(section_data); - while (fbr.seek < fbr.buffer.len) { - const entry_header = try EntryHeader.read(&fbr, frame_section, endian); - switch (entry_header.type) { - .cie => { - const cie = try CommonInformationEntry.parse( - entry_header.entry_bytes, - di.sectionVirtualOffset(frame_section, base_address).?, - true, - entry_header.format, - frame_section, - entry_header.length_offset, - @sizeOf(usize), - di.endian, - ); - try di.cie_map.put(allocator, entry_header.length_offset, cie); - }, - .fde => |cie_offset| { - const cie = di.cie_map.get(cie_offset) orelse return bad(); - const fde = try FrameDescriptionEntry.parse( - entry_header.entry_bytes, - di.sectionVirtualOffset(frame_section, base_address).?, - true, - cie, - @sizeOf(usize), - di.endian, - ); - try di.fde_list.append(allocator, fde); - }, - .terminator => break, - } - } - - std.mem.sortUnstable(FrameDescriptionEntry, di.fde_list.items, {}, struct { - fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { - _ = ctx; - return a.pc_begin < b.pc_begin; - } - }.lessThan); - } - } -} - fn parseFormValue( r: *Reader, form_id: u64, @@ -1946,7 +1412,7 @@ const UnitHeader = struct { unit_length: u64, }; -fn readUnitHeader(r: *Reader, endian: Endian) ScanError!UnitHeader { +pub fn readUnitHeader(r: *Reader, endian: Endian) ScanError!UnitHeader { return switch (try r.takeInt(u32, endian)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", @@ -1986,7 +1452,7 @@ fn invalidDebugInfoDetected() void { if (debug_debug_mode) @panic("bad dwarf"); } -fn missing() error{MissingDebugInfo} { +pub fn missing() error{MissingDebugInfo} { if (debug_debug_mode) @panic("missing dwarf"); return error.MissingDebugInfo; } @@ -2000,94 +1466,39 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { return str[casted_offset..last :0]; } -const EhPointerContext = struct { - // The address of the pointer field itself - pc_rel_base: u64, - - // Whether or not to follow indirect pointers. This should only be - // used when decoding pointers at runtime using the current process's - // debug info - follow_indirect: bool, - - // These relative addressing modes are only used in specific cases, and - // might not be available / required in all parsing contexts - data_rel_base: ?u64 = null, - text_rel_base: ?u64 = null, - function_rel_base: ?u64 = null, -}; - -fn readEhPointer(fbr: *Reader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !?u64 { - if (enc == EH.PE.omit) return null; - - const value: union(enum) { - signed: i64, - unsigned: u64, - } = switch (enc & EH.PE.type_mask) { - EH.PE.absptr => .{ - .unsigned = switch (addr_size_bytes) { - 2 => try fbr.takeInt(u16, endian), - 4 => try fbr.takeInt(u32, endian), - 8 => try fbr.takeInt(u64, endian), - else => return error.InvalidAddrSize, - }, - }, - EH.PE.uleb128 => .{ .unsigned = try fbr.takeLeb128(u64) }, - EH.PE.udata2 => .{ .unsigned = try fbr.takeInt(u16, endian) }, - EH.PE.udata4 => .{ .unsigned = try fbr.takeInt(u32, endian) }, - EH.PE.udata8 => .{ .unsigned = try fbr.takeInt(u64, endian) }, - EH.PE.sleb128 => .{ .signed = try fbr.takeLeb128(i64) }, - EH.PE.sdata2 => .{ .signed = try fbr.takeInt(i16, endian) }, - EH.PE.sdata4 => .{ .signed = try fbr.takeInt(i32, endian) }, - EH.PE.sdata8 => .{ .signed = try fbr.takeInt(i64, endian) }, - else => return bad(), - }; - - const base = switch (enc & EH.PE.rel_mask) { - EH.PE.pcrel => ctx.pc_rel_base, - EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified, - else => null, - }; +pub const ElfModule = struct { + unwind: Dwarf.Unwind, + dwarf: Dwarf, + mapped_memory: ?[]align(std.heap.page_size_min) const u8, + external_mapped_memory: ?[]align(std.heap.page_size_min) const u8, - const ptr: u64 = if (base) |b| switch (value) { - .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(b)))), - // absptr can actually contain signed values in some cases (aarch64 MachO) - .unsigned => |u| u +% b, - } else switch (value) { - .signed => |s| @as(u64, @intCast(s)), - .unsigned => |u| u, + pub const Lookup = struct { + base_address: usize, + name: []const u8, + build_id: ?[]const u8, + gnu_eh_frame: ?[]const u8, }; - if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) { - if (@sizeOf(usize) != addr_size_bytes) { - // See the documentation for `follow_indirect` - return error.NonNativeIndirection; - } - - const native_ptr = cast(usize, ptr) orelse return error.PointerOverflow; - return switch (addr_size_bytes) { - 2, 4, 8 => return @as(*const usize, @ptrFromInt(native_ptr)).*, - else => return error.UnsupportedAddrSize, + pub fn init(lookup: *const Lookup) ElfModule { + var em: ElfModule = .{ + .unwind = .{ + .sections = @splat(null), + }, + .dwarf = .{}, + .mapped_memory = null, + .external_mapped_memory = null, }; - } else { - return ptr; - } -} - -fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { - if (pc_rel_offset < 0) { - return std.math.sub(usize, field_ptr, @as(usize, @intCast(-pc_rel_offset))); - } else { - return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); + if (lookup.gnu_eh_frame) |eh_frame_hdr| { + // This is a special case - pointer offsets inside .eh_frame_hdr + // are encoded relative to its base address, so we must use the + // version that is already memory mapped, and not the one that + // will be mapped separately from the ELF file. + em.unwind.sections[@intFromEnum(Dwarf.Unwind.Section.Id.eh_frame_hdr)] = .{ + .data = eh_frame_hdr, + }; + } + return em; } -} - -pub const ElfModule = struct { - base_address: usize, - dwarf: Dwarf, - mapped_memory: []align(std.heap.page_size_min) const u8, - external_mapped_memory: ?[]align(std.heap.page_size_min) const u8, pub fn deinit(self: *@This(), allocator: Allocator) void { self.dwarf.deinit(allocator); @@ -2095,16 +1506,16 @@ pub const ElfModule = struct { if (self.external_mapped_memory) |m| std.posix.munmap(m); } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, endian: Endian, base_address: usize, address: usize) !std.debug.Symbol { // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - return self.dwarf.getSymbol(allocator, relocated_address); + const relocated_address = address - base_address; + return self.dwarf.getSymbol(allocator, endian, relocated_address); } - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { + pub fn getDwarfUnwindForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf.Unwind { _ = allocator; _ = address; - return &self.dwarf; + return &self.unwind; } pub const LoadError = error{ @@ -2132,6 +1543,7 @@ pub const ElfModule = struct { /// info is, then this this function will recurse to attempt to load the debug /// sections from an external file. pub fn load( + em: *ElfModule, gpa: Allocator, mapped_mem: []align(std.heap.page_size_min) const u8, build_id: ?[]const u8, @@ -2139,7 +1551,7 @@ pub const ElfModule = struct { parent_sections: *Dwarf.SectionArray, parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, elf_filename: ?[]const u8, - ) LoadError!Dwarf.ElfModule { + ) LoadError!void { if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); @@ -2162,7 +1574,7 @@ pub const ElfModule = struct { @ptrCast(@alignCast(&mapped_mem[shoff])), )[0..hdr.e_shnum]; - var sections: Dwarf.SectionArray = Dwarf.null_section_array; + var sections: Dwarf.SectionArray = @splat(null); // Combine section list. This takes ownership over any owned sections from the parent scope. for (parent_sections, §ions) |*parent, *section_elem| { @@ -2276,7 +1688,7 @@ pub const ElfModule = struct { .sub_path = filename, }; - return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch break :blk; + return em.loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch break :blk; } const global_debug_directories = [_][]const u8{ @@ -2304,7 +1716,7 @@ pub const ElfModule = struct { }; defer gpa.free(path.sub_path); - return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + return em.loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; } } @@ -2320,7 +1732,7 @@ pub const ElfModule = struct { defer exe_dir.close(); // / - if (loadPath( + if (em.loadPath( gpa, .{ .root_dir = .{ .path = null, .handle = exe_dir }, @@ -2341,7 +1753,7 @@ pub const ElfModule = struct { }; defer gpa.free(path.sub_path); - if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + if (em.loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} } var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; @@ -2354,37 +1766,27 @@ pub const ElfModule = struct { .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), }; defer gpa.free(path.sub_path); - if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + if (em.loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} } } return error.MissingDebugInfo; } - var di: Dwarf = .{ - .endian = endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&di, gpa); - - return .{ - .base_address = 0, - .dwarf = di, - .mapped_memory = parent_mapped_mem orelse mapped_mem, - .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, - }; + em.mapped_memory = parent_mapped_mem orelse mapped_mem; + em.external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null; + try em.dwarf.open(gpa, endian); } pub fn loadPath( + em: *ElfModule, gpa: Allocator, elf_file_path: Path, build_id: ?[]const u8, expected_crc: ?u32, parent_sections: *Dwarf.SectionArray, parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, - ) LoadError!Dwarf.ElfModule { + ) LoadError!void { const elf_file = elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}) catch |err| switch (err) { error.FileNotFound => return missing(), else => return err, @@ -2407,7 +1809,7 @@ pub const ElfModule = struct { }; errdefer std.posix.munmap(mapped_mem); - return load( + return em.load( gpa, mapped_mem, build_id, @@ -2419,22 +1821,21 @@ pub const ElfModule = struct { } }; -pub fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { - if (di.findCompileUnit(address)) |compile_unit| { - return .{ - .name = di.getSymbolName(address) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .source_location = di.getLineNumberInfo(allocator, compile_unit, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { +pub fn getSymbol(di: *Dwarf, allocator: Allocator, endian: Endian, address: u64) !std.debug.Symbol { + const compile_unit = di.findCompileUnit(endian, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return .{}, else => return err, - } + }; + return .{ + .name = di.getSymbolName(address) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString(di, endian, std.dwarf.AT.name, di.section(.debug_str), compile_unit) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .source_location = di.getLineNumberInfo(allocator, endian, compile_unit, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; } pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { @@ -2443,7 +1844,7 @@ pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]cons return ptr[start..end]; } -fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { +pub fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { return switch (format) { .@"32" => try r.takeInt(u32, endian), .@"64" => try r.takeInt(u64, endian), diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig new file mode 100644 index 000000000000..1da318a04826 --- /dev/null +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -0,0 +1,645 @@ +sections: SectionArray = @splat(null), + +/// Starts out non-`null` if the `.eh_frame_hdr` section is present. May become `null` later if we +/// find that `.eh_frame_hdr` is incomplete. +eh_frame_hdr: ?ExceptionFrameHeader = null, +/// These lookup tables are only used if `eh_frame_hdr` is null +cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .empty, +/// Sorted by start_pc +fde_list: std.ArrayList(FrameDescriptionEntry) = .empty, + +pub const Section = struct { + data: []const u8, + + pub const Id = enum { + debug_frame, + eh_frame, + eh_frame_hdr, + }; +}; + +const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); +pub const SectionArray = [num_sections]?Section; + +pub fn section(unwind: Unwind, dwarf_section: Section.Id) ?[]const u8 { + return if (unwind.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; +} + +/// This represents the decoded .eh_frame_hdr header +pub const ExceptionFrameHeader = struct { + eh_frame_ptr: usize, + table_enc: u8, + fde_count: usize, + entries: []const u8, + + pub fn entrySize(table_enc: u8) !u8 { + return switch (table_enc & EH.PE.type_mask) { + EH.PE.udata2, + EH.PE.sdata2, + => 4, + EH.PE.udata4, + EH.PE.sdata4, + => 8, + EH.PE.udata8, + EH.PE.sdata8, + => 16, + // This is a binary search table, so all entries must be the same length + else => return bad(), + }; + } + + pub fn findEntry( + self: ExceptionFrameHeader, + eh_frame_len: usize, + eh_frame_hdr_ptr: usize, + pc: usize, + cie: *CommonInformationEntry, + fde: *FrameDescriptionEntry, + endian: Endian, + ) !void { + const entry_size = try entrySize(self.table_enc); + + var left: usize = 0; + var len: usize = self.fde_count; + var fbr: Reader = .fixed(self.entries); + + while (len > 1) { + const mid = left + len / 2; + + fbr.seek = mid * entry_size; + const pc_begin = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), + .follow_indirect = true, + .data_rel_base = eh_frame_hdr_ptr, + }, endian) orelse return bad(); + + if (pc < pc_begin) { + len /= 2; + } else { + left = mid; + if (pc == pc_begin) break; + len -= len / 2; + } + } + + if (len == 0) return missing(); + fbr.seek = left * entry_size; + + // Read past the pc_begin field of the entry + _ = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), + .follow_indirect = true, + .data_rel_base = eh_frame_hdr_ptr, + }, endian) orelse return bad(); + + const fde_ptr = cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), + .follow_indirect = true, + .data_rel_base = eh_frame_hdr_ptr, + }, endian) orelse return bad()) orelse return bad(); + + if (fde_ptr < self.eh_frame_ptr) return bad(); + + const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0..eh_frame_len]; + + const fde_offset = fde_ptr - self.eh_frame_ptr; + var eh_frame_fbr: Reader = .fixed(eh_frame); + eh_frame_fbr.seek = fde_offset; + + const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); + if (fde_entry_header.type != .fde) return bad(); + + // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable + const cie_offset = fde_entry_header.type.fde; + eh_frame_fbr.seek = @intCast(cie_offset); + const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); + if (cie_entry_header.type != .cie) return bad(); + + cie.* = try CommonInformationEntry.parse( + cie_entry_header.entry_bytes, + 0, + true, + cie_entry_header.format, + .eh_frame, + cie_entry_header.length_offset, + @sizeOf(usize), + endian, + ); + + fde.* = try FrameDescriptionEntry.parse( + fde_entry_header.entry_bytes, + 0, + true, + cie.*, + @sizeOf(usize), + endian, + ); + + if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return missing(); + } +}; + +pub const EntryHeader = struct { + /// Offset of the length field in the backing buffer + length_offset: usize, + format: Format, + type: union(enum) { + cie, + /// Value is the offset of the corresponding CIE + fde: u64, + terminator, + }, + /// The entry's contents, not including the ID field + entry_bytes: []const u8, + + /// The length of the entry including the ID field, but not the length field itself + pub fn entryLength(self: EntryHeader) usize { + return self.entry_bytes.len + @as(u8, if (self.format == .@"64") 8 else 4); + } + + /// Reads a header for either an FDE or a CIE, then advances the fbr to the + /// position after the trailing structure. + /// + /// `fbr` must be backed by either the .eh_frame or .debug_frame sections. + /// + /// TODO that's a bad API, don't do that. this function should neither require + /// a fixed reader nor depend on seeking. + pub fn read(fbr: *Reader, dwarf_section: Section.Id, endian: Endian) !EntryHeader { + assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); + + const length_offset = fbr.seek; + const unit_header = try Dwarf.readUnitHeader(fbr, endian); + const unit_length = cast(usize, unit_header.unit_length) orelse return bad(); + if (unit_length == 0) return .{ + .length_offset = length_offset, + .format = unit_header.format, + .type = .terminator, + .entry_bytes = &.{}, + }; + const start_offset = fbr.seek; + const end_offset = start_offset + unit_length; + defer fbr.seek = end_offset; + + const id = try Dwarf.readAddress(fbr, unit_header.format, endian); + const entry_bytes = fbr.buffer[fbr.seek..end_offset]; + const cie_id: u64 = switch (dwarf_section) { + .eh_frame => CommonInformationEntry.eh_id, + .debug_frame => switch (unit_header.format) { + .@"32" => CommonInformationEntry.dwarf32_id, + .@"64" => CommonInformationEntry.dwarf64_id, + }, + else => unreachable, + }; + + return .{ + .length_offset = length_offset, + .format = unit_header.format, + .type = if (id == cie_id) .cie else .{ .fde = switch (dwarf_section) { + .eh_frame => try std.math.sub(u64, start_offset, id), + .debug_frame => id, + else => unreachable, + } }, + .entry_bytes = entry_bytes, + }; + } +}; + +pub const CommonInformationEntry = struct { + // Used in .eh_frame + pub const eh_id = 0; + + // Used in .debug_frame (DWARF32) + pub const dwarf32_id = maxInt(u32); + + // Used in .debug_frame (DWARF64) + pub const dwarf64_id = maxInt(u64); + + // Offset of the length field of this entry in the eh_frame section. + // This is the key that FDEs use to reference CIEs. + length_offset: u64, + version: u8, + address_size: u8, + format: Format, + + // Only present in version 4 + segment_selector_size: ?u8, + + code_alignment_factor: u32, + data_alignment_factor: i32, + return_address_register: u8, + + aug_str: []const u8, + aug_data: []const u8, + lsda_pointer_enc: u8, + personality_enc: ?u8, + personality_routine_pointer: ?u64, + fde_pointer_enc: u8, + initial_instructions: []const u8, + + pub fn isSignalFrame(self: CommonInformationEntry) bool { + for (self.aug_str) |c| if (c == 'S') return true; + return false; + } + + pub fn addressesSignedWithBKey(self: CommonInformationEntry) bool { + for (self.aug_str) |c| if (c == 'B') return true; + return false; + } + + pub fn mteTaggedFrame(self: CommonInformationEntry) bool { + for (self.aug_str) |c| if (c == 'G') return true; + return false; + } + + /// This function expects to read the CIE starting with the version field. + /// The returned struct references memory backed by cie_bytes. + /// + /// See the FrameDescriptionEntry.parse documentation for the description + /// of `pc_rel_offset` and `is_runtime`. + /// + /// `length_offset` specifies the offset of this CIE's length field in the + /// .eh_frame / .debug_frame section. + pub fn parse( + cie_bytes: []const u8, + pc_rel_offset: i64, + is_runtime: bool, + format: Format, + dwarf_section: Section.Id, + length_offset: u64, + addr_size_bytes: u8, + endian: Endian, + ) !CommonInformationEntry { + if (addr_size_bytes > 8) return error.UnsupportedAddrSize; + + var fbr: Reader = .fixed(cie_bytes); + + const version = try fbr.takeByte(); + switch (dwarf_section) { + .eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion, + .debug_frame => if (version != 4) return error.UnsupportedDwarfVersion, + else => return error.UnsupportedDwarfSection, + } + + var has_eh_data = false; + var has_aug_data = false; + + var aug_str_len: usize = 0; + const aug_str_start = fbr.seek; + var aug_byte = try fbr.takeByte(); + while (aug_byte != 0) : (aug_byte = try fbr.takeByte()) { + switch (aug_byte) { + 'z' => { + if (aug_str_len != 0) return bad(); + has_aug_data = true; + }, + 'e' => { + if (has_aug_data or aug_str_len != 0) return bad(); + if (try fbr.takeByte() != 'h') return bad(); + has_eh_data = true; + }, + else => if (has_eh_data) return bad(), + } + + aug_str_len += 1; + } + + if (has_eh_data) { + // legacy data created by older versions of gcc - unsupported here + for (0..addr_size_bytes) |_| _ = try fbr.takeByte(); + } + + const address_size = if (version == 4) try fbr.takeByte() else addr_size_bytes; + const segment_selector_size = if (version == 4) try fbr.takeByte() else null; + + const code_alignment_factor = try fbr.takeLeb128(u32); + const data_alignment_factor = try fbr.takeLeb128(i32); + const return_address_register = if (version == 1) try fbr.takeByte() else try fbr.takeLeb128(u8); + + var lsda_pointer_enc: u8 = EH.PE.omit; + var personality_enc: ?u8 = null; + var personality_routine_pointer: ?u64 = null; + var fde_pointer_enc: u8 = EH.PE.absptr; + + var aug_data: []const u8 = &[_]u8{}; + const aug_str = if (has_aug_data) blk: { + const aug_data_len = try fbr.takeLeb128(usize); + const aug_data_start = fbr.seek; + aug_data = cie_bytes[aug_data_start..][0..aug_data_len]; + + const aug_str = cie_bytes[aug_str_start..][0..aug_str_len]; + for (aug_str[1..]) |byte| { + switch (byte) { + 'L' => { + lsda_pointer_enc = try fbr.takeByte(); + }, + 'P' => { + personality_enc = try fbr.takeByte(); + personality_routine_pointer = try readEhPointer(&fbr, personality_enc.?, addr_size_bytes, .{ + .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[fbr.seek]), pc_rel_offset), + .follow_indirect = is_runtime, + }, endian); + }, + 'R' => { + fde_pointer_enc = try fbr.takeByte(); + }, + 'S', 'B', 'G' => {}, + else => return bad(), + } + } + + // aug_data_len can include padding so the CIE ends on an address boundary + fbr.seek = aug_data_start + aug_data_len; + break :blk aug_str; + } else &[_]u8{}; + + const initial_instructions = cie_bytes[fbr.seek..]; + return .{ + .length_offset = length_offset, + .version = version, + .address_size = address_size, + .format = format, + .segment_selector_size = segment_selector_size, + .code_alignment_factor = code_alignment_factor, + .data_alignment_factor = data_alignment_factor, + .return_address_register = return_address_register, + .aug_str = aug_str, + .aug_data = aug_data, + .lsda_pointer_enc = lsda_pointer_enc, + .personality_enc = personality_enc, + .personality_routine_pointer = personality_routine_pointer, + .fde_pointer_enc = fde_pointer_enc, + .initial_instructions = initial_instructions, + }; + } +}; + +pub const FrameDescriptionEntry = struct { + // Offset into eh_frame where the CIE for this FDE is stored + cie_length_offset: u64, + + pc_begin: u64, + pc_range: u64, + lsda_pointer: ?u64, + aug_data: []const u8, + instructions: []const u8, + + /// This function expects to read the FDE starting at the PC Begin field. + /// The returned struct references memory backed by `fde_bytes`. + /// + /// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values + /// used when decoding pointers. This should be set to zero if fde_bytes is + /// backed by the memory of a .eh_frame / .debug_frame section in the running executable. + /// Otherwise, it should be the relative offset to translate addresses from + /// where the section is currently stored in memory, to where it *would* be + /// stored at runtime: section base addr - backing data base ptr. + /// + /// Similarly, `is_runtime` specifies this function is being called on a runtime + /// section, and so indirect pointers can be followed. + pub fn parse( + fde_bytes: []const u8, + pc_rel_offset: i64, + is_runtime: bool, + cie: CommonInformationEntry, + addr_size_bytes: u8, + endian: Endian, + ) !FrameDescriptionEntry { + if (addr_size_bytes > 8) return error.InvalidAddrSize; + + var fbr: Reader = .fixed(fde_bytes); + + const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ + .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), + .follow_indirect = is_runtime, + }, endian) orelse return bad(); + + const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ + .pc_rel_base = 0, + .follow_indirect = false, + }, endian) orelse return bad(); + + var aug_data: []const u8 = &[_]u8{}; + const lsda_pointer = if (cie.aug_str.len > 0) blk: { + const aug_data_len = try fbr.takeLeb128(usize); + const aug_data_start = fbr.seek; + aug_data = fde_bytes[aug_data_start..][0..aug_data_len]; + + const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit) + try readEhPointer(&fbr, cie.lsda_pointer_enc, addr_size_bytes, .{ + .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), + .follow_indirect = is_runtime, + }, endian) + else + null; + + fbr.seek = aug_data_start + aug_data_len; + break :blk lsda_pointer; + } else null; + + const instructions = fde_bytes[fbr.seek..]; + return .{ + .cie_length_offset = cie.length_offset, + .pc_begin = pc_begin, + .pc_range = pc_range, + .lsda_pointer = lsda_pointer, + .aug_data = aug_data, + .instructions = instructions, + }; + } +}; + +/// If `.eh_frame_hdr` is present, then only the header needs to be parsed. Otherwise, `.eh_frame` +/// and `.debug_frame` are scanned and a sorted list of FDEs is built for binary searching during +/// unwinding. Even if `.eh_frame_hdr` is used, we may find during unwinding that it's incomplete, +/// in which case we build the sorted list of FDEs at that point. +/// +/// See also `scanCieFdeInfo`. +pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { + const endian = di.endian; + + if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { + var fbr: Reader = .fixed(eh_frame_hdr); + + const version = try fbr.takeByte(); + if (version != 1) break :blk; + + const eh_frame_ptr_enc = try fbr.takeByte(); + if (eh_frame_ptr_enc == EH.PE.omit) break :blk; + const fde_count_enc = try fbr.takeByte(); + if (fde_count_enc == EH.PE.omit) break :blk; + const table_enc = try fbr.takeByte(); + if (table_enc == EH.PE.omit) break :blk; + + const eh_frame_ptr = cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), + .follow_indirect = true, + }, endian) orelse return bad()) orelse return bad(); + + const fde_count = cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), + .follow_indirect = true, + }, endian) orelse return bad()) orelse return bad(); + + const entry_size = try ExceptionFrameHeader.entrySize(table_enc); + const entries_len = fde_count * entry_size; + if (entries_len > eh_frame_hdr.len - fbr.seek) return bad(); + + di.eh_frame_hdr = .{ + .eh_frame_ptr = eh_frame_ptr, + .table_enc = table_enc, + .fde_count = fde_count, + .entries = eh_frame_hdr[fbr.seek..][0..entries_len], + }; + + // No need to scan .eh_frame, we have a binary search table already + return; + } + + try di.scanCieFdeInfo(allocator, base_address); +} + +/// Scan `.eh_frame` and `.debug_frame` and build a sorted list of FDEs for binary searching during +/// unwinding. +pub fn scanCieFdeInfo(unwind: *Unwind, allocator: Allocator, endian: Endian, base_address: usize) !void { + const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; + for (frame_sections) |frame_section| { + if (unwind.section(frame_section)) |section_data| { + var fbr: Reader = .fixed(section_data); + while (fbr.seek < fbr.buffer.len) { + const entry_header = try EntryHeader.read(&fbr, frame_section, endian); + switch (entry_header.type) { + .cie => { + const cie = try CommonInformationEntry.parse( + entry_header.entry_bytes, + unwind.sectionVirtualOffset(frame_section, base_address).?, + true, + entry_header.format, + frame_section, + entry_header.length_offset, + @sizeOf(usize), + endian, + ); + try unwind.cie_map.put(allocator, entry_header.length_offset, cie); + }, + .fde => |cie_offset| { + const cie = unwind.cie_map.get(cie_offset) orelse return bad(); + const fde = try FrameDescriptionEntry.parse( + entry_header.entry_bytes, + unwind.sectionVirtualOffset(frame_section, base_address).?, + true, + cie, + @sizeOf(usize), + endian, + ); + try unwind.fde_list.append(allocator, fde); + }, + .terminator => break, + } + } + + std.mem.sortUnstable(FrameDescriptionEntry, unwind.fde_list.items, {}, struct { + fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { + _ = ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + } + } +} + +const EhPointerContext = struct { + // The address of the pointer field itself + pc_rel_base: u64, + + // Whether or not to follow indirect pointers. This should only be + // used when decoding pointers at runtime using the current process's + // debug info + follow_indirect: bool, + + // These relative addressing modes are only used in specific cases, and + // might not be available / required in all parsing contexts + data_rel_base: ?u64 = null, + text_rel_base: ?u64 = null, + function_rel_base: ?u64 = null, +}; + +fn readEhPointer(fbr: *Reader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !?u64 { + if (enc == EH.PE.omit) return null; + + const value: union(enum) { + signed: i64, + unsigned: u64, + } = switch (enc & EH.PE.type_mask) { + EH.PE.absptr => .{ + .unsigned = switch (addr_size_bytes) { + 2 => try fbr.takeInt(u16, endian), + 4 => try fbr.takeInt(u32, endian), + 8 => try fbr.takeInt(u64, endian), + else => return error.InvalidAddrSize, + }, + }, + EH.PE.uleb128 => .{ .unsigned = try fbr.takeLeb128(u64) }, + EH.PE.udata2 => .{ .unsigned = try fbr.takeInt(u16, endian) }, + EH.PE.udata4 => .{ .unsigned = try fbr.takeInt(u32, endian) }, + EH.PE.udata8 => .{ .unsigned = try fbr.takeInt(u64, endian) }, + EH.PE.sleb128 => .{ .signed = try fbr.takeLeb128(i64) }, + EH.PE.sdata2 => .{ .signed = try fbr.takeInt(i16, endian) }, + EH.PE.sdata4 => .{ .signed = try fbr.takeInt(i32, endian) }, + EH.PE.sdata8 => .{ .signed = try fbr.takeInt(i64, endian) }, + else => return bad(), + }; + + const base = switch (enc & EH.PE.rel_mask) { + EH.PE.pcrel => ctx.pc_rel_base, + EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified, + EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified, + EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified, + else => null, + }; + + const ptr: u64 = if (base) |b| switch (value) { + .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(b)))), + // absptr can actually contain signed values in some cases (aarch64 MachO) + .unsigned => |u| u +% b, + } else switch (value) { + .signed => |s| @as(u64, @intCast(s)), + .unsigned => |u| u, + }; + + if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) { + if (@sizeOf(usize) != addr_size_bytes) { + // See the documentation for `follow_indirect` + return error.NonNativeIndirection; + } + + const native_ptr = cast(usize, ptr) orelse return error.PointerOverflow; + return switch (addr_size_bytes) { + 2, 4, 8 => return @as(*const usize, @ptrFromInt(native_ptr)).*, + else => return error.UnsupportedAddrSize, + }; + } else { + return ptr; + } +} + +fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { + if (pc_rel_offset < 0) { + return std.math.sub(usize, field_ptr, @as(usize, @intCast(-pc_rel_offset))); + } else { + return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); + } +} + +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const bad = Dwarf.bad; +const cast = std.math.cast; +const DW = std.dwarf; +const Dwarf = std.debug.Dwarf; +const EH = DW.EH; +const Endian = std.builtin.Endian; +const Format = DW.Format; +const maxInt = std.math.maxInt; +const missing = Dwarf.missing; +const Reader = std.Io.Reader; +const std = @import("std"); +const Unwind = @This(); diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index f77d14b913d2..4a29eb0fa78b 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -31,7 +31,7 @@ const SelfInfo = @This(); const root = @import("root"); allocator: Allocator, -address_map: std.AutoHashMap(usize, *Module), +address_map: std.AutoHashMapUnmanaged(usize, Module), modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, pub const OpenError = error{ @@ -40,29 +40,27 @@ pub const OpenError = error{ } || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).@"fn".return_type.?).error_union.error_set; pub fn open(allocator: Allocator) OpenError!SelfInfo { - nosuspend { - if (builtin.strip_debug_info) - return error.MissingDebugInfo; - switch (native_os) { - .linux, - .freebsd, - .netbsd, - .dragonfly, - .openbsd, - .macos, - .solaris, - .illumos, - .windows, - => return try SelfInfo.init(allocator), - else => return error.UnsupportedOperatingSystem, - } + if (builtin.strip_debug_info) + return error.MissingDebugInfo; + switch (native_os) { + .linux, + .freebsd, + .netbsd, + .dragonfly, + .openbsd, + .macos, + .solaris, + .illumos, + .windows, + => return try SelfInfo.init(allocator), + else => return error.UnsupportedOperatingSystem, } } pub fn init(allocator: Allocator) !SelfInfo { var debug_info: SelfInfo = .{ .allocator = allocator, - .address_map = std.AutoHashMap(usize, *Module).init(allocator), + .address_map = .empty, .modules = if (native_os == .windows) .{} else {}, }; @@ -110,7 +108,7 @@ pub fn deinit(self: *SelfInfo) void { mdi.deinit(self.allocator); self.allocator.destroy(mdi); } - self.address_map.deinit(); + self.address_map.deinit(self.allocator); if (native_os == .windows) { for (self.modules.items) |module| { self.allocator.free(module.name); @@ -120,7 +118,7 @@ pub fn deinit(self: *SelfInfo) void { } } -pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { +fn lookupModuleForAddress(self: *SelfInfo, address: usize) !Module.Lookup { if (builtin.target.os.tag.isDarwin()) { return self.lookupModuleDyld(address); } else if (native_os == .windows) { @@ -134,21 +132,65 @@ pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { } } -// Returns the module name for a given address. -// This can be called when getModuleForAddress fails, so implementations should provide -// a path that doesn't rely on any side-effects of a prior successful module lookup. -pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { +fn loadModuleDebugInfo(self: *SelfInfo, lookup: *const Module.Lookup, module: *Module) !void { if (builtin.target.os.tag.isDarwin()) { - return self.lookupModuleNameDyld(address); + @compileError("TODO"); } else if (native_os == .windows) { - return self.lookupModuleNameWin32(address); + @compileError("TODO"); } else if (native_os == .haiku) { - return null; + @compileError("TODO"); } else if (builtin.target.cpu.arch.isWasm()) { - return null; + @compileError("TODO"); } else { - return self.lookupModuleNameDl(address); + if (module.mapped_memory == null) { + var sections: Dwarf.SectionArray = @splat(null); + try readElfDebugInfo(module, self.allocator, if (lookup.name.len > 0) lookup.name else null, lookup.build_id, §ions); + assert(module.mapped_memory != null); + } + } +} + +pub fn unwindFrame(self: *SelfInfo, context: *UnwindContext) !usize { + const lookup = try self.lookupModuleForAddress(context.pc); + const gop = try self.address_map.getOrPut(self.allocator, lookup.base_address); + if (!gop.found_existing) gop.value_ptr.* = .init(&lookup); + if (native_os.isDarwin()) { + // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding + // via DWARF before attempting to use the compact unwind info will produce incorrect results. + if (gop.value_ptr.unwind_info) |unwind_info| { + if (unwindFrameMachO( + self.allocator, + lookup.base_address, + context, + unwind_info, + gop.value_ptr.eh_frame, + )) |return_address| { + return return_address; + } else |err| { + if (err != error.RequiresDWARFUnwind) return err; + } + } else return error.MissingUnwindInfo; } + if (try gop.value_ptr.getDwarfUnwindForAddress(self.allocator, context.pc)) |unwind| { + return unwindFrameDwarf(self.allocator, unwind, lookup.base_address, context, null); + } else return error.MissingDebugInfo; +} + +pub fn getSymbolAtAddress(self: *SelfInfo, address: usize) !std.debug.Symbol { + const lookup = try self.lookupModuleForAddress(address); + const gop = try self.address_map.getOrPut(self.allocator, lookup.base_address); + if (!gop.found_existing) gop.value_ptr.* = .init(&lookup); + try self.loadModuleDebugInfo(&lookup, gop.value_ptr); + return gop.value_ptr.getSymbolAtAddress(self.allocator, native_endian, lookup.base_address, address); +} + +/// Returns the module name for a given address. +/// This can be called when getModuleForAddress fails, so implementations should provide +/// a path that doesn't rely on any side-effects of a prior successful module lookup. +pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { + return if (self.lookupModuleForAddress(address)) |lookup| lookup.name else |err| switch (err) { + error.MissingDebugInfo => null, + }; } fn lookupModuleDyld(self: *SelfInfo, address: usize) !*Module { @@ -394,19 +436,24 @@ fn lookupModuleNameDl(self: *SelfInfo, address: usize) ?[]const u8 { return null; } -fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { +fn lookupModuleDl(self: *SelfInfo, address: usize) !Module.Lookup { var ctx: struct { // Input address: usize, // Output - base_address: usize = undefined, - name: []const u8 = undefined, - build_id: ?[]const u8 = null, - gnu_eh_frame: ?[]const u8 = null, - } = .{ .address = address }; + lookup: Module.Lookup, + } = .{ + .address = address, + .lookup = .{ + .base_address = undefined, + .name = undefined, + .build_id = null, + .gnu_eh_frame = null, + }, + }; const CtxTy = @TypeOf(ctx); - if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { + posix.dl_iterate_phdr(&ctx, error{Found}, struct { fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { _ = size; // The base address is too high @@ -423,8 +470,8 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { if (context.address >= seg_start and context.address < seg_end) { // Android libc uses NULL instead of an empty string to mark the // main program - context.name = mem.sliceTo(info.name, 0) orelse ""; - context.base_address = info.addr; + context.lookup.name = mem.sliceTo(info.name, 0) orelse ""; + context.lookup.base_address = info.addr; break; } } else return; @@ -440,10 +487,10 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); if (note_type != elf.NT_GNU_BUILD_ID) continue; if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; - context.build_id = note_bytes[16..][0..desc_size]; + context.lookup.build_id = note_bytes[16..][0..desc_size]; }, elf.PT_GNU_EH_FRAME => { - context.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; + context.lookup.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; }, else => {}, } @@ -452,38 +499,36 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { // Stop the iteration return error.Found; } - }.callback)) { - return error.MissingDebugInfo; - } else |err| switch (err) { - error.Found => {}, - } + }.callback) catch |err| switch (err) { + error.Found => return ctx.lookup, + }; + if (true) return error.MissingDebugInfo; - if (self.address_map.get(ctx.base_address)) |obj_di| { + if (self.address_map.get(ctx.lookup.base_address)) |obj_di| { return obj_di; } - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - if (ctx.gnu_eh_frame) |eh_frame_hdr| { + var sections: Dwarf.SectionArray = @splat(null); + if (ctx.lookup.gnu_eh_frame) |eh_frame_hdr| { // This is a special case - pointer offsets inside .eh_frame_hdr // are encoded relative to its base address, so we must use the // version that is already memory mapped, and not the one that // will be mapped separately from the ELF file. - sections[@intFromEnum(Dwarf.Section.Id.eh_frame_hdr)] = .{ + sections[@intFromEnum(Dwarf.Unwind.Section.Id.eh_frame_hdr)] = .{ .data = eh_frame_hdr, .owned = false, }; } - obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); - obj_di.base_address = ctx.base_address; + const obj_di = try self.allocator.create(Module); + errdefer self.allocator.destroy(obj_di); + obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.lookup.name.len > 0) ctx.lookup.name else null, ctx.lookup.build_id, §ions); + obj_di.base_address = ctx.lookup.base_address; // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding - obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; + obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.lookup.base_address) catch {}; - try self.address_map.putNoClobber(ctx.base_address, obj_di); + try self.address_map.putNoClobber(self.allocator, ctx.lookup.base_address, obj_di); return obj_di; } @@ -625,49 +670,47 @@ pub const Module = switch (native_os) { } pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - nosuspend { - const result = try self.getOFileInfoForAddress(allocator, address); - if (result.symbol == null) return .{}; - - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); - if (result.o_file_info == null) return .{ .name = stab_symbol }; - - // Translate again the address, this time into an address inside the - // .o file - const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ - .name = "???", - }; + const result = try self.getOFileInfoForAddress(allocator, address); + if (result.symbol == null) return .{}; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); + if (result.o_file_info == null) return .{ .name = stab_symbol }; + + // Translate again the address, this time into an address inside the + // .o file + const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ + .name = "???", + }; - const addr_off = result.relocated_address - result.symbol.?.addr; - const o_file_di = &result.o_file_info.?.di; - if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return .{ - .name = o_file_di.getSymbolName(relocated_address_o) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString( - o_file_di, - std.dwarf.AT.name, - o_file_di.section(.debug_str), - compile_unit.*, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .source_location = o_file_di.getLineNumberInfo( - allocator, - compile_unit, - relocated_address_o + addr_off, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return .{ .name = stab_symbol }; + const addr_off = result.relocated_address - result.symbol.?.addr; + const o_file_di = &result.o_file_info.?.di; + if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { + return .{ + .name = o_file_di.getSymbolName(relocated_address_o) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString( + o_file_di, + std.dwarf.AT.name, + o_file_di.section(.debug_str), + compile_unit.*, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, - else => return err, - } + .source_location = o_file_di.getLineNumberInfo( + allocator, + compile_unit, + relocated_address_o + addr_off, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + return .{ .name = stab_symbol }; + }, + else => return err, } } @@ -676,35 +719,33 @@ pub const Module = switch (native_os) { symbol: ?*const MachoSymbol = null, o_file_info: ?*OFileInfo = null, } { - nosuspend { - // Translate the VA into an address into this object - const relocated_address = address - self.vmaddr_slide; + // Translate the VA into an address into this object + const relocated_address = address - self.vmaddr_slide; - // Find the .o file where this symbol is defined - const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ - .relocated_address = relocated_address, - }; + // Find the .o file where this symbol is defined + const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ + .relocated_address = relocated_address, + }; - // Check if its debug infos are already in the cache - const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); - const o_file_info = self.ofiles.getPtr(o_file_path) orelse - (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { - error.FileNotFound, - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return .{ - .relocated_address = relocated_address, - .symbol = symbol, - }, - else => return err, - }); + // Check if its debug infos are already in the cache + const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); + const o_file_info = self.ofiles.getPtr(o_file_path) orelse + (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { + error.FileNotFound, + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return .{ + .relocated_address = relocated_address, + .symbol = symbol, + }, + else => return err, + }); - return .{ - .relocated_address = relocated_address, - .symbol = symbol, - .o_file_info = o_file_info, - }; - } + return .{ + .relocated_address = relocated_address, + .symbol = symbol, + .o_file_info = o_file_info, + }; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { @@ -974,83 +1015,68 @@ fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { }; } -fn readCoffDebugInfo(gpa: Allocator, coff_obj: *coff.Coff) !Module { - nosuspend { - var di: Module = .{ - .base_address = undefined, - .coff_image_base = coff_obj.getImageBase(), - .coff_section_headers = undefined, - .pdb = null, - .dwarf = null, - }; +fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { + var di: Module = .{ + .base_address = undefined, + .coff_image_base = coff_obj.getImageBase(), + .coff_section_headers = undefined, + }; - if (coff_obj.getSectionByName(".debug_info")) |_| { - // This coff file has embedded DWARF debug info - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - errdefer for (sections) |section| if (section) |s| if (s.owned) gpa.free(s.data); - - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, gpa), - .virtual_address = section_header.virtual_address, - .owned = true, - }; - } else null; - } + if (coff_obj.getSectionByName(".debug_info")) |_| { + // This coff file has embedded DWARF debug info + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - var dwarf: Dwarf = .{ - .endian = native_endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&dwarf, gpa); - di.dwarf = dwarf; + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { + break :blk .{ + .data = try coff_obj.getSectionDataAlloc(section_header, allocator), + .virtual_address = section_header.virtual_address, + .owned = true, + }; + } else null; } - const raw_path = try coff_obj.getPdbPath() orelse return di; - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(gpa); - defer gpa.free(self_dir); - break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) gpa.free(path); - - const pdb_file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound, error.IsDir => { - if (di.dwarf == null) return error.MissingDebugInfo; - return di; - }, - else => |e| return e, + var dwarf: Dwarf = .{ + .endian = native_endian, + .sections = sections, + .is_macho = false, }; - errdefer pdb_file.close(); - - const pdb_file_reader_buffer = try gpa.alloc(u8, 4096); - errdefer gpa.free(pdb_file_reader_buffer); - const pdb_file_reader = try gpa.create(File.Reader); - errdefer gpa.destroy(pdb_file_reader); + try Dwarf.open(&dwarf, allocator); + di.dwarf = dwarf; + } - pdb_file_reader.* = pdb_file.reader(pdb_file_reader_buffer); + const raw_path = try coff_obj.getPdbPath() orelse return di; + const path = blk: { + if (fs.path.isAbsolute(raw_path)) { + break :blk raw_path; + } else { + const self_dir = try fs.selfExeDirPathAlloc(allocator); + defer allocator.free(self_dir); + break :blk try fs.path.join(allocator, &.{ self_dir, raw_path }); + } + }; + defer if (path.ptr != raw_path.ptr) allocator.free(path); - di.pdb = try Pdb.init(gpa, pdb_file_reader); - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); + di.pdb = Pdb.init(allocator, path) catch |err| switch (err) { + error.FileNotFound, error.IsDir => { + if (di.dwarf == null) return error.MissingDebugInfo; + return di; + }, + else => return err, + }; + try di.pdb.?.parseInfoStream(); + try di.pdb.?.parseDbiStream(); - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) - return error.InvalidDebugInfo; + if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + return error.InvalidDebugInfo; - // Only used by the pdb path - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); - errdefer gpa.free(di.coff_section_headers); + // Only used by the pdb path + di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); + errdefer allocator.free(di.coff_section_headers); - return di; - } + return di; } /// Reads debug info from an ELF file, or the current binary if none in specified. @@ -1058,32 +1084,29 @@ fn readCoffDebugInfo(gpa: Allocator, coff_obj: *coff.Coff) !Module { /// then this this function will recurse to attempt to load the debug sections from /// an external file. pub fn readElfDebugInfo( + em: *Dwarf.ElfModule, allocator: Allocator, elf_filename: ?[]const u8, build_id: ?[]const u8, - expected_crc: ?u32, parent_sections: *Dwarf.SectionArray, - parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, -) !Dwarf.ElfModule { - nosuspend { - const elf_file = (if (elf_filename) |filename| blk: { - break :blk fs.cwd().openFile(filename, .{}); - } else fs.openSelfExe(.{})) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; +) !void { + const elf_file = (if (elf_filename) |filename| blk: { + break :blk fs.cwd().openFile(filename, .{}); + } else fs.openSelfExe(.{})) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; - const mapped_mem = try mapWholeFile(elf_file); - return Dwarf.ElfModule.load( - allocator, - mapped_mem, - build_id, - expected_crc, - parent_sections, - parent_mapped_mem, - elf_filename, - ); - } + const mapped_mem = try mapWholeFile(elf_file); + return em.load( + allocator, + mapped_mem, + build_id, + null, + parent_sections, + null, + elf_filename, + ); } const MachoSymbol = struct { @@ -1106,22 +1129,20 @@ const MachoSymbol = struct { /// Takes ownership of file, even on error. /// TODO it's weird to take ownership even on error, rework this code. fn mapWholeFile(file: File) ![]align(std.heap.page_size_min) const u8 { - nosuspend { - defer file.close(); - - const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); - const mapped_mem = try posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); - errdefer posix.munmap(mapped_mem); + defer file.close(); + + const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); + const mapped_mem = try posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); + errdefer posix.munmap(mapped_mem); - return mapped_mem; - } + return mapped_mem; } fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { @@ -1172,7 +1193,7 @@ test machoSearchSymbols { /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrameMachO( +fn unwindFrameMachO( allocator: Allocator, base_address: usize, context: *UnwindContext, @@ -1562,9 +1583,9 @@ pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { /// /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info /// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. -pub fn unwindFrameDwarf( +fn unwindFrameDwarf( allocator: Allocator, - di: *Dwarf, + unwind: *Dwarf.Unwind, base_address: usize, context: *UnwindContext, explicit_fde_offset: ?usize, @@ -1572,37 +1593,34 @@ pub fn unwindFrameDwarf( if (!supports_unwinding) return error.UnsupportedCpuArchitecture; if (context.pc == 0) return 0; - const endian = di.endian; - // Find the FDE and CIE const cie, const fde = if (explicit_fde_offset) |fde_offset| blk: { - const dwarf_section: Dwarf.Section.Id = .eh_frame; - const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; + const frame_section = unwind.section(.eh_frame) orelse return error.MissingFDE; if (fde_offset >= frame_section.len) return error.MissingFDE; var fbr: std.Io.Reader = .fixed(frame_section); fbr.seek = fde_offset; - const fde_entry_header = try Dwarf.EntryHeader.read(&fbr, dwarf_section, endian); + const fde_entry_header = try Dwarf.Unwind.EntryHeader.read(&fbr, .eh_frame, native_endian); if (fde_entry_header.type != .fde) return error.MissingFDE; const cie_offset = fde_entry_header.type.fde; fbr.seek = @intCast(cie_offset); - const cie_entry_header = try Dwarf.EntryHeader.read(&fbr, dwarf_section, endian); + const cie_entry_header = try Dwarf.Unwind.EntryHeader.read(&fbr, .eh_frame, native_endian); if (cie_entry_header.type != .cie) return Dwarf.bad(); - const cie = try Dwarf.CommonInformationEntry.parse( + const cie = try Dwarf.Unwind.CommonInformationEntry.parse( cie_entry_header.entry_bytes, 0, true, cie_entry_header.format, - dwarf_section, + .eh_frame, cie_entry_header.length_offset, @sizeOf(usize), native_endian, ); - const fde = try Dwarf.FrameDescriptionEntry.parse( + const fde = try Dwarf.Unwind.FrameDescriptionEntry.parse( fde_entry_header.entry_bytes, 0, true, @@ -1616,33 +1634,33 @@ pub fn unwindFrameDwarf( // `.eh_frame_hdr` may be incomplete. We'll try it first, but if the lookup fails, we fall // back to loading `.eh_frame`/`.debug_frame` and using those from that point on. - if (di.eh_frame_hdr) |header| hdr: { - const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else { - try di.scanCieFdeInfo(allocator, base_address); - di.eh_frame_hdr = null; + if (unwind.eh_frame_hdr) |header| hdr: { + const eh_frame_len = if (unwind.section(.eh_frame)) |eh_frame| eh_frame.len else { + try unwind.scanCieFdeInfo(allocator, native_endian, base_address); + unwind.eh_frame_hdr = null; break :hdr; }; - var cie: Dwarf.CommonInformationEntry = undefined; - var fde: Dwarf.FrameDescriptionEntry = undefined; + var cie: Dwarf.Unwind.CommonInformationEntry = undefined; + var fde: Dwarf.Unwind.FrameDescriptionEntry = undefined; header.findEntry( eh_frame_len, - @intFromPtr(di.section(.eh_frame_hdr).?.ptr), + @intFromPtr(unwind.section(.eh_frame_hdr).?.ptr), context.pc, &cie, &fde, - endian, + native_endian, ) catch |err| switch (err) { error.MissingDebugInfo => { // `.eh_frame_hdr` appears to be incomplete, so go ahead and populate `cie_map` // and `fde_list`, and fall back to the binary search logic below. - try di.scanCieFdeInfo(allocator, base_address); + try unwind.scanCieFdeInfo(allocator, native_endian, base_address); // Since `.eh_frame_hdr` is incomplete, we're very likely to get more lookup // failures using it, and we've just built a complete, sorted list of FDEs // anyway, so just stop using `.eh_frame_hdr` altogether. - di.eh_frame_hdr = null; + unwind.eh_frame_hdr = null; break :hdr; }, @@ -1652,8 +1670,8 @@ pub fn unwindFrameDwarf( break :blk .{ cie, fde }; } - const index = std.sort.binarySearch(Dwarf.FrameDescriptionEntry, di.fde_list.items, context.pc, struct { - pub fn compareFn(pc: usize, item: Dwarf.FrameDescriptionEntry) std.math.Order { + const index = std.sort.binarySearch(Dwarf.Unwind.FrameDescriptionEntry, unwind.fde_list.items, context.pc, struct { + pub fn compareFn(pc: usize, item: Dwarf.Unwind.FrameDescriptionEntry) std.math.Order { if (pc < item.pc_begin) return .lt; const range_end = item.pc_begin + item.pc_range; @@ -1663,15 +1681,16 @@ pub fn unwindFrameDwarf( } }.compareFn); - const fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE; - const cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; + const fde = if (index) |i| unwind.fde_list.items[i] else return error.MissingFDE; + const cie = unwind.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; break :blk .{ cie, fde }; }; + // Do not set `compile_unit` because the spec states that CFIs + // may not reference other debug sections anyway. var expression_context: Dwarf.expression.Context = .{ .format = cie.format, - .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, .thread_context = context.thread_context, .reg_context = context.reg_context, .cfa = context.cfa, @@ -1679,7 +1698,7 @@ pub fn unwindFrameDwarf( context.vm.reset(); context.reg_context.eh_frame = cie.version != 4; - context.reg_context.is_macho = di.is_macho; + context.reg_context.is_macho = native_os.isDarwin(); const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); context.cfa = switch (row.cfa.rule) { @@ -2007,8 +2026,8 @@ pub const VirtualMachine = struct { self: *VirtualMachine, allocator: std.mem.Allocator, pc: u64, - cie: std.debug.Dwarf.CommonInformationEntry, - fde: std.debug.Dwarf.FrameDescriptionEntry, + cie: std.debug.Dwarf.Unwind.CommonInformationEntry, + fde: std.debug.Dwarf.Unwind.FrameDescriptionEntry, addr_size_bytes: u8, endian: std.builtin.Endian, ) !Row { @@ -2036,8 +2055,8 @@ pub const VirtualMachine = struct { self: *VirtualMachine, allocator: std.mem.Allocator, pc: u64, - cie: std.debug.Dwarf.CommonInformationEntry, - fde: std.debug.Dwarf.FrameDescriptionEntry, + cie: std.debug.Dwarf.Unwind.CommonInformationEntry, + fde: std.debug.Dwarf.Unwind.FrameDescriptionEntry, ) !Row { return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), native_endian); } @@ -2059,7 +2078,7 @@ pub const VirtualMachine = struct { pub fn step( self: *VirtualMachine, allocator: std.mem.Allocator, - cie: std.debug.Dwarf.CommonInformationEntry, + cie: std.debug.Dwarf.Unwind.CommonInformationEntry, is_initial: bool, instruction: Dwarf.call_frame.Instruction, ) !Row { From b750e7cf9e2a1225b20ef7fdf53df9ef97cf8065 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 1 Sep 2025 16:50:39 +0100 Subject: [PATCH 02/85] change one million things --- lib/std/coff.zig | 19 +- lib/std/debug.zig | 133 +- lib/std/debug/Dwarf.zig | 83 +- lib/std/debug/Dwarf/Unwind.zig | 1002 +++--- lib/std/debug/Dwarf/Unwind/VirtualMachine.zig | 298 ++ lib/std/debug/Dwarf/call_frame.zig | 36 +- lib/std/debug/SelfInfo.zig | 2885 +++++++---------- lib/std/dwarf/EH.zig | 51 +- lib/std/macho.zig | 118 +- src/link/Elf/eh_frame.zig | 59 +- 10 files changed, 2140 insertions(+), 2544 deletions(-) create mode 100644 lib/std/debug/Dwarf/Unwind/VirtualMachine.zig diff --git a/lib/std/coff.zig b/lib/std/coff.zig index c0929020ec87..cb4112339d90 100644 --- a/lib/std/coff.zig +++ b/lib/std/coff.zig @@ -1083,26 +1083,27 @@ pub const Coff = struct { age: u32 = undefined, // The lifetime of `data` must be longer than the lifetime of the returned Coff - pub fn init(data: []const u8, is_loaded: bool) !Coff { + pub fn init(data: []const u8, is_loaded: bool) error{ EndOfStream, MissingPEHeader }!Coff { const pe_pointer_offset = 0x3C; const pe_magic = "PE\x00\x00"; - var reader: std.Io.Reader = .fixed(data); - reader.seek = pe_pointer_offset; - const coff_header_offset = try reader.takeInt(u32, .little); - reader.seek = coff_header_offset; - const is_image = mem.eql(u8, pe_magic, try reader.takeArray(4)); + if (data.len < pe_pointer_offset + 4) return error.EndOfStream; + const header_offset = mem.readInt(u32, data[pe_pointer_offset..][0..4], .little); + if (data.len < header_offset + 4) return error.EndOfStream; + const is_image = mem.eql(u8, data[header_offset..][0..4], pe_magic); - var coff = @This(){ + const coff: Coff = .{ .data = data, .is_image = is_image, .is_loaded = is_loaded, - .coff_header_offset = coff_header_offset, + .coff_header_offset = o: { + if (is_image) break :o header_offset + 4; + break :o header_offset; + }, }; // Do some basic validation upfront if (is_image) { - coff.coff_header_offset = coff.coff_header_offset + 4; const coff_header = coff.getCoffHeader(); if (coff_header.size_of_optional_header == 0) return error.MissingPEHeader; } diff --git a/lib/std/debug.zig b/lib/std/debug.zig index d5f7791fc210..06fa327d2ef1 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -153,6 +153,7 @@ pub const SourceLocation = struct { }; pub const Symbol = struct { + // MLUGG TODO: remove the defaults and audit everywhere. also grep for '???' across std name: []const u8 = "???", compile_unit_name: []const u8 = "???", source_location: ?SourceLocation = null, @@ -232,15 +233,14 @@ pub fn print(comptime fmt: []const u8, args: anytype) void { } /// TODO multithreaded awareness -var self_debug_info: ?SelfInfo = null; - -pub fn getSelfDebugInfo() !*SelfInfo { - if (self_debug_info) |*info| { - return info; - } else { - self_debug_info = try SelfInfo.open(getDebugInfoAllocator()); - return &self_debug_info.?; - } +/// Marked `inline` to propagate a comptime-known error to callers. +pub inline fn getSelfDebugInfo() !*SelfInfo { + if (builtin.strip_debug_info) return error.MissingDebugInfo; + if (!SelfInfo.target_supported) return error.UnsupportedOperatingSystem; + const S = struct { + var self_info: SelfInfo = .init; + }; + return &S.self_info; } /// Tries to print a hexadecimal view of the bytes, unbuffered, and ignores any error returned. @@ -446,10 +446,7 @@ pub fn dumpStackTraceFromBase(context: *ThreadContext, stderr: *Writer) void { defer it.deinit(); // DWARF unwinding on aarch64-macos is not complete so we need to get pc address from mcontext - const pc_addr = if (builtin.target.os.tag.isDarwin() and native_arch == .aarch64) - context.mcontext.ss.pc - else - it.unwind_state.?.dwarf_context.pc; + const pc_addr = it.unwind_state.?.dwarf_context.pc; printSourceAtAddress(debug_info, stderr, pc_addr, tty_config) catch return; while (it.next()) |return_address| { @@ -460,7 +457,7 @@ pub fn dumpStackTraceFromBase(context: *ThreadContext, stderr: *Writer) void { // an overflow. We do not need to signal `StackIterator` as it will correctly detect this // condition on the subsequent iteration and return `null` thus terminating the loop. // same behaviour for x86-windows-msvc - const address = if (return_address == 0) return_address else return_address - 1; + const address = return_address -| 1; printSourceAtAddress(debug_info, stderr, address, tty_config) catch return; } else printLastUnwindError(&it, debug_info, stderr, tty_config); } @@ -758,7 +755,7 @@ pub fn writeStackTrace( frame_index = (frame_index + 1) % stack_trace.instruction_addresses.len; }) { const return_address = stack_trace.instruction_addresses[frame_index]; - try printSourceAtAddress(debug_info, writer, return_address - 1, tty_config); + try printSourceAtAddress(debug_info, writer, return_address -| 1, tty_config); } if (stack_trace.index > stack_trace.instruction_addresses.len) { @@ -808,16 +805,11 @@ pub const StackIterator = struct { } pub fn initWithContext(first_address: ?usize, debug_info: *SelfInfo, context: *posix.ucontext_t, fp: usize) !StackIterator { - // The implementation of DWARF unwinding on aarch64-macos is not complete. However, Apple mandates that - // the frame pointer register is always used, so on this platform we can safely use the FP-based unwinder. - if (builtin.target.os.tag.isDarwin() and native_arch == .aarch64) - return init(first_address, @truncate(context.mcontext.ss.fp)); - if (SelfInfo.supports_unwinding) { var iterator = init(first_address, fp); iterator.unwind_state = .{ .debug_info = debug_info, - .dwarf_context = try SelfInfo.UnwindContext.init(debug_info.allocator, context), + .dwarf_context = try SelfInfo.UnwindContext.init(getDebugInfoAllocator(), context), }; return iterator; } @@ -890,7 +882,7 @@ pub const StackIterator = struct { if (!unwind_state.failed) { if (unwind_state.dwarf_context.pc == 0) return null; defer it.fp = unwind_state.dwarf_context.getFp() catch 0; - if (unwind_state.debug_info.unwindFrame(&unwind_state.dwarf_context)) |return_address| { + if (unwind_state.debug_info.unwindFrame(getDebugInfoAllocator(), &unwind_state.dwarf_context)) |return_address| { return return_address; } else |err| { unwind_state.last_error = err; @@ -1039,19 +1031,6 @@ pub fn writeStackTraceWindows( } } -fn printUnknownSource(debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) !void { - const module_name = debug_info.getModuleNameForAddress(address); - return printLineInfo( - writer, - null, - address, - "???", - module_name orelse "???", - tty_config, - printLineFromFileAnyOs, - ); -} - fn printLastUnwindError(it: *StackIterator, debug_info: *SelfInfo, writer: *Writer, tty_config: tty.Config) void { if (!have_ucontext) return; if (it.getLastError()) |unwind_error| { @@ -1059,32 +1038,48 @@ fn printLastUnwindError(it: *StackIterator, debug_info: *SelfInfo, writer: *Writ } } -fn printUnwindError(debug_info: *SelfInfo, writer: *Writer, address: usize, err: UnwindError, tty_config: tty.Config) !void { - const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; +fn printUnwindError(debug_info: *SelfInfo, writer: *Writer, address: usize, unwind_err: UnwindError, tty_config: tty.Config) !void { + const module_name = debug_info.getModuleNameForAddress(getDebugInfoAllocator(), address) catch |err| switch (err) { + error.Unexpected, error.OutOfMemory => |e| return e, + error.MissingDebugInfo => "???", + }; try tty_config.setColor(writer, .dim); - if (err == error.MissingDebugInfo) { + if (unwind_err == error.MissingDebugInfo) { try writer.print("Unwind information for `{s}:0x{x}` was not available, trace may be incomplete\n\n", .{ module_name, address }); } else { - try writer.print("Unwind error at address `{s}:0x{x}` ({}), trace may be incomplete\n\n", .{ module_name, address, err }); + try writer.print("Unwind error at address `{s}:0x{x}` ({}), trace may be incomplete\n\n", .{ module_name, address, unwind_err }); } try tty_config.setColor(writer, .reset); } pub fn printSourceAtAddress(debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) !void { - const symbol_info = debug_info.getSymbolAtAddress(address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, writer, address, tty_config), - else => return err, + const gpa = getDebugInfoAllocator(); + if (debug_info.getSymbolAtAddress(gpa, address)) |symbol_info| { + defer if (symbol_info.source_location) |sl| gpa.free(sl.file_name); + return printLineInfo( + writer, + symbol_info.source_location, + address, + symbol_info.name, + symbol_info.compile_unit_name, + tty_config, + ); + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => {}, + else => |e| return e, + } + // Unknown source location, but perhaps we can at least get a module name + const compile_unit_name = debug_info.getModuleNameForAddress(getDebugInfoAllocator(), address) catch |err| switch (err) { + error.MissingDebugInfo => "???", + error.Unexpected, error.OutOfMemory => |e| return e, }; - defer if (symbol_info.source_location) |sl| debug_info.allocator.free(sl.file_name); - return printLineInfo( writer, - symbol_info.source_location, + null, address, - symbol_info.name, - symbol_info.compile_unit_name, + "???", + compile_unit_name, tty_config, - printLineFromFileAnyOs, ); } @@ -1095,7 +1090,6 @@ fn printLineInfo( symbol_name: []const u8, compile_unit_name: []const u8, tty_config: tty.Config, - comptime printLineFromFile: anytype, ) !void { nosuspend { try tty_config.setColor(writer, .bold); @@ -1136,7 +1130,7 @@ fn printLineInfo( } } -fn printLineFromFileAnyOs(writer: *Writer, source_location: SourceLocation) !void { +fn printLineFromFile(writer: *Writer, source_location: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. var f = try fs.cwd().openFile(source_location.file_name, .{}); @@ -1190,7 +1184,7 @@ fn printLineFromFileAnyOs(writer: *Writer, source_location: SourceLocation) !voi } } -test printLineFromFileAnyOs { +test printLineFromFile { var aw: Writer.Allocating = .init(std.testing.allocator); defer aw.deinit(); const output_stream = &aw.writer; @@ -1212,9 +1206,9 @@ test printLineFromFileAnyOs { defer allocator.free(path); try test_dir.dir.writeFile(.{ .sub_path = "one_line.zig", .data = "no new lines in this file, but one is printed anyway" }); - try expectError(error.EndOfFile, printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 2, .column = 0 })); + try expectError(error.EndOfFile, printLineFromFile(output_stream, .{ .file_name = path, .line = 2, .column = 0 })); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); try expectEqualStrings("no new lines in this file, but one is printed anyway\n", aw.written()); aw.clearRetainingCapacity(); } @@ -1230,11 +1224,11 @@ test printLineFromFileAnyOs { , }); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); try expectEqualStrings("1\n", aw.written()); aw.clearRetainingCapacity(); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 3, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = 3, .column = 0 }); try expectEqualStrings("3\n", aw.written()); aw.clearRetainingCapacity(); } @@ -1253,7 +1247,7 @@ test printLineFromFileAnyOs { try writer.splatByteAll('a', overlap); try writer.flush(); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 2, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = 2, .column = 0 }); try expectEqualStrings(("a" ** overlap) ++ "\n", aw.written()); aw.clearRetainingCapacity(); } @@ -1267,7 +1261,7 @@ test printLineFromFileAnyOs { const writer = &file_writer.interface; try writer.splatByteAll('a', std.heap.page_size_max); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); try expectEqualStrings(("a" ** std.heap.page_size_max) ++ "\n", aw.written()); aw.clearRetainingCapacity(); } @@ -1281,19 +1275,19 @@ test printLineFromFileAnyOs { const writer = &file_writer.interface; try writer.splatByteAll('a', 3 * std.heap.page_size_max); - try expectError(error.EndOfFile, printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 2, .column = 0 })); + try expectError(error.EndOfFile, printLineFromFile(output_stream, .{ .file_name = path, .line = 2, .column = 0 })); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); try expectEqualStrings(("a" ** (3 * std.heap.page_size_max)) ++ "\n", aw.written()); aw.clearRetainingCapacity(); try writer.writeAll("a\na"); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = 1, .column = 0 }); try expectEqualStrings(("a" ** (3 * std.heap.page_size_max)) ++ "a\n", aw.written()); aw.clearRetainingCapacity(); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = 2, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = 2, .column = 0 }); try expectEqualStrings("a\n", aw.written()); aw.clearRetainingCapacity(); } @@ -1309,26 +1303,23 @@ test printLineFromFileAnyOs { try writer.splatByteAll('\n', real_file_start); try writer.writeAll("abc\ndef"); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = real_file_start + 1, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = real_file_start + 1, .column = 0 }); try expectEqualStrings("abc\n", aw.written()); aw.clearRetainingCapacity(); - try printLineFromFileAnyOs(output_stream, .{ .file_name = path, .line = real_file_start + 2, .column = 0 }); + try printLineFromFile(output_stream, .{ .file_name = path, .line = real_file_start + 2, .column = 0 }); try expectEqualStrings("def\n", aw.written()); aw.clearRetainingCapacity(); } } /// TODO multithreaded awareness -var debug_info_allocator: ?mem.Allocator = null; -var debug_info_arena_allocator: std.heap.ArenaAllocator = undefined; +var debug_info_arena: ?std.heap.ArenaAllocator = null; fn getDebugInfoAllocator() mem.Allocator { - if (debug_info_allocator) |a| return a; - - debug_info_arena_allocator = std.heap.ArenaAllocator.init(std.heap.page_allocator); - const allocator = debug_info_arena_allocator.allocator(); - debug_info_allocator = allocator; - return allocator; + if (debug_info_arena == null) { + debug_info_arena = .init(std.heap.page_allocator); + } + return debug_info_arena.?.allocator(); } /// Whether or not the current target can print useful debug information when a segfault occurs. diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index e0d74172da66..8d1087b6caed 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -78,17 +78,6 @@ pub const Section = struct { debug_addr, debug_names, }; - - // For sections that are not memory mapped by the loader, this is an offset - // from `data.ptr` to where the section would have been mapped. Otherwise, - // `data` is directly backed by the section and the offset is zero. - pub fn virtualOffset(self: Section, base_address: usize) i64 { - return if (self.virtual_address) |va| - @as(i64, @intCast(base_address + va)) - - @as(i64, @intCast(@intFromPtr(self.data.ptr))) - else - 0; - } }; pub const Abbrev = struct { @@ -342,10 +331,6 @@ pub fn section(di: Dwarf, dwarf_section: Section.Id) ?[]const u8 { return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; } -pub fn sectionVirtualOffset(di: Dwarf, dwarf_section: Section.Id, base_address: usize) ?i64 { - return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null; -} - pub fn deinit(di: *Dwarf, gpa: Allocator) void { for (di.sections) |opt_section| { if (opt_section) |s| if (s.owned) gpa.free(s.data); @@ -364,8 +349,6 @@ pub fn deinit(di: *Dwarf, gpa: Allocator) void { } di.compile_unit_list.deinit(gpa); di.func_list.deinit(gpa); - di.cie_map.deinit(gpa); - di.fde_list.deinit(gpa); di.ranges.deinit(gpa); di.* = undefined; } @@ -983,8 +966,8 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: }, 0, }; - _ = addr_size; - _ = seg_size; + if (seg_size != 0) return bad(); // unsupported + _ = addr_size; // TODO: ignoring this is incorrect, we should use it to decide address lengths const prologue_length = try readAddress(&fr, unit_header.format, endian); const prog_start_offset = fr.seek + prologue_length; @@ -1472,44 +1455,27 @@ pub const ElfModule = struct { mapped_memory: ?[]align(std.heap.page_size_min) const u8, external_mapped_memory: ?[]align(std.heap.page_size_min) const u8, - pub const Lookup = struct { - base_address: usize, - name: []const u8, - build_id: ?[]const u8, - gnu_eh_frame: ?[]const u8, + pub const init: ElfModule = .{ + .unwind = .{ + .debug_frame = null, + .eh_frame = null, + }, + .dwarf = .{}, + .mapped_memory = null, + .external_mapped_memory = null, }; - pub fn init(lookup: *const Lookup) ElfModule { - var em: ElfModule = .{ - .unwind = .{ - .sections = @splat(null), - }, - .dwarf = .{}, - .mapped_memory = null, - .external_mapped_memory = null, - }; - if (lookup.gnu_eh_frame) |eh_frame_hdr| { - // This is a special case - pointer offsets inside .eh_frame_hdr - // are encoded relative to its base address, so we must use the - // version that is already memory mapped, and not the one that - // will be mapped separately from the ELF file. - em.unwind.sections[@intFromEnum(Dwarf.Unwind.Section.Id.eh_frame_hdr)] = .{ - .data = eh_frame_hdr, - }; - } - return em; - } - pub fn deinit(self: *@This(), allocator: Allocator) void { self.dwarf.deinit(allocator); std.posix.munmap(self.mapped_memory); if (self.external_mapped_memory) |m| std.posix.munmap(m); } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, endian: Endian, base_address: usize, address: usize) !std.debug.Symbol { - // Translate the VA into an address into this object - const relocated_address = address - base_address; - return self.dwarf.getSymbol(allocator, endian, relocated_address); + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, endian: Endian, load_offset: usize, address: usize) !std.debug.Symbol { + // Translate the runtime address into a virtual address into the module + // MLUGG TODO: this clearly tells us that the logic should live near SelfInfo... + const vaddr = address - load_offset; + return self.dwarf.getSymbol(allocator, endian, vaddr); } pub fn getDwarfUnwindForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf.Unwind { @@ -1548,7 +1514,7 @@ pub const ElfModule = struct { mapped_mem: []align(std.heap.page_size_min) const u8, build_id: ?[]const u8, expected_crc: ?u32, - parent_sections: *Dwarf.SectionArray, + parent_sections: ?*Dwarf.SectionArray, parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, elf_filename: ?[]const u8, ) LoadError!void { @@ -1577,10 +1543,12 @@ pub const ElfModule = struct { var sections: Dwarf.SectionArray = @splat(null); // Combine section list. This takes ownership over any owned sections from the parent scope. - for (parent_sections, §ions) |*parent, *section_elem| { - if (parent.*) |*p| { - section_elem.* = p.*; - p.owned = false; + if (parent_sections) |ps| { + for (ps, §ions) |*parent, *section_elem| { + if (parent.*) |*p| { + section_elem.* = p.*; + p.owned = false; + } } } errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); @@ -1647,7 +1615,6 @@ pub const ElfModule = struct { // Attempt to load debug info from an external file // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html if (missing_debug_info) { - // Only allow one level of debug info nesting if (parent_mapped_mem) |_| { return error.MissingDebugInfo; @@ -1775,6 +1742,7 @@ pub const ElfModule = struct { em.mapped_memory = parent_mapped_mem orelse mapped_mem; em.external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null; + em.dwarf.sections = sections; try em.dwarf.open(gpa, endian); } @@ -1844,7 +1812,8 @@ pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]cons return ptr[start..end]; } -pub fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { +fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { + // MLUGG TODO FIX BEFORE MERGE: this function is slightly bogus. addresses have a byte width which is independent of the `dwarf.Format`! return switch (format) { .@"32" => try r.takeInt(u32, endian), .@"64" => try r.takeInt(u64, endian), @@ -1852,6 +1821,8 @@ pub fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { } fn nativeFormat() std.dwarf.Format { + // MLUGG TODO FIX BEFORE MERGE: this is nonsensical. this is neither what `dwarf.Format` is for, nor does it make sense to check the NATIVE FUCKING FORMAT + // when parsing ARBITRARY DWARF. return switch (@sizeOf(usize)) { 4 => .@"32", 8 => .@"64", diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 1da318a04826..a51c417e7c97 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -1,632 +1,622 @@ -sections: SectionArray = @splat(null), +pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); -/// Starts out non-`null` if the `.eh_frame_hdr` section is present. May become `null` later if we -/// find that `.eh_frame_hdr` is incomplete. -eh_frame_hdr: ?ExceptionFrameHeader = null, -/// These lookup tables are only used if `eh_frame_hdr` is null -cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .empty, -/// Sorted by start_pc -fde_list: std.ArrayList(FrameDescriptionEntry) = .empty, - -pub const Section = struct { +/// The contents of the `.debug_frame` section as specified by DWARF. This might be a more reliable +/// stack unwind mechanism in some cases, or it may be present when `.eh_frame` is not, but fetching +/// the data requires loading the binary, so it is not a viable approach for fast stack trace +/// capturing within a process. +debug_frame: ?struct { data: []const u8, - - pub const Id = enum { - debug_frame, - eh_frame, - eh_frame_hdr, - }; + /// Offsets into `data` of FDEs, sorted by ascending `pc_begin`. + sorted_fdes: []SortedFdeEntry, +}, + +/// Data associated with the `.eh_frame` and `.eh_frame_hdr` sections as defined by LSB Core. The +/// format of `.eh_frame` is an extension of that of DWARF's `.debug_frame` -- in fact it is almost +/// identical, though subtly different in a few places. +eh_frame: ?struct { + header: EhFrameHeader, + /// Though this is a slice, it may be longer than the `.eh_frame` section. When unwinding + /// through the runtime-loaded `.eh_frame_hdr` data, we are not told the size of the `.eh_frame` + /// section, so construct a slice referring to all of the rest of memory. The end of the section + /// must be detected through `EntryHeader.terminator`. + eh_frame_data: []const u8, + /// Offsets into `eh_frame_data` of FDEs, sorted by ascending `pc_begin`. + /// Populated only if `header` does not already contain a lookup table. + sorted_fdes: ?[]SortedFdeEntry, +}, + +const SortedFdeEntry = struct { + /// This FDE's value of `pc_begin`. + pc_begin: u64, + /// Offset into the section of the corresponding FDE, including the entry header. + fde_offset: u64, }; -const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); -pub const SectionArray = [num_sections]?Section; - -pub fn section(unwind: Unwind, dwarf_section: Section.Id) ?[]const u8 { - return if (unwind.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; -} +const Section = enum { debug_frame, eh_frame }; /// This represents the decoded .eh_frame_hdr header -pub const ExceptionFrameHeader = struct { - eh_frame_ptr: usize, - table_enc: u8, - fde_count: usize, - entries: []const u8, - - pub fn entrySize(table_enc: u8) !u8 { - return switch (table_enc & EH.PE.type_mask) { - EH.PE.udata2, - EH.PE.sdata2, - => 4, - EH.PE.udata4, - EH.PE.sdata4, - => 8, - EH.PE.udata8, - EH.PE.sdata8, - => 16, - // This is a binary search table, so all entries must be the same length - else => return bad(), +pub const EhFrameHeader = struct { + vaddr: u64, + eh_frame_vaddr: u64, + search_table: ?struct { + /// The byte offset of the search table into the `.eh_frame_hdr` section. + offset: u8, + encoding: EH.PE, + fde_count: usize, + entries: []const u8, + }, + + pub fn entrySize(table_enc: EH.PE, addr_size_bytes: u8) !u8 { + return switch (table_enc.type) { + .absptr => 2 * addr_size_bytes, + .udata2, .sdata2 => 4, + .udata4, .sdata4 => 8, + .udata8, .sdata8 => 16, + .uleb128, .sleb128 => return bad(), // this is a binary search table; all entries must be the same size + _ => return bad(), }; } - pub fn findEntry( - self: ExceptionFrameHeader, - eh_frame_len: usize, - eh_frame_hdr_ptr: usize, - pc: usize, - cie: *CommonInformationEntry, - fde: *FrameDescriptionEntry, + pub fn parse( + eh_frame_hdr_vaddr: u64, + eh_frame_hdr_bytes: []const u8, + addr_size_bytes: u8, endian: Endian, - ) !void { - const entry_size = try entrySize(self.table_enc); + ) !EhFrameHeader { + var r: Reader = .fixed(eh_frame_hdr_bytes); - var left: usize = 0; - var len: usize = self.fde_count; - var fbr: Reader = .fixed(self.entries); + const version = try r.takeByte(); + if (version != 1) return bad(); - while (len > 1) { - const mid = left + len / 2; + const eh_frame_ptr_enc: EH.PE = @bitCast(try r.takeByte()); + const fde_count_enc: EH.PE = @bitCast(try r.takeByte()); + const table_enc: EH.PE = @bitCast(try r.takeByte()); - fbr.seek = mid * entry_size; - const pc_begin = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); + const eh_frame_ptr = try readEhPointer(&r, eh_frame_ptr_enc, addr_size_bytes, .{ + .pc_rel_base = eh_frame_hdr_vaddr + r.seek, + }, endian); + return .{ + .vaddr = eh_frame_hdr_vaddr, + .eh_frame_vaddr = eh_frame_ptr, + .search_table = table: { + if (fde_count_enc == EH.PE.omit) break :table null; + if (table_enc == EH.PE.omit) break :table null; + const fde_count = try readEhPointer(&r, fde_count_enc, addr_size_bytes, .{ + .pc_rel_base = eh_frame_hdr_vaddr + r.seek, + }, endian); + const entry_size = try entrySize(table_enc, addr_size_bytes); + const bytes_offset = r.seek; + const bytes_len = cast(usize, fde_count * entry_size) orelse return error.EndOfStream; + const bytes = try r.take(bytes_len); + break :table .{ + .encoding = table_enc, + .fde_count = @intCast(fde_count), + .entries = bytes, + .offset = @intCast(bytes_offset), + }; + }, + }; + } + + /// Asserts that `eh_frame_hdr.search_table != null`. + fn findEntry( + eh_frame_hdr: *const EhFrameHeader, + pc: u64, + addr_size_bytes: u8, + endian: Endian, + ) !?u64 { + const table = &eh_frame_hdr.search_table.?; + const table_vaddr = eh_frame_hdr.vaddr + table.offset; + const entry_size = try EhFrameHeader.entrySize(table.encoding, addr_size_bytes); + var left: usize = 0; + var len: usize = table.fde_count; + while (len > 1) { + const mid = left + len / 2; + var entry_reader: Reader = .fixed(table.entries[mid * entry_size ..][0..entry_size]); + const pc_begin = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr.vaddr, + }, endian); if (pc < pc_begin) { len /= 2; } else { left = mid; - if (pc == pc_begin) break; len -= len / 2; } } - - if (len == 0) return missing(); - fbr.seek = left * entry_size; - - // Read past the pc_begin field of the entry - _ = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); - - const fde_ptr = cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad()) orelse return bad(); - - if (fde_ptr < self.eh_frame_ptr) return bad(); - - const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0..eh_frame_len]; - - const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: Reader = .fixed(eh_frame); - eh_frame_fbr.seek = fde_offset; - - const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (fde_entry_header.type != .fde) return bad(); - - // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable - const cie_offset = fde_entry_header.type.fde; - eh_frame_fbr.seek = @intCast(cie_offset); - const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (cie_entry_header.type != .cie) return bad(); - - cie.* = try CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - .eh_frame, - cie_entry_header.length_offset, - @sizeOf(usize), - endian, - ); - - fde.* = try FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie.*, - @sizeOf(usize), - endian, - ); - - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return missing(); + if (len == 0) return null; + var entry_reader: Reader = .fixed(table.entries[left * entry_size ..][0..entry_size]); + // Skip past `pc_begin`; we're now interested in the fde offset + _ = try readEhPointerAbs(&entry_reader, table.encoding.type, addr_size_bytes, endian); + const fde_ptr = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr.vaddr, + }, endian); + return std.math.sub(u64, fde_ptr, eh_frame_hdr.eh_frame_vaddr) catch bad(); // offset into .eh_frame } }; -pub const EntryHeader = struct { - /// Offset of the length field in the backing buffer - length_offset: usize, - format: Format, - type: union(enum) { - cie, - /// Value is the offset of the corresponding CIE - fde: u64, - terminator, +pub const EntryHeader = union(enum) { + cie: struct { + format: Format, + /// Remaining bytes in the CIE. These are parseable by `CommonInformationEntry.parse`. + bytes_len: u64, + }, + fde: struct { + format: Format, + /// Offset into the section of the corresponding CIE, *including* its entry header. + cie_offset: u64, + /// Remaining bytes in the FDE. These are parseable by `FrameDescriptionEntry.parse`. + bytes_len: u64, }, - /// The entry's contents, not including the ID field - entry_bytes: []const u8, + /// The `.eh_frame` format includes terminators which indicate that the last CIE/FDE has been + /// reached. However, `.debug_frame` does not include such a terminator, so the caller must + /// keep track of how many section bytes remain when parsing all entries in `.debug_frame`. + terminator, - /// The length of the entry including the ID field, but not the length field itself - pub fn entryLength(self: EntryHeader) usize { - return self.entry_bytes.len + @as(u8, if (self.format == .@"64") 8 else 4); - } + pub fn read(r: *Reader, header_section_offset: u64, section: Section, endian: Endian) !EntryHeader { + const unit_header = try Dwarf.readUnitHeader(r, endian); + if (unit_header.unit_length == 0) return .terminator; - /// Reads a header for either an FDE or a CIE, then advances the fbr to the - /// position after the trailing structure. - /// - /// `fbr` must be backed by either the .eh_frame or .debug_frame sections. - /// - /// TODO that's a bad API, don't do that. this function should neither require - /// a fixed reader nor depend on seeking. - pub fn read(fbr: *Reader, dwarf_section: Section.Id, endian: Endian) !EntryHeader { - assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); - - const length_offset = fbr.seek; - const unit_header = try Dwarf.readUnitHeader(fbr, endian); - const unit_length = cast(usize, unit_header.unit_length) orelse return bad(); - if (unit_length == 0) return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = .terminator, - .entry_bytes = &.{}, - }; - const start_offset = fbr.seek; - const end_offset = start_offset + unit_length; - defer fbr.seek = end_offset; - - const id = try Dwarf.readAddress(fbr, unit_header.format, endian); - const entry_bytes = fbr.buffer[fbr.seek..end_offset]; - const cie_id: u64 = switch (dwarf_section) { - .eh_frame => CommonInformationEntry.eh_id, + // TODO MLUGG: seriously, just... check the formats of everything in BOTH LSB Core and DWARF. this is a fucking *mess*. maybe add spec references. + + // Next is a value which will disambiguate CIEs and FDEs. Annoyingly, LSB Core makes this + // value always 4-byte, whereas DWARF makes it depend on the `dwarf.Format`. + const cie_ptr_or_id_size: u8 = switch (section) { + .eh_frame => 4, .debug_frame => switch (unit_header.format) { - .@"32" => CommonInformationEntry.dwarf32_id, - .@"64" => CommonInformationEntry.dwarf64_id, + .@"32" => 4, + .@"64" => 8, }, + }; + const cie_ptr_or_id = switch (cie_ptr_or_id_size) { + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), else => unreachable, }; + const remaining_bytes = unit_header.unit_length - cie_ptr_or_id_size; - return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = if (id == cie_id) .cie else .{ .fde = switch (dwarf_section) { - .eh_frame => try std.math.sub(u64, start_offset, id), - .debug_frame => id, - else => unreachable, - } }, - .entry_bytes = entry_bytes, + // If this entry is a CIE, then `cie_ptr_or_id` will have this value, which is different + // between the DWARF `.debug_frame` section and the LSB Core `.eh_frame` section. + const cie_id: u64 = switch (section) { + .eh_frame => 0, + .debug_frame => switch (unit_header.format) { + .@"32" => maxInt(u32), + .@"64" => maxInt(u64), + }, }; + if (cie_ptr_or_id == cie_id) { + return .{ .cie = .{ + .format = unit_header.format, + .bytes_len = remaining_bytes, + } }; + } + + // This is an FDE -- `cie_ptr_or_id` points to the associated CIE. Unfortunately, the format + // of that pointer again differs between `.debug_frame` and `.eh_frame`. + const cie_offset = switch (section) { + .eh_frame => try std.math.sub(u64, header_section_offset + unit_header.header_length, cie_ptr_or_id), + .debug_frame => cie_ptr_or_id, + }; + return .{ .fde = .{ + .format = unit_header.format, + .cie_offset = cie_offset, + .bytes_len = remaining_bytes, + } }; } }; pub const CommonInformationEntry = struct { - // Used in .eh_frame - pub const eh_id = 0; - - // Used in .debug_frame (DWARF32) - pub const dwarf32_id = maxInt(u32); - - // Used in .debug_frame (DWARF64) - pub const dwarf64_id = maxInt(u64); - - // Offset of the length field of this entry in the eh_frame section. - // This is the key that FDEs use to reference CIEs. - length_offset: u64, version: u8, - address_size: u8, - format: Format, - // Only present in version 4 - segment_selector_size: ?u8, + /// In version 4, CIEs can specify the address size used in the CIE and associated FDEs. + /// This value must be used *only* to parse associated FDEs in `FrameDescriptionEntry.parse`. + addr_size_bytes: u8, + + /// Always 0 for versions which do not specify this (currently all versions other than 4). + segment_selector_size: u8, code_alignment_factor: u32, data_alignment_factor: i32, return_address_register: u8, - aug_str: []const u8, - aug_data: []const u8, - lsda_pointer_enc: u8, - personality_enc: ?u8, - personality_routine_pointer: ?u64, - fde_pointer_enc: u8, - initial_instructions: []const u8, + fde_pointer_enc: EH.PE, + is_signal_frame: bool, - pub fn isSignalFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'S') return true; - return false; - } + augmentation_kind: AugmentationKind, - pub fn addressesSignedWithBKey(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'B') return true; - return false; - } + initial_instructions: []const u8, - pub fn mteTaggedFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'G') return true; - return false; - } + pub const AugmentationKind = enum { none, gcc_eh, lsb_z }; /// This function expects to read the CIE starting with the version field. - /// The returned struct references memory backed by cie_bytes. - /// - /// See the FrameDescriptionEntry.parse documentation for the description - /// of `pc_rel_offset` and `is_runtime`. + /// The returned struct references memory backed by `cie_bytes`. /// /// `length_offset` specifies the offset of this CIE's length field in the /// .eh_frame / .debug_frame section. pub fn parse( cie_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, - format: Format, - dwarf_section: Section.Id, - length_offset: u64, - addr_size_bytes: u8, - endian: Endian, + section: Section, + default_addr_size_bytes: u8, ) !CommonInformationEntry { - if (addr_size_bytes > 8) return error.UnsupportedAddrSize; + // We only read the data through this reader. + var r: Reader = .fixed(cie_bytes); - var fbr: Reader = .fixed(cie_bytes); - - const version = try fbr.takeByte(); - switch (dwarf_section) { + const version = try r.takeByte(); + switch (section) { .eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion, .debug_frame => if (version != 4) return error.UnsupportedDwarfVersion, - else => return error.UnsupportedDwarfSection, } - var has_eh_data = false; - var has_aug_data = false; - - var aug_str_len: usize = 0; - const aug_str_start = fbr.seek; - var aug_byte = try fbr.takeByte(); - while (aug_byte != 0) : (aug_byte = try fbr.takeByte()) { - switch (aug_byte) { - 'z' => { - if (aug_str_len != 0) return bad(); - has_aug_data = true; - }, - 'e' => { - if (has_aug_data or aug_str_len != 0) return bad(); - if (try fbr.takeByte() != 'h') return bad(); - has_eh_data = true; - }, - else => if (has_eh_data) return bad(), - } - - aug_str_len += 1; - } + const aug_str = try r.takeSentinel(0); + const aug_kind: AugmentationKind = aug: { + if (aug_str.len == 0) break :aug .none; + if (aug_str[0] == 'z') break :aug .lsb_z; + if (std.mem.eql(u8, aug_str, "eh")) break :aug .gcc_eh; + // We can't finish parsing the CIE if we don't know what its augmentation means. + return bad(); + }; - if (has_eh_data) { - // legacy data created by older versions of gcc - unsupported here - for (0..addr_size_bytes) |_| _ = try fbr.takeByte(); + switch (aug_kind) { + .none => {}, // no extra data + .lsb_z => {}, // no extra data yet, but there is a bit later + .gcc_eh => try r.discardAll(default_addr_size_bytes), // unsupported data } - const address_size = if (version == 4) try fbr.takeByte() else addr_size_bytes; - const segment_selector_size = if (version == 4) try fbr.takeByte() else null; - - const code_alignment_factor = try fbr.takeLeb128(u32); - const data_alignment_factor = try fbr.takeLeb128(i32); - const return_address_register = if (version == 1) try fbr.takeByte() else try fbr.takeLeb128(u8); - - var lsda_pointer_enc: u8 = EH.PE.omit; - var personality_enc: ?u8 = null; - var personality_routine_pointer: ?u64 = null; - var fde_pointer_enc: u8 = EH.PE.absptr; - - var aug_data: []const u8 = &[_]u8{}; - const aug_str = if (has_aug_data) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = cie_bytes[aug_data_start..][0..aug_data_len]; - - const aug_str = cie_bytes[aug_str_start..][0..aug_str_len]; - for (aug_str[1..]) |byte| { - switch (byte) { - 'L' => { - lsda_pointer_enc = try fbr.takeByte(); - }, - 'P' => { - personality_enc = try fbr.takeByte(); - personality_routine_pointer = try readEhPointer(&fbr, personality_enc.?, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian); - }, - 'R' => { - fde_pointer_enc = try fbr.takeByte(); - }, - 'S', 'B', 'G' => {}, - else => return bad(), - } - } - - // aug_data_len can include padding so the CIE ends on an address boundary - fbr.seek = aug_data_start + aug_data_len; - break :blk aug_str; - } else &[_]u8{}; + const addr_size_bytes = if (version == 4) try r.takeByte() else default_addr_size_bytes; + const segment_selector_size: u8 = if (version == 4) try r.takeByte() else 0; + const code_alignment_factor = try r.takeLeb128(u32); + const data_alignment_factor = try r.takeLeb128(i32); + const return_address_register = if (version == 1) try r.takeByte() else try r.takeLeb128(u8); + + // This is where LSB's augmentation might add some data. + const fde_pointer_enc: EH.PE, const is_signal_frame: bool = aug: { + const default_fde_pointer_enc: EH.PE = .{ .type = .absptr, .rel = .abs }; + if (aug_kind != .lsb_z) break :aug .{ default_fde_pointer_enc, false }; + const aug_data_len = try r.takeLeb128(u32); + var aug_data: Reader = .fixed(try r.take(aug_data_len)); + var fde_pointer_enc: EH.PE = default_fde_pointer_enc; + var is_signal_frame = false; + for (aug_str[1..]) |byte| switch (byte) { + 'L' => _ = try aug_data.takeByte(), // we ignore the LSDA pointer + 'P' => { + const enc: EH.PE = @bitCast(try aug_data.takeByte()); + const endian: Endian = .little; // irrelevant because we're discarding the value anyway + _ = try readEhPointerAbs(&r, enc.type, addr_size_bytes, endian); // we ignore the personality routine; endianness is irrelevant since we're discarding + }, + 'R' => fde_pointer_enc = @bitCast(try aug_data.takeByte()), + 'S' => is_signal_frame = true, + 'B', 'G' => {}, + else => return bad(), + }; + break :aug .{ fde_pointer_enc, is_signal_frame }; + }; - const initial_instructions = cie_bytes[fbr.seek..]; return .{ - .length_offset = length_offset, .version = version, - .address_size = address_size, - .format = format, + .addr_size_bytes = addr_size_bytes, .segment_selector_size = segment_selector_size, .code_alignment_factor = code_alignment_factor, .data_alignment_factor = data_alignment_factor, .return_address_register = return_address_register, - .aug_str = aug_str, - .aug_data = aug_data, - .lsda_pointer_enc = lsda_pointer_enc, - .personality_enc = personality_enc, - .personality_routine_pointer = personality_routine_pointer, .fde_pointer_enc = fde_pointer_enc, - .initial_instructions = initial_instructions, + .is_signal_frame = is_signal_frame, + .augmentation_kind = aug_kind, + .initial_instructions = r.buffered(), }; } }; pub const FrameDescriptionEntry = struct { - // Offset into eh_frame where the CIE for this FDE is stored - cie_length_offset: u64, - pc_begin: u64, pc_range: u64, - lsda_pointer: ?u64, - aug_data: []const u8, instructions: []const u8, /// This function expects to read the FDE starting at the PC Begin field. /// The returned struct references memory backed by `fde_bytes`. - /// - /// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values - /// used when decoding pointers. This should be set to zero if fde_bytes is - /// backed by the memory of a .eh_frame / .debug_frame section in the running executable. - /// Otherwise, it should be the relative offset to translate addresses from - /// where the section is currently stored in memory, to where it *would* be - /// stored at runtime: section base addr - backing data base ptr. - /// - /// Similarly, `is_runtime` specifies this function is being called on a runtime - /// section, and so indirect pointers can be followed. pub fn parse( + /// The virtual address of the FDE we're parsing, *excluding* its entry header (i.e. the + /// address is after the header). If `fde_bytes` is backed by the memory of a loaded + /// module's `.eh_frame` section, this will equal `fde_bytes.ptr`. + fde_vaddr: u64, fde_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, cie: CommonInformationEntry, - addr_size_bytes: u8, endian: Endian, ) !FrameDescriptionEntry { - if (addr_size_bytes > 8) return error.InvalidAddrSize; - - var fbr: Reader = .fixed(fde_bytes); - - const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) orelse return bad(); - - const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = 0, - .follow_indirect = false, - }, endian) orelse return bad(); - - var aug_data: []const u8 = &[_]u8{}; - const lsda_pointer = if (cie.aug_str.len > 0) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = fde_bytes[aug_data_start..][0..aug_data_len]; - - const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit) - try readEhPointer(&fbr, cie.lsda_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) - else - null; - - fbr.seek = aug_data_start + aug_data_len; - break :blk lsda_pointer; - } else null; - - const instructions = fde_bytes[fbr.seek..]; + if (cie.segment_selector_size != 0) return error.UnsupportedAddrSize; + + var r: Reader = .fixed(fde_bytes); + + const pc_begin = try readEhPointer(&r, cie.fde_pointer_enc, cie.addr_size_bytes, .{ + .pc_rel_base = fde_vaddr, + }, endian); + + // I swear I'm not kidding when I say that PC Range is encoded with `cie.fde_pointer_enc`, but ignoring `rel`. + const pc_range = switch (try readEhPointerAbs(&r, cie.fde_pointer_enc.type, cie.addr_size_bytes, endian)) { + .unsigned => |x| x, + .signed => |x| cast(u64, x) orelse return bad(), + }; + + switch (cie.augmentation_kind) { + .none, .gcc_eh => {}, + .lsb_z => { + // There is augmentation data, but it's irrelevant to us -- it + // only contains the LSDA pointer, which we don't care about. + const aug_data_len = try r.takeLeb128(u64); + _ = try r.discardAll(aug_data_len); + }, + } + return .{ - .cie_length_offset = cie.length_offset, .pc_begin = pc_begin, .pc_range = pc_range, - .lsda_pointer = lsda_pointer, - .aug_data = aug_data, - .instructions = instructions, + .instructions = r.buffered(), }; } }; -/// If `.eh_frame_hdr` is present, then only the header needs to be parsed. Otherwise, `.eh_frame` -/// and `.debug_frame` are scanned and a sorted list of FDEs is built for binary searching during -/// unwinding. Even if `.eh_frame_hdr` is used, we may find during unwinding that it's incomplete, -/// in which case we build the sorted list of FDEs at that point. -/// -/// See also `scanCieFdeInfo`. -pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { - const endian = di.endian; - - if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: Reader = .fixed(eh_frame_hdr); - - const version = try fbr.takeByte(); - if (version != 1) break :blk; - - const eh_frame_ptr_enc = try fbr.takeByte(); - if (eh_frame_ptr_enc == EH.PE.omit) break :blk; - const fde_count_enc = try fbr.takeByte(); - if (fde_count_enc == EH.PE.omit) break :blk; - const table_enc = try fbr.takeByte(); - if (table_enc == EH.PE.omit) break :blk; - - const eh_frame_ptr = cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const fde_count = cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const entry_size = try ExceptionFrameHeader.entrySize(table_enc); - const entries_len = fde_count * entry_size; - if (entries_len > eh_frame_hdr.len - fbr.seek) return bad(); - - di.eh_frame_hdr = .{ - .eh_frame_ptr = eh_frame_ptr, - .table_enc = table_enc, - .fde_count = fde_count, - .entries = eh_frame_hdr[fbr.seek..][0..entries_len], - }; +pub fn scanDebugFrame( + unwind: *Unwind, + gpa: Allocator, + section_vaddr: u64, + section_bytes: []const u8, + addr_size_bytes: u8, + endian: Endian, +) void { + assert(unwind.debug_frame == null); + + var fbr: Reader = .fixed(section_bytes); + var fde_list: std.ArrayList(SortedFdeEntry) = .empty; + defer fde_list.deinit(gpa); + while (fbr.seek < fbr.buffer.len) { + const entry_offset = fbr.seek; + switch (try EntryHeader.read(&fbr, fbr.seek, .debug_frame, endian)) { + // Ignore CIEs; we only need them to parse the FDEs! + .cie => |info| { + try fbr.discardAll(info.bytes_len); + continue; + }, + .fde => |info| { + const cie: CommonInformationEntry = cie: { + var cie_reader: Reader = .fixed(section_bytes[info.cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, info.cie_offset, .debug_frame, endian)) { + .cie => |cie_info| cie_info, + .fde, .terminator => return bad(), // This is meant to be a CIE + }; + break :cie try .parse(try cie_reader.take(cie_info.bytes_len), .debug_frame, addr_size_bytes); + }; + const fde: FrameDescriptionEntry = try .parse( + section_vaddr + fbr.seek, + try fbr.take(info.bytes_len), + cie, + endian, + ); + try fde_list.append(.{ + .pc_begin = fde.pc_begin, + .fde_offset = entry_offset, // *not* `fde_offset`, because we need to include the entry header + }); + }, + .terminator => return bad(), // DWARF `.debug_frame` isn't meant to have terminators + } + } + const fde_slice = try fde_list.toOwnedSlice(gpa); + errdefer comptime unreachable; + std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct { + fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { + ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + unwind.debug_frame = .{ .data = section_bytes, .sorted_fdes = fde_slice }; +} + +pub fn scanEhFrame( + unwind: *Unwind, + gpa: Allocator, + header: EhFrameHeader, + section_bytes_ptr: [*]const u8, + /// This is separate from `section_bytes_ptr` because it is unknown when `.eh_frame` is accessed + /// through the pointer in the `.eh_frame_hdr` section. If this is non-`null`, we avoid reading + /// past this number of bytes, but if `null`, we must assume that the `.eh_frame` data has a + /// valid terminator. + section_bytes_len: ?usize, + addr_size_bytes: u8, + endian: Endian, +) !void { + assert(unwind.eh_frame == null); + + const section_bytes: []const u8 = bytes: { + // If the length is unknown, let the slice span from `section_bytes_ptr` to the end of memory. + const len = section_bytes_len orelse (std.math.maxInt(usize) - @intFromPtr(section_bytes_ptr)); + break :bytes section_bytes_ptr[0..len]; + }; - // No need to scan .eh_frame, we have a binary search table already + if (header.search_table != null) { + // No need to populate `sorted_fdes`, the header contains a search table. + unwind.eh_frame = .{ + .header = header, + .eh_frame_data = section_bytes, + .sorted_fdes = null, + }; return; } - try di.scanCieFdeInfo(allocator, base_address); + // We aren't told the length of this section. Luckily, we don't need it, because there will be + // an `EntryHeader.terminator` after the last CIE/FDE. Just make a `Reader` which will give us + // alllll of the bytes! + var fbr: Reader = .fixed(section_bytes); + + var fde_list: std.ArrayList(SortedFdeEntry) = .empty; + defer fde_list.deinit(gpa); + + while (true) { + const entry_offset = fbr.seek; + switch (try EntryHeader.read(&fbr, fbr.seek, .eh_frame, endian)) { + // Ignore CIEs; we only need them to parse the FDEs! + .cie => |info| { + try fbr.discardAll(info.bytes_len); + continue; + }, + .fde => |info| { + const cie: CommonInformationEntry = cie: { + var cie_reader: Reader = .fixed(section_bytes[info.cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, info.cie_offset, .eh_frame, endian)) { + .cie => |cie_info| cie_info, + .fde, .terminator => return bad(), // This is meant to be a CIE + }; + break :cie try .parse(try cie_reader.take(cie_info.bytes_len), .eh_frame, addr_size_bytes); + }; + const fde: FrameDescriptionEntry = try .parse( + header.eh_frame_vaddr + fbr.seek, + try fbr.take(info.bytes_len), + cie, + endian, + ); + try fde_list.append(gpa, .{ + .pc_begin = fde.pc_begin, + .fde_offset = entry_offset, // *not* `fde_offset`, because we need to include the entry header + }); + }, + // Unlike `.debug_frame`, the `.eh_frame` section does have a terminator CIE -- this is + // necessary because `header` doesn't include the length of the `.eh_frame` section + .terminator => break, + } + } + const fde_slice = try fde_list.toOwnedSlice(gpa); + errdefer comptime unreachable; + std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct { + fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { + ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + unwind.eh_frame = .{ + .header = header, + .eh_frame_data = section_bytes, + .sorted_fdes = fde_slice, + }; } -/// Scan `.eh_frame` and `.debug_frame` and build a sorted list of FDEs for binary searching during -/// unwinding. -pub fn scanCieFdeInfo(unwind: *Unwind, allocator: Allocator, endian: Endian, base_address: usize) !void { - const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; - for (frame_sections) |frame_section| { - if (unwind.section(frame_section)) |section_data| { - var fbr: Reader = .fixed(section_data); - while (fbr.seek < fbr.buffer.len) { - const entry_header = try EntryHeader.read(&fbr, frame_section, endian); - switch (entry_header.type) { - .cie => { - const cie = try CommonInformationEntry.parse( - entry_header.entry_bytes, - unwind.sectionVirtualOffset(frame_section, base_address).?, - true, - entry_header.format, - frame_section, - entry_header.length_offset, - @sizeOf(usize), - endian, - ); - try unwind.cie_map.put(allocator, entry_header.length_offset, cie); - }, - .fde => |cie_offset| { - const cie = unwind.cie_map.get(cie_offset) orelse return bad(); - const fde = try FrameDescriptionEntry.parse( - entry_header.entry_bytes, - unwind.sectionVirtualOffset(frame_section, base_address).?, - true, - cie, - @sizeOf(usize), - endian, - ); - try unwind.fde_list.append(allocator, fde); - }, - .terminator => break, - } - } - - std.mem.sortUnstable(FrameDescriptionEntry, unwind.fde_list.items, {}, struct { - fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { - _ = ctx; - return a.pc_begin < b.pc_begin; - } - }.lessThan); +/// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must +/// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`. +pub fn findFdeOffset(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: Endian) !?u64 { + // We'll break from this block only if we have a manually-constructed search table. + const sorted_fdes: []const SortedFdeEntry = fdes: { + if (unwind.debug_frame) |df| break :fdes df.sorted_fdes; + if (unwind.eh_frame) |eh_frame| { + if (eh_frame.sorted_fdes) |fdes| break :fdes fdes; + // Use the search table from the `.eh_frame_hdr` section rather than one of our own + return eh_frame.header.findEntry(pc, addr_size_bytes, endian); } - } + // We have no available unwind info + return null; + }; + const first_bad_idx = std.sort.partitionPoint(SortedFdeEntry, sorted_fdes, pc, struct { + fn canIncludePc(target_pc: u64, entry: SortedFdeEntry) bool { + return target_pc >= entry.pc_begin; // i.e. does 'entry_pc..' include 'target_pc' + } + }.canIncludePc); + // `first_bad_idx` is the index of the first FDE whose `pc_begin` is too high to include `pc`. + // So if any FDE matches, it'll be the one at `first_bad_idx - 1` (maybe false positive). + if (first_bad_idx == 0) return null; + return sorted_fdes[first_bad_idx - 1].fde_offset; +} + +pub fn loadFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { + const section_bytes: []const u8, const section_vaddr: u64, const section: Section = s: { + if (unwind.debug_frame) |df| break :s .{ df.data, if (true) @panic("MLUGG TODO"), .debug_frame }; + if (unwind.eh_frame) |ef| break :s .{ ef.eh_frame_data, ef.header.eh_frame_vaddr, .eh_frame }; + unreachable; // how did you get `fde_offset`?! + }; + + var fde_reader: Reader = .fixed(section_bytes[fde_offset..]); + const fde_info = switch (try EntryHeader.read(&fde_reader, fde_offset, section, endian)) { + .fde => |info| info, + .cie, .terminator => return bad(), // This is meant to be an FDE + }; + + const cie_offset = fde_info.cie_offset; + var cie_reader: Reader = .fixed(section_bytes[cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, cie_offset, section, endian)) { + .cie => |info| info, + .fde, .terminator => return bad(), // This is meant to be a CIE + }; + + const cie: CommonInformationEntry = try .parse( + try cie_reader.take(cie_info.bytes_len), + section, + addr_size_bytes, + ); + const fde: FrameDescriptionEntry = try .parse( + section_vaddr + fde_offset + fde_reader.seek, + try fde_reader.take(fde_info.bytes_len), + cie, + endian, + ); + + return .{ cie_info.format, cie, fde }; } const EhPointerContext = struct { // The address of the pointer field itself pc_rel_base: u64, - // Whether or not to follow indirect pointers. This should only be - // used when decoding pointers at runtime using the current process's - // debug info - follow_indirect: bool, - // These relative addressing modes are only used in specific cases, and // might not be available / required in all parsing contexts data_rel_base: ?u64 = null, text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; - -fn readEhPointer(fbr: *Reader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !?u64 { - if (enc == EH.PE.omit) return null; - - const value: union(enum) { - signed: i64, - unsigned: u64, - } = switch (enc & EH.PE.type_mask) { - EH.PE.absptr => .{ +/// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. +fn readEhPointerAbs(r: *Reader, enc_ty: EH.PE.Type, addr_size_bytes: u8, endian: Endian) !union(enum) { + signed: i64, + unsigned: u64, +} { + return switch (enc_ty) { + .absptr => .{ .unsigned = switch (addr_size_bytes) { - 2 => try fbr.takeInt(u16, endian), - 4 => try fbr.takeInt(u32, endian), - 8 => try fbr.takeInt(u64, endian), - else => return error.InvalidAddrSize, + 2 => try r.takeInt(u16, endian), + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), + else => return error.UnsupportedAddrSize, }, }, - EH.PE.uleb128 => .{ .unsigned = try fbr.takeLeb128(u64) }, - EH.PE.udata2 => .{ .unsigned = try fbr.takeInt(u16, endian) }, - EH.PE.udata4 => .{ .unsigned = try fbr.takeInt(u32, endian) }, - EH.PE.udata8 => .{ .unsigned = try fbr.takeInt(u64, endian) }, - EH.PE.sleb128 => .{ .signed = try fbr.takeLeb128(i64) }, - EH.PE.sdata2 => .{ .signed = try fbr.takeInt(i16, endian) }, - EH.PE.sdata4 => .{ .signed = try fbr.takeInt(i32, endian) }, - EH.PE.sdata8 => .{ .signed = try fbr.takeInt(i64, endian) }, + .uleb128 => .{ .unsigned = try r.takeLeb128(u64) }, + .udata2 => .{ .unsigned = try r.takeInt(u16, endian) }, + .udata4 => .{ .unsigned = try r.takeInt(u32, endian) }, + .udata8 => .{ .unsigned = try r.takeInt(u64, endian) }, + .sleb128 => .{ .signed = try r.takeLeb128(i64) }, + .sdata2 => .{ .signed = try r.takeInt(i16, endian) }, + .sdata4 => .{ .signed = try r.takeInt(i32, endian) }, + .sdata8 => .{ .signed = try r.takeInt(i64, endian) }, else => return bad(), }; - - const base = switch (enc & EH.PE.rel_mask) { - EH.PE.pcrel => ctx.pc_rel_base, - EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified, - else => null, +} +/// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. +fn readEhPointer(fbr: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 { + const offset = try readEhPointerAbs(fbr, enc.type, addr_size_bytes, endian); + const base = switch (enc.rel) { + .abs, .aligned => 0, + .pcrel => ctx.pc_rel_base, + .textrel => ctx.text_rel_base orelse return bad(), + .datarel => ctx.data_rel_base orelse return bad(), + .funcrel => ctx.function_rel_base orelse return bad(), + .indirect => return bad(), // GCC extension; not supported + _ => return bad(), }; - - const ptr: u64 = if (base) |b| switch (value) { - .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(b)))), + return switch (offset) { + .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(base)))), // absptr can actually contain signed values in some cases (aarch64 MachO) - .unsigned => |u| u +% b, - } else switch (value) { - .signed => |s| @as(u64, @intCast(s)), - .unsigned => |u| u, + .unsigned => |u| u +% base, }; - - if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) { - if (@sizeOf(usize) != addr_size_bytes) { - // See the documentation for `follow_indirect` - return error.NonNativeIndirection; - } - - const native_ptr = cast(usize, ptr) orelse return error.PointerOverflow; - return switch (addr_size_bytes) { - 2, 4, 8 => return @as(*const usize, @ptrFromInt(native_ptr)).*, - else => return error.UnsupportedAddrSize, - }; - } else { - return ptr; - } } -fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { - if (pc_rel_offset < 0) { - return std.math.sub(usize, field_ptr, @as(usize, @intCast(-pc_rel_offset))); - } else { - return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); - } +/// Like `Reader.fixed`, but when the length of the data is unknown and we just want to allow +/// reading indefinitely. +fn maxSlice(ptr: [*]const u8) []const u8 { + const len = std.math.maxInt(usize) - @intFromPtr(ptr); + return ptr[0..len]; } const Allocator = std.mem.Allocator; diff --git a/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig new file mode 100644 index 000000000000..66100f5edaee --- /dev/null +++ b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig @@ -0,0 +1,298 @@ +//! Virtual machine that evaluates DWARF call frame instructions + +/// See section 6.4.1 of the DWARF5 specification for details on each +pub const RegisterRule = union(enum) { + /// The spec says that the default rule for each column is the undefined rule. + /// However, it also allows ABI / compiler authors to specify alternate defaults, so + /// there is a distinction made here. + default: void, + undefined: void, + same_value: void, + /// offset(N) + offset: i64, + /// val_offset(N) + val_offset: i64, + /// register(R) + register: u8, + /// expression(E) + expression: []const u8, + /// val_expression(E) + val_expression: []const u8, + /// Augmenter-defined rule + architectural: void, +}; + +/// Each row contains unwinding rules for a set of registers. +pub const Row = struct { + /// Offset from `FrameDescriptionEntry.pc_begin` + offset: u64 = 0, + /// Special-case column that defines the CFA (Canonical Frame Address) rule. + /// The register field of this column defines the register that CFA is derived from. + cfa: Column = .{}, + /// The register fields in these columns define the register the rule applies to. + columns: ColumnRange = .{}, + /// Indicates that the next write to any column in this row needs to copy + /// the backing column storage first, as it may be referenced by previous rows. + copy_on_write: bool = false, +}; + +pub const Column = struct { + register: ?u8 = null, + rule: RegisterRule = .{ .default = {} }, +}; + +const ColumnRange = struct { + /// Index into `columns` of the first column in this row. + start: usize = undefined, + len: u8 = 0, +}; + +columns: std.ArrayList(Column) = .empty, +stack: std.ArrayList(ColumnRange) = .empty, +current_row: Row = .{}, + +/// The result of executing the CIE's initial_instructions +cie_row: ?Row = null, + +pub fn deinit(self: *VirtualMachine, gpa: Allocator) void { + self.stack.deinit(gpa); + self.columns.deinit(gpa); + self.* = undefined; +} + +pub fn reset(self: *VirtualMachine) void { + self.stack.clearRetainingCapacity(); + self.columns.clearRetainingCapacity(); + self.current_row = .{}; + self.cie_row = null; +} + +/// Return a slice backed by the row's non-CFA columns +pub fn rowColumns(self: VirtualMachine, row: Row) []Column { + if (row.columns.len == 0) return &.{}; + return self.columns.items[row.columns.start..][0..row.columns.len]; +} + +/// Either retrieves or adds a column for `register` (non-CFA) in the current row. +fn getOrAddColumn(self: *VirtualMachine, gpa: Allocator, register: u8) !*Column { + for (self.rowColumns(self.current_row)) |*c| { + if (c.register == register) return c; + } + + if (self.current_row.columns.len == 0) { + self.current_row.columns.start = self.columns.items.len; + } + self.current_row.columns.len += 1; + + const column = try self.columns.addOne(gpa); + column.* = .{ + .register = register, + }; + + return column; +} + +/// Runs the CIE instructions, then the FDE instructions. Execution halts +/// once the row that corresponds to `pc` is known, and the row is returned. +pub fn runTo( + self: *VirtualMachine, + gpa: Allocator, + pc: u64, + cie: Dwarf.Unwind.CommonInformationEntry, + fde: Dwarf.Unwind.FrameDescriptionEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !Row { + assert(self.cie_row == null); + assert(pc >= fde.pc_begin); + assert(pc < fde.pc_begin + fde.pc_range); + + var prev_row: Row = self.current_row; + + const instruction_slices: [2][]const u8 = .{ + cie.initial_instructions, + fde.instructions, + }; + for (instruction_slices, [2]bool{ true, false }) |slice, is_cie_stream| { + var stream: std.Io.Reader = .fixed(slice); + while (stream.seek < slice.len) { + const instruction: Dwarf.call_frame.Instruction = try .read(&stream, addr_size_bytes, endian); + prev_row = try self.step(gpa, cie, is_cie_stream, instruction); + if (pc < fde.pc_begin + self.current_row.offset) return prev_row; + } + } + + return self.current_row; +} + +fn resolveCopyOnWrite(self: *VirtualMachine, gpa: Allocator) !void { + if (!self.current_row.copy_on_write) return; + + const new_start = self.columns.items.len; + if (self.current_row.columns.len > 0) { + try self.columns.ensureUnusedCapacity(gpa, self.current_row.columns.len); + self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); + self.current_row.columns.start = new_start; + } +} + +/// Executes a single instruction. +/// If this instruction is from the CIE, `is_initial` should be set. +/// Returns the value of `current_row` before executing this instruction. +pub fn step( + self: *VirtualMachine, + gpa: Allocator, + cie: Dwarf.Unwind.CommonInformationEntry, + is_initial: bool, + instruction: Dwarf.call_frame.Instruction, +) !Row { + // CIE instructions must be run before FDE instructions + assert(!is_initial or self.cie_row == null); + if (!is_initial and self.cie_row == null) { + self.cie_row = self.current_row; + self.current_row.copy_on_write = true; + } + + const prev_row = self.current_row; + switch (instruction) { + .set_loc => |i| { + if (i.address <= self.current_row.offset) return error.InvalidOperation; + if (cie.segment_selector_size != 0) return error.InvalidOperation; // unsupported + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 + self.current_row.offset = i.address; + }, + inline .advance_loc, + .advance_loc1, + .advance_loc2, + .advance_loc4, + => |i| { + self.current_row.offset += i.delta * cie.code_alignment_factor; + self.current_row.copy_on_write = true; + }, + inline .offset, + .offset_extended, + .offset_extended_sf, + => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; + }, + inline .restore, + .restore_extended, + => |i| { + try self.resolveCopyOnWrite(gpa); + if (self.cie_row) |cie_row| { + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = for (self.rowColumns(cie_row)) |cie_column| { + if (cie_column.register == i.register) break cie_column.rule; + } else .{ .default = {} }; + } else return error.InvalidOperation; + }, + .nop => {}, + .undefined => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ .undefined = {} }; + }, + .same_value => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ .same_value = {} }; + }, + .register => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ .register = i.target_register }; + }, + .remember_state => { + try self.stack.append(gpa, self.current_row.columns); + self.current_row.copy_on_write = true; + }, + .restore_state => { + const restored_columns = self.stack.pop() orelse return error.InvalidOperation; + self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); + try self.columns.ensureUnusedCapacity(gpa, restored_columns.len); + + self.current_row.columns.start = self.columns.items.len; + self.current_row.columns.len = restored_columns.len; + self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); + }, + .def_cfa => |i| { + try self.resolveCopyOnWrite(gpa); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = @intCast(i.offset) }, + }; + }, + .def_cfa_sf => |i| { + try self.resolveCopyOnWrite(gpa); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, + }; + }, + .def_cfa_register => |i| { + try self.resolveCopyOnWrite(gpa); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.register = i.register; + }, + .def_cfa_offset => |i| { + try self.resolveCopyOnWrite(gpa); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = @intCast(i.offset), + }; + }, + .def_cfa_offset_sf => |i| { + try self.resolveCopyOnWrite(gpa); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .def_cfa_expression => |i| { + try self.resolveCopyOnWrite(gpa); + self.current_row.cfa.register = undefined; + self.current_row.cfa.rule = .{ + .expression = i.block, + }; + }, + .expression => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ + .expression = i.block, + }; + }, + .val_offset => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ + .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, + }; + }, + .val_offset_sf => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .val_expression => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ + .val_expression = i.block, + }; + }, + } + + return prev_row; +} + +const std = @import("../../../std.zig"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; + +const VirtualMachine = @This(); diff --git a/lib/std/debug/Dwarf/call_frame.zig b/lib/std/debug/Dwarf/call_frame.zig index f78ed4378b76..8f1758f4eb92 100644 --- a/lib/std/debug/Dwarf/call_frame.zig +++ b/lib/std/debug/Dwarf/call_frame.zig @@ -1,12 +1,5 @@ -const builtin = @import("builtin"); const std = @import("../../std.zig"); -const mem = std.mem; -const debug = std.debug; -const leb = std.leb; -const DW = std.dwarf; -const abi = std.debug.Dwarf.abi; -const assert = std.debug.assert; -const native_endian = builtin.cpu.arch.endian(); +const Reader = std.Io.Reader; /// TODO merge with std.dwarf.CFA const Opcode = enum(u8) { @@ -51,9 +44,13 @@ const Opcode = enum(u8) { pub const hi_user = 0x3f; }; -fn readBlock(reader: *std.Io.Reader) ![]const u8 { +/// The returned slice points into `reader.buffer`. +fn readBlock(reader: *Reader) ![]const u8 { const block_len = try reader.takeLeb128(usize); - return reader.take(block_len); + return reader.take(block_len) catch |err| switch (err) { + error.EndOfStream => return error.InvalidOperand, + error.ReadFailed => |e| return e, + }; } pub const Instruction = union(Opcode) { @@ -140,8 +137,9 @@ pub const Instruction = union(Opcode) { block: []const u8, }, + /// `reader` must be a `Reader.fixed` so that regions of its buffer are never invalidated. pub fn read( - reader: *std.Io.Reader, + reader: *Reader, addr_size_bytes: u8, endian: std.builtin.Endian, ) !Instruction { @@ -173,16 +171,14 @@ pub const Instruction = union(Opcode) { .restore, => unreachable, .nop => .{ .nop = {} }, - .set_loc => .{ - .set_loc = .{ - .address = switch (addr_size_bytes) { - 2 => try reader.takeInt(u16, endian), - 4 => try reader.takeInt(u32, endian), - 8 => try reader.takeInt(u64, endian), - else => return error.InvalidAddrSize, - }, + .set_loc => .{ .set_loc = .{ + .address = switch (addr_size_bytes) { + 2 => try reader.takeInt(u16, endian), + 4 => try reader.takeInt(u32, endian), + 8 => try reader.takeInt(u64, endian), + else => return error.UnsupportedAddrSize, }, - }, + } }, .advance_loc1 => .{ .advance_loc1 = .{ .delta = try reader.takeByte() }, }, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 4a29eb0fa78b..daa5cf12d2e8 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -13,7 +13,6 @@ const windows = std.os.windows; const macho = std.macho; const fs = std.fs; const coff = std.coff; -const pdb = std.pdb; const assert = std.debug.assert; const posix = std.posix; const elf = std.elf; @@ -22,86 +21,37 @@ const Pdb = std.debug.Pdb; const File = std.fs.File; const math = std.math; const testing = std.testing; -const StackIterator = std.debug.StackIterator; const regBytes = Dwarf.abi.regBytes; const regValueNative = Dwarf.abi.regValueNative; const SelfInfo = @This(); -const root = @import("root"); - -allocator: Allocator, -address_map: std.AutoHashMapUnmanaged(usize, Module), -modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, - -pub const OpenError = error{ - MissingDebugInfo, - UnsupportedOperatingSystem, -} || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).@"fn".return_type.?).error_union.error_set; - -pub fn open(allocator: Allocator) OpenError!SelfInfo { - if (builtin.strip_debug_info) - return error.MissingDebugInfo; - switch (native_os) { - .linux, - .freebsd, - .netbsd, - .dragonfly, - .openbsd, - .macos, - .solaris, - .illumos, - .windows, - => return try SelfInfo.init(allocator), - else => return error.UnsupportedOperatingSystem, - } -} - -pub fn init(allocator: Allocator) !SelfInfo { - var debug_info: SelfInfo = .{ - .allocator = allocator, - .address_map = .empty, - .modules = if (native_os == .windows) .{} else {}, - }; - - if (native_os == .windows) { - errdefer debug_info.modules.deinit(allocator); - - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); - if (handle == windows.INVALID_HANDLE_VALUE) { - switch (windows.GetLastError()) { - else => |err| return windows.unexpectedError(err), - } - } - defer windows.CloseHandle(handle); - - var module_entry: windows.MODULEENTRY32 = undefined; - module_entry.dwSize = @sizeOf(windows.MODULEENTRY32); - if (windows.kernel32.Module32First(handle, &module_entry) == 0) { - return error.MissingDebugInfo; - } - - var module_valid = true; - while (module_valid) { - const module_info = try debug_info.modules.addOne(allocator); - const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; - errdefer allocator.free(name); - - module_info.* = .{ - .base_address = @intFromPtr(module_entry.modBaseAddr), - .size = module_entry.modBaseSize, - .name = name, - .handle = module_entry.hModule, - }; - - module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; - } - } +/// MLUGG TODO: what if this field had a less stupid name... +address_map: std.AutoHashMapUnmanaged(usize, Module.DebugInfo), + +module_cache: if (native_os == .windows) std.ArrayListUnmanaged(windows.MODULEENTRY32) else void, + +pub const target_supported: bool = switch (native_os) { + .linux, + .freebsd, + .netbsd, + .dragonfly, + .openbsd, + .macos, + .solaris, + .illumos, + .windows, + => true, + else => false, +}; - return debug_info; -} +pub const init: SelfInfo = .{ + .address_map = .empty, + .module_cache = if (native_os == .windows) .empty, +}; pub fn deinit(self: *SelfInfo) void { + // MLUGG TODO: that's amusing, this function is straight-up unused. i... wonder if it even should be used anywhere? perhaps not... so perhaps it should not even exist...???? var it = self.address_map.iterator(); while (it.next()) |entry| { const mdi = entry.value_ptr.*; @@ -118,49 +68,91 @@ pub fn deinit(self: *SelfInfo) void { } } -fn lookupModuleForAddress(self: *SelfInfo, address: usize) !Module.Lookup { +fn lookupModuleForAddress(self: *SelfInfo, gpa: Allocator, address: usize) !Module { if (builtin.target.os.tag.isDarwin()) { return self.lookupModuleDyld(address); } else if (native_os == .windows) { - return self.lookupModuleWin32(address); + return self.lookupModuleWin32(gpa, address); } else if (native_os == .haiku) { - return self.lookupModuleHaiku(address); + @panic("TODO implement lookup module for Haiku"); } else if (builtin.target.cpu.arch.isWasm()) { - return self.lookupModuleWasm(address); + @panic("TODO implement lookup module for Wasm"); } else { return self.lookupModuleDl(address); } } -fn loadModuleDebugInfo(self: *SelfInfo, lookup: *const Module.Lookup, module: *Module) !void { +fn loadModuleDebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { + // MLUGG TODO: this should totally just go into the `Module` impl or something, right? lol + if (builtin.target.os.tag.isDarwin()) { + try loadMachODebugInfo(gpa, module, di); + } else if (native_os == .windows) { + // MLUGG TODO: deal with 'already loaded' properly + try readCoffDebugInfo(gpa, module, di); + } else if (native_os == .haiku) { + unreachable; + } else if (builtin.target.cpu.arch.isWasm()) { + unreachable; + } else { + if (di.mapped_memory != null) return; // already loaded + const filename: ?[]const u8 = if (module.name.len > 0) module.name else null; + const mapped_mem = mapFileOrSelfExe(filename) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.FileTooBig => return error.InvalidDebugInfo, + else => |e| return e, + }; + errdefer posix.munmap(mapped_mem); + try di.load(gpa, mapped_mem, module.build_id, null, null, null, filename); + assert(di.mapped_memory != null); + } +} + +fn loadModuleUnwindInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { if (builtin.target.os.tag.isDarwin()) { - @compileError("TODO"); + // MLUGG TODO HACKHACK + try loadMachODebugInfo(gpa, module, di); } else if (native_os == .windows) { - @compileError("TODO"); + comptime unreachable; // not supported } else if (native_os == .haiku) { - @compileError("TODO"); + comptime unreachable; // not supported } else if (builtin.target.cpu.arch.isWasm()) { - @compileError("TODO"); + comptime unreachable; // not supported } else { - if (module.mapped_memory == null) { - var sections: Dwarf.SectionArray = @splat(null); - try readElfDebugInfo(module, self.allocator, if (lookup.name.len > 0) lookup.name else null, lookup.build_id, §ions); - assert(module.mapped_memory != null); + eh_frame: { + if (di.unwind.eh_frame != null) break :eh_frame; // already loaded + const eh_frame_hdr_bytes = module.gnu_eh_frame orelse break :eh_frame; + const eh_frame_hdr: Dwarf.Unwind.EhFrameHeader = try .parse( + @intFromPtr(eh_frame_hdr_bytes.ptr) - module.load_offset, + eh_frame_hdr_bytes, + @sizeOf(usize), + native_endian, + ); + const eh_frame_addr = module.load_offset + @as(usize, @intCast(eh_frame_hdr.eh_frame_vaddr)); + try di.unwind.scanEhFrame( + gpa, + eh_frame_hdr, + @ptrFromInt(eh_frame_addr), + null, + @sizeOf(usize), + native_endian, + ); } } } -pub fn unwindFrame(self: *SelfInfo, context: *UnwindContext) !usize { - const lookup = try self.lookupModuleForAddress(context.pc); - const gop = try self.address_map.getOrPut(self.allocator, lookup.base_address); - if (!gop.found_existing) gop.value_ptr.* = .init(&lookup); +pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { + comptime assert(target_supported); + const module = try self.lookupModuleForAddress(gpa, context.pc); + const gop = try self.address_map.getOrPut(gpa, module.load_offset); + if (!gop.found_existing) gop.value_ptr.* = .init; + try loadModuleUnwindInfo(gpa, &module, gop.value_ptr); if (native_os.isDarwin()) { // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding // via DWARF before attempting to use the compact unwind info will produce incorrect results. if (gop.value_ptr.unwind_info) |unwind_info| { if (unwindFrameMachO( - self.allocator, - lookup.base_address, + module.text_base, + module.load_offset, context, unwind_info, gop.value_ptr.eh_frame, @@ -169,292 +161,42 @@ pub fn unwindFrame(self: *SelfInfo, context: *UnwindContext) !usize { } else |err| { if (err != error.RequiresDWARFUnwind) return err; } - } else return error.MissingUnwindInfo; + } + return error.MissingUnwindInfo; + } + if (try gop.value_ptr.getDwarfUnwindForAddress(gpa, context.pc)) |unwind| { + return unwindFrameDwarf(unwind, module.load_offset, context, null); } - if (try gop.value_ptr.getDwarfUnwindForAddress(self.allocator, context.pc)) |unwind| { - return unwindFrameDwarf(self.allocator, unwind, lookup.base_address, context, null); - } else return error.MissingDebugInfo; + return error.MissingDebugInfo; } -pub fn getSymbolAtAddress(self: *SelfInfo, address: usize) !std.debug.Symbol { - const lookup = try self.lookupModuleForAddress(address); - const gop = try self.address_map.getOrPut(self.allocator, lookup.base_address); - if (!gop.found_existing) gop.value_ptr.* = .init(&lookup); - try self.loadModuleDebugInfo(&lookup, gop.value_ptr); - return gop.value_ptr.getSymbolAtAddress(self.allocator, native_endian, lookup.base_address, address); +pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std.debug.Symbol { + comptime assert(target_supported); + const module = try self.lookupModuleForAddress(gpa, address); + const gop = try self.address_map.getOrPut(gpa, module.key()); + if (!gop.found_existing) gop.value_ptr.* = .init; + try loadModuleDebugInfo(gpa, &module, gop.value_ptr); + return module.getSymbolAtAddress(gpa, gop.value_ptr, address); } /// Returns the module name for a given address. /// This can be called when getModuleForAddress fails, so implementations should provide /// a path that doesn't rely on any side-effects of a prior successful module lookup. -pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { - return if (self.lookupModuleForAddress(address)) |lookup| lookup.name else |err| switch (err) { - error.MissingDebugInfo => null, - }; -} - -fn lookupModuleDyld(self: *SelfInfo, address: usize) !*Module { - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const seg_start = segment_cmd.vmaddr + vmaddr_slide; - const seg_end = seg_start + segment_cmd.vmsize; - if (address >= seg_start and address < seg_end) { - if (self.address_map.get(base_address)) |obj_di| { - return obj_di; - } - - for (cmd.getSections()) |sect| { - const sect_addr: usize = @intCast(sect.addr); - const sect_size: usize = @intCast(sect.size); - if (mem.eql(u8, "__unwind_info", sect.sectName())) { - unwind_info = @as([*]const u8, @ptrFromInt(sect_addr + vmaddr_slide))[0..sect_size]; - } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { - eh_frame = @as([*]const u8, @ptrFromInt(sect_addr + vmaddr_slide))[0..sect_size]; - } - } - - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - - const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); - const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - obj_di.* = try readMachODebugInfo(self.allocator, macho_file); - obj_di.base_address = base_address; - obj_di.vmaddr_slide = vmaddr_slide; - obj_di.unwind_info = unwind_info; - obj_di.eh_frame = eh_frame; - - try self.address_map.putNoClobber(base_address, obj_di); - - return obj_di; - } - }, - else => {}, - }; - } - - return error.MissingDebugInfo; -} - -fn lookupModuleNameDyld(self: *SelfInfo, address: usize) ?[]const u8 { - _ = self; - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const original_address = address - vmaddr_slide; - const seg_start = segment_cmd.vmaddr; - const seg_end = seg_start + segment_cmd.vmsize; - if (original_address >= seg_start and original_address < seg_end) { - return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); - } - }, - else => {}, - }; - } - - return null; -} - -fn lookupModuleWin32(self: *SelfInfo, address: usize) !*Module { - for (self.modules.items) |*module| { - if (address >= module.base_address and address < module.base_address + module.size) { - if (self.address_map.get(module.base_address)) |obj_di| { - return obj_di; - } - - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - - const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; - var coff_obj = try coff.Coff.init(mapped_module, true); - - // The string table is not mapped into memory by the loader, so if a section name is in the - // string table then we have to map the full image file from disk. This can happen when - // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. - if (coff_obj.strtabRequired()) { - var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; - // openFileAbsoluteW requires the prefix to be present - @memcpy(name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); - - const process_handle = windows.GetCurrentProcess(); - const len = windows.kernel32.GetModuleFileNameExW( - process_handle, - module.handle, - @ptrCast(&name_buffer[4]), - windows.PATH_MAX_WIDE, - ); - - if (len == 0) return error.MissingDebugInfo; - const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - errdefer coff_file.close(); - - var section_handle: windows.HANDLE = undefined; - const create_section_rc = windows.ntdll.NtCreateSection( - §ion_handle, - windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, - null, - null, - windows.PAGE_READONLY, - // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. - // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. - windows.SEC_COMMIT, - coff_file.handle, - ); - if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer windows.CloseHandle(section_handle); - - var coff_len: usize = 0; - var base_ptr: usize = 0; - const map_section_rc = windows.ntdll.NtMapViewOfSection( - section_handle, - process_handle, - @ptrCast(&base_ptr), - null, - 0, - null, - &coff_len, - .ViewUnmap, - 0, - windows.PAGE_READONLY, - ); - if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); - - const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; - coff_obj = try coff.Coff.init(section_view, false); - - module.mapped_file = .{ - .file = coff_file, - .section_handle = section_handle, - .section_view = section_view, - }; - } - errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); - - obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); - obj_di.base_address = module.base_address; - - try self.address_map.putNoClobber(module.base_address, obj_di); - return obj_di; - } - } - - return error.MissingDebugInfo; -} - -fn lookupModuleNameWin32(self: *SelfInfo, address: usize) ?[]const u8 { - for (self.modules.items) |module| { - if (address >= module.base_address and address < module.base_address + module.size) { - return module.name; - } - } - return null; -} - -fn lookupModuleNameDl(self: *SelfInfo, address: usize) ?[]const u8 { - _ = self; - - var ctx: struct { - // Input - address: usize, - // Output - name: []const u8 = "", - } = .{ .address = address }; - const CtxTy = @TypeOf(ctx); - - if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { - _ = size; - if (context.address < info.addr) return; - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.name = mem.sliceTo(info.name, 0) orelse ""; - break; - } - } else return; - - return error.Found; - } - }.callback)) { - return null; - } else |err| switch (err) { - error.Found => return fs.path.basename(ctx.name), - } - - return null; +pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) error{ Unexpected, OutOfMemory, MissingDebugInfo }![]const u8 { + comptime assert(target_supported); + const module = try self.lookupModuleForAddress(gpa, address); + return module.name; } -fn lookupModuleDl(self: *SelfInfo, address: usize) !Module.Lookup { - var ctx: struct { - // Input +fn lookupModuleDl(self: *SelfInfo, address: usize) !Module { + _ = self; // MLUGG + const DlIterContext = struct { + /// input address: usize, - // Output - lookup: Module.Lookup, - } = .{ - .address = address, - .lookup = .{ - .base_address = undefined, - .name = undefined, - .build_id = null, - .gnu_eh_frame = null, - }, - }; - const CtxTy = @TypeOf(ctx); + /// output + module: Module, - posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { + fn callback(info: *posix.dl_phdr_info, size: usize, context: *@This()) !void { _ = size; // The base address is too high if (context.address < info.addr) @@ -468,10 +210,13 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !Module.Lookup { const seg_start = info.addr +% phdr.p_vaddr; const seg_end = seg_start + phdr.p_memsz; if (context.address >= seg_start and context.address < seg_end) { - // Android libc uses NULL instead of an empty string to mark the - // main program - context.lookup.name = mem.sliceTo(info.name, 0) orelse ""; - context.lookup.base_address = info.addr; + context.module = .{ + .load_offset = info.addr, + // Android libc uses NULL instead of "" to mark the main program + .name = mem.sliceTo(info.name, 0) orelse "", + .build_id = null, + .gnu_eh_frame = null, + }; break; } } else return; @@ -480,17 +225,20 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !Module.Lookup { switch (phdr.p_type) { elf.PT_NOTE => { // Look for .note.gnu.build-id - const note_bytes = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; - const name_size = mem.readInt(u32, note_bytes[0..4], native_endian); - if (name_size != 4) continue; - const desc_size = mem.readInt(u32, note_bytes[4..8], native_endian); - const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); + const name_size = r.takeInt(u32, native_endian) catch continue; + const desc_size = r.takeInt(u32, native_endian) catch continue; + const note_type = r.takeInt(u32, native_endian) catch continue; + const name = r.take(name_size) catch continue; if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; - context.lookup.build_id = note_bytes[16..][0..desc_size]; + if (!mem.eql(u8, name, "GNU\x00")) continue; + const desc = r.take(desc_size) catch continue; + context.module.build_id = desc; }, elf.PT_GNU_EH_FRAME => { - context.lookup.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; }, else => {}, } @@ -499,425 +247,558 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !Module.Lookup { // Stop the iteration return error.Found; } - }.callback) catch |err| switch (err) { - error.Found => return ctx.lookup, }; - if (true) return error.MissingDebugInfo; - - if (self.address_map.get(ctx.lookup.base_address)) |obj_di| { - return obj_di; - } + var ctx: DlIterContext = .{ + .address = address, + .module = undefined, + }; + posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { + error.Found => return ctx.module, + }; + return error.MissingDebugInfo; +} - var sections: Dwarf.SectionArray = @splat(null); - if (ctx.lookup.gnu_eh_frame) |eh_frame_hdr| { - // This is a special case - pointer offsets inside .eh_frame_hdr - // are encoded relative to its base address, so we must use the - // version that is already memory mapped, and not the one that - // will be mapped separately from the ELF file. - sections[@intFromEnum(Dwarf.Unwind.Section.Id.eh_frame_hdr)] = .{ - .data = eh_frame_hdr, - .owned = false, +fn lookupModuleDyld(self: *SelfInfo, address: usize) !Module { + _ = self; // MLUGG + const image_count = std.c._dyld_image_count(); + for (0..image_count) |image_idx| { + const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; + const text_base = @intFromPtr(header); + if (address < text_base) continue; + const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); + + // Find the __TEXT segment + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const text_segment_cmd, const text_sections = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break .{ segment_cmd, load_cmd.getSections() }; + } else continue; + + const seg_start = load_offset + text_segment_cmd.vmaddr; + assert(seg_start == text_base); + const seg_end = seg_start + text_segment_cmd.vmsize; + if (address < seg_start or address >= seg_end) continue; + + // We've found the matching __TEXT segment. This is the image we need, but we must look + // for unwind info in it before returning. + + var result: Module = .{ + .text_base = text_base, + .load_offset = load_offset, + .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), + .unwind_info = null, + .eh_frame = null, }; + for (text_sections) |sect| { + if (mem.eql(u8, sect.sectName(), "__unwind_info")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); + result.unwind_info = sect_ptr[0..@intCast(sect.size)]; + } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); + result.eh_frame = sect_ptr[0..@intCast(sect.size)]; + } + } + return result; } - - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.lookup.name.len > 0) ctx.lookup.name else null, ctx.lookup.build_id, §ions); - obj_di.base_address = ctx.lookup.base_address; - - // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding - obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.lookup.base_address) catch {}; - - try self.address_map.putNoClobber(self.allocator, ctx.lookup.base_address, obj_di); - - return obj_di; + return error.MissingDebugInfo; } -fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module { - _ = self; - _ = address; - @panic("TODO implement lookup module for Haiku"); -} +fn lookupModuleWin32(self: *SelfInfo, gpa: Allocator, address: usize) !Module { + if (self.lookupModuleWin32Cache(address)) |m| return m; -fn lookupModuleWasm(self: *SelfInfo, address: usize) !*Module { - _ = self; - _ = address; - @panic("TODO implement lookup module for Wasm"); -} + { + // Check a new module hasn't been loaded + self.module_cache.clearRetainingCapacity(); -pub const Module = switch (native_os) { - .macos, .ios, .watchos, .tvos, .visionos => struct { - base_address: usize, - vmaddr_slide: usize, - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: [:0]const u8, - ofiles: OFileTable, - - // Backed by the in-memory sections mapped by the loader - unwind_info: ?[]const u8 = null, - eh_frame: ?[]const u8 = null, - - const OFileTable = std.StringHashMap(OFileInfo); - const OFileInfo = struct { - di: Dwarf, - addr_table: std.StringHashMap(u64), - }; + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + return windows.unexpectedError(windows.GetLastError()); + } + defer windows.CloseHandle(handle); - pub fn deinit(self: *@This(), allocator: Allocator) void { - var it = self.ofiles.iterator(); - while (it.next()) |entry| { - const ofile = entry.value_ptr; - ofile.di.deinit(allocator); - ofile.addr_table.deinit(); + var entry: windows.MODULEENTRY32 = undefined; + entry.dwSize = @sizeOf(windows.MODULEENTRY32); + if (windows.kernel32.Module32First(handle, &entry) != 0) { + try self.module_cache.append(gpa, entry); + while (windows.kernel32.Module32Next(handle, &entry) != 0) { + try self.module_cache.append(gpa, entry); } - self.ofiles.deinit(); - allocator.free(self.symbols); - posix.munmap(self.mapped_memory); } + } - fn loadOFile(self: *@This(), allocator: Allocator, o_file_path: []const u8) !*OFileInfo { - const o_file = try fs.cwd().openFile(o_file_path, .{}); - const mapped_mem = try mapWholeFile(o_file); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - var segcmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtabcmd: ?macho.symtab_command = null; - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => segcmd = cmd, - .SYMTAB => symtabcmd = cmd.cast(macho.symtab_command).?, - else => {}, + if (self.lookupModuleWin32Cache(address)) |m| return m; + return error.MissingDebugInfo; +} +fn lookupModuleWin32Cache(self: *SelfInfo, address: usize) ?Module { + for (self.module_cache.items) |*entry| { + const base_address = @intFromPtr(entry.modBaseAddr); + if (address >= base_address and address < base_address + entry.modBaseSize) { + return .{ + .base_address = base_address, + .size = entry.modBaseSize, + .name = std.mem.sliceTo(&entry.szModule, 0), + .handle = entry.hModule, }; + } + } + return null; +} - if (segcmd == null or symtabcmd == null) return error.MissingDebugInfo; - - // Parse symbols - const strtab = @as( - [*]const u8, - @ptrCast(&mapped_mem[symtabcmd.?.stroff]), - )[0 .. symtabcmd.?.strsize - 1 :0]; - const symtab = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtabcmd.?.symoff])), - )[0..symtabcmd.?.nsyms]; - - // TODO handle tentative (common) symbols - var addr_table = std.StringHashMap(u64).init(allocator); - try addr_table.ensureTotalCapacity(@as(u32, @intCast(symtab.len))); - for (symtab) |sym| { - if (sym.n_strx == 0) continue; - if (sym.undf() or sym.tentative() or sym.abs()) continue; - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - // TODO is it possible to have a symbol collision? - addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); - } +fn readCoffDebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { + const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); + const mapped = mapped_ptr[0..module.size]; + var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + if (coff_obj.strtabRequired()) { + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + name_buffer[4..], + windows.PATH_MAX_WIDE, + ); + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => |e| return e, + }; + errdefer coff_file.close(); + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + var coff_len: usize = 0; + var section_view_ptr: [*]const u8 = undefined; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(§ion_view_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr)) == .SUCCESS); + const section_view = section_view_ptr[0..coff_len]; + coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; + di.mapped_file = .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + } + di.coff_image_base = coff_obj.getImageBase(); - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - if (self.eh_frame) |eh_frame| sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; + if (coff_obj.getSectionByName(".debug_info")) |_| { + di.dwarf = .{}; - for (segcmd.?.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { + break :blk .{ + .data = try coff_obj.getSectionDataAlloc(section_header, gpa), + .virtual_address = section_header.virtual_address, + .owned = true, + }; + } else null; + } - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; - } - if (section_index == null) continue; + try di.dwarf.?.open(gpa, native_endian); + } - const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); - sections[section_index.?] = .{ - .data = section_bytes, - .virtual_address = @intCast(sect.addr), - .owned = false, - }; + if (try coff_obj.getPdbPath()) |raw_path| pdb: { + const path = blk: { + if (fs.path.isAbsolute(raw_path)) { + break :blk raw_path; + } else { + const self_dir = try fs.selfExeDirPathAlloc(gpa); + defer gpa.free(self_dir); + break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); } + }; + defer if (path.ptr != raw_path.ptr) gpa.free(path); - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var di: Dwarf = .{ - .endian = .little, - .sections = sections, - .is_macho = true, - }; + di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { + error.FileNotFound, error.IsDir => break :pdb, + else => return err, + }; + try di.pdb.?.parseInfoStream(); + try di.pdb.?.parseDbiStream(); - try Dwarf.open(&di, allocator); - const info = OFileInfo{ - .di = di, - .addr_table = addr_table, - }; + if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + return error.InvalidDebugInfo; - // Add the debug info to the cache - const result = try self.ofiles.getOrPut(o_file_path); - assert(!result.found_existing); - result.value_ptr.* = info; + di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); + } +} - return result.value_ptr; +const Module = switch (native_os) { + else => "MLUGG TODO", // Dwarf, // TODO MLUGG: it's this on master but that's definitely broken atm... + .macos, .ios, .watchos, .tvos, .visionos => struct { + /// The runtime address where __TEXT is loaded. + text_base: usize, + load_offset: usize, + name: []const u8, + unwind_info: ?[]const u8, + eh_frame: ?[]const u8, + fn key(m: *const Module) usize { + return m.text_base; } + fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + const vaddr = address - module.load_offset; + const symbol = MachoSymbol.find(di.symbols, vaddr) orelse return .{}; // MLUGG TODO null? - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - const result = try self.getOFileInfoForAddress(allocator, address); - if (result.symbol == null) return .{}; + // offset of `address` from start of `symbol` + const address_symbol_offset = vaddr - symbol.addr; // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); - if (result.o_file_info == null) return .{ .name = stab_symbol }; - - // Translate again the address, this time into an address inside the - // .o file - const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ - .name = "???", + const stab_symbol = mem.sliceTo(di.strings[symbol.strx..], 0); + const o_file_path = mem.sliceTo(di.strings[symbol.ofile..], 0); + + const o_file: *DebugInfo.OFile = of: { + const gop = try di.ofiles.getOrPut(gpa, o_file_path); + if (!gop.found_existing) { + gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { + defer _ = di.ofiles.pop().?; + switch (err) { + error.FileNotFound, + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return .{ .name = stab_symbol }, + else => |e| return e, + } + }; + } + break :of gop.value_ptr; }; - const addr_off = result.relocated_address - result.symbol.?.addr; - const o_file_di = &result.o_file_info.?.di; - if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return .{ - .name = o_file_di.getSymbolName(relocated_address_o) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString( - o_file_di, - std.dwarf.AT.name, - o_file_di.section(.debug_str), - compile_unit.*, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .source_location = o_file_di.getLineNumberInfo( - allocator, - compile_unit, - relocated_address_o + addr_off, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return .{ .name = stab_symbol }; - }, - else => return err, - } - } + const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return .{ .name = stab_symbol }; - pub fn getOFileInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !struct { - relocated_address: usize, - symbol: ?*const MachoSymbol = null, - o_file_info: ?*OFileInfo = null, - } { - // Translate the VA into an address into this object - const relocated_address = address - self.vmaddr_slide; - - // Find the .o file where this symbol is defined - const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ - .relocated_address = relocated_address, + const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => return .{ .name = stab_symbol }, + else => |e| return e, }; - // Check if its debug infos are already in the cache - const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); - const o_file_info = self.ofiles.getPtr(o_file_path) orelse - (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { - error.FileNotFound, - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return .{ - .relocated_address = relocated_address, - .symbol = symbol, - }, - else => return err, - }); - return .{ - .relocated_address = relocated_address, - .symbol = symbol, - .o_file_info = o_file_info, + .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol, + .compile_unit_name = compile_unit.die.getAttrString( + &o_file.dwarf, + native_endian, + std.dwarf.AT.name, + o_file.dwarf.section(.debug_str), + compile_unit, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .source_location = o_file.dwarf.getLineNumberInfo( + gpa, + native_endian, + compile_unit, + symbol_ofile_vaddr + address_symbol_offset, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, }; } + const DebugInfo = struct { + // MLUGG TODO: these are duplicated state. i actually reckon they should be removed from Module, and loadMachODebugInfo should be the one discovering them! + mapped_memory: []align(std.heap.page_size_min) const u8, + symbols: []const MachoSymbol, + strings: [:0]const u8, + // MLUGG TODO: this could use an adapter to just index straight into `strings`! + ofiles: std.StringArrayHashMapUnmanaged(OFile), + + // Backed by the in-memory sections mapped by the loader + unwind_info: ?[]const u8, + eh_frame: ?[]const u8, + + // MLUGG TODO HACKHACK: this is awful + const init: DebugInfo = undefined; + + const OFile = struct { + dwarf: Dwarf, + // MLUGG TODO: this could use an adapter to just index straight into the strtab! + addr_table: std.StringArrayHashMapUnmanaged(u64), + }; - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; - } - }, - .uefi, .windows => struct { - base_address: usize, - pdb: ?Pdb, - dwarf: ?Dwarf, - coff_image_base: u64, - - /// Only used if pdb is non-null - coff_section_headers: []coff.SectionHeader, - - pub fn deinit(self: *@This(), gpa: Allocator) void { - if (self.dwarf) |*dwarf| { - dwarf.deinit(gpa); - } - - if (self.pdb) |*p| { - gpa.free(p.file_reader.interface.buffer); - gpa.destroy(p.file_reader); - p.deinit(); - gpa.free(self.coff_section_headers); + fn deinit(di: *DebugInfo, gpa: Allocator) void { + for (di.ofiles.values()) |*ofile| { + ofile.dwarf.deinit(gpa); + ofile.addr_table.deinit(gpa); + } + di.ofiles.deinit(); + gpa.free(di.symbols); + posix.munmap(di.mapped_memory); } - self.* = undefined; - } + fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { + const mapped_mem = try mapFileOrSelfExe(o_file_path); + errdefer posix.munmap(mapped_mem); - fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?std.debug.Symbol { - var coff_section: *align(1) const coff.SectionHeader = undefined; - const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { - if (sect_contrib.section > self.coff_section_headers.len) continue; - // Remember that SectionContribEntry.Section is 1-based. - coff_section = &self.coff_section_headers[sect_contrib.section - 1]; - - const vaddr_start = coff_section.virtual_address + sect_contrib.offset; - const vaddr_end = vaddr_start + sect_contrib.size; - if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { - break sect_contrib.module_index; - } - } else { - // we have no information to add to the address - return null; - }; + if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - const module = (try self.pdb.?.getModule(mod_index)) orelse - return error.InvalidDebugInfo; - const obj_basename = fs.path.basename(module.obj_file_name); - - const symbol_name = self.pdb.?.getSymbolName( - module, - relocated_address - coff_section.virtual_address, - ) orelse "???"; - const opt_line_info = try self.pdb.?.getLineNumberInfo( - module, - relocated_address - coff_section.virtual_address, - ); + const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { + var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtab_cmd: ?macho.symtab_command = null; + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => seg_cmd = cmd, + .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + break :cmds .{ + seg_cmd orelse return error.MissingDebugInfo, + symtab_cmd orelse return error.MissingDebugInfo, + }; + }; - return .{ - .name = symbol_name, - .compile_unit_name = obj_basename, - .source_location = opt_line_info, - }; - } + if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; + if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + + const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); + if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; + const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); + + // TODO handle tentative (common) symbols + // MLUGG TODO: does initCapacity actually make sense? + var addr_table: std.StringArrayHashMapUnmanaged(u64) = .empty; + defer addr_table.deinit(gpa); + try addr_table.ensureUnusedCapacity(gpa, @intCast(symtab.len)); + for (symtab) |sym| { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf => continue, // includes tentative symbols + .abs => continue, + else => {}, + } + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + const gop = addr_table.getOrPutAssumeCapacity(sym_name); + if (gop.found_existing) return error.InvalidDebugInfo; + gop.value_ptr.* = sym.n_value; + } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; + var sections: Dwarf.SectionArray = @splat(null); + for (seg_cmd.getSections()) |sect| { + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - if (self.pdb != null) { - if (try self.getSymbolFromPdb(relocated_address)) |symbol| return symbol; - } + const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; + } else continue; - if (self.dwarf) |*dwarf| { - const dwarf_address = relocated_address + self.coff_image_base; - return dwarf.getSymbol(allocator, dwarf_address); - } + const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); + sections[section_index] = .{ + .data = section_bytes, + .virtual_address = @intCast(sect.addr), + .owned = false, + }; + } - return .{}; - } + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - _ = allocator; - _ = address; + var dwarf: Dwarf = .{ .sections = sections }; + errdefer dwarf.deinit(gpa); + try dwarf.open(gpa, native_endian); - return switch (self.debug_data) { - .dwarf => |*dwarf| dwarf, - else => null, - }; - } + return .{ + .dwarf = dwarf, + .addr_table = addr_table.move(), + }; + } + }; }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule, .wasi, .emscripten => struct { - pub fn deinit(self: *@This(), allocator: Allocator) void { - _ = self; - _ = allocator; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - _ = self; - _ = allocator; - _ = address; - return .{}; + const DebugInfo = struct { + const init: DebugInfo = .{}; + fn getSymbolAtAddress(di: *DebugInfo, gpa: Allocator, base_address: usize, address: usize) !std.debug.Symbol { + _ = di; + _ = gpa; + _ = base_address; + _ = address; + unreachable; + } + }; + }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { + load_offset: usize, + name: []const u8, + build_id: ?[]const u8, + gnu_eh_frame: ?[]const u8, + fn key(m: Module) usize { + return m.load_offset; // MLUGG TODO: is this technically valid? idk } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - _ = self; - _ = allocator; - _ = address; - return null; + const DebugInfo = Dwarf.ElfModule; + fn getSymbolAtAddress(mod: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + return di.getSymbolAtAddress(gpa, native_endian, mod.load_offset, address); } }, - else => Dwarf, -}; + .uefi, .windows => struct { + base_address: usize, + size: usize, + name: []const u8, + handle: windows.HMODULE, + fn key(m: Module) usize { + return m.base_address; + } + const DebugInfo = struct { + coff_image_base: u64, + mapped_file: ?struct { + file: File, + section_handle: windows.HANDLE, + section_view: []const u8, + fn deinit(mapped: @This()) void { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mapped.section_view.ptr)) == .SUCCESS); + windows.CloseHandle(mapped.section_handle); + mapped.file.close(); + } + }, -/// How is this different than `Module` when the host is Windows? -/// Why are both stored in the `SelfInfo` struct? -/// Boy, it sure would be nice if someone added documentation comments for this -/// struct explaining it. -pub const WindowsModule = struct { - base_address: usize, - size: u32, - name: []const u8, - handle: windows.HMODULE, - - // Set when the image file needed to be mapped from disk - mapped_file: ?struct { - file: File, - section_handle: windows.HANDLE, - section_view: []const u8, - - pub fn deinit(self: @This()) void { - const process_handle = windows.GetCurrentProcess(); - assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrCast(@constCast(self.section_view.ptr))) == .SUCCESS); - windows.CloseHandle(self.section_handle); - self.file.close(); + dwarf: ?Dwarf, + + pdb: ?Pdb, + /// Populated iff `pdb != null`; otherwise `&.{}`. + coff_section_headers: []coff.SectionHeader, + + const init: DebugInfo = .{ + .coff_image_base = undefined, + .mapped_file = null, + .dwarf = null, + .pdb = null, + .coff_section_headers = &.{}, + }; + + fn deinit(di: *DebugInfo, gpa: Allocator) void { + if (di.dwarf) |*dwarf| dwarf.deinit(gpa); + if (di.pdb) |*pdb| pdb.deinit(); + gpa.free(di.coff_section_headers); + if (di.mapped_file) |mapped| mapped.deinit(); + } + + fn getSymbolFromPdb(di: *DebugInfo, relocated_address: usize) !?std.debug.Symbol { + var coff_section: *align(1) const coff.SectionHeader = undefined; + const mod_index = for (di.pdb.?.sect_contribs) |sect_contrib| { + if (sect_contrib.section > di.coff_section_headers.len) continue; + // Remember that SectionContribEntry.Section is 1-based. + coff_section = &di.coff_section_headers[sect_contrib.section - 1]; + + const vaddr_start = coff_section.virtual_address + sect_contrib.offset; + const vaddr_end = vaddr_start + sect_contrib.size; + if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { + break sect_contrib.module_index; + } + } else { + // we have no information to add to the address + return null; + }; + + const module = (try di.pdb.?.getModule(mod_index)) orelse + return error.InvalidDebugInfo; + const obj_basename = fs.path.basename(module.obj_file_name); + + const symbol_name = di.pdb.?.getSymbolName( + module, + relocated_address - coff_section.virtual_address, + ) orelse "???"; + const opt_line_info = try di.pdb.?.getLineNumberInfo( + module, + relocated_address - coff_section.virtual_address, + ); + + return .{ + .name = symbol_name, + .compile_unit_name = obj_basename, + .source_location = opt_line_info, + }; + } + }; + + fn getSymbolAtAddress(mod: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + // Translate the runtime address into a virtual address into the module + const vaddr = address - mod.base_address; + + if (di.pdb != null) { + if (try di.getSymbolFromPdb(vaddr)) |symbol| return symbol; + } + + if (di.dwarf) |*dwarf| { + const dwarf_address = vaddr + di.coff_image_base; + return dwarf.getSymbol(gpa, native_endian, dwarf_address); + } + + return error.MissingDebugInfo; } - } = null, + }, }; -/// This takes ownership of macho_file: users of this function should not close -/// it themselves, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { - const mapped_mem = try mapWholeFile(macho_file); +fn loadMachODebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { + const mapped_mem = mapFileOrSelfExe(module.name) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.FileTooBig => return error.InvalidDebugInfo, + else => |e| return e, + }; + errdefer posix.munmap(mapped_mem); const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); if (hdr.magic != macho.MH_MAGIC_64) return error.InvalidDebugInfo; - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + const symtab: macho.symtab_command = symtab: { + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + return error.MissingDebugInfo; }; - const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break cmd.cast(macho.symtab_command).?, - else => {}, - } else return error.MissingDebugInfo; - - const syms = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtab.symoff])), - )[0..symtab.nsyms]; + + const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); + const syms = syms_ptr[0..symtab.nsyms]; const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; - const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); + // MLUGG TODO: does it really make sense to initCapacity here? how many of syms are omitted? + var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); + defer symbols.deinit(gpa); var ofile: u32 = undefined; var last_sym: MachoSymbol = undefined; - var symbol_index: usize = 0; var state: enum { init, oso_open, @@ -929,64 +810,53 @@ fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { } = .init; for (syms) |*sym| { - if (!sym.stab()) continue; + if (sym.n_type.bits.is_stab == 0) continue; // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type) { - macho.N_OSO => { - switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - } + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, }, - macho.N_BNSYM => { - switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .size = 0, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - } + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .size = 0, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, }, - macho.N_FUN => { - switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - last_sym.size = @as(u32, @intCast(sym.n_value)); - }, - else => return error.InvalidDebugInfo, - } + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + last_sym.size = @intCast(sym.n_value); + }, + else => return error.InvalidDebugInfo, }, - macho.N_ENSYM => { - switch (state) { - .fun_size => { - state = .ensym; - symbols_buf[symbol_index] = last_sym; - symbol_index += 1; - }, - else => return error.InvalidDebugInfo, - } + .ensym => switch (state) { + .fun_size => { + state = .ensym; + symbols.appendAssumeCapacity(last_sym); + }, + else => return error.InvalidDebugInfo, }, - macho.N_SO => { - switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - } + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, }, else => {}, } @@ -998,560 +868,187 @@ fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { else => return error.InvalidDebugInfo, } - const symbols = try allocator.realloc(symbols_buf, symbol_index); + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); // Even though lld emits symbols in ascending order, this debug code // should work for programs linked in any valid way. // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); + mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - return .{ - .base_address = undefined, - .vmaddr_slide = undefined, + di.* = .{ + .unwind_info = module.unwind_info, + .eh_frame = module.eh_frame, .mapped_memory = mapped_mem, - .ofiles = Module.OFileTable.init(allocator), - .symbols = symbols, + .symbols = symbols_slice, .strings = strings, + .ofiles = .empty, }; } -fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { - var di: Module = .{ - .base_address = undefined, - .coff_image_base = coff_obj.getImageBase(), - .coff_section_headers = undefined, - }; - - if (coff_obj.getSectionByName(".debug_info")) |_| { - // This coff file has embedded DWARF debug info - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, allocator), - .virtual_address = section_header.virtual_address, - .owned = true, - }; - } else null; - } - - var dwarf: Dwarf = .{ - .endian = native_endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&dwarf, allocator); - di.dwarf = dwarf; - } - - const raw_path = try coff_obj.getPdbPath() orelse return di; - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(allocator); - defer allocator.free(self_dir); - break :blk try fs.path.join(allocator, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) allocator.free(path); - - di.pdb = Pdb.init(allocator, path) catch |err| switch (err) { - error.FileNotFound, error.IsDir => { - if (di.dwarf == null) return error.MissingDebugInfo; - return di; - }, - else => return err, - }; - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); - - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) - return error.InvalidDebugInfo; - - // Only used by the pdb path - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); - errdefer allocator.free(di.coff_section_headers); - - return di; -} - -/// Reads debug info from an ELF file, or the current binary if none in specified. -/// If the required sections aren't present but a reference to external debug info is, -/// then this this function will recurse to attempt to load the debug sections from -/// an external file. -pub fn readElfDebugInfo( - em: *Dwarf.ElfModule, - allocator: Allocator, - elf_filename: ?[]const u8, - build_id: ?[]const u8, - parent_sections: *Dwarf.SectionArray, -) !void { - const elf_file = (if (elf_filename) |filename| blk: { - break :blk fs.cwd().openFile(filename, .{}); - } else fs.openSelfExe(.{})) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - - const mapped_mem = try mapWholeFile(elf_file); - return em.load( - allocator, - mapped_mem, - build_id, - null, - parent_sections, - null, - elf_filename, - ); -} - const MachoSymbol = struct { strx: u32, addr: u64, size: u32, ofile: u32, - - /// Returns the address from the macho file - fn address(self: MachoSymbol) u64 { - return self.addr; - } - fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { _ = context; return lhs.addr < rhs.addr; } -}; - -/// Takes ownership of file, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn mapWholeFile(file: File) ![]align(std.heap.page_size_min) const u8 { - defer file.close(); - - const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); - const mapped_mem = try posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); - errdefer posix.munmap(mapped_mem); - - return mapped_mem; -} - -fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { - var min: usize = 0; - var max: usize = symbols.len - 1; - while (min < max) { - const mid = min + (max - min) / 2; - const curr = &symbols[mid]; - const next = &symbols[mid + 1]; - if (address >= next.address()) { - min = mid + 1; - } else if (address < curr.address()) { - max = mid; - } else { - return curr; - } - } - - const max_sym = &symbols[symbols.len - 1]; - if (address >= max_sym.address()) - return max_sym; - - return null; -} - -test machoSearchSymbols { - const symbols = [_]MachoSymbol{ - .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, - }; - - try testing.expectEqual(null, machoSearchSymbols(&symbols, 0)); - try testing.expectEqual(null, machoSearchSymbols(&symbols, 99)); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 100).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 150).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 199).?); - - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 200).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 250).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 299).?); - - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 300).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 301).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); -} - -/// Unwind a frame using MachO compact unwind info (from __unwind_info). -/// If the compact encoding can't encode a way to unwind a frame, it will -/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -fn unwindFrameMachO( - allocator: Allocator, - base_address: usize, - context: *UnwindContext, - unwind_info: []const u8, - eh_frame: ?[]const u8, -) !usize { - const header = std.mem.bytesAsValue( - macho.unwind_info_section_header, - unwind_info[0..@sizeOf(macho.unwind_info_section_header)], - ); - const indices = std.mem.bytesAsSlice( - macho.unwind_info_section_header_index_entry, - unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], - ); - if (indices.len == 0) return error.MissingUnwindInfo; - - const mapped_pc = context.pc - base_address; - const second_level_index = blk: { + /// Assumes that `symbols` is sorted in order of ascending `addr`. + fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + if (symbols.len == 0) return null; // no potential match + if (address < symbols[0].addr) return null; // address is before the lowest-address symbol var left: usize = 0; - var len: usize = indices.len; - + var len: usize = symbols.len; while (len > 1) { const mid = left + len / 2; - const offset = indices[mid].functionOffset; - if (mapped_pc < offset) { + if (address < symbols[mid].addr) { len /= 2; } else { left = mid; - if (mapped_pc == offset) break; len -= len / 2; } } + return &symbols[left]; + } - // Last index is a sentinel containing the highest address as its functionOffset - if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; - break :blk &indices[left]; - }; - - const common_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - - const start_offset = second_level_index.secondLevelPagesSectionOffset; - const kind = std.mem.bytesAsValue( - macho.UNWIND_SECOND_LEVEL, - unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], - ); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_regular_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], - ); - - const entries = std.mem.bytesAsSlice( - macho.unwind_info_regular_second_level_entry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = entries[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } + test find { + const symbols: []const MachoSymbol = &.{ + .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, + }; - break :blk .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_compressed_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], - ); + try testing.expectEqual(null, find(symbols, 0)); + try testing.expectEqual(null, find(symbols, 99)); + try testing.expectEqual(&symbols[0], find(symbols, 100).?); + try testing.expectEqual(&symbols[0], find(symbols, 150).?); + try testing.expectEqual(&symbols[0], find(symbols, 199).?); - const entries = std.mem.bytesAsSlice( - macho.UnwindInfoCompressedEntry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; + try testing.expectEqual(&symbols[1], find(symbols, 200).?); + try testing.expectEqual(&symbols[1], find(symbols, 250).?); + try testing.expectEqual(&symbols[1], find(symbols, 299).?); - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = second_level_index.functionOffset + entries[mid].funcOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } + try testing.expectEqual(&symbols[2], find(symbols, 300).?); + try testing.expectEqual(&symbols[2], find(symbols, 301).?); + try testing.expectEqual(&symbols[2], find(symbols, 5000).?); + } +}; +test { + _ = MachoSymbol; +} - const entry = entries[left]; - const function_offset = second_level_index.functionOffset + entry.funcOffset; - if (entry.encodingIndex < header.commonEncodingsArrayCount) { - if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } else { - const local_index = try math.sub( - u8, - entry.encodingIndex, - math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, - ); - const local_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - } - }, - else => return error.InvalidUnwindInfo, - }; +pub const UnwindContext = struct { + gpa: Allocator, + cfa: ?usize, + pc: usize, + thread_context: *std.debug.ThreadContext, + reg_context: Dwarf.abi.RegisterContext, + vm: Dwarf.Unwind.VirtualMachine, + stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), - if (entry.raw_encoding == 0) return error.NoUnwindInfo; - const reg_context = Dwarf.abi.RegisterContext{ - .eh_frame = false, - .is_macho = true, - }; + pub fn init(gpa: Allocator, thread_context: *std.debug.ThreadContext) !UnwindContext { + comptime assert(supports_unwinding); - const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnimplementedUnwindEncoding, - .RBP_FRAME => blk: { - const regs: [5]u3 = .{ - encoding.value.x86_64.frame.reg0, - encoding.value.x86_64.frame.reg1, - encoding.value.x86_64.frame.reg2, - encoding.value.x86_64.frame.reg3, - encoding.value.x86_64.frame.reg4, - }; + const pc = stripInstructionPtrAuthCode( + (try regValueNative(thread_context, ip_reg_num, null)).*, + ); - const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); - var max_reg: usize = 0; - inline for (regs, 0..) |reg, i| { - if (reg > 0) max_reg = i; - } + const context_copy = try gpa.create(std.debug.ThreadContext); + std.debug.copyContext(thread_context, context_copy); - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 2 * @sizeOf(usize); + return .{ + .gpa = gpa, + .cfa = null, + .pc = pc, + .thread_context = context_copy, + .reg_context = undefined, + .vm = .{}, + .stack_machine = .{}, + }; + } - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + pub fn deinit(self: *UnwindContext) void { + self.vm.deinit(self.gpa); + self.stack_machine.deinit(self.gpa); + self.gpa.destroy(self.thread_context); + self.* = undefined; + } - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + pub fn getFp(self: *const UnwindContext) !usize { + return (try regValueNative(self.thread_context, fpRegNum(self.reg_context), self.reg_context)).*; + } - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame_offset + i * @sizeOf(usize); - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + /// Resolves the register rule and places the result into `out` (see regBytes) + pub fn resolveRegisterRule( + context: *UnwindContext, + col: Dwarf.Unwind.VirtualMachine.Column, + expression_context: std.debug.Dwarf.expression.Context, + out: []u8, + ) !void { + switch (col.rule) { + .default => { + const register = col.register orelse return error.InvalidRegister; + // The default type is usually undefined, but can be overriden by ABI authors. + // See the doc comment on `Dwarf.Unwind.VirtualMachine.RegisterRule.default`. + if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 18) { + // Callee-saved registers are initialized as if they had the .same_value rule + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + return; } - - break :blk new_ip; + @memset(out, undefined); }, - .STACK_IMMD, - .STACK_IND, - => blk: { - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) - @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) - else stack_size: { - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - base_address + - entry.function_offset + - encoding.value.x86_64.frameless.stack.indirect.sub_offset; - - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = encoding.value.x86_64.frameless.stack_reg_count; - const ip_ptr = if (reg_count > 0) reg_blk: { - var digits: [6]u3 = undefined; - var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; - var registers: [reg_numbers.len]u3 = undefined; - var used_indices = [_]bool{false} ** reg_numbers.len; - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - - registers[i] = reg_numbers[unused_index]; - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - for (0..reg_count) |i| { - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :reg_blk reg_addr; - } else sp + stack_size - @sizeOf(usize); - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - break :blk new_ip; + .undefined => { + @memset(out, undefined); }, - .DWARF => { - return unwindFrameMachODwarf(allocator, base_address, context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); + .same_value => { + // TODO: This copy could be eliminated if callers always copy the state then call this function to update it + const register = col.register orelse return error.InvalidRegister; + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); }, - }, - .aarch64, .aarch64_be => switch (encoding.mode.arm64) { - .OLD => return error.UnimplementedUnwindEncoding, - .FRAMELESS => blk: { - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - break :blk new_ip; + .offset => |offset| { + if (context.cfa) |cfa| { + const addr = try applyOffset(cfa, offset); + const ptr: *const usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + } else return error.InvalidCFA; }, - .DWARF => { - return unwindFrameMachODwarf(allocator, base_address, context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); + .val_offset => |offset| { + if (context.cfa) |cfa| { + mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); + } else return error.InvalidCFA; }, - .FRAME => blk: { - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const ip_ptr = fp + @sizeOf(usize); - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { - (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - @memcpy( - try regBytes(context.thread_context, 64 + 8 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - @memcpy( - try regBytes(context.thread_context, 64 + 9 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - break :blk new_ip; + .register => |register| { + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); }, - }, - else => return error.UnimplementedArch, - }; - - context.pc = stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; -} - -pub const UnwindContext = struct { - allocator: Allocator, - cfa: ?usize, - pc: usize, - thread_context: *std.debug.ThreadContext, - reg_context: Dwarf.abi.RegisterContext, - vm: VirtualMachine, - stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), - - pub fn init( - allocator: Allocator, - thread_context: *std.debug.ThreadContext, - ) !UnwindContext { - comptime assert(supports_unwinding); - - const pc = stripInstructionPtrAuthCode( - (try regValueNative(thread_context, ip_reg_num, null)).*, - ); - - const context_copy = try allocator.create(std.debug.ThreadContext); - std.debug.copyContext(thread_context, context_copy); - - return .{ - .allocator = allocator, - .cfa = null, - .pc = pc, - .thread_context = context_copy, - .reg_context = undefined, - .vm = .{}, - .stack_machine = .{}, - }; - } - - pub fn deinit(self: *UnwindContext) void { - self.vm.deinit(self.allocator); - self.stack_machine.deinit(self.allocator); - self.allocator.destroy(self.thread_context); - self.* = undefined; - } - - pub fn getFp(self: *const UnwindContext) !usize { - return (try regValueNative(self.thread_context, fpRegNum(self.reg_context), self.reg_context)).*; + .expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.gpa, expression_context, context.cfa.?); + const addr = if (value) |v| blk: { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + + const ptr: *usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + }, + .val_expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.gpa, expression_context, context.cfa.?); + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); + } else return error.NoExpressionValue; + }, + .architectural => return error.UnimplementedRegisterRule, + } } }; @@ -1584,113 +1081,30 @@ pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info /// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. fn unwindFrameDwarf( - allocator: Allocator, - unwind: *Dwarf.Unwind, - base_address: usize, + unwind: *const Dwarf.Unwind, + load_offset: usize, context: *UnwindContext, explicit_fde_offset: ?usize, ) !usize { if (!supports_unwinding) return error.UnsupportedCpuArchitecture; if (context.pc == 0) return 0; - // Find the FDE and CIE - const cie, const fde = if (explicit_fde_offset) |fde_offset| blk: { - const frame_section = unwind.section(.eh_frame) orelse return error.MissingFDE; - if (fde_offset >= frame_section.len) return error.MissingFDE; - - var fbr: std.Io.Reader = .fixed(frame_section); - fbr.seek = fde_offset; + const pc_vaddr = context.pc - load_offset; - const fde_entry_header = try Dwarf.Unwind.EntryHeader.read(&fbr, .eh_frame, native_endian); - if (fde_entry_header.type != .fde) return error.MissingFDE; + const fde_offset = explicit_fde_offset orelse try unwind.findFdeOffset( + pc_vaddr, + @sizeOf(usize), + native_endian, + ) orelse return error.MissingDebugInfo; + const format, const cie, const fde = try unwind.loadFde(fde_offset, @sizeOf(usize), native_endian); - const cie_offset = fde_entry_header.type.fde; - fbr.seek = @intCast(cie_offset); - - const cie_entry_header = try Dwarf.Unwind.EntryHeader.read(&fbr, .eh_frame, native_endian); - if (cie_entry_header.type != .cie) return Dwarf.bad(); - - const cie = try Dwarf.Unwind.CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - .eh_frame, - cie_entry_header.length_offset, - @sizeOf(usize), - native_endian, - ); - const fde = try Dwarf.Unwind.FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie, - @sizeOf(usize), - native_endian, - ); - - break :blk .{ cie, fde }; - } else blk: { - // `.eh_frame_hdr` may be incomplete. We'll try it first, but if the lookup fails, we fall - // back to loading `.eh_frame`/`.debug_frame` and using those from that point on. - - if (unwind.eh_frame_hdr) |header| hdr: { - const eh_frame_len = if (unwind.section(.eh_frame)) |eh_frame| eh_frame.len else { - try unwind.scanCieFdeInfo(allocator, native_endian, base_address); - unwind.eh_frame_hdr = null; - break :hdr; - }; - - var cie: Dwarf.Unwind.CommonInformationEntry = undefined; - var fde: Dwarf.Unwind.FrameDescriptionEntry = undefined; - - header.findEntry( - eh_frame_len, - @intFromPtr(unwind.section(.eh_frame_hdr).?.ptr), - context.pc, - &cie, - &fde, - native_endian, - ) catch |err| switch (err) { - error.MissingDebugInfo => { - // `.eh_frame_hdr` appears to be incomplete, so go ahead and populate `cie_map` - // and `fde_list`, and fall back to the binary search logic below. - try unwind.scanCieFdeInfo(allocator, native_endian, base_address); - - // Since `.eh_frame_hdr` is incomplete, we're very likely to get more lookup - // failures using it, and we've just built a complete, sorted list of FDEs - // anyway, so just stop using `.eh_frame_hdr` altogether. - unwind.eh_frame_hdr = null; - - break :hdr; - }, - else => return err, - }; - - break :blk .{ cie, fde }; - } - - const index = std.sort.binarySearch(Dwarf.Unwind.FrameDescriptionEntry, unwind.fde_list.items, context.pc, struct { - pub fn compareFn(pc: usize, item: Dwarf.Unwind.FrameDescriptionEntry) std.math.Order { - if (pc < item.pc_begin) return .lt; - - const range_end = item.pc_begin + item.pc_range; - if (pc < range_end) return .eq; - - return .gt; - } - }.compareFn); - - const fde = if (index) |i| unwind.fde_list.items[i] else return error.MissingFDE; - const cie = unwind.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; - - break :blk .{ cie, fde }; - }; + // Check if this FDE *actually* includes the address. + if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) return error.MissingDebugInfo; // Do not set `compile_unit` because the spec states that CFIs // may not reference other debug sections anyway. var expression_context: Dwarf.expression.Context = .{ - .format = cie.format, + .format = format, .thread_context = context.thread_context, .reg_context = context.reg_context, .cfa = context.cfa, @@ -1700,7 +1114,7 @@ fn unwindFrameDwarf( context.reg_context.eh_frame = cie.version != 4; context.reg_context.is_macho = native_os.isDarwin(); - const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); + const row = try context.vm.runTo(context.gpa, context.pc - load_offset, cie, fde, @sizeOf(usize), native_endian); context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; @@ -1711,7 +1125,7 @@ fn unwindFrameDwarf( context.stack_machine.reset(); const value = try context.stack_machine.run( expr, - context.allocator, + context.gpa, expression_context, context.cfa, ); @@ -1728,9 +1142,9 @@ fn unwindFrameDwarf( // Buffering the modifications is done because copying the thread context is not portable, // some implementations (ie. darwin) use internal pointers to the mcontext. - var arena = std.heap.ArenaAllocator.init(context.allocator); + var arena: std.heap.ArenaAllocator = .init(context.gpa); defer arena.deinit(); - const update_allocator = arena.allocator(); + const update_arena = arena.allocator(); const RegisterUpdate = struct { // Backed by thread_context @@ -1749,17 +1163,16 @@ fn unwindFrameDwarf( } const dest = try regBytes(context.thread_context, register, context.reg_context); - const src = try update_allocator.alloc(u8, dest.len); + const src = try update_arena.alloc(u8, dest.len); + try context.resolveRegisterRule(column, expression_context, src); - const prev = update_tail; - update_tail = try update_allocator.create(RegisterUpdate); - update_tail.?.* = .{ + const new_update = try update_arena.create(RegisterUpdate); + new_update.* = .{ .dest = dest, .src = src, - .prev = prev, + .prev = update_tail, }; - - try column.resolveValue(context, expression_context, src); + update_tail = new_update; } } @@ -1792,7 +1205,7 @@ fn unwindFrameDwarf( // The exception to this rule is signal frames, where we return execution would be returned to the instruction // that triggered the handler. const return_address = context.pc; - if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; + if (context.pc > 0 and !cie.is_signal_frame) context.pc -= 1; return return_address; } @@ -1843,415 +1256,345 @@ pub fn supportsUnwinding(target: *const std.Target) bool { }; } -fn unwindFrameMachODwarf( - allocator: Allocator, - base_address: usize, - context: *UnwindContext, - eh_frame: []const u8, - fde_offset: usize, -) !usize { - var di: Dwarf = .{ - .endian = native_endian, - .is_macho = true, - }; - defer di.deinit(context.allocator); +/// Since register rules are applied (usually) during a panic, +/// checked addition / subtraction is used so that we can return +/// an error and fall back to FP-based unwinding. +fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); +} - di.sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; +/// Uses `mmap` to map the file at `opt_path` (or, if `null`, the self executable image) into memory. +fn mapFileOrSelfExe(opt_path: ?[]const u8) ![]align(std.heap.page_size_min) const u8 { + const file = if (opt_path) |path| + try fs.cwd().openFile(path, .{}) + else + try fs.openSelfExe(.{}); + defer file.close(); - return unwindFrameDwarf(allocator, &di, base_address, context, fde_offset); + const file_len = math.cast(usize, try file.getEndPos()) orelse return error.FileTooBig; + + return posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); } -/// This is a virtual machine that runs DWARF call frame instructions. -pub const VirtualMachine = struct { - /// See section 6.4.1 of the DWARF5 specification for details on each - const RegisterRule = union(enum) { - // The spec says that the default rule for each column is the undefined rule. - // However, it also allows ABI / compiler authors to specify alternate defaults, so - // there is a distinction made here. - default: void, - undefined: void, - same_value: void, - // offset(N) - offset: i64, - // val_offset(N) - val_offset: i64, - // register(R) - register: u8, - // expression(E) - expression: []const u8, - // val_expression(E) - val_expression: []const u8, - // Augmenter-defined rule - architectural: void, - }; +/// Unwind a frame using MachO compact unwind info (from __unwind_info). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. +fn unwindFrameMachO( + text_base: usize, + load_offset: usize, + context: *UnwindContext, + unwind_info: []const u8, + eh_frame: ?[]const u8, +) !usize { + if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo; + const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); - /// Each row contains unwinding rules for a set of registers. - pub const Row = struct { - /// Offset from `FrameDescriptionEntry.pc_begin` - offset: u64 = 0, - /// Special-case column that defines the CFA (Canonical Frame Address) rule. - /// The register field of this column defines the register that CFA is derived from. - cfa: Column = .{}, - /// The register fields in these columns define the register the rule applies to. - columns: ColumnRange = .{}, - /// Indicates that the next write to any column in this row needs to copy - /// the backing column storage first, as it may be referenced by previous rows. - copy_on_write: bool = false, - }; + const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); + if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidUnwindInfo; + const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); + if (indices.len == 0) return error.MissingUnwindInfo; - pub const Column = struct { - register: ?u8 = null, - rule: RegisterRule = .{ .default = {} }, - - /// Resolves the register rule and places the result into `out` (see regBytes) - pub fn resolveValue( - self: Column, - context: *SelfInfo.UnwindContext, - expression_context: std.debug.Dwarf.expression.Context, - out: []u8, - ) !void { - switch (self.rule) { - .default => { - const register = self.register orelse return error.InvalidRegister; - try getRegDefaultValue(register, context, out); - }, - .undefined => { - @memset(out, undefined); - }, - .same_value => { - // TODO: This copy could be eliminated if callers always copy the state then call this function to update it - const register = self.register orelse return error.InvalidRegister; - const src = try regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - }, - .offset => |offset| { - if (context.cfa) |cfa| { - const addr = try applyOffset(cfa, offset); - const ptr: *const usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - } else return error.InvalidCFA; - }, - .val_offset => |offset| { - if (context.cfa) |cfa| { - mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); - } else return error.InvalidCFA; - }, - .register => |register| { - const src = try regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); - }, - .expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - const addr = if (value) |v| blk: { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - - const ptr: *usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - }, - .val_expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); - } else return error.NoExpressionValue; - }, - .architectural => return error.UnimplementedRegisterRule, + // MLUGG TODO HACKHACK -- Unwind needs a slight refactor to make this work well + const opt_dwarf_unwind: ?Dwarf.Unwind = if (eh_frame) |eh_frame_data| .{ + .debug_frame = null, + .eh_frame = .{ + .header = .{ + .vaddr = undefined, + .eh_frame_vaddr = @intFromPtr(eh_frame_data.ptr) - load_offset, + .search_table = null, + }, + .eh_frame_data = eh_frame_data, + .sorted_fdes = null, + }, + } else null; + + // offset of the PC into the `__TEXT` segment + const pc_text_offset = context.pc - text_base; + + const start_offset: u32, const first_level_offset: u32 = index: { + var left: usize = 0; + var len: usize = indices.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < indices[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; } } + break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; }; + // An offset of 0 is a sentinel indicating a range does not have unwind info. + if (start_offset == 0) return error.MissingUnwindInfo; - const ColumnRange = struct { - /// Index into `columns` of the first column in this row. - start: usize = undefined, - len: u8 = 0, - }; + const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidUnwindInfo; + const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], + ); - columns: std.ArrayListUnmanaged(Column) = .empty, - stack: std.ArrayListUnmanaged(ColumnRange) = .empty, - current_row: Row = .{}, + if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidUnwindInfo; + const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); - /// The result of executing the CIE's initial_instructions - cie_row: ?Row = null, + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidUnwindInfo; + const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; - pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { - self.stack.deinit(allocator); - self.columns.deinit(allocator); - self.* = undefined; - } + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < entries[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :entry .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidUnwindInfo; + const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; - pub fn reset(self: *VirtualMachine) void { - self.stack.clearRetainingCapacity(); - self.columns.clearRetainingCapacity(); - self.current_row = .{}; - self.cie_row = null; - } + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + const entry = entries[left]; - /// Return a slice backed by the row's non-CFA columns - pub fn rowColumns(self: VirtualMachine, row: Row) []Column { - if (row.columns.len == 0) return &.{}; - return self.columns.items[row.columns.start..][0..row.columns.len]; - } + const function_offset = first_level_offset + entry.funcOffset; + if (entry.encodingIndex < common_encodings.len) { + break :entry .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } - /// Either retrieves or adds a column for `register` (non-CFA) in the current row. - fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { - for (self.rowColumns(self.current_row)) |*c| { - if (c.register == register) return c; - } + const local_index = entry.encodingIndex - common_encodings.len; + const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidUnwindInfo; + const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :entry .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + }, + else => return error.InvalidUnwindInfo, + }; - if (self.current_row.columns.len == 0) { - self.current_row.columns.start = self.columns.items.len; - } - self.current_row.columns.len += 1; + if (entry.raw_encoding == 0) return error.NoUnwindInfo; + const reg_context: Dwarf.abi.RegisterContext = .{ .eh_frame = false, .is_macho = true }; - const column = try self.columns.addOne(allocator); - column.* = .{ - .register = register, - }; + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => ip: { + const frame = encoding.value.x86_64.frame; - return column; - } + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); - /// Runs the CIE instructions, then the FDE instructions. Execution halts - /// once the row that corresponds to `pc` is known, and the row is returned. - pub fn runTo( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.Unwind.CommonInformationEntry, - fde: std.debug.Dwarf.Unwind.FrameDescriptionEntry, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !Row { - assert(self.cie_row == null); - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; - - var prev_row: Row = self.current_row; - - var cie_stream: std.Io.Reader = .fixed(cie.initial_instructions); - var fde_stream: std.Io.Reader = .fixed(fde.instructions); - const streams = [_]*std.Io.Reader{ &cie_stream, &fde_stream }; - - for (&streams, 0..) |stream, i| { - while (stream.seek < stream.buffer.len) { - const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); - prev_row = try self.step(allocator, cie, i == 0, instruction); - if (pc < fde.pc_begin + self.current_row.offset) return prev_row; - } - } + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - return self.current_row; - } + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - pub fn runToNative( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.Unwind.CommonInformationEntry, - fde: std.debug.Dwarf.Unwind.FrameDescriptionEntry, - ) !Row { - return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), native_endian); - } + const regs: [5]u3 = .{ + frame.reg0, + frame.reg1, + frame.reg2, + frame.reg3, + frame.reg4, + }; + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } - fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { - if (!self.current_row.copy_on_write) return; + break :ip new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => ip: { + const frameless = encoding.value.x86_64.frameless; - const new_start = self.columns.items.len; - if (self.current_row.columns.len > 0) { - try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); - self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); - self.current_row.columns.start = new_start; - } - } + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const stack_size: usize = stack_size: { + if (encoding.mode.x86_64 == .STACK_IMMD) { + break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); + } + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + text_base + + entry.function_offset + + frameless.stack.indirect.sub_offset; + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); + }; - /// Executes a single instruction. - /// If this instruction is from the CIE, `is_initial` should be set. - /// Returns the value of `current_row` before executing this instruction. - pub fn step( - self: *VirtualMachine, - allocator: std.mem.Allocator, - cie: std.debug.Dwarf.Unwind.CommonInformationEntry, - is_initial: bool, - instruction: Dwarf.call_frame.Instruction, - ) !Row { - // CIE instructions must be run before FDE instructions - assert(!is_initial or self.cie_row == null); - if (!is_initial and self.cie_row == null) { - self.cie_row = self.current_row; - self.current_row.copy_on_write = true; - } + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - const prev_row = self.current_row; - switch (instruction) { - .set_loc => |i| { - if (i.address <= self.current_row.offset) return error.InvalidOperation; - // TODO: Check cie.segment_selector_size != 0 for DWARFV4 - self.current_row.offset = i.address; - }, - inline .advance_loc, - .advance_loc1, - .advance_loc2, - .advance_loc4, - => |i| { - self.current_row.offset += i.delta * cie.code_alignment_factor; - self.current_row.copy_on_write = true; - }, - inline .offset, - .offset_extended, - .offset_extended_sf, - => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; - }, - inline .restore, - .restore_extended, - => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.cie_row) |cie_row| { - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = for (self.rowColumns(cie_row)) |cie_column| { - if (cie_column.register == i.register) break cie_column.rule; - } else .{ .default = {} }; - } else return error.InvalidOperation; - }, - .nop => {}, - .undefined => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .undefined = {} }; - }, - .same_value => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .same_value = {} }; - }, - .register => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .register = i.target_register }; - }, - .remember_state => { - try self.stack.append(allocator, self.current_row.columns); - self.current_row.copy_on_write = true; - }, - .restore_state => { - const restored_columns = self.stack.pop() orelse return error.InvalidOperation; - self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); - try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); - - self.current_row.columns.start = self.columns.items.len; - self.current_row.columns.len = restored_columns.len; - self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); - }, - .def_cfa => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = @intCast(i.offset) }, - }; - }, - .def_cfa_sf => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, - }; - }, - .def_cfa_register => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.register = i.register; - }, - .def_cfa_offset => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = @intCast(i.offset), - }; - }, - .def_cfa_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .def_cfa_expression => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa.register = undefined; - self.current_row.cfa.rule = .{ - .expression = i.block, - }; - }, - .expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .expression = i.block, + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = frameless.stack_reg_count; + const ip_ptr = ip_ptr: { + var digits: [6]u3 = undefined; + var accumulator: usize = frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + var registers: [6]u3 = undefined; + var used_indices: [6]bool = @splat(false); + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + registers[i] = @intCast(unused_index + 1); + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + for (0..reg_count) |i| { + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :ip_ptr reg_addr; }; + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :ip new_ip; }, - .val_offset => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, - }; + .DWARF => { + const dwarf_unwind = &(opt_dwarf_unwind orelse return error.MissingEhFrame); + return unwindFrameDwarf(dwarf_unwind, load_offset, context, @intCast(encoding.value.x86_64.dwarf)); }, - .val_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; + }, + .aarch64, .aarch64_be => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => ip: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + break :ip new_ip; }, - .val_expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_expression = i.block, - }; + .DWARF => { + const dwarf_unwind = &(opt_dwarf_unwind orelse return error.MissingEhFrame); + return unwindFrameDwarf(dwarf_unwind, load_offset, context, @intCast(encoding.value.arm64.dwarf)); }, - } + .FRAME => ip: { + const frame = encoding.value.arm64.frame; - return prev_row; - } -}; + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const ip_ptr = fp + @sizeOf(usize); -/// Returns the ABI-defined default value this register has in the unwinding table -/// before running any of the CIE instructions. The DWARF spec defines these as having -/// the .undefined rule by default, but allows ABI authors to override that. -fn getRegDefaultValue(reg_number: u8, context: *UnwindContext, out: []u8) !void { - switch (builtin.cpu.arch) { - .aarch64, .aarch64_be => { - // Callee-saved registers are initialized as if they had the .same_value rule - if (reg_number >= 19 and reg_number <= 28) { - const src = try regBytes(context.thread_context, reg_number, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return; - } - }, - else => {}, - } + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.x_reg_pairs, field.name) != 0) { + (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } - @memset(out, undefined); -} + inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + { + const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 8 + i, context.reg_context)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + { + const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 9 + i, context.reg_context)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + } + } -/// Since register rules are applied (usually) during a panic, -/// checked addition / subtraction is used so that we can return -/// an error and fall back to FP-based unwinding. -fn applyOffset(base: usize, offset: i64) !usize { - return if (offset >= 0) - try std.math.add(usize, base, @as(usize, @intCast(offset))) - else - try std.math.sub(usize, base, @as(usize, @intCast(-offset))); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :ip new_ip; + }, + }, + else => comptime unreachable, // unimplemented + }; + + context.pc = stripInstructionPtrAuthCode(new_ip); + if (context.pc > 0) context.pc -= 1; + return new_ip; } diff --git a/lib/std/dwarf/EH.zig b/lib/std/dwarf/EH.zig index 3ee7e0be0f07..8396f36f6aab 100644 --- a/lib/std/dwarf/EH.zig +++ b/lib/std/dwarf/EH.zig @@ -1,27 +1,32 @@ -pub const PE = struct { - pub const absptr = 0x00; +pub const PE = packed struct(u8) { + type: Type, + rel: Rel, - pub const size_mask = 0x7; - pub const sign_mask = 0x8; - pub const type_mask = size_mask | sign_mask; + /// This is a special encoding which does not correspond to named `type`/`rel` values. + pub const omit: PE = @bitCast(@as(u8, 0xFF)); - pub const uleb128 = 0x01; - pub const udata2 = 0x02; - pub const udata4 = 0x03; - pub const udata8 = 0x04; - pub const sleb128 = 0x09; - pub const sdata2 = 0x0A; - pub const sdata4 = 0x0B; - pub const sdata8 = 0x0C; + pub const Type = enum(u4) { + absptr = 0x0, + uleb128 = 0x1, + udata2 = 0x2, + udata4 = 0x3, + udata8 = 0x4, + sleb128 = 0x9, + sdata2 = 0xA, + sdata4 = 0xB, + sdata8 = 0xC, + _, + }; - pub const rel_mask = 0x70; - pub const pcrel = 0x10; - pub const textrel = 0x20; - pub const datarel = 0x30; - pub const funcrel = 0x40; - pub const aligned = 0x50; - - pub const indirect = 0x80; - - pub const omit = 0xff; + pub const Rel = enum(u4) { + abs = 0x0, + pcrel = 0x1, + textrel = 0x2, + datarel = 0x3, + funcrel = 0x4, + aligned = 0x5, + /// Undocumented GCC extension + indirect = 0x8, + _, + }; }; diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 455f5f8fb120..0ce1f4c7ff3f 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -839,62 +839,112 @@ pub const nlist = extern struct { pub const nlist_64 = extern struct { n_strx: u32, - n_type: u8, + n_type: packed union { + bits: packed struct(u8) { + ext: bool, + type: enum(u3) { + undf = 0, + abs = 1, + sect = 7, + pbud = 6, + indr = 5, + _, + }, + pext: bool, + /// Any non-zero value indicates this is an stab, so the `stab` field should be used. + is_stab: u3, + }, + stab: enum(u8) { + gsym = N_GSYM, + fname = N_FNAME, + fun = N_FUN, + stsym = N_STSYM, + lcsym = N_LCSYM, + bnsym = N_BNSYM, + ast = N_AST, + opt = N_OPT, + rsym = N_RSYM, + sline = N_SLINE, + ensym = N_ENSYM, + ssym = N_SSYM, + so = N_SO, + oso = N_OSO, + lsym = N_LSYM, + bincl = N_BINCL, + sol = N_SOL, + params = N_PARAMS, + version = N_VERSION, + olevel = N_OLEVEL, + psym = N_PSYM, + eincl = N_EINCL, + entry = N_ENTRY, + lbrac = N_LBRAC, + excl = N_EXCL, + rbrac = N_RBRAC, + bcomm = N_BCOMM, + ecomm = N_ECOMM, + ecoml = N_ECOML, + leng = N_LENG, + _, + }, + }, n_sect: u8, - n_desc: u16, + n_desc: packed struct(u16) { + _pad0: u3 = 0, + arm_thumb_def: bool, + _pad1: u1 = 0, + /// The meaning of this bit is contextual. + /// See `N_DESC_DISCARDED` and `N_NO_DEAD_STRIP`. + discarded_or_no_dead_strip: bool, + weak_ref: bool, + /// The meaning of this bit is contextual. + /// See `N_WEAK_DEF` and `N_REF_TO_WEAK`. + weak_def_or_ref_to_weak: bool, + symbol_resolver: bool, + alt_entry: bool, + _pad2: u6 = 0, + }, n_value: u64, + // MLUGG TODO DELETE pub fn stab(sym: nlist_64) bool { - return N_STAB & sym.n_type != 0; - } - - pub fn pext(sym: nlist_64) bool { - return N_PEXT & sym.n_type != 0; - } - - pub fn ext(sym: nlist_64) bool { - return N_EXT & sym.n_type != 0; + return sym.n_type.bits.is_stab != 0; } - + // MLUGG TODO DELETE pub fn sect(sym: nlist_64) bool { - const type_ = N_TYPE & sym.n_type; - return type_ == N_SECT; + return sym.n_type.type == .sect; } - + // MLUGG TODO DELETE pub fn undf(sym: nlist_64) bool { - const type_ = N_TYPE & sym.n_type; - return type_ == N_UNDF; + return sym.n_type.type == .undf; } - + // MLUGG TODO DELETE pub fn indr(sym: nlist_64) bool { - const type_ = N_TYPE & sym.n_type; - return type_ == N_INDR; + return sym.n_type.type == .indr; } - + // MLUGG TODO DELETE pub fn abs(sym: nlist_64) bool { - const type_ = N_TYPE & sym.n_type; - return type_ == N_ABS; + return sym.n_type.type == .abs; } - + // MLUGG TODO DELETE pub fn weakDef(sym: nlist_64) bool { - return sym.n_desc & N_WEAK_DEF != 0; + return sym.n_desc.weak_def_or_ref_to_weak; } - + // MLUGG TODO DELETE pub fn weakRef(sym: nlist_64) bool { - return sym.n_desc & N_WEAK_REF != 0; + return sym.n_desc.weak_ref; } - + // MLUGG TODO DELETE pub fn discarded(sym: nlist_64) bool { - return sym.n_desc & N_DESC_DISCARDED != 0; + return sym.n_desc.discarded_or_no_dead_strip; } - + // MLUGG TODO DELETE pub fn noDeadStrip(sym: nlist_64) bool { - return sym.n_desc & N_NO_DEAD_STRIP != 0; + return sym.n_desc.discarded_or_no_dead_strip; } pub fn tentative(sym: nlist_64) bool { - if (!sym.undf()) return false; - return sym.n_value != 0; + return sym.n_type.type == .undf and sym.n_value != 0; } }; @@ -2046,7 +2096,7 @@ pub const unwind_info_compressed_second_level_page_header = extern struct { // encodings array }; -pub const UnwindInfoCompressedEntry = packed struct { +pub const UnwindInfoCompressedEntry = packed struct(u32) { funcOffset: u24, encodingIndex: u8, }; diff --git a/src/link/Elf/eh_frame.zig b/src/link/Elf/eh_frame.zig index 03ab9b5ae0e1..76756d66d880 100644 --- a/src/link/Elf/eh_frame.zig +++ b/src/link/Elf/eh_frame.zig @@ -455,72 +455,23 @@ pub fn writeEhFrameRelocs(elf_file: *Elf, relocs: *std.array_list.Managed(elf.El } pub fn writeEhFrameHdr(elf_file: *Elf, writer: anytype) !void { - const comp = elf_file.base.comp; - const gpa = comp.gpa; - try writer.writeByte(1); // version - try writer.writeByte(DW_EH_PE.pcrel | DW_EH_PE.sdata4); - try writer.writeByte(DW_EH_PE.udata4); - try writer.writeByte(DW_EH_PE.datarel | DW_EH_PE.sdata4); + try writer.writeByte(DW_EH_PE.pcrel | DW_EH_PE.sdata4); // eh_frame_ptr_enc + // Building the lookup table would be expensive work on every `flush` -- omit it. + try writer.writeByte(DW_EH_PE.omit); // fde_count_enc + try writer.writeByte(DW_EH_PE.omit); // table_enc const shdrs = elf_file.sections.items(.shdr); const eh_frame_shdr = shdrs[elf_file.section_indexes.eh_frame.?]; const eh_frame_hdr_shdr = shdrs[elf_file.section_indexes.eh_frame_hdr.?]; - const num_fdes = @as(u32, @intCast(@divExact(eh_frame_hdr_shdr.sh_size - eh_frame_hdr_header_size, 8))); - const existing_size = existing_size: { - const zo = elf_file.zigObjectPtr() orelse break :existing_size 0; - const sym = zo.symbol(zo.eh_frame_index orelse break :existing_size 0); - break :existing_size sym.atom(elf_file).?.size; - }; try writer.writeInt( u32, @as(u32, @bitCast(@as( i32, - @truncate(@as(i64, @intCast(eh_frame_shdr.sh_addr + existing_size)) - @as(i64, @intCast(eh_frame_hdr_shdr.sh_addr)) - 4), + @truncate(@as(i64, @intCast(eh_frame_shdr.sh_addr)) - @as(i64, @intCast(eh_frame_hdr_shdr.sh_addr)) - 4), ))), .little, ); - try writer.writeInt(u32, num_fdes, .little); - - const Entry = extern struct { - init_addr: u32, - fde_addr: u32, - - pub fn lessThan(ctx: void, lhs: @This(), rhs: @This()) bool { - _ = ctx; - return lhs.init_addr < rhs.init_addr; - } - }; - - var entries = std.array_list.Managed(Entry).init(gpa); - defer entries.deinit(); - try entries.ensureTotalCapacityPrecise(num_fdes); - - for (elf_file.objects.items) |index| { - const object = elf_file.file(index).?.object; - for (object.fdes.items) |fde| { - if (!fde.alive) continue; - - const relocs = fde.relocs(object); - assert(relocs.len > 0); // Should this be an error? Things are completely broken anyhow if this trips... - const rel = relocs[0]; - const ref = object.resolveSymbol(rel.r_sym(), elf_file); - const sym = elf_file.symbol(ref).?; - const P = @as(i64, @intCast(fde.address(elf_file))); - const S = @as(i64, @intCast(sym.address(.{}, elf_file))); - const A = rel.r_addend; - entries.appendAssumeCapacity(.{ - .init_addr = @bitCast(@as(i32, @truncate(S + A - @as(i64, @intCast(eh_frame_hdr_shdr.sh_addr))))), - .fde_addr = @as( - u32, - @bitCast(@as(i32, @truncate(P - @as(i64, @intCast(eh_frame_hdr_shdr.sh_addr))))), - ), - }); - } - } - - std.mem.sort(Entry, entries.items, {}, Entry.lessThan); - try writer.writeSliceEndian(Entry, entries.items, .little); } const eh_frame_hdr_header_size: usize = 12; From ed6ed62c42dfad18facde164785a62faa305cb6c Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 12:12:54 +0100 Subject: [PATCH 03/85] more stuff --- lib/std/debug/Dwarf.zig | 15 +- lib/std/debug/Dwarf/Unwind.zig | 389 ++++++++--------- lib/std/debug/SelfInfo.zig | 760 ++++++++++++++++----------------- 3 files changed, 555 insertions(+), 609 deletions(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 8d1087b6caed..fd678605483b 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1449,6 +1449,7 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { return str[casted_offset..last :0]; } +// MLUGG TODO: i am dubious of this whole thing being here atp. look closely and see if it depends on being the self process pub const ElfModule = struct { unwind: Dwarf.Unwind, dwarf: Dwarf, @@ -1456,10 +1457,7 @@ pub const ElfModule = struct { external_mapped_memory: ?[]align(std.heap.page_size_min) const u8, pub const init: ElfModule = .{ - .unwind = .{ - .debug_frame = null, - .eh_frame = null, - }, + .unwind = .init, .dwarf = .{}, .mapped_memory = null, .external_mapped_memory = null, @@ -1508,6 +1506,8 @@ pub const ElfModule = struct { /// If the required sections aren't present but a reference to external debug /// info is, then this this function will recurse to attempt to load the debug /// sections from an external file. + /// + /// MLUGG TODO: this should *return* a thing pub fn load( em: *ElfModule, gpa: Allocator, @@ -1518,6 +1518,8 @@ pub const ElfModule = struct { parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, elf_filename: ?[]const u8, ) LoadError!void { + assert(em.mapped_memory == null); + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); @@ -1709,8 +1711,9 @@ pub const ElfModule = struct { separate_debug_crc, §ions, mapped_mem, - )) |debug_info| { - return debug_info; + )) |v| { + v; + return; } else |_| {} // /.debug/ diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index a51c417e7c97..c5f115802663 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -1,28 +1,35 @@ +//! MLUGG TODO DOCUMENT THIS + pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); -/// The contents of the `.debug_frame` section as specified by DWARF. This might be a more reliable -/// stack unwind mechanism in some cases, or it may be present when `.eh_frame` is not, but fetching -/// the data requires loading the binary, so it is not a viable approach for fast stack trace -/// capturing within a process. -debug_frame: ?struct { - data: []const u8, - /// Offsets into `data` of FDEs, sorted by ascending `pc_begin`. - sorted_fdes: []SortedFdeEntry, +frame_section: ?struct { + id: Section, + /// The virtual address of the start of the section. "Virtual address" refers to the address in + /// the binary (e.g. `sh_addr` in an ELF file); the equivalent runtime address may be relocated + /// in position-independent binaries. + vaddr: u64, + /// The full contents of the section. May have imprecise bounds depending on `section`. + /// + /// For `.debug_frame`, the slice length is exactly equal to the section length. This is needed + /// to know the number of CIEs and FDEs. + /// + /// For `.eh_frame`, the slice length may exceed the section length, i.e. the slice may refer to + /// more bytes than are in the second. This restriction exists because `.eh_frame_hdr` only + /// includes the address of the loaded `.eh_frame` data, not its length. It is not a problem + /// because unlike `.debug_frame`, the end of the CIE/FDE list is signaled through a sentinel + /// value. If this slice does have bounds, they will still be checked, preventing crashes when + /// reading potentially-invalid `.eh_frame` data from files. + bytes: []const u8, }, -/// Data associated with the `.eh_frame` and `.eh_frame_hdr` sections as defined by LSB Core. The -/// format of `.eh_frame` is an extension of that of DWARF's `.debug_frame` -- in fact it is almost -/// identical, though subtly different in a few places. -eh_frame: ?struct { - header: EhFrameHeader, - /// Though this is a slice, it may be longer than the `.eh_frame` section. When unwinding - /// through the runtime-loaded `.eh_frame_hdr` data, we are not told the size of the `.eh_frame` - /// section, so construct a slice referring to all of the rest of memory. The end of the section - /// must be detected through `EntryHeader.terminator`. - eh_frame_data: []const u8, - /// Offsets into `eh_frame_data` of FDEs, sorted by ascending `pc_begin`. - /// Populated only if `header` does not already contain a lookup table. - sorted_fdes: ?[]SortedFdeEntry, +lookup: ?union(enum) { + eh_frame_hdr: struct { + /// Virtual address of the `.eh_frame_hdr` section. + vaddr: u64, + table: EhFrameHeader.SearchTable, + }, + /// Offsets into `frame_section` of FDEs, sorted by ascending `pc_begin`. + sorted_fdes: []SortedFdeEntry, }, const SortedFdeEntry = struct { @@ -34,17 +41,61 @@ const SortedFdeEntry = struct { const Section = enum { debug_frame, eh_frame }; +// MLUGG TODO deinit? +pub const init: Unwind = .{ + .frame_section = null, + .lookup = null, +}; + /// This represents the decoded .eh_frame_hdr header pub const EhFrameHeader = struct { - vaddr: u64, eh_frame_vaddr: u64, - search_table: ?struct { + search_table: ?SearchTable, + + pub const SearchTable = struct { /// The byte offset of the search table into the `.eh_frame_hdr` section. offset: u8, encoding: EH.PE, fde_count: usize, entries: []const u8, - }, + + /// Returns the vaddr of the FDE for `pc`, or `null` if no matching FDE was found. + fn findEntry( + table: *const SearchTable, + eh_frame_hdr_vaddr: u64, + pc: u64, + addr_size_bytes: u8, + endian: Endian, + ) !?u64 { + const table_vaddr = eh_frame_hdr_vaddr + table.offset; + const entry_size = try EhFrameHeader.entrySize(table.encoding, addr_size_bytes); + var left: usize = 0; + var len: usize = table.fde_count; + while (len > 1) { + const mid = left + len / 2; + var entry_reader: Reader = .fixed(table.entries[mid * entry_size ..][0..entry_size]); + const pc_begin = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr_vaddr, + }, endian); + if (pc < pc_begin) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + if (len == 0) return null; + var entry_reader: Reader = .fixed(table.entries[left * entry_size ..][0..entry_size]); + // Skip past `pc_begin`; we're now interested in the fde offset + _ = try readEhPointerAbs(&entry_reader, table.encoding.type, addr_size_bytes, endian); + const fde_ptr = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr_vaddr, + }, endian); + return fde_ptr; + } + }; pub fn entrySize(table_enc: EH.PE, addr_size_bytes: u8) !u8 { return switch (table_enc.type) { @@ -76,65 +127,29 @@ pub const EhFrameHeader = struct { .pc_rel_base = eh_frame_hdr_vaddr + r.seek, }, endian); + const table: ?SearchTable = table: { + if (fde_count_enc == EH.PE.omit) break :table null; + if (table_enc == EH.PE.omit) break :table null; + const fde_count = try readEhPointer(&r, fde_count_enc, addr_size_bytes, .{ + .pc_rel_base = eh_frame_hdr_vaddr + r.seek, + }, endian); + const entry_size = try entrySize(table_enc, addr_size_bytes); + const bytes_offset = r.seek; + const bytes_len = cast(usize, fde_count * entry_size) orelse return error.EndOfStream; + const bytes = try r.take(bytes_len); + break :table .{ + .encoding = table_enc, + .fde_count = @intCast(fde_count), + .entries = bytes, + .offset = @intCast(bytes_offset), + }; + }; + return .{ - .vaddr = eh_frame_hdr_vaddr, .eh_frame_vaddr = eh_frame_ptr, - .search_table = table: { - if (fde_count_enc == EH.PE.omit) break :table null; - if (table_enc == EH.PE.omit) break :table null; - const fde_count = try readEhPointer(&r, fde_count_enc, addr_size_bytes, .{ - .pc_rel_base = eh_frame_hdr_vaddr + r.seek, - }, endian); - const entry_size = try entrySize(table_enc, addr_size_bytes); - const bytes_offset = r.seek; - const bytes_len = cast(usize, fde_count * entry_size) orelse return error.EndOfStream; - const bytes = try r.take(bytes_len); - break :table .{ - .encoding = table_enc, - .fde_count = @intCast(fde_count), - .entries = bytes, - .offset = @intCast(bytes_offset), - }; - }, + .search_table = table, }; } - - /// Asserts that `eh_frame_hdr.search_table != null`. - fn findEntry( - eh_frame_hdr: *const EhFrameHeader, - pc: u64, - addr_size_bytes: u8, - endian: Endian, - ) !?u64 { - const table = &eh_frame_hdr.search_table.?; - const table_vaddr = eh_frame_hdr.vaddr + table.offset; - const entry_size = try EhFrameHeader.entrySize(table.encoding, addr_size_bytes); - var left: usize = 0; - var len: usize = table.fde_count; - while (len > 1) { - const mid = left + len / 2; - var entry_reader: Reader = .fixed(table.entries[mid * entry_size ..][0..entry_size]); - const pc_begin = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ - .pc_rel_base = table_vaddr + left * entry_size, - .data_rel_base = eh_frame_hdr.vaddr, - }, endian); - if (pc < pc_begin) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - if (len == 0) return null; - var entry_reader: Reader = .fixed(table.entries[left * entry_size ..][0..entry_size]); - // Skip past `pc_begin`; we're now interested in the fde offset - _ = try readEhPointerAbs(&entry_reader, table.encoding.type, addr_size_bytes, endian); - const fde_ptr = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ - .pc_rel_base = table_vaddr + left * entry_size, - .data_rel_base = eh_frame_hdr.vaddr, - }, endian); - return std.math.sub(u64, fde_ptr, eh_frame_hdr.eh_frame_vaddr) catch bad(); // offset into .eh_frame - } }; pub const EntryHeader = union(enum) { @@ -356,133 +371,84 @@ pub const FrameDescriptionEntry = struct { } }; -pub fn scanDebugFrame( - unwind: *Unwind, - gpa: Allocator, - section_vaddr: u64, - section_bytes: []const u8, - addr_size_bytes: u8, - endian: Endian, -) void { - assert(unwind.debug_frame == null); - - var fbr: Reader = .fixed(section_bytes); - var fde_list: std.ArrayList(SortedFdeEntry) = .empty; - defer fde_list.deinit(gpa); - while (fbr.seek < fbr.buffer.len) { - const entry_offset = fbr.seek; - switch (try EntryHeader.read(&fbr, fbr.seek, .debug_frame, endian)) { - // Ignore CIEs; we only need them to parse the FDEs! - .cie => |info| { - try fbr.discardAll(info.bytes_len); - continue; - }, - .fde => |info| { - const cie: CommonInformationEntry = cie: { - var cie_reader: Reader = .fixed(section_bytes[info.cie_offset..]); - const cie_info = switch (try EntryHeader.read(&cie_reader, info.cie_offset, .debug_frame, endian)) { - .cie => |cie_info| cie_info, - .fde, .terminator => return bad(), // This is meant to be a CIE - }; - break :cie try .parse(try cie_reader.take(cie_info.bytes_len), .debug_frame, addr_size_bytes); - }; - const fde: FrameDescriptionEntry = try .parse( - section_vaddr + fbr.seek, - try fbr.take(info.bytes_len), - cie, - endian, - ); - try fde_list.append(.{ - .pc_begin = fde.pc_begin, - .fde_offset = entry_offset, // *not* `fde_offset`, because we need to include the entry header - }); - }, - .terminator => return bad(), // DWARF `.debug_frame` isn't meant to have terminators - } - } - const fde_slice = try fde_list.toOwnedSlice(gpa); - errdefer comptime unreachable; - std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct { - fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { - ctx; - return a.pc_begin < b.pc_begin; - } - }.lessThan); - unwind.debug_frame = .{ .data = section_bytes, .sorted_fdes = fde_slice }; +/// Load unwind information from the contents of an `.eh_frame` or `.debug_frame` section. +/// +/// If the `.eh_frame_hdr` section is available, consider instead using `loadFromEhFrameHdr`. This +/// allows the implementation to use a search table embedded in that section if it is available. +pub fn loadFromSection(unwind: *Unwind, section: Section, section_vaddr: u64, section_bytes: []const u8) void { + assert(unwind.frame_section == null); + assert(unwind.lookup == null); + unwind.frame_section = .{ + .id = section, + .bytes = section_bytes, + .vaddr = section_vaddr, + }; } -pub fn scanEhFrame( +/// Load unwind information from a header loaded from an `.eh_frame_hdr` section, and a pointer to +/// the contents of the `.eh_frame` section. +/// +/// This differs from `loadFromSection` because `.eh_frame_hdr` may embed a binary search table, and +/// if it does, this function will use that for address lookups instead of constructing our own +/// search table. +pub fn loadFromEhFrameHdr( unwind: *Unwind, - gpa: Allocator, header: EhFrameHeader, + section_vaddr: u64, section_bytes_ptr: [*]const u8, - /// This is separate from `section_bytes_ptr` because it is unknown when `.eh_frame` is accessed - /// through the pointer in the `.eh_frame_hdr` section. If this is non-`null`, we avoid reading - /// past this number of bytes, but if `null`, we must assume that the `.eh_frame` data has a - /// valid terminator. - section_bytes_len: ?usize, - addr_size_bytes: u8, - endian: Endian, ) !void { - assert(unwind.eh_frame == null); - - const section_bytes: []const u8 = bytes: { - // If the length is unknown, let the slice span from `section_bytes_ptr` to the end of memory. - const len = section_bytes_len orelse (std.math.maxInt(usize) - @intFromPtr(section_bytes_ptr)); - break :bytes section_bytes_ptr[0..len]; + assert(unwind.frame_section == null); + assert(unwind.lookup == null); + unwind.frame_section = .{ + .id = .eh_frame, + .bytes = maxSlice(section_bytes_ptr), + .vaddr = header.eh_frame_vaddr, }; - - if (header.search_table != null) { - // No need to populate `sorted_fdes`, the header contains a search table. - unwind.eh_frame = .{ - .header = header, - .eh_frame_data = section_bytes, - .sorted_fdes = null, - }; - return; + if (header.search_table) |table| { + unwind.lookup = .{ .eh_frame_hdr = .{ + .vaddr = section_vaddr, + .table = table, + } }; } +} - // We aren't told the length of this section. Luckily, we don't need it, because there will be - // an `EntryHeader.terminator` after the last CIE/FDE. Just make a `Reader` which will give us - // alllll of the bytes! - var fbr: Reader = .fixed(section_bytes); +pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endian: Endian) !void { + const section = unwind.frame_section.?; + if (unwind.lookup != null) return; + var r: Reader = .fixed(section.bytes); var fde_list: std.ArrayList(SortedFdeEntry) = .empty; defer fde_list.deinit(gpa); - while (true) { - const entry_offset = fbr.seek; - switch (try EntryHeader.read(&fbr, fbr.seek, .eh_frame, endian)) { - // Ignore CIEs; we only need them to parse the FDEs! - .cie => |info| { - try fbr.discardAll(info.bytes_len); + const saw_terminator = while (r.seek < r.buffer.len) { + const entry_offset = r.seek; + switch (try EntryHeader.read(&r, entry_offset, section.id, endian)) { + .cie => |cie_info| { + // Ignore CIEs for now; we'll parse them when we read a corresponding FDE + try r.discardAll(cie_info.bytes_len); continue; }, - .fde => |info| { - const cie: CommonInformationEntry = cie: { - var cie_reader: Reader = .fixed(section_bytes[info.cie_offset..]); - const cie_info = switch (try EntryHeader.read(&cie_reader, info.cie_offset, .eh_frame, endian)) { - .cie => |cie_info| cie_info, - .fde, .terminator => return bad(), // This is meant to be a CIE - }; - break :cie try .parse(try cie_reader.take(cie_info.bytes_len), .eh_frame, addr_size_bytes); + .fde => |fde_info| { + var cie_r: Reader = .fixed(section.bytes[fde_info.cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_r, fde_info.cie_offset, section.id, endian)) { + .cie => |cie_info| cie_info, + .fde, .terminator => return bad(), // this is meant to be a CIE }; - const fde: FrameDescriptionEntry = try .parse( - header.eh_frame_vaddr + fbr.seek, - try fbr.take(info.bytes_len), - cie, - endian, - ); + const cie: CommonInformationEntry = try .parse(try cie_r.take(cie_info.bytes_len), section.id, addr_size_bytes); + const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(fde_info.bytes_len), cie, endian); try fde_list.append(gpa, .{ .pc_begin = fde.pc_begin, - .fde_offset = entry_offset, // *not* `fde_offset`, because we need to include the entry header + .fde_offset = entry_offset, }); }, - // Unlike `.debug_frame`, the `.eh_frame` section does have a terminator CIE -- this is - // necessary because `header` doesn't include the length of the `.eh_frame` section - .terminator => break, + .terminator => break true, } + } else false; + switch (section.id) { + .eh_frame => if (!saw_terminator) return bad(), // `.eh_frame` indicates the end of the CIE/FDE list with a sentinel entry + .debug_frame => if (saw_terminator) return bad(), // `.debug_frame` uses the section bounds and does not specify a sentinel entry } + const fde_slice = try fde_list.toOwnedSlice(gpa); errdefer comptime unreachable; std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct { @@ -491,26 +457,29 @@ pub fn scanEhFrame( return a.pc_begin < b.pc_begin; } }.lessThan); - unwind.eh_frame = .{ - .header = header, - .eh_frame_data = section_bytes, - .sorted_fdes = fde_slice, - }; + unwind.lookup = .{ .sorted_fdes = fde_slice }; } +/// Given a program counter value, returns the offset of the corresponding FDE, or `null` if no +/// matching FDE was found. The returned offset can be passed to `getFde` to load the data +/// associated with the FDE. +/// +/// Before calling this function, `prepareLookup` must return successfully. +/// /// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must /// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`. -pub fn findFdeOffset(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: Endian) !?u64 { - // We'll break from this block only if we have a manually-constructed search table. - const sorted_fdes: []const SortedFdeEntry = fdes: { - if (unwind.debug_frame) |df| break :fdes df.sorted_fdes; - if (unwind.eh_frame) |eh_frame| { - if (eh_frame.sorted_fdes) |fdes| break :fdes fdes; - // Use the search table from the `.eh_frame_hdr` section rather than one of our own - return eh_frame.header.findEntry(pc, addr_size_bytes, endian); - } - // We have no available unwind info - return null; +pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: Endian) !?u64 { + const sorted_fdes: []const SortedFdeEntry = switch (unwind.lookup.?) { + .eh_frame_hdr => |eh_frame_hdr| { + const fde_vaddr = try eh_frame_hdr.table.findEntry( + eh_frame_hdr.vaddr, + pc, + addr_size_bytes, + endian, + ) orelse return null; + return std.math.sub(u64, fde_vaddr, unwind.frame_section.?.vaddr) catch bad(); // convert vaddr to offset + }, + .sorted_fdes => |sorted_fdes| sorted_fdes, }; const first_bad_idx = std.sort.partitionPoint(SortedFdeEntry, sorted_fdes, pc, struct { fn canIncludePc(target_pc: u64, entry: SortedFdeEntry) bool { @@ -523,33 +492,29 @@ pub fn findFdeOffset(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian return sorted_fdes[first_bad_idx - 1].fde_offset; } -pub fn loadFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { - const section_bytes: []const u8, const section_vaddr: u64, const section: Section = s: { - if (unwind.debug_frame) |df| break :s .{ df.data, if (true) @panic("MLUGG TODO"), .debug_frame }; - if (unwind.eh_frame) |ef| break :s .{ ef.eh_frame_data, ef.header.eh_frame_vaddr, .eh_frame }; - unreachable; // how did you get `fde_offset`?! - }; +pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { + const section = unwind.frame_section.?; - var fde_reader: Reader = .fixed(section_bytes[fde_offset..]); - const fde_info = switch (try EntryHeader.read(&fde_reader, fde_offset, section, endian)) { + var fde_reader: Reader = .fixed(section.bytes[fde_offset..]); + const fde_info = switch (try EntryHeader.read(&fde_reader, fde_offset, section.id, endian)) { .fde => |info| info, .cie, .terminator => return bad(), // This is meant to be an FDE }; const cie_offset = fde_info.cie_offset; - var cie_reader: Reader = .fixed(section_bytes[cie_offset..]); - const cie_info = switch (try EntryHeader.read(&cie_reader, cie_offset, section, endian)) { + var cie_reader: Reader = .fixed(section.bytes[cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, cie_offset, section.id, endian)) { .cie => |info| info, .fde, .terminator => return bad(), // This is meant to be a CIE }; const cie: CommonInformationEntry = try .parse( try cie_reader.take(cie_info.bytes_len), - section, + section.id, addr_size_bytes, ); const fde: FrameDescriptionEntry = try .parse( - section_vaddr + fde_offset + fde_reader.seek, + section.vaddr + fde_offset + fde_reader.seek, try fde_reader.take(fde_info.bytes_len), cie, endian, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index daa5cf12d2e8..85b784b447ac 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -26,10 +26,13 @@ const regValueNative = Dwarf.abi.regValueNative; const SelfInfo = @This(); -/// MLUGG TODO: what if this field had a less stupid name... -address_map: std.AutoHashMapUnmanaged(usize, Module.DebugInfo), - -module_cache: if (native_os == .windows) std.ArrayListUnmanaged(windows.MODULEENTRY32) else void, +modules: std.AutoHashMapUnmanaged(usize, struct { + di: Module.DebugInfo, + loaded_debug: bool, + loaded_unwind: bool, + const init: @This() = .{ .di = .init, .loaded_debug = false, .loaded_unwind = false }; +}), +lookup_cache: Module.LookupCache, pub const target_supported: bool = switch (native_os) { .linux, @@ -46,19 +49,19 @@ pub const target_supported: bool = switch (native_os) { }; pub const init: SelfInfo = .{ - .address_map = .empty, - .module_cache = if (native_os == .windows) .empty, + .modules = .empty, + .lookup_cache = if (Module.LookupCache != void) .init, }; pub fn deinit(self: *SelfInfo) void { // MLUGG TODO: that's amusing, this function is straight-up unused. i... wonder if it even should be used anywhere? perhaps not... so perhaps it should not even exist...???? - var it = self.address_map.iterator(); + var it = self.modules.iterator(); while (it.next()) |entry| { const mdi = entry.value_ptr.*; mdi.deinit(self.allocator); self.allocator.destroy(mdi); } - self.address_map.deinit(self.allocator); + self.modules.deinit(self.allocator); if (native_os == .windows) { for (self.modules.items) |module| { self.allocator.free(module.name); @@ -68,94 +71,26 @@ pub fn deinit(self: *SelfInfo) void { } } -fn lookupModuleForAddress(self: *SelfInfo, gpa: Allocator, address: usize) !Module { - if (builtin.target.os.tag.isDarwin()) { - return self.lookupModuleDyld(address); - } else if (native_os == .windows) { - return self.lookupModuleWin32(gpa, address); - } else if (native_os == .haiku) { - @panic("TODO implement lookup module for Haiku"); - } else if (builtin.target.cpu.arch.isWasm()) { - @panic("TODO implement lookup module for Wasm"); - } else { - return self.lookupModuleDl(address); - } -} - -fn loadModuleDebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { - // MLUGG TODO: this should totally just go into the `Module` impl or something, right? lol - if (builtin.target.os.tag.isDarwin()) { - try loadMachODebugInfo(gpa, module, di); - } else if (native_os == .windows) { - // MLUGG TODO: deal with 'already loaded' properly - try readCoffDebugInfo(gpa, module, di); - } else if (native_os == .haiku) { - unreachable; - } else if (builtin.target.cpu.arch.isWasm()) { - unreachable; - } else { - if (di.mapped_memory != null) return; // already loaded - const filename: ?[]const u8 = if (module.name.len > 0) module.name else null; - const mapped_mem = mapFileOrSelfExe(filename) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.FileTooBig => return error.InvalidDebugInfo, - else => |e| return e, - }; - errdefer posix.munmap(mapped_mem); - try di.load(gpa, mapped_mem, module.build_id, null, null, null, filename); - assert(di.mapped_memory != null); - } -} - -fn loadModuleUnwindInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { - if (builtin.target.os.tag.isDarwin()) { - // MLUGG TODO HACKHACK - try loadMachODebugInfo(gpa, module, di); - } else if (native_os == .windows) { - comptime unreachable; // not supported - } else if (native_os == .haiku) { - comptime unreachable; // not supported - } else if (builtin.target.cpu.arch.isWasm()) { - comptime unreachable; // not supported - } else { - eh_frame: { - if (di.unwind.eh_frame != null) break :eh_frame; // already loaded - const eh_frame_hdr_bytes = module.gnu_eh_frame orelse break :eh_frame; - const eh_frame_hdr: Dwarf.Unwind.EhFrameHeader = try .parse( - @intFromPtr(eh_frame_hdr_bytes.ptr) - module.load_offset, - eh_frame_hdr_bytes, - @sizeOf(usize), - native_endian, - ); - const eh_frame_addr = module.load_offset + @as(usize, @intCast(eh_frame_hdr.eh_frame_vaddr)); - try di.unwind.scanEhFrame( - gpa, - eh_frame_hdr, - @ptrFromInt(eh_frame_addr), - null, - @sizeOf(usize), - native_endian, - ); - } - } -} - pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { comptime assert(target_supported); - const module = try self.lookupModuleForAddress(gpa, context.pc); - const gop = try self.address_map.getOrPut(gpa, module.load_offset); + const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); // MLUGG TODO: don't take gpa + const gop = try self.modules.getOrPut(gpa, module.load_offset); if (!gop.found_existing) gop.value_ptr.* = .init; - try loadModuleUnwindInfo(gpa, &module, gop.value_ptr); + if (!gop.value_ptr.loaded_unwind) { + try module.loadUnwindInfo(gpa, &gop.value_ptr.di); + gop.value_ptr.loaded_unwind = true; + } + // MLUGG TODO: the stuff below is impl! if (native_os.isDarwin()) { // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding // via DWARF before attempting to use the compact unwind info will produce incorrect results. - if (gop.value_ptr.unwind_info) |unwind_info| { + if (gop.value_ptr.di.unwind_info) |unwind_info| { if (unwindFrameMachO( module.text_base, module.load_offset, context, unwind_info, - gop.value_ptr.eh_frame, + gop.value_ptr.di.eh_frame, )) |return_address| { return return_address; } else |err| { @@ -164,7 +99,7 @@ pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !us } return error.MissingUnwindInfo; } - if (try gop.value_ptr.getDwarfUnwindForAddress(gpa, context.pc)) |unwind| { + if (try gop.value_ptr.di.getDwarfUnwindForAddress(gpa, context.pc)) |unwind| { return unwindFrameDwarf(unwind, module.load_offset, context, null); } return error.MissingDebugInfo; @@ -172,11 +107,15 @@ pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !us pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std.debug.Symbol { comptime assert(target_supported); - const module = try self.lookupModuleForAddress(gpa, address); - const gop = try self.address_map.getOrPut(gpa, module.key()); + const module: Module = try .lookup(&self.lookup_cache, gpa, address); // MLUGG TODO: don't take gpa + const gop = try self.modules.getOrPut(gpa, module.key()); if (!gop.found_existing) gop.value_ptr.* = .init; - try loadModuleDebugInfo(gpa, &module, gop.value_ptr); - return module.getSymbolAtAddress(gpa, gop.value_ptr, address); + if (!gop.value_ptr.loaded_debug) { + // MLUGG TODO: this overloads the name 'debug info' with including vs excluding unwind info + // figure out a better name for one or the other (i think the inner one is maybe 'symbol info' or something idk) + try module.loadDebugInfo(gpa, &gop.value_ptr.di); + } + return module.getSymbolAtAddress(gpa, &gop.value_ptr.di, address); } /// Returns the module name for a given address. @@ -184,278 +123,12 @@ pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std. /// a path that doesn't rely on any side-effects of a prior successful module lookup. pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) error{ Unexpected, OutOfMemory, MissingDebugInfo }![]const u8 { comptime assert(target_supported); - const module = try self.lookupModuleForAddress(gpa, address); + const module: Module = try .lookup(&self.lookup_cache, gpa, address); // MLUGG TODO: don't take gpa return module.name; } -fn lookupModuleDl(self: *SelfInfo, address: usize) !Module { - _ = self; // MLUGG - const DlIterContext = struct { - /// input - address: usize, - /// output - module: Module, - - fn callback(info: *posix.dl_phdr_info, size: usize, context: *@This()) !void { - _ = size; - // The base address is too high - if (context.address < info.addr) - return; - - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.module = .{ - .load_offset = info.addr, - // Android libc uses NULL instead of "" to mark the main program - .name = mem.sliceTo(info.name, 0) orelse "", - .build_id = null, - .gnu_eh_frame = null, - }; - break; - } - } else return; - - for (info.phdr[0..info.phnum]) |phdr| { - switch (phdr.p_type) { - elf.PT_NOTE => { - // Look for .note.gnu.build-id - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); - const name_size = r.takeInt(u32, native_endian) catch continue; - const desc_size = r.takeInt(u32, native_endian) catch continue; - const note_type = r.takeInt(u32, native_endian) catch continue; - const name = r.take(name_size) catch continue; - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, name, "GNU\x00")) continue; - const desc = r.take(desc_size) catch continue; - context.module.build_id = desc; - }, - elf.PT_GNU_EH_FRAME => { - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; - }, - else => {}, - } - } - - // Stop the iteration - return error.Found; - } - }; - var ctx: DlIterContext = .{ - .address = address, - .module = undefined, - }; - posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { - error.Found => return ctx.module, - }; - return error.MissingDebugInfo; -} - -fn lookupModuleDyld(self: *SelfInfo, address: usize) !Module { - _ = self; // MLUGG - const image_count = std.c._dyld_image_count(); - for (0..image_count) |image_idx| { - const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; - const text_base = @intFromPtr(header); - if (address < text_base) continue; - const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); - - // Find the __TEXT segment - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const text_segment_cmd, const text_sections = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break .{ segment_cmd, load_cmd.getSections() }; - } else continue; - - const seg_start = load_offset + text_segment_cmd.vmaddr; - assert(seg_start == text_base); - const seg_end = seg_start + text_segment_cmd.vmsize; - if (address < seg_start or address >= seg_end) continue; - - // We've found the matching __TEXT segment. This is the image we need, but we must look - // for unwind info in it before returning. - - var result: Module = .{ - .text_base = text_base, - .load_offset = load_offset, - .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), - .unwind_info = null, - .eh_frame = null, - }; - for (text_sections) |sect| { - if (mem.eql(u8, sect.sectName(), "__unwind_info")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); - result.unwind_info = sect_ptr[0..@intCast(sect.size)]; - } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); - result.eh_frame = sect_ptr[0..@intCast(sect.size)]; - } - } - return result; - } - return error.MissingDebugInfo; -} - -fn lookupModuleWin32(self: *SelfInfo, gpa: Allocator, address: usize) !Module { - if (self.lookupModuleWin32Cache(address)) |m| return m; - - { - // Check a new module hasn't been loaded - self.module_cache.clearRetainingCapacity(); - - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); - if (handle == windows.INVALID_HANDLE_VALUE) { - return windows.unexpectedError(windows.GetLastError()); - } - defer windows.CloseHandle(handle); - - var entry: windows.MODULEENTRY32 = undefined; - entry.dwSize = @sizeOf(windows.MODULEENTRY32); - if (windows.kernel32.Module32First(handle, &entry) != 0) { - try self.module_cache.append(gpa, entry); - while (windows.kernel32.Module32Next(handle, &entry) != 0) { - try self.module_cache.append(gpa, entry); - } - } - } - - if (self.lookupModuleWin32Cache(address)) |m| return m; - return error.MissingDebugInfo; -} -fn lookupModuleWin32Cache(self: *SelfInfo, address: usize) ?Module { - for (self.module_cache.items) |*entry| { - const base_address = @intFromPtr(entry.modBaseAddr); - if (address >= base_address and address < base_address + entry.modBaseSize) { - return .{ - .base_address = base_address, - .size = entry.modBaseSize, - .name = std.mem.sliceTo(&entry.szModule, 0), - .handle = entry.hModule, - }; - } - } - return null; -} - -fn readCoffDebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { - const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); - const mapped = mapped_ptr[0..module.size]; - var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; - // The string table is not mapped into memory by the loader, so if a section name is in the - // string table then we have to map the full image file from disk. This can happen when - // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. - if (coff_obj.strtabRequired()) { - var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; - name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present - const process_handle = windows.GetCurrentProcess(); - const len = windows.kernel32.GetModuleFileNameExW( - process_handle, - module.handle, - name_buffer[4..], - windows.PATH_MAX_WIDE, - ); - if (len == 0) return error.MissingDebugInfo; - const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => |e| return e, - }; - errdefer coff_file.close(); - var section_handle: windows.HANDLE = undefined; - const create_section_rc = windows.ntdll.NtCreateSection( - §ion_handle, - windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, - null, - null, - windows.PAGE_READONLY, - // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. - // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. - windows.SEC_COMMIT, - coff_file.handle, - ); - if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer windows.CloseHandle(section_handle); - var coff_len: usize = 0; - var section_view_ptr: [*]const u8 = undefined; - const map_section_rc = windows.ntdll.NtMapViewOfSection( - section_handle, - process_handle, - @ptrCast(§ion_view_ptr), - null, - 0, - null, - &coff_len, - .ViewUnmap, - 0, - windows.PAGE_READONLY, - ); - if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr)) == .SUCCESS); - const section_view = section_view_ptr[0..coff_len]; - coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; - di.mapped_file = .{ - .file = coff_file, - .section_handle = section_handle, - .section_view = section_view, - }; - } - di.coff_image_base = coff_obj.getImageBase(); - - if (coff_obj.getSectionByName(".debug_info")) |_| { - di.dwarf = .{}; - - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, gpa), - .virtual_address = section_header.virtual_address, - .owned = true, - }; - } else null; - } - - try di.dwarf.?.open(gpa, native_endian); - } - - if (try coff_obj.getPdbPath()) |raw_path| pdb: { - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(gpa); - defer gpa.free(self_dir); - break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) gpa.free(path); - - di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { - error.FileNotFound, error.IsDir => break :pdb, - else => return err, - }; - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); - - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) - return error.InvalidDebugInfo; - - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); - } -} - const Module = switch (native_os) { - else => "MLUGG TODO", // Dwarf, // TODO MLUGG: it's this on master but that's definitely broken atm... + else => {}, // Dwarf, // TODO MLUGG: it's this on master but that's definitely broken atm... .macos, .ios, .watchos, .tvos, .visionos => struct { /// The runtime address where __TEXT is loaded. text_base: usize, @@ -466,6 +139,63 @@ const Module = switch (native_os) { fn key(m: *const Module) usize { return m.text_base; } + fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { + _ = cache; + _ = gpa; + const image_count = std.c._dyld_image_count(); + for (0..image_count) |image_idx| { + const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; + const text_base = @intFromPtr(header); + if (address < text_base) continue; + const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); + + // Find the __TEXT segment + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const text_segment_cmd, const text_sections = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break .{ segment_cmd, load_cmd.getSections() }; + } else continue; + + const seg_start = load_offset + text_segment_cmd.vmaddr; + assert(seg_start == text_base); + const seg_end = seg_start + text_segment_cmd.vmsize; + if (address < seg_start or address >= seg_end) continue; + + // We've found the matching __TEXT segment. This is the image we need, but we must look + // for unwind info in it before returning. + + var result: Module = .{ + .text_base = text_base, + .load_offset = load_offset, + .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), + .unwind_info = null, + .eh_frame = null, + }; + for (text_sections) |sect| { + if (mem.eql(u8, sect.sectName(), "__unwind_info")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); + result.unwind_info = sect_ptr[0..@intCast(sect.size)]; + } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); + result.eh_frame = sect_ptr[0..@intCast(sect.size)]; + } + } + return result; + } + return error.MissingDebugInfo; + } + fn loadDebugInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { + return loadMachODebugInfo(gpa, module, di); + } + fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { + // MLUGG TODO HACKHACK + try loadMachODebugInfo(gpa, module, di); + } fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { const vaddr = address - module.load_offset; const symbol = MachoSymbol.find(di.symbols, vaddr) orelse return .{}; // MLUGG TODO null? @@ -524,8 +254,8 @@ const Module = switch (native_os) { }, }; } + const LookupCache = void; const DebugInfo = struct { - // MLUGG TODO: these are duplicated state. i actually reckon they should be removed from Module, and loadMachODebugInfo should be the one discovering them! mapped_memory: []align(std.heap.page_size_min) const u8, symbols: []const MachoSymbol, strings: [:0]const u8, @@ -533,6 +263,7 @@ const Module = switch (native_os) { ofiles: std.StringArrayHashMapUnmanaged(OFile), // Backed by the in-memory sections mapped by the loader + // MLUGG TODO: these are duplicated state. i actually reckon they should be removed from Module, and loadMachODebugInfo should be the one discovering them! unwind_info: ?[]const u8, eh_frame: ?[]const u8, @@ -642,28 +373,137 @@ const Module = switch (native_os) { }; }, .wasi, .emscripten => struct { + const LookupCache = void; const DebugInfo = struct { const init: DebugInfo = .{}; - fn getSymbolAtAddress(di: *DebugInfo, gpa: Allocator, base_address: usize, address: usize) !std.debug.Symbol { - _ = di; - _ = gpa; - _ = base_address; - _ = address; - unreachable; - } }; + fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { + _ = cache; + _ = gpa; + _ = address; + @panic("TODO implement lookup module for Wasm"); + } + fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + _ = module; + _ = gpa; + _ = di; + _ = address; + unreachable; + } + fn loadDebugInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { + _ = module; + _ = gpa; + _ = di; + unreachable; + } + fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { + _ = module; + _ = gpa; + _ = di; + unreachable; + } }, .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { load_offset: usize, name: []const u8, build_id: ?[]const u8, gnu_eh_frame: ?[]const u8, + const LookupCache = void; + const DebugInfo = Dwarf.ElfModule; fn key(m: Module) usize { return m.load_offset; // MLUGG TODO: is this technically valid? idk } - const DebugInfo = Dwarf.ElfModule; - fn getSymbolAtAddress(mod: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - return di.getSymbolAtAddress(gpa, native_endian, mod.load_offset, address); + fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { + _ = cache; + _ = gpa; + if (native_os == .haiku) @panic("TODO implement lookup module for Haiku"); + const DlIterContext = struct { + /// input + address: usize, + /// output + module: Module, + + fn callback(info: *posix.dl_phdr_info, size: usize, context: *@This()) !void { + _ = size; + // The base address is too high + if (context.address < info.addr) + return; + + const phdrs = info.phdr[0..info.phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 + const seg_start = info.addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + context.module = .{ + .load_offset = info.addr, + // Android libc uses NULL instead of "" to mark the main program + .name = mem.sliceTo(info.name, 0) orelse "", + .build_id = null, + .gnu_eh_frame = null, + }; + break; + } + } else return; + + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + elf.PT_NOTE => { + // Look for .note.gnu.build-id + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); + const name_size = r.takeInt(u32, native_endian) catch continue; + const desc_size = r.takeInt(u32, native_endian) catch continue; + const note_type = r.takeInt(u32, native_endian) catch continue; + const name = r.take(name_size) catch continue; + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, name, "GNU\x00")) continue; + const desc = r.take(desc_size) catch continue; + context.module.build_id = desc; + }, + elf.PT_GNU_EH_FRAME => { + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; + }, + else => {}, + } + } + + // Stop the iteration + return error.Found; + } + }; + var ctx: DlIterContext = .{ + .address = address, + .module = undefined, + }; + posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { + error.Found => return ctx.module, + }; + return error.MissingDebugInfo; + } + fn loadDebugInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { + const filename: ?[]const u8 = if (module.name.len > 0) module.name else null; + const mapped_mem = mapFileOrSelfExe(filename) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.FileTooBig => return error.InvalidDebugInfo, + else => |e| return e, + }; + errdefer posix.munmap(mapped_mem); + try di.load(gpa, mapped_mem, module.build_id, null, null, null, filename); + assert(di.mapped_memory != null); + } + fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { + const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file + const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; + const header: Dwarf.Unwind.EhFrameHeader = try .parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian); + try di.unwind.loadFromEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); + try di.unwind.prepareLookup(gpa, @sizeOf(usize), native_endian); + } + fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + return di.getSymbolAtAddress(gpa, native_endian, module.load_offset, address); } }, .uefi, .windows => struct { @@ -674,6 +514,152 @@ const Module = switch (native_os) { fn key(m: Module) usize { return m.base_address; } + fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { + if (lookupInCache(cache, address)) |m| return m; + { + // Check a new module hasn't been loaded + cache.modules.clearRetainingCapacity(); + + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + return windows.unexpectedError(windows.GetLastError()); + } + defer windows.CloseHandle(handle); + + var entry: windows.MODULEENTRY32 = undefined; + entry.dwSize = @sizeOf(windows.MODULEENTRY32); + if (windows.kernel32.Module32First(handle, &entry) != 0) { + try cache.modules.append(gpa, entry); + while (windows.kernel32.Module32Next(handle, &entry) != 0) { + try cache.modules.append(gpa, entry); + } + } + } + if (lookupInCache(cache, address)) |m| return m; + return error.MissingDebugInfo; + } + fn lookupInCache(cache: *const LookupCache, address: usize) ?Module { + for (cache.modules.items) |*entry| { + const base_address = @intFromPtr(entry.modBaseAddr); + if (address >= base_address and address < base_address + entry.modBaseSize) { + return .{ + .base_address = base_address, + .size = entry.modBaseSize, + .name = std.mem.sliceTo(&entry.szModule, 0), + .handle = entry.hModule, + }; + } + } + return null; + } + fn loadDebugInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { + const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); + const mapped = mapped_ptr[0..module.size]; + var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + if (coff_obj.strtabRequired()) { + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + name_buffer[4..], + windows.PATH_MAX_WIDE, + ); + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => |e| return e, + }; + errdefer coff_file.close(); + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + var coff_len: usize = 0; + var section_view_ptr: [*]const u8 = undefined; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(§ion_view_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr)) == .SUCCESS); + const section_view = section_view_ptr[0..coff_len]; + coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; + di.mapped_file = .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + } + di.coff_image_base = coff_obj.getImageBase(); + + if (coff_obj.getSectionByName(".debug_info")) |_| { + di.dwarf = .{}; + + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { + break :blk .{ + .data = try coff_obj.getSectionDataAlloc(section_header, gpa), + .virtual_address = section_header.virtual_address, + .owned = true, + }; + } else null; + } + + try di.dwarf.?.open(gpa, native_endian); + } + + if (try coff_obj.getPdbPath()) |raw_path| pdb: { + const path = blk: { + if (fs.path.isAbsolute(raw_path)) { + break :blk raw_path; + } else { + const self_dir = try fs.selfExeDirPathAlloc(gpa); + defer gpa.free(self_dir); + break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); + } + }; + defer if (path.ptr != raw_path.ptr) gpa.free(path); + + di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { + error.FileNotFound, error.IsDir => break :pdb, + else => return err, + }; + try di.pdb.?.parseInfoStream(); + try di.pdb.?.parseDbiStream(); + + if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + return error.InvalidDebugInfo; + + di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); + } + } + const LookupCache = struct { + modules: std.ArrayListUnmanaged(windows.MODULEENTRY32), + const init: LookupCache = .{ .modules = .empty }; + }; const DebugInfo = struct { coff_image_base: u64, mapped_file: ?struct { @@ -747,9 +733,9 @@ const Module = switch (native_os) { } }; - fn getSymbolAtAddress(mod: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { // Translate the runtime address into a virtual address into the module - const vaddr = address - mod.base_address; + const vaddr = address - module.base_address; if (di.pdb != null) { if (try di.getSymbolFromPdb(vaddr)) |symbol| return symbol; @@ -1091,12 +1077,12 @@ fn unwindFrameDwarf( const pc_vaddr = context.pc - load_offset; - const fde_offset = explicit_fde_offset orelse try unwind.findFdeOffset( + const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( pc_vaddr, @sizeOf(usize), native_endian, ) orelse return error.MissingDebugInfo; - const format, const cie, const fde = try unwind.loadFde(fde_offset, @sizeOf(usize), native_endian); + const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian); // Check if this FDE *actually* includes the address. if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) return error.MissingDebugInfo; @@ -1294,7 +1280,7 @@ fn unwindFrameMachO( load_offset: usize, context: *UnwindContext, unwind_info: []const u8, - eh_frame: ?[]const u8, + opt_eh_frame: ?[]const u8, ) !usize { if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo; const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); @@ -1304,20 +1290,6 @@ fn unwindFrameMachO( const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); if (indices.len == 0) return error.MissingUnwindInfo; - // MLUGG TODO HACKHACK -- Unwind needs a slight refactor to make this work well - const opt_dwarf_unwind: ?Dwarf.Unwind = if (eh_frame) |eh_frame_data| .{ - .debug_frame = null, - .eh_frame = .{ - .header = .{ - .vaddr = undefined, - .eh_frame_vaddr = @intFromPtr(eh_frame_data.ptr) - load_offset, - .search_table = null, - }, - .eh_frame_data = eh_frame_data, - .sorted_fdes = null, - }, - } else null; - // offset of the PC into the `__TEXT` segment const pc_text_offset = context.pc - text_base; @@ -1533,8 +1505,11 @@ fn unwindFrameMachO( break :ip new_ip; }, .DWARF => { - const dwarf_unwind = &(opt_dwarf_unwind orelse return error.MissingEhFrame); - return unwindFrameDwarf(dwarf_unwind, load_offset, context, @intCast(encoding.value.x86_64.dwarf)); + const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; + var dwarf_unwind: Dwarf.Unwind = .init; + dwarf_unwind.loadFromSection(.eh_frame, eh_frame_vaddr, eh_frame); + return unwindFrameDwarf(&dwarf_unwind, load_offset, context, @intCast(encoding.value.x86_64.dwarf)); }, }, .aarch64, .aarch64_be => switch (encoding.mode.arm64) { @@ -1547,7 +1522,10 @@ fn unwindFrameMachO( break :ip new_ip; }, .DWARF => { - const dwarf_unwind = &(opt_dwarf_unwind orelse return error.MissingEhFrame); + const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; + var dwarf_unwind: Dwarf.Unwind = .init; + dwarf_unwind.loadFromSection(.eh_frame, eh_frame_vaddr, eh_frame); return unwindFrameDwarf(dwarf_unwind, load_offset, context, @intCast(encoding.value.arm64.dwarf)); }, .FRAME => ip: { From fb88dab4c9c7d89ec7b5842dafdefe93ed939b3c Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 12:22:59 +0100 Subject: [PATCH 04/85] more still --- lib/std/debug/Dwarf.zig | 58 +++++++++++++++----------------------- lib/std/debug/SelfInfo.zig | 16 +++++------ 2 files changed, 30 insertions(+), 44 deletions(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index fd678605483b..395b3951da85 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1451,18 +1451,10 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { // MLUGG TODO: i am dubious of this whole thing being here atp. look closely and see if it depends on being the self process pub const ElfModule = struct { - unwind: Dwarf.Unwind, dwarf: Dwarf, - mapped_memory: ?[]align(std.heap.page_size_min) const u8, + mapped_memory: []align(std.heap.page_size_min) const u8, external_mapped_memory: ?[]align(std.heap.page_size_min) const u8, - pub const init: ElfModule = .{ - .unwind = .init, - .dwarf = .{}, - .mapped_memory = null, - .external_mapped_memory = null, - }; - pub fn deinit(self: *@This(), allocator: Allocator) void { self.dwarf.deinit(allocator); std.posix.munmap(self.mapped_memory); @@ -1476,12 +1468,6 @@ pub const ElfModule = struct { return self.dwarf.getSymbol(allocator, endian, vaddr); } - pub fn getDwarfUnwindForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf.Unwind { - _ = allocator; - _ = address; - return &self.unwind; - } - pub const LoadError = error{ InvalidDebugInfo, MissingDebugInfo, @@ -1506,10 +1492,7 @@ pub const ElfModule = struct { /// If the required sections aren't present but a reference to external debug /// info is, then this this function will recurse to attempt to load the debug /// sections from an external file. - /// - /// MLUGG TODO: this should *return* a thing pub fn load( - em: *ElfModule, gpa: Allocator, mapped_mem: []align(std.heap.page_size_min) const u8, build_id: ?[]const u8, @@ -1517,9 +1500,7 @@ pub const ElfModule = struct { parent_sections: ?*Dwarf.SectionArray, parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, elf_filename: ?[]const u8, - ) LoadError!void { - assert(em.mapped_memory == null); - + ) LoadError!ElfModule { if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); @@ -1657,7 +1638,7 @@ pub const ElfModule = struct { .sub_path = filename, }; - return em.loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch break :blk; + return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch break :blk; } const global_debug_directories = [_][]const u8{ @@ -1685,7 +1666,7 @@ pub const ElfModule = struct { }; defer gpa.free(path.sub_path); - return em.loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; } } @@ -1701,7 +1682,7 @@ pub const ElfModule = struct { defer exe_dir.close(); // / - if (em.loadPath( + if (loadPath( gpa, .{ .root_dir = .{ .path = null, .handle = exe_dir }, @@ -1711,9 +1692,8 @@ pub const ElfModule = struct { separate_debug_crc, §ions, mapped_mem, - )) |v| { - v; - return; + )) |em| { + return em; } else |_| {} // /.debug/ @@ -1723,7 +1703,9 @@ pub const ElfModule = struct { }; defer gpa.free(path.sub_path); - if (em.loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { + return em; + } else |_| {} } var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; @@ -1736,28 +1718,32 @@ pub const ElfModule = struct { .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), }; defer gpa.free(path.sub_path); - if (em.loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { + return em; + } else |_| {} } } return error.MissingDebugInfo; } - em.mapped_memory = parent_mapped_mem orelse mapped_mem; - em.external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null; - em.dwarf.sections = sections; - try em.dwarf.open(gpa, endian); + var dwarf: Dwarf = .{ .sections = sections }; + try dwarf.open(gpa, endian); + return .{ + .mapped_memory = parent_mapped_mem orelse mapped_mem, + .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, + .dwarf = dwarf, + }; } pub fn loadPath( - em: *ElfModule, gpa: Allocator, elf_file_path: Path, build_id: ?[]const u8, expected_crc: ?u32, parent_sections: *Dwarf.SectionArray, parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, - ) LoadError!void { + ) LoadError!ElfModule { const elf_file = elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}) catch |err| switch (err) { error.FileNotFound => return missing(), else => return err, @@ -1780,7 +1766,7 @@ pub const ElfModule = struct { }; errdefer std.posix.munmap(mapped_mem); - return em.load( + return load( gpa, mapped_mem, build_id, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 85b784b447ac..b875e74b4c45 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -99,10 +99,7 @@ pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !us } return error.MissingUnwindInfo; } - if (try gop.value_ptr.di.getDwarfUnwindForAddress(gpa, context.pc)) |unwind| { - return unwindFrameDwarf(unwind, module.load_offset, context, null); - } - return error.MissingDebugInfo; + return unwindFrameDwarf(&gop.value_ptr.di.unwind, module.load_offset, context, null); } pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std.debug.Symbol { @@ -409,7 +406,11 @@ const Module = switch (native_os) { build_id: ?[]const u8, gnu_eh_frame: ?[]const u8, const LookupCache = void; - const DebugInfo = Dwarf.ElfModule; + const DebugInfo = struct { + const init: DebugInfo = undefined; // MLUGG TODO: this makes me sad + em: Dwarf.ElfModule, // MLUGG TODO: bad field name (and, frankly, type) + unwind: Dwarf.Unwind, + }; fn key(m: Module) usize { return m.load_offset; // MLUGG TODO: is this technically valid? idk } @@ -492,8 +493,7 @@ const Module = switch (native_os) { else => |e| return e, }; errdefer posix.munmap(mapped_mem); - try di.load(gpa, mapped_mem, module.build_id, null, null, null, filename); - assert(di.mapped_memory != null); + di.em = try .load(gpa, mapped_mem, module.build_id, null, null, null, filename); } fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file @@ -503,7 +503,7 @@ const Module = switch (native_os) { try di.unwind.prepareLookup(gpa, @sizeOf(usize), native_endian); } fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - return di.getSymbolAtAddress(gpa, native_endian, module.load_offset, address); + return di.em.getSymbolAtAddress(gpa, native_endian, module.load_offset, address); } }, .uefi, .windows => struct { From 89d862180f1d032b36e8d4371a037ae018bc43c5 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 12:31:40 +0100 Subject: [PATCH 05/85] yet more --- lib/std/Build/Step/CheckObject.zig | 12 ++++---- lib/std/macho.zig | 37 ------------------------- src/link/MachO/Object.zig | 44 +++++++++++++++--------------- 3 files changed, 28 insertions(+), 65 deletions(-) diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index 2b5994cc342a..e65120641f38 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -1097,7 +1097,7 @@ const MachODumper = struct { for (ctx.symtab.items) |sym| { const sym_name = ctx.getString(sym.n_strx); - if (sym.stab()) { + if (sym.n_type.bits.is_stab != 0) { const tt = switch (sym.n_type) { macho.N_SO => "SO", macho.N_OSO => "OSO", @@ -1114,7 +1114,7 @@ const MachODumper = struct { try writer.print(" ({s},{s})", .{ sect.segName(), sect.sectName() }); } try writer.print(" {s} (stab) {s}\n", .{ tt, sym_name }); - } else if (sym.sect()) { + } else if (sym.n_type.type == .sect) { const sect = ctx.sections.items[sym.n_sect - 1]; try writer.print("{x} ({s},{s})", .{ sym.n_value, @@ -1122,8 +1122,8 @@ const MachODumper = struct { sect.sectName(), }); if (sym.n_desc & macho.REFERENCED_DYNAMICALLY != 0) try writer.writeAll(" [referenced dynamically]"); - if (sym.weakDef()) try writer.writeAll(" weak"); - if (sym.weakRef()) try writer.writeAll(" weakref"); + if (sym.n_desc.weak_def_or_ref_to_weak) try writer.writeAll(" weak"); + if (sym.n_desc.weak_ref) try writer.writeAll(" weakref"); if (sym.ext()) { if (sym.pext()) try writer.writeAll(" private"); try writer.writeAll(" external"); @@ -1134,7 +1134,7 @@ const MachODumper = struct { try writer.print(" 0x{x:0>16} (common) (alignment 2^{d})", .{ sym.n_value, alignment }); if (sym.ext()) try writer.writeAll(" external"); try writer.print(" {s}\n", .{sym_name}); - } else if (sym.undf()) { + } else if (sym.n_type.type == .undf) { const ordinal = @divFloor(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); const import_name = blk: { if (ordinal <= 0) { @@ -1153,7 +1153,7 @@ const MachODumper = struct { break :blk basename[0..ext]; }; try writer.writeAll("(undefined)"); - if (sym.weakRef()) try writer.writeAll(" weakref"); + if (sym.n_desc.weak_ref) try writer.writeAll(" weakref"); if (sym.ext()) try writer.writeAll(" external"); try writer.print(" {s} (from {s})\n", .{ sym_name, diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 0ce1f4c7ff3f..4ebb0cabd855 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -906,43 +906,6 @@ pub const nlist_64 = extern struct { }, n_value: u64, - // MLUGG TODO DELETE - pub fn stab(sym: nlist_64) bool { - return sym.n_type.bits.is_stab != 0; - } - // MLUGG TODO DELETE - pub fn sect(sym: nlist_64) bool { - return sym.n_type.type == .sect; - } - // MLUGG TODO DELETE - pub fn undf(sym: nlist_64) bool { - return sym.n_type.type == .undf; - } - // MLUGG TODO DELETE - pub fn indr(sym: nlist_64) bool { - return sym.n_type.type == .indr; - } - // MLUGG TODO DELETE - pub fn abs(sym: nlist_64) bool { - return sym.n_type.type == .abs; - } - // MLUGG TODO DELETE - pub fn weakDef(sym: nlist_64) bool { - return sym.n_desc.weak_def_or_ref_to_weak; - } - // MLUGG TODO DELETE - pub fn weakRef(sym: nlist_64) bool { - return sym.n_desc.weak_ref; - } - // MLUGG TODO DELETE - pub fn discarded(sym: nlist_64) bool { - return sym.n_desc.discarded_or_no_dead_strip; - } - // MLUGG TODO DELETE - pub fn noDeadStrip(sym: nlist_64) bool { - return sym.n_desc.discarded_or_no_dead_strip; - } - pub fn tentative(sym: nlist_64) bool { return sym.n_type.type == .undf and sym.n_value != 0; } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 6752c751b238..e2d9564799be 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -185,7 +185,7 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { if (name[0] == 'l' or name[0] == 'L') return 4; return 3; } - return if (nl.weakDef()) 2 else 1; + return if (nl.n_desc.weak_def_or_ref_to_weak) 2 else 1; } fn lessThan(ctx: *const Object, lhs: @This(), rhs: @This()) bool { @@ -202,7 +202,7 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { var nlists = try std.array_list.Managed(NlistIdx).initCapacity(gpa, self.symtab.items(.nlist).len); defer nlists.deinit(); for (self.symtab.items(.nlist), 0..) |nlist, i| { - if (nlist.stab() or !nlist.sect()) continue; + if (nlist.n_type.bits.is_stab != 0 or nlist.n_type.type != .sect) continue; nlists.appendAssumeCapacity(.{ .nlist = nlist, .idx = i }); } mem.sort(NlistIdx, nlists.items, self, NlistIdx.lessThan); @@ -805,7 +805,7 @@ fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); for (self.symtab.items(.nlist), self.symtab.items(.atom)) |nlist, *atom| { - if (!nlist.stab() and nlist.sect()) { + if (!nlist.n_type.bits.is_stab != 0 and nlist.n_type.type == .sect) { const sect = self.sections.items(.header)[nlist.n_sect - 1]; const subs = self.sections.items(.subsections)[nlist.n_sect - 1].items; if (nlist.n_value == sect.addr) { @@ -852,30 +852,30 @@ fn initSymbols(self: *Object, allocator: Allocator, macho_file: *MachO) !void { symbol.extra = self.addSymbolExtraAssumeCapacity(.{}); if (self.getAtom(atom_index)) |atom| { - assert(!nlist.abs()); + assert(nlist.n_type.type != .abs); symbol.value -= atom.getInputAddress(macho_file); symbol.atom_ref = .{ .index = atom_index, .file = self.index }; } - symbol.flags.weak = nlist.weakDef(); - symbol.flags.abs = nlist.abs(); + symbol.flags.weak = nlist.n_desc.weak_def_or_ref_to_weak; + symbol.flags.abs = nlist.n_type.type == .abs; symbol.flags.tentative = nlist.tentative(); - symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip(); + symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.n_desc.discarded_or_no_dead_strip; symbol.flags.dyn_ref = nlist.n_desc & macho.REFERENCED_DYNAMICALLY != 0; symbol.flags.interposable = false; // TODO - // symbol.flags.interposable = nlist.ext() and (nlist.sect() or nlist.abs()) and macho_file.base.isDynLib() and macho_file.options.namespace == .flat and !nlist.pext(); + // symbol.flags.interposable = nlist.ext() and (nlist.n_type.type == .sect or nlist.n_type.type == .abs) and macho_file.base.isDynLib() and macho_file.options.namespace == .flat and !nlist.pext(); - if (nlist.sect() and + if (nlist.n_type.type == .sect and self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) { symbol.flags.tlv = true; } if (nlist.ext()) { - if (nlist.undf()) { - symbol.flags.weak_ref = nlist.weakRef(); - } else if (nlist.pext() or (nlist.weakDef() and nlist.weakRef()) or self.hidden) { + if (nlist.n_type.type == .undf) { + symbol.flags.weak_ref = nlist.n_desc.weak_ref; + } else if (nlist.pext() or (nlist.n_desc.weak_def_or_ref_to_weak and nlist.n_desc.weak_ref) or self.hidden) { symbol.visibility = .hidden; } else { symbol.visibility = .global; @@ -902,10 +902,10 @@ fn initSymbolStabs(self: *Object, allocator: Allocator, nlists: anytype, macho_f }; const start: u32 = for (self.symtab.items(.nlist), 0..) |nlist, i| { - if (nlist.stab()) break @intCast(i); + if (nlist.n_type.bits.is_stab != 0) break @intCast(i); } else @intCast(self.symtab.items(.nlist).len); const end: u32 = for (self.symtab.items(.nlist)[start..], start..) |nlist, i| { - if (!nlist.stab()) break @intCast(i); + if (nlist.n_type.bits.is_stab == 0) break @intCast(i); } else @intCast(self.symtab.items(.nlist).len); if (start == end) return; @@ -919,7 +919,7 @@ fn initSymbolStabs(self: *Object, allocator: Allocator, nlists: anytype, macho_f var addr_lookup = std.StringHashMap(u64).init(allocator); defer addr_lookup.deinit(); for (syms) |sym| { - if (sym.sect() and (sym.ext() or sym.pext())) { + if (sym.n_type.type == .sect and (sym.ext() or sym.pext())) { try addr_lookup.putNoClobber(self.getNStrx(sym.n_strx), sym.n_value); } } @@ -1241,8 +1241,8 @@ fn parseUnwindRecords(self: *Object, allocator: Allocator, cpu_arch: std.Target. const slice = self.symtab.slice(); for (slice.items(.nlist), slice.items(.atom), slice.items(.size)) |nlist, atom, size| { - if (nlist.stab()) continue; - if (!nlist.sect()) continue; + if (nlist.n_type.bits.is_stab != 0) continue; + if (nlist.n_type.type != .sect) continue; const sect = self.sections.items(.header)[nlist.n_sect - 1]; if (sect.isCode() and sect.size > 0) { try superposition.ensureUnusedCapacity(1); @@ -1459,7 +1459,7 @@ pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void { for (self.symtab.items(.nlist), self.symtab.items(.atom), self.globals.items, 0..) |nlist, atom_index, *global, i| { if (!nlist.ext()) continue; - if (nlist.sect()) { + if (nlist.n_type.type == .sect) { const atom = self.getAtom(atom_index).?; if (!atom.isAlive()) continue; } @@ -1473,7 +1473,7 @@ pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void { } global.* = gop.index; - if (nlist.undf() and !nlist.tentative()) continue; + if (nlist.n_type.type == .undf and !nlist.tentative()) continue; if (gop.ref.getFile(macho_file) == null) { gop.ref.* = .{ .index = @intCast(i), .file = self.index }; continue; @@ -1481,7 +1481,7 @@ pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void { if (self.asFile().getSymbolRank(.{ .archive = !self.alive, - .weak = nlist.weakDef(), + .weak = nlist.n_desc.weak_def_or_ref_to_weak, .tentative = nlist.tentative(), }) < gop.ref.getSymbol(macho_file).?.getSymbolRank(macho_file)) { gop.ref.* = .{ .index = @intCast(i), .file = self.index }; @@ -1500,7 +1500,7 @@ pub fn markLive(self: *Object, macho_file: *MachO) void { const ref = self.getSymbolRef(@intCast(i), macho_file); const file = ref.getFile(macho_file) orelse continue; const sym = ref.getSymbol(macho_file).?; - const should_keep = nlist.undf() or (nlist.tentative() and !sym.flags.tentative); + const should_keep = nlist.n_type.type == .undf or (nlist.tentative() and !sym.flags.tentative); if (should_keep and file == .object and !file.object.alive) { file.object.alive = true; file.object.markLive(macho_file); @@ -1685,7 +1685,7 @@ pub fn parseAr(self: *Object, macho_file: *MachO) !void { pub fn updateArSymtab(self: Object, ar_symtab: *Archive.ArSymtab, macho_file: *MachO) error{OutOfMemory}!void { const gpa = macho_file.base.comp.gpa; for (self.symtab.items(.nlist)) |nlist| { - if (!nlist.ext() or (nlist.undf() and !nlist.tentative())) continue; + if (!nlist.ext() or (nlist.n_type.type == .undf and !nlist.tentative())) continue; const off = try ar_symtab.strtab.insert(gpa, self.getNStrx(nlist.n_strx)); try ar_symtab.entries.append(gpa, .{ .off = off, .file = self.index }); } From 3f6a90766c7ababdaaca629cc5165c5ff5b5dcd7 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 13:07:42 +0100 Subject: [PATCH 06/85] sky pirates! which are even better! --- lib/std/debug/Dwarf/Unwind.zig | 89 +++++++++++++++------------------- lib/std/debug/SelfInfo.zig | 64 ++++++++++++------------ 2 files changed, 74 insertions(+), 79 deletions(-) diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index c5f115802663..09cad0db2308 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -2,7 +2,7 @@ pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); -frame_section: ?struct { +frame_section: struct { id: Section, /// The virtual address of the start of the section. "Virtual address" refers to the address in /// the binary (e.g. `sh_addr` in an ELF file); the equivalent runtime address may be relocated @@ -42,10 +42,41 @@ const SortedFdeEntry = struct { const Section = enum { debug_frame, eh_frame }; // MLUGG TODO deinit? -pub const init: Unwind = .{ - .frame_section = null, - .lookup = null, -}; + +/// Initialize with unwind information from the contents of a `.debug_frame` or `.eh_frame` section. +/// +/// If the `.eh_frame_hdr` section is available, consider instead using `initEhFrameHdr`. This +/// allows the implementation to use a search table embedded in that section if it is available. +pub fn initSection(section: Section, section_vaddr: u64, section_bytes: []const u8) Unwind { + return .{ + .frame_section = .{ + .id = section, + .bytes = section_bytes, + .vaddr = section_vaddr, + }, + .lookup = null, + }; +} + +/// Initialize with unwind information from a header loaded from an `.eh_frame_hdr` section, and a +/// pointer to the contents of the `.eh_frame` section. +/// +/// This differs from `loadFromSection` because `.eh_frame_hdr` may embed a binary search table, and +/// if it does, this function will use that for address lookups instead of constructing our own +/// search table. +pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_ptr: [*]const u8) Unwind { + return .{ + .frame_section = .{ + .id = .eh_frame, + .bytes = maxSlice(section_bytes_ptr), + .vaddr = header.eh_frame_vaddr, + }, + .lookup = if (header.search_table) |table| .{ .eh_frame_hdr = .{ + .vaddr = section_vaddr, + .table = table, + } } else null, + }; +} /// This represents the decoded .eh_frame_hdr header pub const EhFrameHeader = struct { @@ -371,51 +402,11 @@ pub const FrameDescriptionEntry = struct { } }; -/// Load unwind information from the contents of an `.eh_frame` or `.debug_frame` section. -/// -/// If the `.eh_frame_hdr` section is available, consider instead using `loadFromEhFrameHdr`. This -/// allows the implementation to use a search table embedded in that section if it is available. -pub fn loadFromSection(unwind: *Unwind, section: Section, section_vaddr: u64, section_bytes: []const u8) void { - assert(unwind.frame_section == null); - assert(unwind.lookup == null); - unwind.frame_section = .{ - .id = section, - .bytes = section_bytes, - .vaddr = section_vaddr, - }; -} - -/// Load unwind information from a header loaded from an `.eh_frame_hdr` section, and a pointer to -/// the contents of the `.eh_frame` section. -/// -/// This differs from `loadFromSection` because `.eh_frame_hdr` may embed a binary search table, and -/// if it does, this function will use that for address lookups instead of constructing our own -/// search table. -pub fn loadFromEhFrameHdr( - unwind: *Unwind, - header: EhFrameHeader, - section_vaddr: u64, - section_bytes_ptr: [*]const u8, -) !void { - assert(unwind.frame_section == null); - assert(unwind.lookup == null); - unwind.frame_section = .{ - .id = .eh_frame, - .bytes = maxSlice(section_bytes_ptr), - .vaddr = header.eh_frame_vaddr, - }; - if (header.search_table) |table| { - unwind.lookup = .{ .eh_frame_hdr = .{ - .vaddr = section_vaddr, - .table = table, - } }; - } -} - pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endian: Endian) !void { - const section = unwind.frame_section.?; if (unwind.lookup != null) return; + const section = unwind.frame_section; + var r: Reader = .fixed(section.bytes); var fde_list: std.ArrayList(SortedFdeEntry) = .empty; defer fde_list.deinit(gpa); @@ -477,7 +468,7 @@ pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: End addr_size_bytes, endian, ) orelse return null; - return std.math.sub(u64, fde_vaddr, unwind.frame_section.?.vaddr) catch bad(); // convert vaddr to offset + return std.math.sub(u64, fde_vaddr, unwind.frame_section.vaddr) catch bad(); // convert vaddr to offset }, .sorted_fdes => |sorted_fdes| sorted_fdes, }; @@ -493,7 +484,7 @@ pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: End } pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { - const section = unwind.frame_section.?; + const section = unwind.frame_section; var fde_reader: Reader = .fixed(section.bytes[fde_offset..]); const fde_info = switch (try EntryHeader.read(&fde_reader, fde_offset, section.id, endian)) { diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index b875e74b4c45..64ccb34e250e 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -73,44 +73,26 @@ pub fn deinit(self: *SelfInfo) void { pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { comptime assert(target_supported); - const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); // MLUGG TODO: don't take gpa + const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.load_offset); if (!gop.found_existing) gop.value_ptr.* = .init; if (!gop.value_ptr.loaded_unwind) { try module.loadUnwindInfo(gpa, &gop.value_ptr.di); gop.value_ptr.loaded_unwind = true; } - // MLUGG TODO: the stuff below is impl! - if (native_os.isDarwin()) { - // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding - // via DWARF before attempting to use the compact unwind info will produce incorrect results. - if (gop.value_ptr.di.unwind_info) |unwind_info| { - if (unwindFrameMachO( - module.text_base, - module.load_offset, - context, - unwind_info, - gop.value_ptr.di.eh_frame, - )) |return_address| { - return return_address; - } else |err| { - if (err != error.RequiresDWARFUnwind) return err; - } - } - return error.MissingUnwindInfo; - } - return unwindFrameDwarf(&gop.value_ptr.di.unwind, module.load_offset, context, null); + return module.unwindFrame(gpa, &gop.value_ptr.di, context); } pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std.debug.Symbol { comptime assert(target_supported); - const module: Module = try .lookup(&self.lookup_cache, gpa, address); // MLUGG TODO: don't take gpa + const module: Module = try .lookup(&self.lookup_cache, gpa, address); const gop = try self.modules.getOrPut(gpa, module.key()); if (!gop.found_existing) gop.value_ptr.* = .init; if (!gop.value_ptr.loaded_debug) { // MLUGG TODO: this overloads the name 'debug info' with including vs excluding unwind info // figure out a better name for one or the other (i think the inner one is maybe 'symbol info' or something idk) try module.loadDebugInfo(gpa, &gop.value_ptr.di); + gop.value_ptr.loaded_debug = true; } return module.getSymbolAtAddress(gpa, &gop.value_ptr.di, address); } @@ -120,7 +102,7 @@ pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std. /// a path that doesn't rely on any side-effects of a prior successful module lookup. pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) error{ Unexpected, OutOfMemory, MissingDebugInfo }![]const u8 { comptime assert(target_supported); - const module: Module = try .lookup(&self.lookup_cache, gpa, address); // MLUGG TODO: don't take gpa + const module: Module = try .lookup(&self.lookup_cache, gpa, address); return module.name; } @@ -251,6 +233,18 @@ const Module = switch (native_os) { }, }; } + fn unwindFrame(module: *const Module, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { + _ = gpa; + const unwind_info = di.unwind_info orelse return error.MissingUnwindInfo; + // MLUGG TODO: inline + return unwindFrameMachO( + module.text_base, + module.load_offset, + context, + unwind_info, + di.eh_frame, + ); + } const LookupCache = void; const DebugInfo = struct { mapped_memory: []align(std.heap.page_size_min) const u8, @@ -499,12 +493,16 @@ const Module = switch (native_os) { const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; const header: Dwarf.Unwind.EhFrameHeader = try .parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian); - try di.unwind.loadFromEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); + di.unwind = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); try di.unwind.prepareLookup(gpa, @sizeOf(usize), native_endian); } fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { return di.em.getSymbolAtAddress(gpa, native_endian, module.load_offset, address); } + fn unwindFrame(module: *const Module, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { + _ = gpa; + return unwindFrameDwarf(&di.unwind, module.load_offset, context, null); + } }, .uefi, .windows => struct { base_address: usize, @@ -1507,9 +1505,12 @@ fn unwindFrameMachO( .DWARF => { const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; - var dwarf_unwind: Dwarf.Unwind = .init; - dwarf_unwind.loadFromSection(.eh_frame, eh_frame_vaddr, eh_frame); - return unwindFrameDwarf(&dwarf_unwind, load_offset, context, @intCast(encoding.value.x86_64.dwarf)); + return unwindFrameDwarf( + &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), + load_offset, + context, + @intCast(encoding.value.x86_64.dwarf), + ); }, }, .aarch64, .aarch64_be => switch (encoding.mode.arm64) { @@ -1524,9 +1525,12 @@ fn unwindFrameMachO( .DWARF => { const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; - var dwarf_unwind: Dwarf.Unwind = .init; - dwarf_unwind.loadFromSection(.eh_frame, eh_frame_vaddr, eh_frame); - return unwindFrameDwarf(dwarf_unwind, load_offset, context, @intCast(encoding.value.arm64.dwarf)); + return unwindFrameDwarf( + &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), + load_offset, + context, + @intCast(encoding.value.x86_64.dwarf), + ); }, .FRAME => ip: { const frame = encoding.value.arm64.frame; From 55ae6747e2ad85f9f92919cab174efadc40ea002 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 13:13:16 +0100 Subject: [PATCH 07/85] names --- lib/std/debug/SelfInfo.zig | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 64ccb34e250e..cad5798ad55a 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -28,9 +28,10 @@ const SelfInfo = @This(); modules: std.AutoHashMapUnmanaged(usize, struct { di: Module.DebugInfo, - loaded_debug: bool, + // MLUGG TODO: okay actually these should definitely go on the impl so it can share state. e.g. loading unwind info might require lodaing debug info in some cases + loaded_locations: bool, loaded_unwind: bool, - const init: @This() = .{ .di = .init, .loaded_debug = false, .loaded_unwind = false }; + const init: @This() = .{ .di = .init, .loaded_locations = false, .loaded_unwind = false }; }), lookup_cache: Module.LookupCache, @@ -88,11 +89,9 @@ pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std. const module: Module = try .lookup(&self.lookup_cache, gpa, address); const gop = try self.modules.getOrPut(gpa, module.key()); if (!gop.found_existing) gop.value_ptr.* = .init; - if (!gop.value_ptr.loaded_debug) { - // MLUGG TODO: this overloads the name 'debug info' with including vs excluding unwind info - // figure out a better name for one or the other (i think the inner one is maybe 'symbol info' or something idk) - try module.loadDebugInfo(gpa, &gop.value_ptr.di); - gop.value_ptr.loaded_debug = true; + if (!gop.value_ptr.loaded_locations) { + try module.loadLocationInfo(gpa, &gop.value_ptr.di); + gop.value_ptr.loaded_locations = true; } return module.getSymbolAtAddress(gpa, &gop.value_ptr.di, address); } @@ -168,8 +167,8 @@ const Module = switch (native_os) { } return error.MissingDebugInfo; } - fn loadDebugInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - return loadMachODebugInfo(gpa, module, di); + fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { + try loadMachODebugInfo(gpa, module, di); // MLUGG TODO inline } fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { // MLUGG TODO HACKHACK @@ -381,7 +380,7 @@ const Module = switch (native_os) { _ = address; unreachable; } - fn loadDebugInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { + fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { _ = module; _ = gpa; _ = di; @@ -479,7 +478,7 @@ const Module = switch (native_os) { }; return error.MissingDebugInfo; } - fn loadDebugInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { + fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { const filename: ?[]const u8 = if (module.name.len > 0) module.name else null; const mapped_mem = mapFileOrSelfExe(filename) catch |err| switch (err) { error.FileNotFound => return error.MissingDebugInfo, @@ -550,7 +549,7 @@ const Module = switch (native_os) { } return null; } - fn loadDebugInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { + fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); const mapped = mapped_ptr[0..module.size]; var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; From 25e02bed4cebc7c90f763fae5d57e3d99a08bdfc Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 13:44:18 +0100 Subject: [PATCH 08/85] less hacky :D --- lib/std/debug/Dwarf/abi.zig | 2 +- lib/std/debug/SelfInfo.zig | 52 +++++++++++++++++++++---------------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig index c5e509c7b0f9..79f7e87217b9 100644 --- a/lib/std/debug/Dwarf/abi.zig +++ b/lib/std/debug/Dwarf/abi.zig @@ -347,5 +347,5 @@ pub fn regValueNative( ) !*align(1) usize { const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); if (@sizeOf(usize) != reg_bytes.len) return error.IncompatibleRegisterSize; - return mem.bytesAsValue(usize, reg_bytes[0..@sizeOf(usize)]); + return @ptrCast(reg_bytes); } diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index cad5798ad55a..2cb9179827fb 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -171,26 +171,29 @@ const Module = switch (native_os) { try loadMachODebugInfo(gpa, module, di); // MLUGG TODO inline } fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - // MLUGG TODO HACKHACK - try loadMachODebugInfo(gpa, module, di); + _ = gpa; + di.unwind = .{ + .unwind_info = module.unwind_info, + .eh_frame = module.eh_frame, + }; } fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { const vaddr = address - module.load_offset; - const symbol = MachoSymbol.find(di.symbols, vaddr) orelse return .{}; // MLUGG TODO null? + const symbol = MachoSymbol.find(di.full.symbols, vaddr) orelse return .{}; // MLUGG TODO null? // offset of `address` from start of `symbol` const address_symbol_offset = vaddr - symbol.addr; // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(di.strings[symbol.strx..], 0); - const o_file_path = mem.sliceTo(di.strings[symbol.ofile..], 0); + const stab_symbol = mem.sliceTo(di.full.strings[symbol.strx..], 0); + const o_file_path = mem.sliceTo(di.full.strings[symbol.ofile..], 0); const o_file: *DebugInfo.OFile = of: { - const gop = try di.ofiles.getOrPut(gpa, o_file_path); + const gop = try di.full.ofiles.getOrPut(gpa, o_file_path); if (!gop.found_existing) { gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { - defer _ = di.ofiles.pop().?; + defer _ = di.full.ofiles.pop().?; switch (err) { error.FileNotFound, error.MissingDebugInfo, @@ -234,28 +237,33 @@ const Module = switch (native_os) { } fn unwindFrame(module: *const Module, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { _ = gpa; - const unwind_info = di.unwind_info orelse return error.MissingUnwindInfo; + const unwind_info = di.unwind.unwind_info orelse return error.MissingUnwindInfo; // MLUGG TODO: inline return unwindFrameMachO( module.text_base, module.load_offset, context, unwind_info, - di.eh_frame, + di.unwind.eh_frame, ); } const LookupCache = void; const DebugInfo = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: [:0]const u8, - // MLUGG TODO: this could use an adapter to just index straight into `strings`! - ofiles: std.StringArrayHashMapUnmanaged(OFile), + unwind: struct { + unwind_info: ?[]const u8, + eh_frame: ?[]const u8, + }, + // MLUGG TODO: awful field name + full: struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + symbols: []const MachoSymbol, + strings: [:0]const u8, + // MLUGG TODO: this could use an adapter to just index straight into `strings`! + ofiles: std.StringArrayHashMapUnmanaged(OFile), + }, // Backed by the in-memory sections mapped by the loader // MLUGG TODO: these are duplicated state. i actually reckon they should be removed from Module, and loadMachODebugInfo should be the one discovering them! - unwind_info: ?[]const u8, - eh_frame: ?[]const u8, // MLUGG TODO HACKHACK: this is awful const init: DebugInfo = undefined; @@ -267,13 +275,13 @@ const Module = switch (native_os) { }; fn deinit(di: *DebugInfo, gpa: Allocator) void { - for (di.ofiles.values()) |*ofile| { + for (di.full.ofiles.values()) |*ofile| { ofile.dwarf.deinit(gpa); ofile.addr_table.deinit(gpa); } - di.ofiles.deinit(); - gpa.free(di.symbols); - posix.munmap(di.mapped_memory); + di.full.ofiles.deinit(); + gpa.free(di.full.symbols); + posix.munmap(di.full.mapped_memory); } fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { @@ -859,9 +867,7 @@ fn loadMachODebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugIn // This sort is so that we can binary search later. mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - di.* = .{ - .unwind_info = module.unwind_info, - .eh_frame = module.eh_frame, + di.full = .{ .mapped_memory = mapped_mem, .symbols = symbols_slice, .strings = strings, From 55a7affea41a4a1f4e117d7ee55c1c0e8b869203 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 14:44:15 +0100 Subject: [PATCH 09/85] me when i did a thing --- lib/std/debug/Dwarf.zig | 6 +- lib/std/debug/SelfInfo.zig | 377 ++++++++++++++++++------------------- 2 files changed, 187 insertions(+), 196 deletions(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 395b3951da85..bb4bee2fbfd0 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1780,13 +1780,13 @@ pub const ElfModule = struct { pub fn getSymbol(di: *Dwarf, allocator: Allocator, endian: Endian, address: u64) !std.debug.Symbol { const compile_unit = di.findCompileUnit(endian, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return .{}, + error.MissingDebugInfo, error.InvalidDebugInfo => return .{ .name = null, .compile_unit_name = null, .source_location = null }, else => return err, }; return .{ - .name = di.getSymbolName(address) orelse "???", + .name = di.getSymbolName(address), .compile_unit_name = compile_unit.die.getAttrString(di, endian, std.dwarf.AT.name, di.section(.debug_str), compile_unit) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", + error.MissingDebugInfo, error.InvalidDebugInfo => null, }, .source_location = di.getLineNumberInfo(allocator, endian, compile_unit, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 2cb9179827fb..a12152a210f5 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -1,6 +1,8 @@ //! Cross-platform abstraction for this binary's own debug information, with a //! goal of minimal code bloat and compilation speed penalty. +// MLUGG TODO: audit use of errors in this file. ideally, introduce some concrete error sets + const builtin = @import("builtin"); const native_os = builtin.os.tag; const native_endian = native_arch.endian(); @@ -26,13 +28,7 @@ const regValueNative = Dwarf.abi.regValueNative; const SelfInfo = @This(); -modules: std.AutoHashMapUnmanaged(usize, struct { - di: Module.DebugInfo, - // MLUGG TODO: okay actually these should definitely go on the impl so it can share state. e.g. loading unwind info might require lodaing debug info in some cases - loaded_locations: bool, - loaded_unwind: bool, - const init: @This() = .{ .di = .init, .loaded_locations = false, .loaded_unwind = false }; -}), +modules: std.AutoHashMapUnmanaged(usize, Module.DebugInfo), lookup_cache: Module.LookupCache, pub const target_supported: bool = switch (native_os) { @@ -77,11 +73,7 @@ pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !us const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.load_offset); if (!gop.found_existing) gop.value_ptr.* = .init; - if (!gop.value_ptr.loaded_unwind) { - try module.loadUnwindInfo(gpa, &gop.value_ptr.di); - gop.value_ptr.loaded_unwind = true; - } - return module.unwindFrame(gpa, &gop.value_ptr.di, context); + return module.unwindFrame(gpa, gop.value_ptr, context); } pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std.debug.Symbol { @@ -89,11 +81,7 @@ pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std. const module: Module = try .lookup(&self.lookup_cache, gpa, address); const gop = try self.modules.getOrPut(gpa, module.key()); if (!gop.found_existing) gop.value_ptr.* = .init; - if (!gop.value_ptr.loaded_locations) { - try module.loadLocationInfo(gpa, &gop.value_ptr.di); - gop.value_ptr.loaded_locations = true; - } - return module.getSymbolAtAddress(gpa, &gop.value_ptr.di, address); + return module.getSymbolAtAddress(gpa, gop.value_ptr, address); } /// Returns the module name for a given address. @@ -168,9 +156,125 @@ const Module = switch (native_os) { return error.MissingDebugInfo; } fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - try loadMachODebugInfo(gpa, module, di); // MLUGG TODO inline + const mapped_mem = mapFileOrSelfExe(module.name) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.FileTooBig => return error.InvalidDebugInfo, + else => |e| return e, + }; + errdefer posix.munmap(mapped_mem); + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + const symtab: macho.symtab_command = symtab: { + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + return error.MissingDebugInfo; + }; + + const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); + const syms = syms_ptr[0..symtab.nsyms]; + const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; + + // MLUGG TODO: does it really make sense to initCapacity here? how many of syms are omitted? + var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); + defer symbols.deinit(gpa); + + var ofile: u32 = undefined; + var last_sym: MachoSymbol = undefined; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + for (syms) |*sym| { + if (sym.n_type.bits.is_stab == 0) continue; + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, + }, + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .size = 0, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, + }, + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + last_sym.size = @intCast(sym.n_value); + }, + else => return error.InvalidDebugInfo, + }, + .ensym => switch (state) { + .fun_size => { + state = .ensym; + symbols.appendAssumeCapacity(last_sym); + }, + else => return error.InvalidDebugInfo, + }, + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, + }, + else => {}, + } + } + + switch (state) { + .init => return error.MissingDebugInfo, + .oso_close => {}, + else => return error.InvalidDebugInfo, + } + + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); + + di.full = .{ + .mapped_memory = mapped_mem, + .symbols = symbols_slice, + .strings = strings, + .ofiles = .empty, + }; } fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { + if (di.unwind != null) return; _ = gpa; di.unwind = .{ .unwind_info = module.unwind_info, @@ -178,27 +282,39 @@ const Module = switch (native_os) { }; } fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + if (di.full == null) try module.loadLocationInfo(gpa, di); const vaddr = address - module.load_offset; - const symbol = MachoSymbol.find(di.full.symbols, vaddr) orelse return .{}; // MLUGG TODO null? + const symbol = MachoSymbol.find(di.full.?.symbols, vaddr) orelse return .{ + .name = null, + .compile_unit_name = null, + .source_location = null, + }; // offset of `address` from start of `symbol` const address_symbol_offset = vaddr - symbol.addr; // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(di.full.strings[symbol.strx..], 0); - const o_file_path = mem.sliceTo(di.full.strings[symbol.ofile..], 0); + const stab_symbol = mem.sliceTo(di.full.?.strings[symbol.strx..], 0); + const o_file_path = mem.sliceTo(di.full.?.strings[symbol.ofile..], 0); + + // If any information is missing, we can at least return this from now on. + const sym_only_result: std.debug.Symbol = .{ + .name = stab_symbol, + .compile_unit_name = null, + .source_location = null, + }; const o_file: *DebugInfo.OFile = of: { - const gop = try di.full.ofiles.getOrPut(gpa, o_file_path); + const gop = try di.full.?.ofiles.getOrPut(gpa, o_file_path); if (!gop.found_existing) { gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { - defer _ = di.full.ofiles.pop().?; + defer _ = di.full.?.ofiles.pop().?; switch (err) { error.FileNotFound, error.MissingDebugInfo, error.InvalidDebugInfo, - => return .{ .name = stab_symbol }, + => return sym_only_result, else => |e| return e, } }; @@ -206,10 +322,10 @@ const Module = switch (native_os) { break :of gop.value_ptr; }; - const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return .{ .name = stab_symbol }; + const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return sym_only_result; const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return .{ .name = stab_symbol }, + error.MissingDebugInfo, error.InvalidDebugInfo => return sym_only_result, else => |e| return e, }; @@ -222,7 +338,7 @@ const Module = switch (native_os) { o_file.dwarf.section(.debug_str), compile_unit, ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", + error.MissingDebugInfo, error.InvalidDebugInfo => null, }, .source_location = o_file.dwarf.getLineNumberInfo( gpa, @@ -236,25 +352,27 @@ const Module = switch (native_os) { }; } fn unwindFrame(module: *const Module, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - _ = gpa; - const unwind_info = di.unwind.unwind_info orelse return error.MissingUnwindInfo; - // MLUGG TODO: inline + if (di.unwind == null) try module.loadUnwindInfo(gpa, di); + const unwind_info = di.unwind.?.unwind_info orelse return error.MissingUnwindInfo; + // MLUGG TODO: inline? return unwindFrameMachO( module.text_base, module.load_offset, context, unwind_info, - di.unwind.eh_frame, + di.unwind.?.eh_frame, ); } const LookupCache = void; const DebugInfo = struct { - unwind: struct { + unwind: ?struct { + // Backed by the in-memory sections mapped by the loader + // MLUGG TODO: these are duplicated state. i actually reckon they should be removed from Module, and loadLocationInfo should be the one discovering them! unwind_info: ?[]const u8, eh_frame: ?[]const u8, }, // MLUGG TODO: awful field name - full: struct { + full: ?struct { mapped_memory: []align(std.heap.page_size_min) const u8, symbols: []const MachoSymbol, strings: [:0]const u8, @@ -262,11 +380,10 @@ const Module = switch (native_os) { ofiles: std.StringArrayHashMapUnmanaged(OFile), }, - // Backed by the in-memory sections mapped by the loader - // MLUGG TODO: these are duplicated state. i actually reckon they should be removed from Module, and loadMachODebugInfo should be the one discovering them! - - // MLUGG TODO HACKHACK: this is awful - const init: DebugInfo = undefined; + const init: DebugInfo = .{ + .unwind = null, + .full = null, + }; const OFile = struct { dwarf: Dwarf, @@ -388,18 +505,6 @@ const Module = switch (native_os) { _ = address; unreachable; } - fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { - _ = module; - _ = gpa; - _ = di; - unreachable; - } - fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { - _ = module; - _ = gpa; - _ = di; - unreachable; - } }, .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { load_offset: usize, @@ -408,9 +513,12 @@ const Module = switch (native_os) { gnu_eh_frame: ?[]const u8, const LookupCache = void; const DebugInfo = struct { - const init: DebugInfo = undefined; // MLUGG TODO: this makes me sad - em: Dwarf.ElfModule, // MLUGG TODO: bad field name (and, frankly, type) - unwind: Dwarf.Unwind, + em: ?Dwarf.ElfModule, // MLUGG TODO: bad field name (and, frankly, type) + unwind: ?Dwarf.Unwind, + const init: DebugInfo = .{ + .em = null, + .unwind = null, + }; }; fn key(m: Module) usize { return m.load_offset; // MLUGG TODO: is this technically valid? idk @@ -496,19 +604,20 @@ const Module = switch (native_os) { errdefer posix.munmap(mapped_mem); di.em = try .load(gpa, mapped_mem, module.build_id, null, null, null, filename); } + fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + if (di.em == null) try module.loadLocationInfo(gpa, di); + return di.em.?.getSymbolAtAddress(gpa, native_endian, module.load_offset, address); + } fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; const header: Dwarf.Unwind.EhFrameHeader = try .parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian); di.unwind = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); - try di.unwind.prepareLookup(gpa, @sizeOf(usize), native_endian); - } - fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - return di.em.getSymbolAtAddress(gpa, native_endian, module.load_offset, address); + try di.unwind.?.prepareLookup(gpa, @sizeOf(usize), native_endian); } fn unwindFrame(module: *const Module, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - _ = gpa; - return unwindFrameDwarf(&di.unwind, module.load_offset, context, null); + if (di.unwind == null) try module.loadUnwindInfo(gpa, di); + return unwindFrameDwarf(&di.unwind.?, module.load_offset, context, null); } }, .uefi, .windows => struct { @@ -660,12 +769,16 @@ const Module = switch (native_os) { di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); } + + di.loaded = true; } const LookupCache = struct { modules: std.ArrayListUnmanaged(windows.MODULEENTRY32), const init: LookupCache = .{ .modules = .empty }; }; const DebugInfo = struct { + loaded: bool, + coff_image_base: u64, mapped_file: ?struct { file: File, @@ -686,6 +799,7 @@ const Module = switch (native_os) { coff_section_headers: []coff.SectionHeader, const init: DebugInfo = .{ + .loaded = false, .coff_image_base = undefined, .mapped_file = null, .dwarf = null, @@ -717,28 +831,24 @@ const Module = switch (native_os) { return null; }; - const module = (try di.pdb.?.getModule(mod_index)) orelse - return error.InvalidDebugInfo; - const obj_basename = fs.path.basename(module.obj_file_name); - - const symbol_name = di.pdb.?.getSymbolName( - module, - relocated_address - coff_section.virtual_address, - ) orelse "???"; - const opt_line_info = try di.pdb.?.getLineNumberInfo( - module, - relocated_address - coff_section.virtual_address, - ); + const module = try di.pdb.?.getModule(mod_index) orelse return error.InvalidDebugInfo; return .{ - .name = symbol_name, - .compile_unit_name = obj_basename, - .source_location = opt_line_info, + .name = di.pdb.?.getSymbolName( + module, + relocated_address - coff_section.virtual_address, + ), + .compile_unit_name = fs.path.basename(module.obj_file_name), + .source_location = try di.pdb.?.getLineNumberInfo( + module, + relocated_address - coff_section.virtual_address, + ), }; } }; fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + if (!di.loaded) try module.loadLocationInfo(gpa, di); // Translate the runtime address into a virtual address into the module const vaddr = address - module.base_address; @@ -756,125 +866,6 @@ const Module = switch (native_os) { }, }; -fn loadMachODebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { - const mapped_mem = mapFileOrSelfExe(module.name) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.FileTooBig => return error.InvalidDebugInfo, - else => |e| return e, - }; - errdefer posix.munmap(mapped_mem); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - const symtab: macho.symtab_command = symtab: { - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - return error.MissingDebugInfo; - }; - - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); - const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; - - // MLUGG TODO: does it really make sense to initCapacity here? how many of syms are omitted? - var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); - defer symbols.deinit(gpa); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) continue; - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type.stab) { - .oso => switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - }, - .bnsym => switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .size = 0, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - }, - .fun => switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - last_sym.size = @intCast(sym.n_value); - }, - else => return error.InvalidDebugInfo, - }, - .ensym => switch (state) { - .fun_size => { - state = .ensym; - symbols.appendAssumeCapacity(last_sym); - }, - else => return error.InvalidDebugInfo, - }, - .so => switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - }, - else => {}, - } - } - - switch (state) { - .init => return error.MissingDebugInfo, - .oso_close => {}, - else => return error.InvalidDebugInfo, - } - - const symbols_slice = try symbols.toOwnedSlice(gpa); - errdefer gpa.free(symbols_slice); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - - di.full = .{ - .mapped_memory = mapped_mem, - .symbols = symbols_slice, - .strings = strings, - .ofiles = .empty, - }; -} - const MachoSymbol = struct { strx: u32, addr: u64, From 84b65860cfa4b7e61cb98347778331d67137d7e8 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 15:54:36 +0100 Subject: [PATCH 10/85] the world if ElfModule didn't suck: --- lib/std/debug.zig | 44 ++++------ lib/std/debug/Dwarf.zig | 154 +++++++++++++++------------------ lib/std/debug/Dwarf/Unwind.zig | 11 ++- lib/std/debug/Info.zig | 2 +- lib/std/debug/SelfInfo.zig | 52 ++++++----- 5 files changed, 126 insertions(+), 137 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 06fa327d2ef1..505677dcb6bd 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -153,10 +153,9 @@ pub const SourceLocation = struct { }; pub const Symbol = struct { - // MLUGG TODO: remove the defaults and audit everywhere. also grep for '???' across std - name: []const u8 = "???", - compile_unit_name: []const u8 = "???", - source_location: ?SourceLocation = null, + name: ?[]const u8, + compile_unit_name: ?[]const u8, + source_location: ?SourceLocation, }; /// Deprecated because it returns the optimization mode of the standard @@ -1040,10 +1039,11 @@ fn printLastUnwindError(it: *StackIterator, debug_info: *SelfInfo, writer: *Writ fn printUnwindError(debug_info: *SelfInfo, writer: *Writer, address: usize, unwind_err: UnwindError, tty_config: tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(getDebugInfoAllocator(), address) catch |err| switch (err) { - error.Unexpected, error.OutOfMemory => |e| return e, error.MissingDebugInfo => "???", + error.Unexpected, error.OutOfMemory => |e| return e, }; try tty_config.setColor(writer, .dim); + // MLUGG TODO this makes no sense given that MissingUnwindInfo exists? if (unwind_err == error.MissingDebugInfo) { try writer.print("Unwind information for `{s}:0x{x}` was not available, trace may be incomplete\n\n", .{ module_name, address }); } else { @@ -1054,35 +1054,27 @@ fn printUnwindError(debug_info: *SelfInfo, writer: *Writer, address: usize, unwi pub fn printSourceAtAddress(debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) !void { const gpa = getDebugInfoAllocator(); - if (debug_info.getSymbolAtAddress(gpa, address)) |symbol_info| { - defer if (symbol_info.source_location) |sl| gpa.free(sl.file_name); - return printLineInfo( - writer, - symbol_info.source_location, - address, - symbol_info.name, - symbol_info.compile_unit_name, - tty_config, - ); - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => {}, + const symbol: Symbol = debug_info.getSymbolAtAddress(gpa, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => .{ + .name = null, + .compile_unit_name = null, + .source_location = null, + }, else => |e| return e, - } - // Unknown source location, but perhaps we can at least get a module name - const compile_unit_name = debug_info.getModuleNameForAddress(getDebugInfoAllocator(), address) catch |err| switch (err) { - error.MissingDebugInfo => "???", - error.Unexpected, error.OutOfMemory => |e| return e, }; + defer if (symbol.source_location) |sl| gpa.free(sl.file_name); return printLineInfo( writer, - null, + symbol.source_location, address, - "???", - compile_unit_name, + symbol.name orelse "???", + symbol.compile_unit_name orelse debug_info.getModuleNameForAddress(gpa, address) catch |err| switch (err) { + error.MissingDebugInfo => "???", + error.Unexpected, error.OutOfMemory => |e| return e, + }, tty_config, ); } - fn printLineInfo( writer: *Writer, source_location: ?SourceLocation, diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index bb4bee2fbfd0..5c8ac1b14a0e 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1487,20 +1487,42 @@ pub const ElfModule = struct { MemoryMappingNotSupported, } || Allocator.Error || std.fs.File.OpenError || OpenError; - /// Reads debug info from an already mapped ELF file. + /// Reads debug info from an ELF file given its path. /// /// If the required sections aren't present but a reference to external debug /// info is, then this this function will recurse to attempt to load the debug /// sections from an external file. pub fn load( gpa: Allocator, - mapped_mem: []align(std.heap.page_size_min) const u8, + elf_file_path: Path, build_id: ?[]const u8, expected_crc: ?u32, parent_sections: ?*Dwarf.SectionArray, parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, - elf_filename: ?[]const u8, ) LoadError!ElfModule { + const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: { + const elf_file = try elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}); + defer elf_file.close(); + + const file_len = cast( + usize, + elf_file.getEndPos() catch return bad(), + ) orelse return error.Overflow; + + break :mapped std.posix.mmap( + null, + file_len, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + elf_file.handle, + 0, + ) catch |err| switch (err) { + error.MappingAlreadyExists => unreachable, + else => |e| return e, + }; + }; + errdefer std.posix.munmap(mapped_mem); + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); @@ -1606,39 +1628,36 @@ pub const ElfModule = struct { // $XDG_CACHE_HOME/debuginfod_client//debuginfo // This only opportunisticly tries to load from the debuginfod cache, but doesn't try to populate it. // One can manually run `debuginfod-find debuginfo PATH` to download the symbols - if (build_id) |id| blk: { - var debuginfod_dir: std.fs.Dir = switch (builtin.os.tag) { - .wasi, .windows => break :blk, - else => dir: { - if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |path| { - break :dir std.fs.openDirAbsolute(path, .{}) catch break :blk; - } - if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { - if (cache_path.len > 0) { - const path = std.fs.path.join(gpa, &[_][]const u8{ cache_path, "debuginfod_client" }) catch break :blk; - defer gpa.free(path); - break :dir std.fs.openDirAbsolute(path, .{}) catch break :blk; - } - } - if (std.posix.getenv("HOME")) |home_path| { - const path = std.fs.path.join(gpa, &[_][]const u8{ home_path, ".cache", "debuginfod_client" }) catch break :blk; - defer gpa.free(path); - break :dir std.fs.openDirAbsolute(path, .{}) catch break :blk; + debuginfod: { + const id = build_id orelse break :debuginfod; + switch (builtin.os.tag) { + .wasi, .windows => break :debuginfod, + else => {}, + } + const id_dir_path: []u8 = p: { + if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |path| { + break :p try std.fmt.allocPrint(gpa, "{s}/{x}", .{ path, id }); + } + if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { + if (cache_path.len > 0) { + break :p try std.fmt.allocPrint(gpa, "{s}/debuginfod_client/{x}", .{ cache_path, id }); } - break :blk; - }, + } + if (std.posix.getenv("HOME")) |home_path| { + break :p try std.fmt.allocPrint(gpa, "{s}/.cache/debuginfod_client/{x}", .{ home_path, id }); + } + break :debuginfod; }; - defer debuginfod_dir.close(); - - const filename = std.fmt.allocPrint(gpa, "{x}/debuginfo", .{id}) catch break :blk; - defer gpa.free(filename); + defer gpa.free(id_dir_path); + if (!std.fs.path.isAbsolute(id_dir_path)) break :debuginfod; - const path: Path = .{ - .root_dir = .{ .path = null, .handle = debuginfod_dir }, - .sub_path = filename, - }; + var id_dir = std.fs.openDirAbsolute(id_dir_path, .{}) catch break :debuginfod; + defer id_dir.close(); - return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch break :blk; + return load(gpa, .{ + .root_dir = .{ .path = id_dir_path, .handle = id_dir }, + .sub_path = "debuginfo", + }, null, separate_debug_crc, §ions, mapped_mem) catch break :debuginfod; } const global_debug_directories = [_][]const u8{ @@ -1659,33 +1678,37 @@ pub const ElfModule = struct { for (global_debug_directories) |global_directory| { const path: Path = .{ - .root_dir = std.Build.Cache.Directory.cwd(), + .root_dir = .cwd(), .sub_path = try std.fs.path.join(gpa, &.{ global_directory, ".build-id", &id_prefix_buf, filename, }), }; defer gpa.free(path.sub_path); - return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + return load(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; } } // use the path from .gnu_debuglink, in the same search order as gdb - if (separate_debug_filename) |separate_filename| blk: { - if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) + separate: { + const separate_filename = separate_debug_filename orelse break :separate; + if (mem.eql(u8, std.fs.path.basename(elf_file_path.sub_path), separate_filename)) return error.MissingDebugInfo; exe_dir: { - var exe_dir_buf: [std.fs.max_path_bytes]u8 = undefined; - const exe_dir_path = std.fs.selfExeDirPath(&exe_dir_buf) catch break :exe_dir; + const exe_dir_path = try std.fs.path.resolve(gpa, &.{ + elf_file_path.root_dir.path orelse ".", + std.fs.path.dirname(elf_file_path.sub_path) orelse ".", + }); + defer gpa.free(exe_dir_path); var exe_dir = std.fs.openDirAbsolute(exe_dir_path, .{}) catch break :exe_dir; defer exe_dir.close(); // / - if (loadPath( + if (load( gpa, .{ - .root_dir = .{ .path = null, .handle = exe_dir }, + .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, .sub_path = separate_filename, }, null, @@ -1698,27 +1721,27 @@ pub const ElfModule = struct { // /.debug/ const path: Path = .{ - .root_dir = .{ .path = null, .handle = exe_dir }, + .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), }; defer gpa.free(path.sub_path); - if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { + if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { return em; } else |_| {} } var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; - const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :blk; + const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :separate; // // for (global_debug_directories) |global_directory| { const path: Path = .{ - .root_dir = std.Build.Cache.Directory.cwd(), + .root_dir = .cwd(), .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), }; defer gpa.free(path.sub_path); - if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { + if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { return em; } else |_| {} } @@ -1735,47 +1758,6 @@ pub const ElfModule = struct { .dwarf = dwarf, }; } - - pub fn loadPath( - gpa: Allocator, - elf_file_path: Path, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: *Dwarf.SectionArray, - parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, - ) LoadError!ElfModule { - const elf_file = elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}) catch |err| switch (err) { - error.FileNotFound => return missing(), - else => return err, - }; - defer elf_file.close(); - - const end_pos = elf_file.getEndPos() catch return bad(); - const file_len = cast(usize, end_pos) orelse return error.Overflow; - - const mapped_mem = std.posix.mmap( - null, - file_len, - std.posix.PROT.READ, - .{ .TYPE = .SHARED }, - elf_file.handle, - 0, - ) catch |err| switch (err) { - error.MappingAlreadyExists => unreachable, - else => |e| return e, - }; - errdefer std.posix.munmap(mapped_mem); - - return load( - gpa, - mapped_mem, - build_id, - expected_crc, - parent_sections, - parent_mapped_mem, - elf_file_path.sub_path, - ); - } }; pub fn getSymbol(di: *Dwarf, allocator: Allocator, endian: Endian, address: u64) !std.debug.Symbol { diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 09cad0db2308..323bf3987461 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -41,8 +41,6 @@ const SortedFdeEntry = struct { const Section = enum { debug_frame, eh_frame }; -// MLUGG TODO deinit? - /// Initialize with unwind information from the contents of a `.debug_frame` or `.eh_frame` section. /// /// If the `.eh_frame_hdr` section is available, consider instead using `initEhFrameHdr`. This @@ -78,6 +76,13 @@ pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_p }; } +pub fn deinit(unwind: *Unwind, gpa: Allocator) void { + if (unwind.lookup) |lookup| switch (lookup) { + .eh_frame_hdr => {}, + .sorted_fdes => |fdes| gpa.free(fdes), + }; +} + /// This represents the decoded .eh_frame_hdr header pub const EhFrameHeader = struct { eh_frame_vaddr: u64, @@ -205,8 +210,6 @@ pub const EntryHeader = union(enum) { const unit_header = try Dwarf.readUnitHeader(r, endian); if (unit_header.unit_length == 0) return .terminator; - // TODO MLUGG: seriously, just... check the formats of everything in BOTH LSB Core and DWARF. this is a fucking *mess*. maybe add spec references. - // Next is a value which will disambiguate CIEs and FDEs. Annoyingly, LSB Core makes this // value always 4-byte, whereas DWARF makes it depend on the `dwarf.Format`. const cie_ptr_or_id_size: u8 = switch (section) { diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index c809547f7376..e38645e1f93f 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -25,7 +25,7 @@ pub const LoadError = Dwarf.ElfModule.LoadError; pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; - var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + var elf_module = try Dwarf.ElfModule.load(gpa, path, null, null, §ions, null); try elf_module.dwarf.populateRanges(gpa); var info: Info = .{ .address_map = .{}, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index a12152a210f5..e99a83ce7522 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -156,11 +156,7 @@ const Module = switch (native_os) { return error.MissingDebugInfo; } fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - const mapped_mem = mapFileOrSelfExe(module.name) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.FileTooBig => return error.InvalidDebugInfo, - else => |e| return e, - }; + const mapped_mem = try mapDebugInfoFile(module.name); errdefer posix.munmap(mapped_mem); const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); @@ -311,7 +307,6 @@ const Module = switch (native_os) { gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { defer _ = di.full.?.ofiles.pop().?; switch (err) { - error.FileNotFound, error.MissingDebugInfo, error.InvalidDebugInfo, => return sym_only_result, @@ -402,7 +397,7 @@ const Module = switch (native_os) { } fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapFileOrSelfExe(o_file_path); + const mapped_mem = try mapDebugInfoFile(o_file_path); errdefer posix.munmap(mapped_mem); if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; @@ -595,14 +590,27 @@ const Module = switch (native_os) { return error.MissingDebugInfo; } fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - const filename: ?[]const u8 = if (module.name.len > 0) module.name else null; - const mapped_mem = mapFileOrSelfExe(filename) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.FileTooBig => return error.InvalidDebugInfo, - else => |e| return e, - }; - errdefer posix.munmap(mapped_mem); - di.em = try .load(gpa, mapped_mem, module.build_id, null, null, null, filename); + if (module.name.len > 0) { + di.em = Dwarf.ElfModule.load(gpa, .{ + .root_dir = .cwd(), + .sub_path = module.name, + }, module.build_id, null, null, null) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.Overflow => return error.InvalidDebugInfo, + else => |e| return e, + }; + } else { + const path = try std.fs.selfExePathAlloc(gpa); + defer gpa.free(path); + di.em = Dwarf.ElfModule.load(gpa, .{ + .root_dir = .cwd(), + .sub_path = path, + }, module.build_id, null, null, null) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.Overflow => return error.InvalidDebugInfo, + else => |e| return e, + }; + } } fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { if (di.em == null) try module.loadLocationInfo(gpa, di); @@ -1247,14 +1255,18 @@ fn applyOffset(base: usize, offset: i64) !usize { } /// Uses `mmap` to map the file at `opt_path` (or, if `null`, the self executable image) into memory. -fn mapFileOrSelfExe(opt_path: ?[]const u8) ![]align(std.heap.page_size_min) const u8 { - const file = if (opt_path) |path| - try fs.cwd().openFile(path, .{}) +fn mapDebugInfoFile(opt_path: ?[]const u8) ![]align(std.heap.page_size_min) const u8 { + const open_result = if (opt_path) |path| + fs.cwd().openFile(path, .{}) else - try fs.openSelfExe(.{}); + fs.openSelfExe(.{}); + const file = open_result catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => |e| return e, + }; defer file.close(); - const file_len = math.cast(usize, try file.getEndPos()) orelse return error.FileTooBig; + const file_len = math.cast(usize, try file.getEndPos()) orelse return error.InvalidDebugInfo; return posix.mmap( null, From 8fdcdb8c69712ebbfbd06e0c18d373eee462fe31 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 16:14:54 +0100 Subject: [PATCH 11/85] the world if Dwarf.ElfModule was like REALLY good: --- lib/std/debug/Dwarf.zig | 323 +---------------------------- lib/std/debug/Dwarf/ElfModule.zig | 328 ++++++++++++++++++++++++++++++ lib/std/debug/SelfInfo.zig | 16 +- 3 files changed, 339 insertions(+), 328 deletions(-) create mode 100644 lib/std/debug/Dwarf/ElfModule.zig diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 5c8ac1b14a0e..73af2d2f42ee 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -8,11 +8,9 @@ //! For unopinionated types and bits, see `std.dwarf`. const builtin = @import("builtin"); -const native_endian = builtin.cpu.arch.endian(); const std = @import("../std.zig"); const Allocator = std.mem.Allocator; -const elf = std.elf; const mem = std.mem; const DW = std.dwarf; const AT = DW.AT; @@ -23,7 +21,6 @@ const UT = DW.UT; const assert = std.debug.assert; const cast = std.math.cast; const maxInt = std.math.maxInt; -const Path = std.Build.Cache.Path; const ArrayList = std.ArrayList; const Endian = std.builtin.Endian; const Reader = std.Io.Reader; @@ -34,6 +31,7 @@ pub const expression = @import("Dwarf/expression.zig"); pub const abi = @import("Dwarf/abi.zig"); pub const call_frame = @import("Dwarf/call_frame.zig"); pub const Unwind = @import("Dwarf/Unwind.zig"); +pub const ElfModule = @import("Dwarf/ElfModule.zig"); /// Useful to temporarily enable while working on this file. const debug_debug_mode = false; @@ -1431,7 +1429,7 @@ pub fn bad() error{InvalidDebugInfo} { return error.InvalidDebugInfo; } -fn invalidDebugInfoDetected() void { +pub fn invalidDebugInfoDetected() void { if (debug_debug_mode) @panic("bad dwarf"); } @@ -1449,317 +1447,6 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { return str[casted_offset..last :0]; } -// MLUGG TODO: i am dubious of this whole thing being here atp. look closely and see if it depends on being the self process -pub const ElfModule = struct { - dwarf: Dwarf, - mapped_memory: []align(std.heap.page_size_min) const u8, - external_mapped_memory: ?[]align(std.heap.page_size_min) const u8, - - pub fn deinit(self: *@This(), allocator: Allocator) void { - self.dwarf.deinit(allocator); - std.posix.munmap(self.mapped_memory); - if (self.external_mapped_memory) |m| std.posix.munmap(m); - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, endian: Endian, load_offset: usize, address: usize) !std.debug.Symbol { - // Translate the runtime address into a virtual address into the module - // MLUGG TODO: this clearly tells us that the logic should live near SelfInfo... - const vaddr = address - load_offset; - return self.dwarf.getSymbol(allocator, endian, vaddr); - } - - pub const LoadError = error{ - InvalidDebugInfo, - MissingDebugInfo, - InvalidElfMagic, - InvalidElfVersion, - InvalidElfEndian, - /// TODO: implement this and then remove this error code - UnimplementedDwarfForeignEndian, - /// The debug info may be valid but this implementation uses memory - /// mapping which limits things to usize. If the target debug info is - /// 64-bit and host is 32-bit, there may be debug info that is not - /// supportable using this method. - Overflow, - - PermissionDenied, - LockedMemoryLimitExceeded, - MemoryMappingNotSupported, - } || Allocator.Error || std.fs.File.OpenError || OpenError; - - /// Reads debug info from an ELF file given its path. - /// - /// If the required sections aren't present but a reference to external debug - /// info is, then this this function will recurse to attempt to load the debug - /// sections from an external file. - pub fn load( - gpa: Allocator, - elf_file_path: Path, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: ?*Dwarf.SectionArray, - parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, - ) LoadError!ElfModule { - const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: { - const elf_file = try elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}); - defer elf_file.close(); - - const file_len = cast( - usize, - elf_file.getEndPos() catch return bad(), - ) orelse return error.Overflow; - - break :mapped std.posix.mmap( - null, - file_len, - std.posix.PROT.READ, - .{ .TYPE = .SHARED }, - elf_file.handle, - 0, - ) catch |err| switch (err) { - error.MappingAlreadyExists => unreachable, - else => |e| return e, - }; - }; - errdefer std.posix.munmap(mapped_mem); - - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - if (endian != native_endian) return error.UnimplementedDwarfForeignEndian; - - const shoff = hdr.e_shoff; - const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[cast(usize, str_section_off) orelse return error.Overflow])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = @splat(null); - - // Combine section list. This takes ownership over any owned sections from the parent scope. - if (parent_sections) |ps| { - for (ps, §ions) |*parent, *section_elem| { - if (parent.*) |*p| { - section_elem.* = p.*; - p.owned = false; - } - } - } - errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, debug_filename.len + 1, 4); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, endian); - separate_debug_filename = debug_filename; - continue; - } - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |sect, i| { - if (mem.eql(u8, "." ++ sect.name, name)) section_index = i; - } - if (section_index == null) continue; - if (sections[section_index.?] != null) continue; - - const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { - var section_reader: Reader = .fixed(section_bytes); - const chdr = section_reader.takeStruct(elf.Chdr, endian) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{}); - var decompressed_section: ArrayList(u8) = .empty; - defer decompressed_section.deinit(gpa); - decompress.reader.appendRemainingUnlimited(gpa, &decompressed_section) catch { - invalidDebugInfoDetected(); - continue; - }; - if (chdr.ch_size != decompressed_section.items.len) { - invalidDebugInfoDetected(); - continue; - } - break :blk .{ - .data = try decompressed_section.toOwnedSlice(gpa), - .virtual_address = shdr.sh_addr, - .owned = true, - }; - } else .{ - .data = section_bytes, - .virtual_address = shdr.sh_addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - // $XDG_CACHE_HOME/debuginfod_client//debuginfo - // This only opportunisticly tries to load from the debuginfod cache, but doesn't try to populate it. - // One can manually run `debuginfod-find debuginfo PATH` to download the symbols - debuginfod: { - const id = build_id orelse break :debuginfod; - switch (builtin.os.tag) { - .wasi, .windows => break :debuginfod, - else => {}, - } - const id_dir_path: []u8 = p: { - if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |path| { - break :p try std.fmt.allocPrint(gpa, "{s}/{x}", .{ path, id }); - } - if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { - if (cache_path.len > 0) { - break :p try std.fmt.allocPrint(gpa, "{s}/debuginfod_client/{x}", .{ cache_path, id }); - } - } - if (std.posix.getenv("HOME")) |home_path| { - break :p try std.fmt.allocPrint(gpa, "{s}/.cache/debuginfod_client/{x}", .{ home_path, id }); - } - break :debuginfod; - }; - defer gpa.free(id_dir_path); - if (!std.fs.path.isAbsolute(id_dir_path)) break :debuginfod; - - var id_dir = std.fs.openDirAbsolute(id_dir_path, .{}) catch break :debuginfod; - defer id_dir.close(); - - return load(gpa, .{ - .root_dir = .{ .path = id_dir_path, .handle = id_dir }, - .sub_path = "debuginfo", - }, null, separate_debug_crc, §ions, mapped_mem) catch break :debuginfod; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // /.build-id/<2-character id prefix>/.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{x}", .{id[0..1]}) catch unreachable; - const filename = std.fmt.bufPrint(&filename_buf, "{x}" ++ extension, .{id[1..]}) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path: Path = .{ - .root_dir = .cwd(), - .sub_path = try std.fs.path.join(gpa, &.{ - global_directory, ".build-id", &id_prefix_buf, filename, - }), - }; - defer gpa.free(path.sub_path); - - return load(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - separate: { - const separate_filename = separate_debug_filename orelse break :separate; - if (mem.eql(u8, std.fs.path.basename(elf_file_path.sub_path), separate_filename)) - return error.MissingDebugInfo; - - exe_dir: { - const exe_dir_path = try std.fs.path.resolve(gpa, &.{ - elf_file_path.root_dir.path orelse ".", - std.fs.path.dirname(elf_file_path.sub_path) orelse ".", - }); - defer gpa.free(exe_dir_path); - var exe_dir = std.fs.openDirAbsolute(exe_dir_path, .{}) catch break :exe_dir; - defer exe_dir.close(); - - // / - if (load( - gpa, - .{ - .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, - .sub_path = separate_filename, - }, - null, - separate_debug_crc, - §ions, - mapped_mem, - )) |em| { - return em; - } else |_| {} - - // /.debug/ - const path: Path = .{ - .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, - .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), - }; - defer gpa.free(path.sub_path); - - if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { - return em; - } else |_| {} - } - - var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; - const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :separate; - - // // - for (global_debug_directories) |global_directory| { - const path: Path = .{ - .root_dir = .cwd(), - .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), - }; - defer gpa.free(path.sub_path); - if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { - return em; - } else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var dwarf: Dwarf = .{ .sections = sections }; - try dwarf.open(gpa, endian); - return .{ - .mapped_memory = parent_mapped_mem orelse mapped_mem, - .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, - .dwarf = dwarf, - }; - } -}; - pub fn getSymbol(di: *Dwarf, allocator: Allocator, endian: Endian, address: u64) !std.debug.Symbol { const compile_unit = di.findCompileUnit(endian, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return .{ .name = null, .compile_unit_name = null, .source_location = null }, @@ -1777,12 +1464,6 @@ pub fn getSymbol(di: *Dwarf, allocator: Allocator, endian: Endian, address: u64) }; } -pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { - const start = cast(usize, offset) orelse return error.Overflow; - const end = start + (cast(usize, size) orelse return error.Overflow); - return ptr[start..end]; -} - fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { // MLUGG TODO FIX BEFORE MERGE: this function is slightly bogus. addresses have a byte width which is independent of the `dwarf.Format`! return switch (format) { diff --git a/lib/std/debug/Dwarf/ElfModule.zig b/lib/std/debug/Dwarf/ElfModule.zig new file mode 100644 index 000000000000..68ebbb90f4f2 --- /dev/null +++ b/lib/std/debug/Dwarf/ElfModule.zig @@ -0,0 +1,328 @@ +//! A thin wrapper around `Dwarf` which handles loading debug information from an ELF file. Load the +//! info with `load`, then directly access the `dwarf` field before finally `deinit`ing. + +dwarf: Dwarf, + +/// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that +/// this memory can be unmapped by `ElfModule.deinit`. +mapped_file: []align(std.heap.page_size_min) const u8, +/// Sometimes, debug info is stored separately to the main ELF file. In that case, `mapped_file` +/// is the mapped ELF binary, and `mapped_debug_file` is the mapped debug info file. Both must +/// be unmapped by `ElfModule.deinit`. +mapped_debug_file: ?[]align(std.heap.page_size_min) const u8, + +pub fn deinit(em: *ElfModule, allocator: Allocator) void { + em.dwarf.deinit(allocator); + std.posix.munmap(em.mapped_file); + if (em.mapped_debug_file) |m| std.posix.munmap(m); +} + +pub const LoadError = error{ + InvalidDebugInfo, + MissingDebugInfo, + InvalidElfMagic, + InvalidElfVersion, + InvalidElfEndian, + /// TODO: implement this and then remove this error code + UnimplementedDwarfForeignEndian, + /// The debug info may be valid but this implementation uses memory + /// mapping which limits things to usize. If the target debug info is + /// 64-bit and host is 32-bit, there may be debug info that is not + /// supportable using this method. + Overflow, + + PermissionDenied, + LockedMemoryLimitExceeded, + MemoryMappingNotSupported, +} || Allocator.Error || std.fs.File.OpenError || Dwarf.OpenError; + +/// Reads debug info from an ELF file given its path. +/// +/// If the required sections aren't present but a reference to external debug +/// info is, then this this function will recurse to attempt to load the debug +/// sections from an external file. +pub fn load( + gpa: Allocator, + elf_file_path: Path, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: ?*Dwarf.SectionArray, + parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, +) LoadError!ElfModule { + const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: { + const elf_file = try elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}); + defer elf_file.close(); + + const file_len = std.math.cast( + usize, + elf_file.getEndPos() catch return Dwarf.bad(), + ) orelse return error.Overflow; + + break :mapped std.posix.mmap( + null, + file_len, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + elf_file.handle, + 0, + ) catch |err| switch (err) { + error.MappingAlreadyExists => unreachable, + else => |e| return e, + }; + }; + errdefer std.posix.munmap(mapped_mem); + + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); + if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; + if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; + + const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { + elf.ELFDATA2LSB => .little, + elf.ELFDATA2MSB => .big, + else => return error.InvalidElfEndian, + }; + if (endian != native_endian) return error.UnimplementedDwarfForeignEndian; + + const shoff = hdr.e_shoff; + const str_section_off = std.math.cast( + usize, + shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx), + ) orelse return error.Overflow; + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(mapped_mem[str_section_off..])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; + const shdrs = @as( + [*]const elf.Shdr, + @ptrCast(@alignCast(&mapped_mem[shoff])), + )[0..hdr.e_shnum]; + + var sections: Dwarf.SectionArray = @splat(null); + + // Combine section list. This takes ownership over any owned sections from the parent scope. + if (parent_sections) |ps| { + for (ps, §ions) |*parent, *section_elem| { + if (parent.*) |*p| { + section_elem.* = p.*; + p.owned = false; + } + } + } + errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); + + var separate_debug_filename: ?[]const u8 = null; + var separate_debug_crc: ?u32 = null; + + for (shdrs) |*shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); + + if (mem.eql(u8, name, ".gnu_debuglink")) { + if (mapped_mem.len < shdr.sh_offset + shdr.sh_size) return error.InvalidDebugInfo; + const gnu_debuglink = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); + const crc_offset = mem.alignForward(usize, debug_filename.len + 1, 4); + const crc_bytes = gnu_debuglink[crc_offset..][0..4]; + separate_debug_crc = mem.readInt(u32, crc_bytes, endian); + separate_debug_filename = debug_filename; + continue; + } + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |sect, i| { + if (mem.eql(u8, "." ++ sect.name, name)) section_index = i; + } + if (section_index == null) continue; + if (sections[section_index.?] != null) continue; + + if (mapped_mem.len < shdr.sh_offset + shdr.sh_size) return error.InvalidDebugInfo; + const section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { + var section_reader: Reader = .fixed(section_bytes); + const chdr = section_reader.takeStruct(elf.Chdr, endian) catch continue; + if (chdr.ch_type != .ZLIB) continue; + + var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{}); + var decompressed_section: ArrayList(u8) = .empty; + defer decompressed_section.deinit(gpa); + decompress.reader.appendRemainingUnlimited(gpa, &decompressed_section) catch { + Dwarf.invalidDebugInfoDetected(); + continue; + }; + if (chdr.ch_size != decompressed_section.items.len) { + Dwarf.invalidDebugInfoDetected(); + continue; + } + break :blk .{ + .data = try decompressed_section.toOwnedSlice(gpa), + .virtual_address = shdr.sh_addr, + .owned = true, + }; + } else .{ + .data = section_bytes, + .virtual_address = shdr.sh_addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + + // Attempt to load debug info from an external file + // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html + if (missing_debug_info) { + // Only allow one level of debug info nesting + if (parent_mapped_mem) |_| { + return error.MissingDebugInfo; + } + + // $XDG_CACHE_HOME/debuginfod_client//debuginfo + // This only opportunisticly tries to load from the debuginfod cache, but doesn't try to populate it. + // One can manually run `debuginfod-find debuginfo PATH` to download the symbols + debuginfod: { + const id = build_id orelse break :debuginfod; + switch (builtin.os.tag) { + .wasi, .windows => break :debuginfod, + else => {}, + } + const id_dir_path: []u8 = p: { + if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |path| { + break :p try std.fmt.allocPrint(gpa, "{s}/{x}", .{ path, id }); + } + if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { + if (cache_path.len > 0) { + break :p try std.fmt.allocPrint(gpa, "{s}/debuginfod_client/{x}", .{ cache_path, id }); + } + } + if (std.posix.getenv("HOME")) |home_path| { + break :p try std.fmt.allocPrint(gpa, "{s}/.cache/debuginfod_client/{x}", .{ home_path, id }); + } + break :debuginfod; + }; + defer gpa.free(id_dir_path); + if (!std.fs.path.isAbsolute(id_dir_path)) break :debuginfod; + + var id_dir = std.fs.openDirAbsolute(id_dir_path, .{}) catch break :debuginfod; + defer id_dir.close(); + + return load(gpa, .{ + .root_dir = .{ .path = id_dir_path, .handle = id_dir }, + .sub_path = "debuginfo", + }, null, separate_debug_crc, §ions, mapped_mem) catch break :debuginfod; + } + + const global_debug_directories = [_][]const u8{ + "/usr/lib/debug", + }; + + // /.build-id/<2-character id prefix>/.debug + if (build_id) |id| blk: { + if (id.len < 3) break :blk; + + // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice + const extension = ".debug"; + var id_prefix_buf: [2]u8 = undefined; + var filename_buf: [38 + extension.len]u8 = undefined; + + _ = std.fmt.bufPrint(&id_prefix_buf, "{x}", .{id[0..1]}) catch unreachable; + const filename = std.fmt.bufPrint(&filename_buf, "{x}" ++ extension, .{id[1..]}) catch break :blk; + + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = .cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ + global_directory, ".build-id", &id_prefix_buf, filename, + }), + }; + defer gpa.free(path.sub_path); + + return load(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + } + } + + // use the path from .gnu_debuglink, in the same search order as gdb + separate: { + const separate_filename = separate_debug_filename orelse break :separate; + if (mem.eql(u8, std.fs.path.basename(elf_file_path.sub_path), separate_filename)) + return error.MissingDebugInfo; + + exe_dir: { + const exe_dir_path = try std.fs.path.resolve(gpa, &.{ + elf_file_path.root_dir.path orelse ".", + std.fs.path.dirname(elf_file_path.sub_path) orelse ".", + }); + defer gpa.free(exe_dir_path); + var exe_dir = std.fs.openDirAbsolute(exe_dir_path, .{}) catch break :exe_dir; + defer exe_dir.close(); + + // / + if (load( + gpa, + .{ + .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, + .sub_path = separate_filename, + }, + null, + separate_debug_crc, + §ions, + mapped_mem, + )) |em| { + return em; + } else |_| {} + + // /.debug/ + const path: Path = .{ + .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, + .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), + }; + defer gpa.free(path.sub_path); + + if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { + return em; + } else |_| {} + } + + var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; + const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :separate; + + // // + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = .cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), + }; + defer gpa.free(path.sub_path); + if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { + return em; + } else |_| {} + } + } + + return error.MissingDebugInfo; + } + + var dwarf: Dwarf = .{ .sections = sections }; + try dwarf.open(gpa, endian); + return .{ + .mapped_file = parent_mapped_mem orelse mapped_mem, + .mapped_debug_file = if (parent_mapped_mem != null) mapped_mem else null, + .dwarf = dwarf, + }; +} + +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const Dwarf = std.debug.Dwarf; +const Path = std.Build.Cache.Path; +const Reader = std.Io.Reader; +const mem = std.mem; +const elf = std.elf; + +const builtin = @import("builtin"); +const native_endian = builtin.cpu.arch.endian(); + +const ElfModule = @This(); diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index e99a83ce7522..4c6f8e499095 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -456,7 +456,8 @@ const Module = switch (native_os) { if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; } else continue; - const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); + if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; + const section_bytes = mapped_mem[sect.offset..][0..sect.size]; sections[section_index] = .{ .data = section_bytes, .virtual_address = @intCast(sect.addr), @@ -508,10 +509,10 @@ const Module = switch (native_os) { gnu_eh_frame: ?[]const u8, const LookupCache = void; const DebugInfo = struct { - em: ?Dwarf.ElfModule, // MLUGG TODO: bad field name (and, frankly, type) + loaded_elf: ?Dwarf.ElfModule, // MLUGG TODO: bad field name unwind: ?Dwarf.Unwind, const init: DebugInfo = .{ - .em = null, + .loaded_elf = null, .unwind = null, }; }; @@ -591,7 +592,7 @@ const Module = switch (native_os) { } fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { if (module.name.len > 0) { - di.em = Dwarf.ElfModule.load(gpa, .{ + di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ .root_dir = .cwd(), .sub_path = module.name, }, module.build_id, null, null, null) catch |err| switch (err) { @@ -602,7 +603,7 @@ const Module = switch (native_os) { } else { const path = try std.fs.selfExePathAlloc(gpa); defer gpa.free(path); - di.em = Dwarf.ElfModule.load(gpa, .{ + di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ .root_dir = .cwd(), .sub_path = path, }, module.build_id, null, null, null) catch |err| switch (err) { @@ -613,8 +614,9 @@ const Module = switch (native_os) { } } fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (di.em == null) try module.loadLocationInfo(gpa, di); - return di.em.?.getSymbolAtAddress(gpa, native_endian, module.load_offset, address); + if (di.loaded_elf == null) try module.loadLocationInfo(gpa, di); + const vaddr = address - module.load_offset; + return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr); } fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file From e4dbfc109bb32334f8d0dde9277ddd80c9b6f126 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 16:39:45 +0100 Subject: [PATCH 12/85] dont dupe state you silly billy --- lib/std/debug/SelfInfo.zig | 57 +++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 4c6f8e499095..fbb925feef25 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -100,8 +100,6 @@ const Module = switch (native_os) { text_base: usize, load_offset: usize, name: []const u8, - unwind_info: ?[]const u8, - eh_frame: ?[]const u8, fn key(m: *const Module) usize { return m.text_base; } @@ -120,11 +118,11 @@ const Module = switch (native_os) { .ncmds = header.ncmds, .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], }; - const text_segment_cmd, const text_sections = while (it.next()) |load_cmd| { + const text_segment_cmd = while (it.next()) |load_cmd| { if (load_cmd.cmd() != .SEGMENT_64) continue; const segment_cmd = load_cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break .{ segment_cmd, load_cmd.getSections() }; + break segment_cmd; } else continue; const seg_start = load_offset + text_segment_cmd.vmaddr; @@ -132,26 +130,12 @@ const Module = switch (native_os) { const seg_end = seg_start + text_segment_cmd.vmsize; if (address < seg_start or address >= seg_end) continue; - // We've found the matching __TEXT segment. This is the image we need, but we must look - // for unwind info in it before returning. - - var result: Module = .{ + // We've found the matching __TEXT segment. This is the image we need. + return .{ .text_base = text_base, .load_offset = load_offset, .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), - .unwind_info = null, - .eh_frame = null, }; - for (text_sections) |sect| { - if (mem.eql(u8, sect.sectName(), "__unwind_info")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); - result.unwind_info = sect_ptr[0..@intCast(sect.size)]; - } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); - result.eh_frame = sect_ptr[0..@intCast(sect.size)]; - } - } - return result; } return error.MissingDebugInfo; } @@ -270,11 +254,35 @@ const Module = switch (native_os) { }; } fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - if (di.unwind != null) return; _ = gpa; + + const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const sections = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break load_cmd.getSections(); + } else unreachable; + + var unwind_info: ?[]const u8 = null; + var eh_frame: ?[]const u8 = null; + for (sections) |sect| { + if (mem.eql(u8, sect.sectName(), "__unwind_info")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + unwind_info = sect_ptr[0..@intCast(sect.size)]; + } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + eh_frame = sect_ptr[0..@intCast(sect.size)]; + } + } di.unwind = .{ - .unwind_info = module.unwind_info, - .eh_frame = module.eh_frame, + .unwind_info = unwind_info, + .eh_frame = eh_frame, }; } fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { @@ -362,7 +370,6 @@ const Module = switch (native_os) { const DebugInfo = struct { unwind: ?struct { // Backed by the in-memory sections mapped by the loader - // MLUGG TODO: these are duplicated state. i actually reckon they should be removed from Module, and loadLocationInfo should be the one discovering them! unwind_info: ?[]const u8, eh_frame: ?[]const u8, }, @@ -517,7 +524,7 @@ const Module = switch (native_os) { }; }; fn key(m: Module) usize { - return m.load_offset; // MLUGG TODO: is this technically valid? idk + return m.load_offset; } fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { _ = cache; From b762cd30fd3695e6c54b81d71f9adaf7bac1e5be Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 16:41:09 +0100 Subject: [PATCH 13/85] remove TODOs which are done or which i'm not actually gonna do lol --- lib/std/debug/SelfInfo.zig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index fbb925feef25..30089c1896a6 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -163,7 +163,6 @@ const Module = switch (native_os) { const syms = syms_ptr[0..symtab.nsyms]; const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; - // MLUGG TODO: does it really make sense to initCapacity here? how many of syms are omitted? var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); defer symbols.deinit(gpa); @@ -438,7 +437,6 @@ const Module = switch (native_os) { const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); // TODO handle tentative (common) symbols - // MLUGG TODO: does initCapacity actually make sense? var addr_table: std.StringArrayHashMapUnmanaged(u64) = .empty; defer addr_table.deinit(gpa); try addr_table.ensureUnusedCapacity(gpa, @intCast(symtab.len)); @@ -516,7 +514,7 @@ const Module = switch (native_os) { gnu_eh_frame: ?[]const u8, const LookupCache = void; const DebugInfo = struct { - loaded_elf: ?Dwarf.ElfModule, // MLUGG TODO: bad field name + loaded_elf: ?Dwarf.ElfModule, unwind: ?Dwarf.Unwind, const init: DebugInfo = .{ .loaded_elf = null, From 1397b95143a799d836f549448485d87a70de391c Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 17:36:56 +0100 Subject: [PATCH 14/85] std.debug.Dwarf: eliminate host pointer size dependency --- lib/std/debug/Dwarf.zig | 146 +++++++++++++++--------------- lib/std/debug/Dwarf/ElfModule.zig | 2 - lib/std/debug/SelfInfo.zig | 2 - 3 files changed, 75 insertions(+), 75 deletions(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 73af2d2f42ee..0ba4ab8048a7 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1,14 +1,13 @@ //! Implements parsing, decoding, and caching of DWARF information. //! -//! This API does not assume the current executable is itself the thing being -//! debugged, however, it does assume the debug info has the same CPU -//! architecture and OS as the current executable. It is planned to remove this -//! limitation. +//! This API makes no assumptions about the relationship between the host and +//! the target being debugged. In other words, any DWARF information can be used +//! from any host via this API. Note, however, that the limits of 32-bit +//! addressing can cause very large 64-bit binaries to be impossible to open on +//! 32-bit hosts. //! //! For unopinionated types and bits, see `std.dwarf`. -const builtin = @import("builtin"); - const std = @import("../std.zig"); const Allocator = std.mem.Allocator; const mem = std.mem; @@ -57,9 +56,6 @@ pub const Range = struct { pub const Section = struct { data: []const u8, - // Module-relative virtual address. - // Only set if the section data was loaded from disk. - virtual_address: ?usize = null, // If `data` is owned by this Dwarf. owned: bool, @@ -120,6 +116,7 @@ pub const Abbrev = struct { pub const CompileUnit = struct { version: u16, format: Format, + addr_size_bytes: u8, die: Die, pc_range: ?PcRange, @@ -170,7 +167,7 @@ pub const CompileUnit = struct { pub const FormValue = union(enum) { addr: u64, - addrx: usize, + addrx: u64, block: []const u8, udata: u64, data16: *const [16]u8, @@ -182,7 +179,7 @@ pub const FormValue = union(enum) { ref_addr: u64, string: [:0]const u8, strp: u64, - strx: usize, + strx: u64, line_strp: u64, loclistx: u64, rnglistx: u64, @@ -392,12 +389,11 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! const unit_type = try fr.takeByte(); if (unit_type != DW.UT.compile) return bad(); address_size = try fr.takeByte(); - debug_abbrev_offset = try readAddress(&fr, unit_header.format, endian); + debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian); } else { - debug_abbrev_offset = try readAddress(&fr, unit_header.format, endian); + debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian); address_size = try fr.takeByte(); } - if (address_size != @sizeOf(usize)) return bad(); const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); @@ -424,6 +420,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! var compile_unit: CompileUnit = .{ .version = version, .format = unit_header.format, + .addr_size_bytes = address_size, .die = undefined, .pc_range = null, @@ -446,6 +443,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! abbrev_table, unit_header.format, endian, + address_size, )) orelse continue; switch (die_obj.tag_id) { @@ -480,6 +478,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! abbrev_table, // wrong abbrev table for different cu unit_header.format, endian, + address_size, )) orelse return bad(); } else if (this_die_obj.getAttr(AT.specification)) |_| { const after_die_offset = fr.seek; @@ -494,6 +493,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! abbrev_table, // wrong abbrev table for different cu unit_header.format, endian, + address_size, )) orelse return bad(); } else { break :x null; @@ -584,12 +584,11 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator, endian: Endian) ScanErr const unit_type = try fr.takeByte(); if (unit_type != UT.compile) return bad(); address_size = try fr.takeByte(); - debug_abbrev_offset = try readAddress(&fr, unit_header.format, endian); + debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian); } else { - debug_abbrev_offset = try readAddress(&fr, unit_header.format, endian); + debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian); address_size = try fr.takeByte(); } - if (address_size != @sizeOf(usize)) return bad(); const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); @@ -605,6 +604,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator, endian: Endian) ScanErr abbrev_table, unit_header.format, endian, + address_size, )) orelse return bad(); if (compile_unit_die.tag_id != DW.TAG.compile_unit) return bad(); @@ -614,6 +614,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator, endian: Endian) ScanErr var compile_unit: CompileUnit = .{ .version = version, .format = unit_header.format, + .addr_size_bytes = address_size, .pc_range = null, .die = compile_unit_die, .str_offsets_base = if (compile_unit_die.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0, @@ -702,15 +703,15 @@ const DebugRangeIterator = struct { .rnglistx => |idx| off: { switch (compile_unit.format) { .@"32" => { - const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); + const offset_loc = compile_unit.rnglists_base + 4 * idx; if (offset_loc + 4 > debug_ranges.len) return bad(); - const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], endian); + const offset = mem.readInt(u32, debug_ranges[@intCast(offset_loc)..][0..4], endian); break :off compile_unit.rnglists_base + offset; }, .@"64" => { - const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); + const offset_loc = compile_unit.rnglists_base + 8 * idx; if (offset_loc + 8 > debug_ranges.len) return bad(); - const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], endian); + const offset = mem.readInt(u64, debug_ranges[@intCast(offset_loc)..][0..8], endian); break :off compile_unit.rnglists_base + offset; }, } @@ -743,21 +744,22 @@ const DebugRangeIterator = struct { // Returns the next range in the list, or null if the end was reached. pub fn next(self: *@This()) !?PcRange { const endian = self.endian; + const addr_size_bytes = self.compile_unit.addr_size_bytes; switch (self.section_type) { .debug_rnglists => { const kind = try self.fr.takeByte(); switch (kind) { RLE.end_of_list => return null, RLE.base_addressx => { - const index = try self.fr.takeLeb128(usize); + const index = try self.fr.takeLeb128(u64); self.base_address = try self.di.readDebugAddr(endian, self.compile_unit, index); return try self.next(); }, RLE.startx_endx => { - const start_index = try self.fr.takeLeb128(usize); + const start_index = try self.fr.takeLeb128(u64); const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index); - const end_index = try self.fr.takeLeb128(usize); + const end_index = try self.fr.takeLeb128(u64); const end_addr = try self.di.readDebugAddr(endian, self.compile_unit, end_index); return .{ @@ -766,10 +768,10 @@ const DebugRangeIterator = struct { }; }, RLE.startx_length => { - const start_index = try self.fr.takeLeb128(usize); + const start_index = try self.fr.takeLeb128(u64); const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index); - const len = try self.fr.takeLeb128(usize); + const len = try self.fr.takeLeb128(u64); const end_addr = start_addr + len; return .{ @@ -778,8 +780,8 @@ const DebugRangeIterator = struct { }; }, RLE.offset_pair => { - const start_addr = try self.fr.takeLeb128(usize); - const end_addr = try self.fr.takeLeb128(usize); + const start_addr = try self.fr.takeLeb128(u64); + const end_addr = try self.fr.takeLeb128(u64); // This is the only kind that uses the base address return .{ @@ -788,12 +790,12 @@ const DebugRangeIterator = struct { }; }, RLE.base_address => { - self.base_address = try self.fr.takeInt(usize, endian); + self.base_address = try readAddress(&self.fr, endian, addr_size_bytes); return try self.next(); }, RLE.start_end => { - const start_addr = try self.fr.takeInt(usize, endian); - const end_addr = try self.fr.takeInt(usize, endian); + const start_addr = try readAddress(&self.fr, endian, addr_size_bytes); + const end_addr = try readAddress(&self.fr, endian, addr_size_bytes); return .{ .start = start_addr, @@ -801,8 +803,8 @@ const DebugRangeIterator = struct { }; }, RLE.start_length => { - const start_addr = try self.fr.takeInt(usize, endian); - const len = try self.fr.takeLeb128(usize); + const start_addr = try readAddress(&self.fr, endian, addr_size_bytes); + const len = try self.fr.takeLeb128(u64); const end_addr = start_addr + len; return .{ @@ -814,12 +816,13 @@ const DebugRangeIterator = struct { } }, .debug_ranges => { - const start_addr = try self.fr.takeInt(usize, endian); - const end_addr = try self.fr.takeInt(usize, endian); + const start_addr = try readAddress(&self.fr, endian, addr_size_bytes); + const end_addr = try readAddress(&self.fr, endian, addr_size_bytes); if (start_addr == 0 and end_addr == 0) return null; - // This entry selects a new value for the base address - if (start_addr == maxInt(usize)) { + // The entry with start_addr = max_representable_address selects a new value for the base address + const max_representable_address = ~@as(u64, 0) >> @intCast(64 - addr_size_bytes); + if (start_addr == max_representable_address) { self.base_address = end_addr; return try self.next(); } @@ -921,6 +924,7 @@ fn parseDie( abbrev_table: *const Abbrev.Table, format: Format, endian: Endian, + addr_size_bytes: u8, ) ScanError!?Die { const abbrev_code = try fr.takeLeb128(u64); if (abbrev_code == 0) return null; @@ -929,7 +933,7 @@ fn parseDie( const attrs = attrs_buf[0..table_entry.attrs.len]; for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = .{ .id = attr.id, - .value = try parseFormValue(fr, attr.form_id, format, endian, attr.payload), + .value = try parseFormValue(fr, attr.form_id, format, endian, addr_size_bytes, attr.payload), }; return .{ .tag_id = table_entry.tag_id, @@ -954,20 +958,16 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: const version = try fr.takeInt(u16, endian); if (version < 2) return bad(); - const addr_size: u8, const seg_size: u8 = if (version >= 5) .{ + const addr_size_bytes: u8, const seg_size: u8 = if (version >= 5) .{ try fr.takeByte(), try fr.takeByte(), } else .{ - switch (unit_header.format) { - .@"32" => 4, - .@"64" => 8, - }, + compile_unit.addr_size_bytes, 0, }; if (seg_size != 0) return bad(); // unsupported - _ = addr_size; // TODO: ignoring this is incorrect, we should use it to decide address lengths - const prologue_length = try readAddress(&fr, unit_header.format, endian); + const prologue_length = try readFormatSizedInt(&fr, unit_header.format, endian); const prog_start_offset = fr.seek + prologue_length; const minimum_instruction_length = try fr.takeByte(); @@ -1036,7 +1036,7 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: for (try directories.addManyAsSlice(gpa, directories_count)) |*e| { e.* = .{ .path = &.{} }; for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, null); + const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, addr_size_bytes, null); switch (ent_fmt.content_type_code) { DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), @@ -1068,7 +1068,7 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| { e.* = .{ .path = &.{} }; for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, null); + const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, addr_size_bytes, null); switch (ent_fmt.content_type_code) { DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), @@ -1117,8 +1117,7 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: prog.reset(); }, DW.LNE.set_address => { - const addr = try fr.takeInt(usize, endian); - prog.address = addr; + prog.address = try readAddress(&fr, endian, addr_size_bytes); }, DW.LNE.define_file => { const path = try fr.takeSentinel(0); @@ -1150,7 +1149,7 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: prog.basic_block = false; }, DW.LNS.advance_pc => { - const arg = try fr.takeLeb128(usize); + const arg = try fr.takeLeb128(u64); prog.address += arg * minimum_instruction_length; }, DW.LNS.advance_line => { @@ -1258,13 +1257,13 @@ fn readDebugAddr(di: Dwarf, endian: Endian, compile_unit: *const CompileUnit, in const addr_size = debug_addr[compile_unit.addr_base - 2]; const seg_size = debug_addr[compile_unit.addr_base - 1]; - const byte_offset = @as(usize, @intCast(compile_unit.addr_base + (addr_size + seg_size) * index)); + const byte_offset = compile_unit.addr_base + (addr_size + seg_size) * index; if (byte_offset + addr_size > debug_addr.len) return bad(); return switch (addr_size) { - 1 => debug_addr[byte_offset], - 2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], endian), - 4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], endian), - 8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], endian), + 1 => debug_addr[@intCast(byte_offset)], + 2 => mem.readInt(u16, debug_addr[@intCast(byte_offset)..][0..2], endian), + 4 => mem.readInt(u32, debug_addr[@intCast(byte_offset)..][0..4], endian), + 8 => mem.readInt(u64, debug_addr[@intCast(byte_offset)..][0..8], endian), else => bad(), }; } @@ -1274,17 +1273,18 @@ fn parseFormValue( form_id: u64, format: Format, endian: Endian, + addr_size_bytes: u8, implicit_const: ?i64, ) ScanError!FormValue { return switch (form_id) { // DWARF5.pdf page 213: the size of this value is encoded in the // compilation unit header as address size. - FORM.addr => .{ .addr = try readAddress(r, nativeFormat(), endian) }, + FORM.addr => .{ .addr = try readAddress(r, endian, addr_size_bytes) }, FORM.addrx1 => .{ .addrx = try r.takeByte() }, FORM.addrx2 => .{ .addrx = try r.takeInt(u16, endian) }, FORM.addrx3 => .{ .addrx = try r.takeInt(u24, endian) }, FORM.addrx4 => .{ .addrx = try r.takeInt(u32, endian) }, - FORM.addrx => .{ .addrx = try r.takeLeb128(usize) }, + FORM.addrx => .{ .addrx = try r.takeLeb128(u64) }, FORM.block1 => .{ .block = try r.take(try r.takeByte()) }, FORM.block2 => .{ .block = try r.take(try r.takeInt(u16, endian)) }, @@ -1301,7 +1301,7 @@ fn parseFormValue( FORM.exprloc => .{ .exprloc = try r.take(try r.takeLeb128(usize)) }, FORM.flag => .{ .flag = (try r.takeByte()) != 0 }, FORM.flag_present => .{ .flag = true }, - FORM.sec_offset => .{ .sec_offset = try readAddress(r, format, endian) }, + FORM.sec_offset => .{ .sec_offset = try readFormatSizedInt(r, format, endian) }, FORM.ref1 => .{ .ref = try r.takeByte() }, FORM.ref2 => .{ .ref = try r.takeInt(u16, endian) }, @@ -1309,18 +1309,18 @@ fn parseFormValue( FORM.ref8 => .{ .ref = try r.takeInt(u64, endian) }, FORM.ref_udata => .{ .ref = try r.takeLeb128(u64) }, - FORM.ref_addr => .{ .ref_addr = try readAddress(r, format, endian) }, + FORM.ref_addr => .{ .ref_addr = try readFormatSizedInt(r, format, endian) }, FORM.ref_sig8 => .{ .ref = try r.takeInt(u64, endian) }, FORM.string => .{ .string = try r.takeSentinel(0) }, - FORM.strp => .{ .strp = try readAddress(r, format, endian) }, + FORM.strp => .{ .strp = try readFormatSizedInt(r, format, endian) }, FORM.strx1 => .{ .strx = try r.takeByte() }, FORM.strx2 => .{ .strx = try r.takeInt(u16, endian) }, FORM.strx3 => .{ .strx = try r.takeInt(u24, endian) }, FORM.strx4 => .{ .strx = try r.takeInt(u32, endian) }, FORM.strx => .{ .strx = try r.takeLeb128(usize) }, - FORM.line_strp => .{ .line_strp = try readAddress(r, format, endian) }, - FORM.indirect => parseFormValue(r, try r.takeLeb128(u64), format, endian, implicit_const), + FORM.line_strp => .{ .line_strp = try readFormatSizedInt(r, format, endian) }, + FORM.indirect => parseFormValue(r, try r.takeLeb128(u64), format, endian, addr_size_bytes, implicit_const), FORM.implicit_const => .{ .sdata = implicit_const orelse return bad() }, FORM.loclistx => .{ .loclistx = try r.takeLeb128(u64) }, FORM.rnglistx => .{ .rnglistx = try r.takeLeb128(u64) }, @@ -1464,20 +1464,24 @@ pub fn getSymbol(di: *Dwarf, allocator: Allocator, endian: Endian, address: u64) }; } -fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { - // MLUGG TODO FIX BEFORE MERGE: this function is slightly bogus. addresses have a byte width which is independent of the `dwarf.Format`! +/// DWARF5 7.4: "In the 32-bit DWARF format, all values that represent lengths of DWARF sections and +/// offsets relative to the beginning of DWARF sections are represented using four bytes. In the +/// 64-bit DWARF format, all values that represent lengths of DWARF sections and offsets relative to +/// the beginning of DWARF sections are represented using eight bytes". +/// +/// This function is for reading such values. +fn readFormatSizedInt(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { return switch (format) { .@"32" => try r.takeInt(u32, endian), .@"64" => try r.takeInt(u64, endian), }; } -fn nativeFormat() std.dwarf.Format { - // MLUGG TODO FIX BEFORE MERGE: this is nonsensical. this is neither what `dwarf.Format` is for, nor does it make sense to check the NATIVE FUCKING FORMAT - // when parsing ARBITRARY DWARF. - return switch (@sizeOf(usize)) { - 4 => .@"32", - 8 => .@"64", - else => @compileError("unsupported @sizeOf(usize)"), +fn readAddress(r: *Reader, endian: Endian, addr_size_bytes: u8) !u64 { + return switch (addr_size_bytes) { + 2 => try r.takeInt(u16, endian), + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), + else => return bad(), }; } diff --git a/lib/std/debug/Dwarf/ElfModule.zig b/lib/std/debug/Dwarf/ElfModule.zig index 68ebbb90f4f2..02b94e580de5 100644 --- a/lib/std/debug/Dwarf/ElfModule.zig +++ b/lib/std/debug/Dwarf/ElfModule.zig @@ -155,12 +155,10 @@ pub fn load( } break :blk .{ .data = try decompressed_section.toOwnedSlice(gpa), - .virtual_address = shdr.sh_addr, .owned = true, }; } else .{ .data = section_bytes, - .virtual_address = shdr.sh_addr, .owned = false, }; } diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 30089c1896a6..bb4cb6c7ca0f 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -465,7 +465,6 @@ const Module = switch (native_os) { const section_bytes = mapped_mem[sect.offset..][0..sect.size]; sections[section_index] = .{ .data = section_bytes, - .virtual_address = @intCast(sect.addr), .owned = false, }; } @@ -751,7 +750,6 @@ const Module = switch (native_os) { di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { break :blk .{ .data = try coff_obj.getSectionDataAlloc(section_header, gpa), - .virtual_address = section_header.virtual_address, .owned = true, }; } else null; From ba3f38959a31ace9af1816f16cda6c0717518b7f Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 18:36:05 +0100 Subject: [PATCH 15/85] split SelfInfo into a file per impl --- lib/std/debug/Dwarf/abi.zig | 2 +- lib/std/debug/SelfInfo.zig | 1581 +++------------------- lib/std/debug/SelfInfo/DarwinModule.zig | 801 +++++++++++ lib/std/debug/SelfInfo/ElfModule.zig | 144 ++ lib/std/debug/SelfInfo/WindowsModule.zig | 255 ++++ 5 files changed, 1411 insertions(+), 1372 deletions(-) create mode 100644 lib/std/debug/SelfInfo/DarwinModule.zig create mode 100644 lib/std/debug/SelfInfo/ElfModule.zig create mode 100644 lib/std/debug/SelfInfo/WindowsModule.zig diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig index 79f7e87217b9..9fca02e38222 100644 --- a/lib/std/debug/Dwarf/abi.zig +++ b/lib/std/debug/Dwarf/abi.zig @@ -7,7 +7,7 @@ const Arch = std.Target.Cpu.Arch; /// Tells whether unwinding for this target is supported by the Dwarf standard. /// -/// See also `std.debug.SelfInfo.supportsUnwinding` which tells whether the Zig +/// See also `std.debug.SelfInfo.supports_unwinding` which tells whether the Zig /// standard library has a working implementation of unwinding for this target. pub fn supportsUnwinding(target: *const std.Target) bool { return switch (target.cpu.arch) { diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index bb4cb6c7ca0f..32382ac1e1b7 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -11,18 +11,8 @@ const native_arch = builtin.cpu.arch; const std = @import("../std.zig"); const mem = std.mem; const Allocator = std.mem.Allocator; -const windows = std.os.windows; -const macho = std.macho; -const fs = std.fs; -const coff = std.coff; const assert = std.debug.assert; -const posix = std.posix; -const elf = std.elf; const Dwarf = std.debug.Dwarf; -const Pdb = std.debug.Pdb; -const File = std.fs.File; -const math = std.math; -const testing = std.testing; const regBytes = Dwarf.abi.regBytes; const regValueNative = Dwarf.abi.regValueNative; @@ -31,6 +21,7 @@ const SelfInfo = @This(); modules: std.AutoHashMapUnmanaged(usize, Module.DebugInfo), lookup_cache: Module.LookupCache, +/// Indicates whether the `SelfInfo` implementation has support for this target. pub const target_supported: bool = switch (native_os) { .linux, .freebsd, @@ -45,9 +36,39 @@ pub const target_supported: bool = switch (native_os) { else => false, }; +/// Indicates whether unwinding for the host is *implemented* here in the Zig +/// standard library. +/// +/// See also `Dwarf.abi.supportsUnwinding` which tells whether Dwarf supports +/// unwinding on a target *in theory*. +pub const supports_unwinding: bool = switch (builtin.target.cpu.arch) { + .x86 => switch (builtin.target.os.tag) { + .linux, .netbsd, .solaris, .illumos => true, + else => false, + }, + .x86_64 => switch (builtin.target.os.tag) { + .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, + else => false, + }, + .arm, .armeb, .thumb, .thumbeb => switch (builtin.target.os.tag) { + .linux => true, + else => false, + }, + .aarch64, .aarch64_be => switch (builtin.target.os.tag) { + .linux, .netbsd, .freebsd, .macos, .ios => true, + else => false, + }, + // Unwinding is possible on other targets but this implementation does + // not support them...yet! + else => false, +}; +comptime { + if (supports_unwinding) assert(Dwarf.abi.supportsUnwinding(&builtin.target)); +} + pub const init: SelfInfo = .{ .modules = .empty, - .lookup_cache = if (Module.LookupCache != void) .init, + .lookup_cache = .init, }; pub fn deinit(self: *SelfInfo) void { @@ -59,19 +80,14 @@ pub fn deinit(self: *SelfInfo) void { self.allocator.destroy(mdi); } self.modules.deinit(self.allocator); - if (native_os == .windows) { - for (self.modules.items) |module| { - self.allocator.free(module.name); - if (module.mapped_file) |mapped_file| mapped_file.deinit(); - } - self.modules.deinit(self.allocator); - } } pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { - comptime assert(target_supported); + comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.load_offset); + self.modules.lockPointers(); + defer self.modules.unlockPointers(); if (!gop.found_existing) gop.value_ptr.* = .init; return module.unwindFrame(gpa, gop.value_ptr, context); } @@ -80,417 +96,39 @@ pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std. comptime assert(target_supported); const module: Module = try .lookup(&self.lookup_cache, gpa, address); const gop = try self.modules.getOrPut(gpa, module.key()); + self.modules.lockPointers(); + defer self.modules.unlockPointers(); if (!gop.found_existing) gop.value_ptr.* = .init; return module.getSymbolAtAddress(gpa, gop.value_ptr, address); } -/// Returns the module name for a given address. -/// This can be called when getModuleForAddress fails, so implementations should provide -/// a path that doesn't rely on any side-effects of a prior successful module lookup. pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) error{ Unexpected, OutOfMemory, MissingDebugInfo }![]const u8 { comptime assert(target_supported); const module: Module = try .lookup(&self.lookup_cache, gpa, address); return module.name; } +/// This type contains the target-specific implementation. It must expose the following declarations: +/// +/// * `LookupCache: type` +/// * `LookupCache.init: LookupCache` +/// * `lookup: fn (*LookupCache, Allocator, address: usize) !Module` +/// * `key: fn (*const Module) usize` +/// * `DebugInfo: type` +/// * `DebugInfo.init: DebugInfo` +/// * `getSymbolAtAddress: fn (*const Module, Allocator, *DebugInfo, address: usize) !std.debug.Symbol` +/// +/// If unwinding is supported on this target, it must additionally expose the following declarations: +/// +/// * `unwindFrame: fn (*const Module, Allocator, *DebugInfo, *UnwindContext) !usize` const Module = switch (native_os) { else => {}, // Dwarf, // TODO MLUGG: it's this on master but that's definitely broken atm... - .macos, .ios, .watchos, .tvos, .visionos => struct { - /// The runtime address where __TEXT is loaded. - text_base: usize, - load_offset: usize, - name: []const u8, - fn key(m: *const Module) usize { - return m.text_base; - } - fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { - _ = cache; - _ = gpa; - const image_count = std.c._dyld_image_count(); - for (0..image_count) |image_idx| { - const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; - const text_base = @intFromPtr(header); - if (address < text_base) continue; - const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); - - // Find the __TEXT segment - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const text_segment_cmd = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break segment_cmd; - } else continue; - - const seg_start = load_offset + text_segment_cmd.vmaddr; - assert(seg_start == text_base); - const seg_end = seg_start + text_segment_cmd.vmsize; - if (address < seg_start or address >= seg_end) continue; - - // We've found the matching __TEXT segment. This is the image we need. - return .{ - .text_base = text_base, - .load_offset = load_offset, - .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), - }; - } - return error.MissingDebugInfo; - } - fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - const mapped_mem = try mapDebugInfoFile(module.name); - errdefer posix.munmap(mapped_mem); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - const symtab: macho.symtab_command = symtab: { - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - return error.MissingDebugInfo; - }; - - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); - const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; - - var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); - defer symbols.deinit(gpa); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) continue; - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type.stab) { - .oso => switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - }, - .bnsym => switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .size = 0, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - }, - .fun => switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - last_sym.size = @intCast(sym.n_value); - }, - else => return error.InvalidDebugInfo, - }, - .ensym => switch (state) { - .fun_size => { - state = .ensym; - symbols.appendAssumeCapacity(last_sym); - }, - else => return error.InvalidDebugInfo, - }, - .so => switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - }, - else => {}, - } - } - - switch (state) { - .init => return error.MissingDebugInfo, - .oso_close => {}, - else => return error.InvalidDebugInfo, - } - - const symbols_slice = try symbols.toOwnedSlice(gpa); - errdefer gpa.free(symbols_slice); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - - di.full = .{ - .mapped_memory = mapped_mem, - .symbols = symbols_slice, - .strings = strings, - .ofiles = .empty, - }; - } - fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - _ = gpa; - - const header: *std.macho.mach_header = @ptrFromInt(module.text_base); - - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const sections = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break load_cmd.getSections(); - } else unreachable; - - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; - for (sections) |sect| { - if (mem.eql(u8, sect.sectName(), "__unwind_info")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); - unwind_info = sect_ptr[0..@intCast(sect.size)]; - } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); - eh_frame = sect_ptr[0..@intCast(sect.size)]; - } - } - di.unwind = .{ - .unwind_info = unwind_info, - .eh_frame = eh_frame, - }; - } - fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (di.full == null) try module.loadLocationInfo(gpa, di); - const vaddr = address - module.load_offset; - const symbol = MachoSymbol.find(di.full.?.symbols, vaddr) orelse return .{ - .name = null, - .compile_unit_name = null, - .source_location = null, - }; - - // offset of `address` from start of `symbol` - const address_symbol_offset = vaddr - symbol.addr; - - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(di.full.?.strings[symbol.strx..], 0); - const o_file_path = mem.sliceTo(di.full.?.strings[symbol.ofile..], 0); - - // If any information is missing, we can at least return this from now on. - const sym_only_result: std.debug.Symbol = .{ - .name = stab_symbol, - .compile_unit_name = null, - .source_location = null, - }; - - const o_file: *DebugInfo.OFile = of: { - const gop = try di.full.?.ofiles.getOrPut(gpa, o_file_path); - if (!gop.found_existing) { - gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { - defer _ = di.full.?.ofiles.pop().?; - switch (err) { - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return sym_only_result, - else => |e| return e, - } - }; - } - break :of gop.value_ptr; - }; - - const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return sym_only_result; - - const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return sym_only_result, - else => |e| return e, - }; - - return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol, - .compile_unit_name = compile_unit.die.getAttrString( - &o_file.dwarf, - native_endian, - std.dwarf.AT.name, - o_file.dwarf.section(.debug_str), - compile_unit, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - }, - .source_location = o_file.dwarf.getLineNumberInfo( - gpa, - native_endian, - compile_unit, - symbol_ofile_vaddr + address_symbol_offset, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } - fn unwindFrame(module: *const Module, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - if (di.unwind == null) try module.loadUnwindInfo(gpa, di); - const unwind_info = di.unwind.?.unwind_info orelse return error.MissingUnwindInfo; - // MLUGG TODO: inline? - return unwindFrameMachO( - module.text_base, - module.load_offset, - context, - unwind_info, - di.unwind.?.eh_frame, - ); - } - const LookupCache = void; - const DebugInfo = struct { - unwind: ?struct { - // Backed by the in-memory sections mapped by the loader - unwind_info: ?[]const u8, - eh_frame: ?[]const u8, - }, - // MLUGG TODO: awful field name - full: ?struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: [:0]const u8, - // MLUGG TODO: this could use an adapter to just index straight into `strings`! - ofiles: std.StringArrayHashMapUnmanaged(OFile), - }, - - const init: DebugInfo = .{ - .unwind = null, - .full = null, - }; - - const OFile = struct { - dwarf: Dwarf, - // MLUGG TODO: this could use an adapter to just index straight into the strtab! - addr_table: std.StringArrayHashMapUnmanaged(u64), - }; - - fn deinit(di: *DebugInfo, gpa: Allocator) void { - for (di.full.ofiles.values()) |*ofile| { - ofile.dwarf.deinit(gpa); - ofile.addr_table.deinit(gpa); - } - di.full.ofiles.deinit(); - gpa.free(di.full.symbols); - posix.munmap(di.full.mapped_memory); - } - - fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapDebugInfoFile(o_file_path); - errdefer posix.munmap(mapped_mem); - - if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - - const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { - var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtab_cmd: ?macho.symtab_command = null; - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => seg_cmd = cmd, - .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - break :cmds .{ - seg_cmd orelse return error.MissingDebugInfo, - symtab_cmd orelse return error.MissingDebugInfo, - }; - }; - - if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; - if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; - - const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); - if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; - const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); - - // TODO handle tentative (common) symbols - var addr_table: std.StringArrayHashMapUnmanaged(u64) = .empty; - defer addr_table.deinit(gpa); - try addr_table.ensureUnusedCapacity(gpa, @intCast(symtab.len)); - for (symtab) |sym| { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf => continue, // includes tentative symbols - .abs => continue, - else => {}, - } - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - const gop = addr_table.getOrPutAssumeCapacity(sym_name); - if (gop.found_existing) return error.InvalidDebugInfo; - gop.value_ptr.* = sym.n_value; - } - - var sections: Dwarf.SectionArray = @splat(null); - for (seg_cmd.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; - } else continue; - - if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; - const section_bytes = mapped_mem[sect.offset..][0..sect.size]; - sections[section_index] = .{ - .data = section_bytes, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var dwarf: Dwarf = .{ .sections = sections }; - errdefer dwarf.deinit(gpa); - try dwarf.open(gpa, native_endian); - - return .{ - .dwarf = dwarf, - .addr_table = addr_table.move(), - }; - } - }; - }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => @import("SelfInfo/ElfModule.zig"), + .macos, .ios, .watchos, .tvos, .visionos => @import("SelfInfo/DarwinModule.zig"), + .uefi, .windows => @import("SelfInfo/WindowsModule.zig"), .wasi, .emscripten => struct { - const LookupCache = void; - const DebugInfo = struct { - const init: DebugInfo = .{}; + const LookupCache = struct { + const init: LookupCache = .{}; }; fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { _ = cache; @@ -498,6 +136,9 @@ const Module = switch (native_os) { _ = address; @panic("TODO implement lookup module for Wasm"); } + const DebugInfo = struct { + const init: DebugInfo = .{}; + }; fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { _ = module; _ = gpa; @@ -506,430 +147,9 @@ const Module = switch (native_os) { unreachable; } }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { - load_offset: usize, - name: []const u8, - build_id: ?[]const u8, - gnu_eh_frame: ?[]const u8, - const LookupCache = void; - const DebugInfo = struct { - loaded_elf: ?Dwarf.ElfModule, - unwind: ?Dwarf.Unwind, - const init: DebugInfo = .{ - .loaded_elf = null, - .unwind = null, - }; - }; - fn key(m: Module) usize { - return m.load_offset; - } - fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { - _ = cache; - _ = gpa; - if (native_os == .haiku) @panic("TODO implement lookup module for Haiku"); - const DlIterContext = struct { - /// input - address: usize, - /// output - module: Module, - - fn callback(info: *posix.dl_phdr_info, size: usize, context: *@This()) !void { - _ = size; - // The base address is too high - if (context.address < info.addr) - return; - - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.module = .{ - .load_offset = info.addr, - // Android libc uses NULL instead of "" to mark the main program - .name = mem.sliceTo(info.name, 0) orelse "", - .build_id = null, - .gnu_eh_frame = null, - }; - break; - } - } else return; - - for (info.phdr[0..info.phnum]) |phdr| { - switch (phdr.p_type) { - elf.PT_NOTE => { - // Look for .note.gnu.build-id - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); - const name_size = r.takeInt(u32, native_endian) catch continue; - const desc_size = r.takeInt(u32, native_endian) catch continue; - const note_type = r.takeInt(u32, native_endian) catch continue; - const name = r.take(name_size) catch continue; - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, name, "GNU\x00")) continue; - const desc = r.take(desc_size) catch continue; - context.module.build_id = desc; - }, - elf.PT_GNU_EH_FRAME => { - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; - }, - else => {}, - } - } - - // Stop the iteration - return error.Found; - } - }; - var ctx: DlIterContext = .{ - .address = address, - .module = undefined, - }; - posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { - error.Found => return ctx.module, - }; - return error.MissingDebugInfo; - } - fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - if (module.name.len > 0) { - di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ - .root_dir = .cwd(), - .sub_path = module.name, - }, module.build_id, null, null, null) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.Overflow => return error.InvalidDebugInfo, - else => |e| return e, - }; - } else { - const path = try std.fs.selfExePathAlloc(gpa); - defer gpa.free(path); - di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ - .root_dir = .cwd(), - .sub_path = path, - }, module.build_id, null, null, null) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.Overflow => return error.InvalidDebugInfo, - else => |e| return e, - }; - } - } - fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (di.loaded_elf == null) try module.loadLocationInfo(gpa, di); - const vaddr = address - module.load_offset; - return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr); - } - fn loadUnwindInfo(module: *const Module, gpa: Allocator, di: *Module.DebugInfo) !void { - const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file - const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; - const header: Dwarf.Unwind.EhFrameHeader = try .parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian); - di.unwind = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); - try di.unwind.?.prepareLookup(gpa, @sizeOf(usize), native_endian); - } - fn unwindFrame(module: *const Module, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - if (di.unwind == null) try module.loadUnwindInfo(gpa, di); - return unwindFrameDwarf(&di.unwind.?, module.load_offset, context, null); - } - }, - .uefi, .windows => struct { - base_address: usize, - size: usize, - name: []const u8, - handle: windows.HMODULE, - fn key(m: Module) usize { - return m.base_address; - } - fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { - if (lookupInCache(cache, address)) |m| return m; - { - // Check a new module hasn't been loaded - cache.modules.clearRetainingCapacity(); - - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); - if (handle == windows.INVALID_HANDLE_VALUE) { - return windows.unexpectedError(windows.GetLastError()); - } - defer windows.CloseHandle(handle); - - var entry: windows.MODULEENTRY32 = undefined; - entry.dwSize = @sizeOf(windows.MODULEENTRY32); - if (windows.kernel32.Module32First(handle, &entry) != 0) { - try cache.modules.append(gpa, entry); - while (windows.kernel32.Module32Next(handle, &entry) != 0) { - try cache.modules.append(gpa, entry); - } - } - } - if (lookupInCache(cache, address)) |m| return m; - return error.MissingDebugInfo; - } - fn lookupInCache(cache: *const LookupCache, address: usize) ?Module { - for (cache.modules.items) |*entry| { - const base_address = @intFromPtr(entry.modBaseAddr); - if (address >= base_address and address < base_address + entry.modBaseSize) { - return .{ - .base_address = base_address, - .size = entry.modBaseSize, - .name = std.mem.sliceTo(&entry.szModule, 0), - .handle = entry.hModule, - }; - } - } - return null; - } - fn loadLocationInfo(module: *const Module, gpa: Allocator, di: *DebugInfo) !void { - const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); - const mapped = mapped_ptr[0..module.size]; - var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; - // The string table is not mapped into memory by the loader, so if a section name is in the - // string table then we have to map the full image file from disk. This can happen when - // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. - if (coff_obj.strtabRequired()) { - var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; - name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present - const process_handle = windows.GetCurrentProcess(); - const len = windows.kernel32.GetModuleFileNameExW( - process_handle, - module.handle, - name_buffer[4..], - windows.PATH_MAX_WIDE, - ); - if (len == 0) return error.MissingDebugInfo; - const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => |e| return e, - }; - errdefer coff_file.close(); - var section_handle: windows.HANDLE = undefined; - const create_section_rc = windows.ntdll.NtCreateSection( - §ion_handle, - windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, - null, - null, - windows.PAGE_READONLY, - // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. - // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. - windows.SEC_COMMIT, - coff_file.handle, - ); - if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer windows.CloseHandle(section_handle); - var coff_len: usize = 0; - var section_view_ptr: [*]const u8 = undefined; - const map_section_rc = windows.ntdll.NtMapViewOfSection( - section_handle, - process_handle, - @ptrCast(§ion_view_ptr), - null, - 0, - null, - &coff_len, - .ViewUnmap, - 0, - windows.PAGE_READONLY, - ); - if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr)) == .SUCCESS); - const section_view = section_view_ptr[0..coff_len]; - coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; - di.mapped_file = .{ - .file = coff_file, - .section_handle = section_handle, - .section_view = section_view, - }; - } - di.coff_image_base = coff_obj.getImageBase(); - - if (coff_obj.getSectionByName(".debug_info")) |_| { - di.dwarf = .{}; - - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, gpa), - .owned = true, - }; - } else null; - } - - try di.dwarf.?.open(gpa, native_endian); - } - - if (try coff_obj.getPdbPath()) |raw_path| pdb: { - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(gpa); - defer gpa.free(self_dir); - break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) gpa.free(path); - - di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { - error.FileNotFound, error.IsDir => break :pdb, - else => return err, - }; - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); - - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) - return error.InvalidDebugInfo; - - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); - } - - di.loaded = true; - } - const LookupCache = struct { - modules: std.ArrayListUnmanaged(windows.MODULEENTRY32), - const init: LookupCache = .{ .modules = .empty }; - }; - const DebugInfo = struct { - loaded: bool, - - coff_image_base: u64, - mapped_file: ?struct { - file: File, - section_handle: windows.HANDLE, - section_view: []const u8, - fn deinit(mapped: @This()) void { - const process_handle = windows.GetCurrentProcess(); - assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mapped.section_view.ptr)) == .SUCCESS); - windows.CloseHandle(mapped.section_handle); - mapped.file.close(); - } - }, - - dwarf: ?Dwarf, - - pdb: ?Pdb, - /// Populated iff `pdb != null`; otherwise `&.{}`. - coff_section_headers: []coff.SectionHeader, - - const init: DebugInfo = .{ - .loaded = false, - .coff_image_base = undefined, - .mapped_file = null, - .dwarf = null, - .pdb = null, - .coff_section_headers = &.{}, - }; - - fn deinit(di: *DebugInfo, gpa: Allocator) void { - if (di.dwarf) |*dwarf| dwarf.deinit(gpa); - if (di.pdb) |*pdb| pdb.deinit(); - gpa.free(di.coff_section_headers); - if (di.mapped_file) |mapped| mapped.deinit(); - } - - fn getSymbolFromPdb(di: *DebugInfo, relocated_address: usize) !?std.debug.Symbol { - var coff_section: *align(1) const coff.SectionHeader = undefined; - const mod_index = for (di.pdb.?.sect_contribs) |sect_contrib| { - if (sect_contrib.section > di.coff_section_headers.len) continue; - // Remember that SectionContribEntry.Section is 1-based. - coff_section = &di.coff_section_headers[sect_contrib.section - 1]; - - const vaddr_start = coff_section.virtual_address + sect_contrib.offset; - const vaddr_end = vaddr_start + sect_contrib.size; - if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { - break sect_contrib.module_index; - } - } else { - // we have no information to add to the address - return null; - }; - - const module = try di.pdb.?.getModule(mod_index) orelse return error.InvalidDebugInfo; - - return .{ - .name = di.pdb.?.getSymbolName( - module, - relocated_address - coff_section.virtual_address, - ), - .compile_unit_name = fs.path.basename(module.obj_file_name), - .source_location = try di.pdb.?.getLineNumberInfo( - module, - relocated_address - coff_section.virtual_address, - ), - }; - } - }; - - fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (!di.loaded) try module.loadLocationInfo(gpa, di); - // Translate the runtime address into a virtual address into the module - const vaddr = address - module.base_address; - - if (di.pdb != null) { - if (try di.getSymbolFromPdb(vaddr)) |symbol| return symbol; - } - - if (di.dwarf) |*dwarf| { - const dwarf_address = vaddr + di.coff_image_base; - return dwarf.getSymbol(gpa, native_endian, dwarf_address); - } - - return error.MissingDebugInfo; - } - }, -}; - -const MachoSymbol = struct { - strx: u32, - addr: u64, - size: u32, - ofile: u32, - fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { - _ = context; - return lhs.addr < rhs.addr; - } - /// Assumes that `symbols` is sorted in order of ascending `addr`. - fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { - if (symbols.len == 0) return null; // no potential match - if (address < symbols[0].addr) return null; // address is before the lowest-address symbol - var left: usize = 0; - var len: usize = symbols.len; - while (len > 1) { - const mid = left + len / 2; - if (address < symbols[mid].addr) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - return &symbols[left]; - } - - test find { - const symbols: []const MachoSymbol = &.{ - .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, - }; - - try testing.expectEqual(null, find(symbols, 0)); - try testing.expectEqual(null, find(symbols, 99)); - try testing.expectEqual(&symbols[0], find(symbols, 100).?); - try testing.expectEqual(&symbols[0], find(symbols, 150).?); - try testing.expectEqual(&symbols[0], find(symbols, 199).?); - - try testing.expectEqual(&symbols[1], find(symbols, 200).?); - try testing.expectEqual(&symbols[1], find(symbols, 250).?); - try testing.expectEqual(&symbols[1], find(symbols, 299).?); - - try testing.expectEqual(&symbols[2], find(symbols, 300).?); - try testing.expectEqual(&symbols[2], find(symbols, 301).?); - try testing.expectEqual(&symbols[2], find(symbols, 5000).?); - } }; test { - _ = MachoSymbol; + _ = Module; } pub const UnwindContext = struct { @@ -944,6 +164,7 @@ pub const UnwindContext = struct { pub fn init(gpa: Allocator, thread_context: *std.debug.ThreadContext) !UnwindContext { comptime assert(supports_unwinding); + const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; const pc = stripInstructionPtrAuthCode( (try regValueNative(thread_context, ip_reg_num, null)).*, ); @@ -970,7 +191,7 @@ pub const UnwindContext = struct { } pub fn getFp(self: *const UnwindContext) !usize { - return (try regValueNative(self.thread_context, fpRegNum(self.reg_context), self.reg_context)).*; + return (try regValueNative(self.thread_context, Dwarf.abi.fpRegNum(native_arch, self.reg_context), self.reg_context)).*; } /// Resolves the register rule and places the result into `out` (see regBytes) @@ -1019,7 +240,7 @@ pub const UnwindContext = struct { .register => |register| { const src = try regBytes(context.thread_context, register, context.reg_context); if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); + @memcpy(out, src); }, .expression => |expression| { context.stack_machine.reset(); @@ -1043,553 +264,171 @@ pub const UnwindContext = struct { .architectural => return error.UnimplementedRegisterRule, } } -}; - -/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. -/// This function clears these signature bits to make the pointer usable. -pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { - if (native_arch.isAARCH64()) { - // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) - // The save / restore is because `xpaclri` operates on x30 (LR) - return asm ( - \\mov x16, x30 - \\mov x30, x15 - \\hint 0x07 - \\mov x15, x30 - \\mov x30, x16 - : [ret] "={x15}" (-> usize), - : [ptr] "{x15}" (ptr), - : .{ .x16 = true }); - } - - return ptr; -} - -/// Unwind a stack frame using DWARF unwinding info, updating the register context. -/// -/// If `.eh_frame_hdr` is available and complete, it will be used to binary search for the FDE. -/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. The latter -/// may require lazily loading the data in those sections. -/// -/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info -/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. -fn unwindFrameDwarf( - unwind: *const Dwarf.Unwind, - load_offset: usize, - context: *UnwindContext, - explicit_fde_offset: ?usize, -) !usize { - if (!supports_unwinding) return error.UnsupportedCpuArchitecture; - if (context.pc == 0) return 0; - - const pc_vaddr = context.pc - load_offset; - const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( - pc_vaddr, - @sizeOf(usize), - native_endian, - ) orelse return error.MissingDebugInfo; - const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian); + /// Unwind a stack frame using DWARF unwinding info, updating the register context. + /// + /// If `.eh_frame_hdr` is available and complete, it will be used to binary search for the FDE. + /// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. The latter + /// may require lazily loading the data in those sections. + /// + /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info + /// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. + pub fn unwindFrameDwarf( + context: *UnwindContext, + unwind: *const Dwarf.Unwind, + load_offset: usize, + explicit_fde_offset: ?usize, + ) !usize { + if (!supports_unwinding) return error.UnsupportedCpuArchitecture; + if (context.pc == 0) return 0; + + const pc_vaddr = context.pc - load_offset; + + const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( + pc_vaddr, + @sizeOf(usize), + native_endian, + ) orelse return error.MissingDebugInfo; + const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian); + + // Check if this FDE *actually* includes the address. + if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) return error.MissingDebugInfo; + + // Do not set `compile_unit` because the spec states that CFIs + // may not reference other debug sections anyway. + var expression_context: Dwarf.expression.Context = .{ + .format = format, + .thread_context = context.thread_context, + .reg_context = context.reg_context, + .cfa = context.cfa, + }; - // Check if this FDE *actually* includes the address. - if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) return error.MissingDebugInfo; + context.vm.reset(); + context.reg_context.eh_frame = cie.version != 4; + context.reg_context.is_macho = native_os.isDarwin(); - // Do not set `compile_unit` because the spec states that CFIs - // may not reference other debug sections anyway. - var expression_context: Dwarf.expression.Context = .{ - .format = format, - .thread_context = context.thread_context, - .reg_context = context.reg_context, - .cfa = context.cfa, - }; + const row = try context.vm.runTo(context.gpa, context.pc - load_offset, cie, fde, @sizeOf(usize), native_endian); + context.cfa = switch (row.cfa.rule) { + .val_offset => |offset| blk: { + const register = row.cfa.register orelse return error.InvalidCFARule; + const value = (try regValueNative(context.thread_context, register, context.reg_context)).*; + break :blk try applyOffset(value, offset); + }, + .expression => |expr| blk: { + context.stack_machine.reset(); + const value = try context.stack_machine.run( + expr, + context.gpa, + expression_context, + context.cfa, + ); - context.vm.reset(); - context.reg_context.eh_frame = cie.version != 4; - context.reg_context.is_macho = native_os.isDarwin(); + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + }, + else => return error.InvalidCFARule, + }; - const row = try context.vm.runTo(context.gpa, context.pc - load_offset, cie, fde, @sizeOf(usize), native_endian); - context.cfa = switch (row.cfa.rule) { - .val_offset => |offset| blk: { - const register = row.cfa.register orelse return error.InvalidCFARule; - const value = mem.readInt(usize, (try regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian); - break :blk try applyOffset(value, offset); - }, - .expression => |expr| blk: { - context.stack_machine.reset(); - const value = try context.stack_machine.run( - expr, - context.gpa, - expression_context, - context.cfa, - ); + expression_context.cfa = context.cfa; - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - }, - else => return error.InvalidCFARule, - }; + // Buffering the modifications is done because copying the thread context is not portable, + // some implementations (ie. darwin) use internal pointers to the mcontext. + var arena: std.heap.ArenaAllocator = .init(context.gpa); + defer arena.deinit(); + const update_arena = arena.allocator(); - expression_context.cfa = context.cfa; + const RegisterUpdate = struct { + // Backed by thread_context + dest: []u8, + // Backed by arena + src: []const u8, + prev: ?*@This(), + }; - // Buffering the modifications is done because copying the thread context is not portable, - // some implementations (ie. darwin) use internal pointers to the mcontext. - var arena: std.heap.ArenaAllocator = .init(context.gpa); - defer arena.deinit(); - const update_arena = arena.allocator(); + var update_tail: ?*RegisterUpdate = null; + var has_return_address = true; + for (context.vm.rowColumns(row)) |column| { + if (column.register) |register| { + if (register == cie.return_address_register) { + has_return_address = column.rule != .undefined; + } - const RegisterUpdate = struct { - // Backed by thread_context - dest: []u8, - // Backed by arena - src: []const u8, - prev: ?*@This(), - }; + const dest = try regBytes(context.thread_context, register, context.reg_context); + const src = try update_arena.alloc(u8, dest.len); + try context.resolveRegisterRule(column, expression_context, src); - var update_tail: ?*RegisterUpdate = null; - var has_return_address = true; - for (context.vm.rowColumns(row)) |column| { - if (column.register) |register| { - if (register == cie.return_address_register) { - has_return_address = column.rule != .undefined; + const new_update = try update_arena.create(RegisterUpdate); + new_update.* = .{ + .dest = dest, + .src = src, + .prev = update_tail, + }; + update_tail = new_update; } + } - const dest = try regBytes(context.thread_context, register, context.reg_context); - const src = try update_arena.alloc(u8, dest.len); - try context.resolveRegisterRule(column, expression_context, src); + // On all implemented architectures, the CFA is defined as being the previous frame's SP + (try regValueNative(context.thread_context, Dwarf.abi.spRegNum(native_arch, context.reg_context), context.reg_context)).* = context.cfa.?; - const new_update = try update_arena.create(RegisterUpdate); - new_update.* = .{ - .dest = dest, - .src = src, - .prev = update_tail, - }; - update_tail = new_update; + while (update_tail) |tail| { + @memcpy(tail.dest, tail.src); + update_tail = tail.prev; } - } - // On all implemented architectures, the CFA is defined as being the previous frame's SP - (try regValueNative(context.thread_context, spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; + if (has_return_address) { + context.pc = stripInstructionPtrAuthCode((try regValueNative( + context.thread_context, + cie.return_address_register, + context.reg_context, + )).*); + } else { + context.pc = 0; + } - while (update_tail) |tail| { - @memcpy(tail.dest, tail.src); - update_tail = tail.prev; + const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; + (try regValueNative(context.thread_context, ip_reg_num, context.reg_context)).* = context.pc; + + // The call instruction will have pushed the address of the instruction that follows the call as the return address. + // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in + // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up + // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, + // we subtract one so that the next lookup is guaranteed to land inside the + // + // The exception to this rule is signal frames, where we return execution would be returned to the instruction + // that triggered the handler. + const return_address = context.pc; + if (context.pc > 0 and !cie.is_signal_frame) context.pc -= 1; + + return return_address; } - - if (has_return_address) { - context.pc = stripInstructionPtrAuthCode(mem.readInt(usize, (try regBytes( - context.thread_context, - cie.return_address_register, - context.reg_context, - ))[0..@sizeOf(usize)], native_endian)); - } else { - context.pc = 0; + /// Since register rules are applied (usually) during a panic, + /// checked addition / subtraction is used so that we can return + /// an error and fall back to FP-based unwinding. + fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); } - - (try regValueNative(context.thread_context, ip_reg_num, context.reg_context)).* = context.pc; - - // The call instruction will have pushed the address of the instruction that follows the call as the return address. - // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in - // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up - // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, - // we subtract one so that the next lookup is guaranteed to land inside the - // - // The exception to this rule is signal frames, where we return execution would be returned to the instruction - // that triggered the handler. - const return_address = context.pc; - if (context.pc > 0 and !cie.is_signal_frame) context.pc -= 1; - - return return_address; -} - -fn fpRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { - return Dwarf.abi.fpRegNum(native_arch, reg_context); -} - -fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { - return Dwarf.abi.spRegNum(native_arch, reg_context); -} - -const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; - -/// Tells whether unwinding for the host is implemented. -pub const supports_unwinding = supportsUnwinding(&builtin.target); - -comptime { - if (supports_unwinding) assert(Dwarf.abi.supportsUnwinding(&builtin.target)); -} - -/// Tells whether unwinding for this target is *implemented* here in the Zig -/// standard library. -/// -/// See also `Dwarf.abi.supportsUnwinding` which tells whether Dwarf supports -/// unwinding on that target *in theory*. -pub fn supportsUnwinding(target: *const std.Target) bool { - return switch (target.cpu.arch) { - .x86 => switch (target.os.tag) { - .linux, .netbsd, .solaris, .illumos => true, - else => false, - }, - .x86_64 => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, - else => false, - }, - .arm, .armeb, .thumb, .thumbeb => switch (target.os.tag) { - .linux => true, - else => false, - }, - .aarch64, .aarch64_be => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .macos, .ios => true, - else => false, - }, - // Unwinding is possible on other targets but this implementation does - // not support them...yet! - else => false, - }; -} - -/// Since register rules are applied (usually) during a panic, -/// checked addition / subtraction is used so that we can return -/// an error and fall back to FP-based unwinding. -fn applyOffset(base: usize, offset: i64) !usize { - return if (offset >= 0) - try std.math.add(usize, base, @as(usize, @intCast(offset))) - else - try std.math.sub(usize, base, @as(usize, @intCast(-offset))); -} - -/// Uses `mmap` to map the file at `opt_path` (or, if `null`, the self executable image) into memory. -fn mapDebugInfoFile(opt_path: ?[]const u8) ![]align(std.heap.page_size_min) const u8 { - const open_result = if (opt_path) |path| - fs.cwd().openFile(path, .{}) - else - fs.openSelfExe(.{}); - const file = open_result catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => |e| return e, - }; - defer file.close(); - - const file_len = math.cast(usize, try file.getEndPos()) orelse return error.InvalidDebugInfo; - - return posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); -} - -/// Unwind a frame using MachO compact unwind info (from __unwind_info). -/// If the compact encoding can't encode a way to unwind a frame, it will -/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -fn unwindFrameMachO( - text_base: usize, - load_offset: usize, - context: *UnwindContext, - unwind_info: []const u8, - opt_eh_frame: ?[]const u8, -) !usize { - if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo; - const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); - - const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); - if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidUnwindInfo; - const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); - if (indices.len == 0) return error.MissingUnwindInfo; - - // offset of the PC into the `__TEXT` segment - const pc_text_offset = context.pc - text_base; - - const start_offset: u32, const first_level_offset: u32 = index: { - var left: usize = 0; - var len: usize = indices.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < indices[mid].functionOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } + /// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. + /// This function clears these signature bits to make the pointer usable. + pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { + if (native_arch.isAARCH64()) { + // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) + // The save / restore is because `xpaclri` operates on x30 (LR) + return asm ( + \\mov x16, x30 + \\mov x30, x15 + \\hint 0x07 + \\mov x15, x30 + \\mov x30, x16 + : [ret] "={x15}" (-> usize), + : [ptr] "{x15}" (ptr), + : .{ .x16 = true }); } - break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; - }; - // An offset of 0 is a sentinel indicating a range does not have unwind info. - if (start_offset == 0) return error.MissingUnwindInfo; - - const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidUnwindInfo; - const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( - unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], - ); - - if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidUnwindInfo; - const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidUnwindInfo; - const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); - - const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; - const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( - unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < entries[mid].functionOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - break :entry .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidUnwindInfo; - const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); - - const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; - const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( - unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - const entry = entries[left]; - - const function_offset = first_level_offset + entry.funcOffset; - if (entry.encodingIndex < common_encodings.len) { - break :entry .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } - - const local_index = entry.encodingIndex - common_encodings.len; - const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidUnwindInfo; - const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( - unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], - ); - if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; - break :entry .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - }, - else => return error.InvalidUnwindInfo, - }; - - if (entry.raw_encoding == 0) return error.NoUnwindInfo; - const reg_context: Dwarf.abi.RegisterContext = .{ .eh_frame = false, .is_macho = true }; - const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnimplementedUnwindEncoding, - .RBP_FRAME => ip: { - const frame = encoding.value.x86_64.frame; - - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 2 * @sizeOf(usize); - - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - const regs: [5]u3 = .{ - frame.reg0, - frame.reg1, - frame.reg2, - frame.reg3, - frame.reg4, - }; - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; - } - - break :ip new_ip; - }, - .STACK_IMMD, - .STACK_IND, - => ip: { - const frameless = encoding.value.x86_64.frameless; - - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const stack_size: usize = stack_size: { - if (encoding.mode.x86_64 == .STACK_IMMD) { - break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); - } - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - text_base + - entry.function_offset + - frameless.stack.indirect.sub_offset; - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = frameless.stack_reg_count; - const ip_ptr = ip_ptr: { - var digits: [6]u3 = undefined; - var accumulator: usize = frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - var registers: [6]u3 = undefined; - var used_indices: [6]bool = @splat(false); - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - registers[i] = @intCast(unused_index + 1); - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - for (0..reg_count) |i| { - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :ip_ptr reg_addr; - }; - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - break :ip new_ip; - }, - .DWARF => { - const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; - return unwindFrameDwarf( - &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - load_offset, - context, - @intCast(encoding.value.x86_64.dwarf), - ); - }, - }, - .aarch64, .aarch64_be => switch (encoding.mode.arm64) { - .OLD => return error.UnimplementedUnwindEncoding, - .FRAMELESS => ip: { - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - break :ip new_ip; - }, - .DWARF => { - const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; - return unwindFrameDwarf( - &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - load_offset, - context, - @intCast(encoding.value.x86_64.dwarf), - ); - }, - .FRAME => ip: { - const frame = encoding.value.arm64.frame; - - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const ip_ptr = fp + @sizeOf(usize); - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(frame.x_reg_pairs, field.name) != 0) { - (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - { - const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 8 + i, context.reg_context)); - dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; - } - reg_addr += @sizeOf(usize); - { - const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 9 + i, context.reg_context)); - dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; - } - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - break :ip new_ip; - }, - }, - else => comptime unreachable, // unimplemented - }; - - context.pc = stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; -} + return ptr; + } +}; diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig new file mode 100644 index 000000000000..d0cb47281f75 --- /dev/null +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -0,0 +1,801 @@ +/// The runtime address where __TEXT is loaded. +text_base: usize, +load_offset: usize, +name: []const u8, + +pub fn key(m: *const DarwinModule) usize { + return m.text_base; +} + +pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !DarwinModule { + _ = cache; + _ = gpa; + const image_count = std.c._dyld_image_count(); + for (0..image_count) |image_idx| { + const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; + const text_base = @intFromPtr(header); + if (address < text_base) continue; + const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); + + // Find the __TEXT segment + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const text_segment_cmd = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break segment_cmd; + } else continue; + + const seg_start = load_offset + text_segment_cmd.vmaddr; + assert(seg_start == text_base); + const seg_end = seg_start + text_segment_cmd.vmsize; + if (address < seg_start or address >= seg_end) continue; + + // We've found the matching __TEXT segment. This is the image we need. + return .{ + .text_base = text_base, + .load_offset = load_offset, + .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), + }; + } + return error.MissingDebugInfo; +} +fn loadLocationInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo) !void { + const mapped_mem = try mapDebugInfoFile(module.name); + errdefer posix.munmap(mapped_mem); + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + const symtab: macho.symtab_command = symtab: { + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + return error.MissingDebugInfo; + }; + + const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); + const syms = syms_ptr[0..symtab.nsyms]; + const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; + + var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); + defer symbols.deinit(gpa); + + var ofile: u32 = undefined; + var last_sym: MachoSymbol = undefined; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + for (syms) |*sym| { + if (sym.n_type.bits.is_stab == 0) continue; + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, + }, + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .size = 0, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, + }, + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + last_sym.size = @intCast(sym.n_value); + }, + else => return error.InvalidDebugInfo, + }, + .ensym => switch (state) { + .fun_size => { + state = .ensym; + symbols.appendAssumeCapacity(last_sym); + }, + else => return error.InvalidDebugInfo, + }, + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, + }, + else => {}, + } + } + + switch (state) { + .init => return error.MissingDebugInfo, + .oso_close => {}, + else => return error.InvalidDebugInfo, + } + + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); + + di.full = .{ + .mapped_memory = mapped_mem, + .symbols = symbols_slice, + .strings = strings, + .ofiles = .empty, + }; +} +fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo) !void { + _ = gpa; + + const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const sections = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break load_cmd.getSections(); + } else unreachable; + + var unwind_info: ?[]const u8 = null; + var eh_frame: ?[]const u8 = null; + for (sections) |sect| { + if (mem.eql(u8, sect.sectName(), "__unwind_info")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + unwind_info = sect_ptr[0..@intCast(sect.size)]; + } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + eh_frame = sect_ptr[0..@intCast(sect.size)]; + } + } + di.unwind = .{ + .unwind_info = unwind_info, + .eh_frame = eh_frame, + }; +} +pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + if (di.full == null) try module.loadLocationInfo(gpa, di); + const vaddr = address - module.load_offset; + const symbol = MachoSymbol.find(di.full.?.symbols, vaddr) orelse return .{ + .name = null, + .compile_unit_name = null, + .source_location = null, + }; + + // offset of `address` from start of `symbol` + const address_symbol_offset = vaddr - symbol.addr; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(di.full.?.strings[symbol.strx..], 0); + const o_file_path = mem.sliceTo(di.full.?.strings[symbol.ofile..], 0); + + // If any information is missing, we can at least return this from now on. + const sym_only_result: std.debug.Symbol = .{ + .name = stab_symbol, + .compile_unit_name = null, + .source_location = null, + }; + + const o_file: *DebugInfo.OFile = of: { + const gop = try di.full.?.ofiles.getOrPut(gpa, o_file_path); + if (!gop.found_existing) { + gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { + defer _ = di.full.?.ofiles.pop().?; + switch (err) { + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return sym_only_result, + else => |e| return e, + } + }; + } + break :of gop.value_ptr; + }; + + const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return sym_only_result; + + const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => return sym_only_result, + else => |e| return e, + }; + + return .{ + .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol, + .compile_unit_name = compile_unit.die.getAttrString( + &o_file.dwarf, + native_endian, + std.dwarf.AT.name, + o_file.dwarf.section(.debug_str), + compile_unit, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + }, + .source_location = o_file.dwarf.getLineNumberInfo( + gpa, + native_endian, + compile_unit, + symbol_ofile_vaddr + address_symbol_offset, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; +} +pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { + if (di.unwind == null) try module.loadUnwindInfo(gpa, di); + const unwind_info = di.unwind.?.unwind_info orelse return error.MissingUnwindInfo; + // MLUGG TODO: inline? + return unwindFrameMachO( + module.text_base, + module.load_offset, + context, + unwind_info, + di.unwind.?.eh_frame, + ); +} +/// Unwind a frame using MachO compact unwind info (from __unwind_info). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. +fn unwindFrameMachO( + text_base: usize, + load_offset: usize, + context: *UnwindContext, + unwind_info: []const u8, + opt_eh_frame: ?[]const u8, +) !usize { + if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo; + const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); + + const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); + if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidUnwindInfo; + const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); + if (indices.len == 0) return error.MissingUnwindInfo; + + // offset of the PC into the `__TEXT` segment + const pc_text_offset = context.pc - text_base; + + const start_offset: u32, const first_level_offset: u32 = index: { + var left: usize = 0; + var len: usize = indices.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < indices[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; + }; + // An offset of 0 is a sentinel indicating a range does not have unwind info. + if (start_offset == 0) return error.MissingUnwindInfo; + + const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidUnwindInfo; + const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], + ); + + if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidUnwindInfo; + const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidUnwindInfo; + const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < entries[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :entry .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidUnwindInfo; + const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + const entry = entries[left]; + + const function_offset = first_level_offset + entry.funcOffset; + if (entry.encodingIndex < common_encodings.len) { + break :entry .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } + + const local_index = entry.encodingIndex - common_encodings.len; + const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidUnwindInfo; + const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :entry .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + }, + else => return error.InvalidUnwindInfo, + }; + + if (entry.raw_encoding == 0) return error.NoUnwindInfo; + const reg_context: Dwarf.abi.RegisterContext = .{ .eh_frame = false, .is_macho = true }; + + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => ip: { + const frame = encoding.value.x86_64.frame; + + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + const regs: [5]u3 = .{ + frame.reg0, + frame.reg1, + frame.reg2, + frame.reg3, + frame.reg4, + }; + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :ip new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => ip: { + const frameless = encoding.value.x86_64.frameless; + + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const stack_size: usize = stack_size: { + if (encoding.mode.x86_64 == .STACK_IMMD) { + break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); + } + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + text_base + + entry.function_offset + + frameless.stack.indirect.sub_offset; + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); + }; + + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = frameless.stack_reg_count; + const ip_ptr = ip_ptr: { + var digits: [6]u3 = undefined; + var accumulator: usize = frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + var registers: [6]u3 = undefined; + var used_indices: [6]bool = @splat(false); + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + registers[i] = @intCast(unused_index + 1); + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + for (0..reg_count) |i| { + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :ip_ptr reg_addr; + }; + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :ip new_ip; + }, + .DWARF => { + const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; + return context.unwindFrameDwarf( + &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), + load_offset, + @intCast(encoding.value.x86_64.dwarf), + ); + }, + }, + .aarch64, .aarch64_be => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => ip: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + break :ip new_ip; + }, + .DWARF => { + const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; + return context.unwindFrameDwarf( + &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), + load_offset, + @intCast(encoding.value.x86_64.dwarf), + ); + }, + .FRAME => ip: { + const frame = encoding.value.arm64.frame; + + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const ip_ptr = fp + @sizeOf(usize); + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.x_reg_pairs, field.name) != 0) { + (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + { + const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 8 + i, context.reg_context)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + { + const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 9 + i, context.reg_context)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :ip new_ip; + }, + }, + else => comptime unreachable, // unimplemented + }; + + context.pc = UnwindContext.stripInstructionPtrAuthCode(new_ip); + if (context.pc > 0) context.pc -= 1; + return new_ip; +} +/// No cache needed, because `_dyld_get_image_header` etc are already fast. +pub const LookupCache = struct { + pub const init: LookupCache = .{}; +}; +pub const DebugInfo = struct { + unwind: ?struct { + // Backed by the in-memory sections mapped by the loader + unwind_info: ?[]const u8, + eh_frame: ?[]const u8, + }, + // MLUGG TODO: awful field name + full: ?struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + symbols: []const MachoSymbol, + strings: [:0]const u8, + // MLUGG TODO: this could use an adapter to just index straight into `strings`! + ofiles: std.StringArrayHashMapUnmanaged(OFile), + }, + + pub const init: DebugInfo = .{ + .unwind = null, + .full = null, + }; + + const OFile = struct { + dwarf: Dwarf, + // MLUGG TODO: this could use an adapter to just index straight into the strtab! + addr_table: std.StringArrayHashMapUnmanaged(u64), + }; + + fn deinit(di: *DebugInfo, gpa: Allocator) void { + for (di.full.ofiles.values()) |*ofile| { + ofile.dwarf.deinit(gpa); + ofile.addr_table.deinit(gpa); + } + di.full.ofiles.deinit(); + gpa.free(di.full.symbols); + posix.munmap(di.full.mapped_memory); + } + + fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { + const mapped_mem = try mapDebugInfoFile(o_file_path); + errdefer posix.munmap(mapped_mem); + + if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; + + const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { + var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtab_cmd: ?macho.symtab_command = null; + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => seg_cmd = cmd, + .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + break :cmds .{ + seg_cmd orelse return error.MissingDebugInfo, + symtab_cmd orelse return error.MissingDebugInfo, + }; + }; + + if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; + if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + + const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); + if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; + const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); + + // TODO handle tentative (common) symbols + var addr_table: std.StringArrayHashMapUnmanaged(u64) = .empty; + defer addr_table.deinit(gpa); + try addr_table.ensureUnusedCapacity(gpa, @intCast(symtab.len)); + for (symtab) |sym| { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf => continue, // includes tentative symbols + .abs => continue, + else => {}, + } + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + const gop = addr_table.getOrPutAssumeCapacity(sym_name); + if (gop.found_existing) return error.InvalidDebugInfo; + gop.value_ptr.* = sym.n_value; + } + + var sections: Dwarf.SectionArray = @splat(null); + for (seg_cmd.getSections()) |sect| { + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; + } else continue; + + if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; + const section_bytes = mapped_mem[sect.offset..][0..sect.size]; + sections[section_index] = .{ + .data = section_bytes, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; + + var dwarf: Dwarf = .{ .sections = sections }; + errdefer dwarf.deinit(gpa); + try dwarf.open(gpa, native_endian); + + return .{ + .dwarf = dwarf, + .addr_table = addr_table.move(), + }; + } +}; + +const MachoSymbol = struct { + strx: u32, + addr: u64, + size: u32, + ofile: u32, + fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } + /// Assumes that `symbols` is sorted in order of ascending `addr`. + fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + if (symbols.len == 0) return null; // no potential match + if (address < symbols[0].addr) return null; // address is before the lowest-address symbol + var left: usize = 0; + var len: usize = symbols.len; + while (len > 1) { + const mid = left + len / 2; + if (address < symbols[mid].addr) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + return &symbols[left]; + } + + test find { + const symbols: []const MachoSymbol = &.{ + .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, find(symbols, 0)); + try testing.expectEqual(null, find(symbols, 99)); + try testing.expectEqual(&symbols[0], find(symbols, 100).?); + try testing.expectEqual(&symbols[0], find(symbols, 150).?); + try testing.expectEqual(&symbols[0], find(symbols, 199).?); + + try testing.expectEqual(&symbols[1], find(symbols, 200).?); + try testing.expectEqual(&symbols[1], find(symbols, 250).?); + try testing.expectEqual(&symbols[1], find(symbols, 299).?); + + try testing.expectEqual(&symbols[2], find(symbols, 300).?); + try testing.expectEqual(&symbols[2], find(symbols, 301).?); + try testing.expectEqual(&symbols[2], find(symbols, 5000).?); + } +}; +test { + _ = MachoSymbol; +} + +fn fpRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.fpRegNum(builtin.target.cpu.arch, reg_context); +} +fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.spRegNum(builtin.target.cpu.arch, reg_context); +} +const ip_reg_num = Dwarf.abi.ipRegNum(builtin.target.cpu.arch).?; + +/// Uses `mmap` to map the file at `path` into memory. +fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { + const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => |e| return e, + }; + defer file.close(); + + const file_len = std.math.cast(usize, try file.getEndPos()) orelse return error.InvalidDebugInfo; + + return posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); +} + +const DarwinModule = @This(); + +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const assert = std.debug.assert; +const macho = std.macho; +const mem = std.mem; +const posix = std.posix; +const testing = std.testing; +const UnwindContext = std.debug.SelfInfo.UnwindContext; +const regBytes = Dwarf.abi.regBytes; +const regValueNative = Dwarf.abi.regValueNative; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig new file mode 100644 index 000000000000..41ea72296246 --- /dev/null +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -0,0 +1,144 @@ +load_offset: usize, +name: []const u8, +build_id: ?[]const u8, +gnu_eh_frame: ?[]const u8, + +/// No cache needed, because `dl_iterate_phdr` is already fast. +pub const LookupCache = struct { + pub const init: LookupCache = .{}; +}; + +pub const DebugInfo = struct { + loaded_elf: ?Dwarf.ElfModule, + unwind: ?Dwarf.Unwind, + pub const init: DebugInfo = .{ + .loaded_elf = null, + .unwind = null, + }; +}; + +pub fn key(m: ElfModule) usize { + return m.load_offset; +} +pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !ElfModule { + _ = cache; + _ = gpa; + if (builtin.target.os.tag == .haiku) @panic("TODO implement lookup module for Haiku"); + const DlIterContext = struct { + /// input + address: usize, + /// output + module: ElfModule, + + fn callback(info: *std.posix.dl_phdr_info, size: usize, context: *@This()) !void { + _ = size; + // The base address is too high + if (context.address < info.addr) + return; + + const phdrs = info.phdr[0..info.phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 + const seg_start = info.addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + context.module = .{ + .load_offset = info.addr, + // Android libc uses NULL instead of "" to mark the main program + .name = mem.sliceTo(info.name, 0) orelse "", + .build_id = null, + .gnu_eh_frame = null, + }; + break; + } + } else return; + + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + elf.PT_NOTE => { + // Look for .note.gnu.build-id + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); + const name_size = r.takeInt(u32, native_endian) catch continue; + const desc_size = r.takeInt(u32, native_endian) catch continue; + const note_type = r.takeInt(u32, native_endian) catch continue; + const name = r.take(name_size) catch continue; + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, name, "GNU\x00")) continue; + const desc = r.take(desc_size) catch continue; + context.module.build_id = desc; + }, + elf.PT_GNU_EH_FRAME => { + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; + }, + else => {}, + } + } + + // Stop the iteration + return error.Found; + } + }; + var ctx: DlIterContext = .{ + .address = address, + .module = undefined, + }; + std.posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { + error.Found => return ctx.module, + }; + return error.MissingDebugInfo; +} +fn loadLocationInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void { + if (module.name.len > 0) { + di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ + .root_dir = .cwd(), + .sub_path = module.name, + }, module.build_id, null, null, null) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.Overflow => return error.InvalidDebugInfo, + else => |e| return e, + }; + } else { + const path = try std.fs.selfExePathAlloc(gpa); + defer gpa.free(path); + di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ + .root_dir = .cwd(), + .sub_path = path, + }, module.build_id, null, null, null) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.Overflow => return error.InvalidDebugInfo, + else => |e| return e, + }; + } +} +pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + if (di.loaded_elf == null) try module.loadLocationInfo(gpa, di); + const vaddr = address - module.load_offset; + return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr); +} +fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void { + const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file + const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; + const header: Dwarf.Unwind.EhFrameHeader = try .parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian); + di.unwind = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); + try di.unwind.?.prepareLookup(gpa, @sizeOf(usize), native_endian); +} +pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { + if (di.unwind == null) try module.loadUnwindInfo(gpa, di); + return context.unwindFrameDwarf(&di.unwind.?, module.load_offset, null); +} + +const ElfModule = @This(); + +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const elf = std.elf; +const mem = std.mem; +const UnwindContext = std.debug.SelfInfo.UnwindContext; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig new file mode 100644 index 000000000000..ab322c201a21 --- /dev/null +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -0,0 +1,255 @@ +base_address: usize, +size: usize, +name: []const u8, +handle: windows.HMODULE, +pub fn key(m: WindowsModule) usize { + return m.base_address; +} +pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !WindowsModule { + if (lookupInCache(cache, address)) |m| return m; + { + // Check a new module hasn't been loaded + cache.modules.clearRetainingCapacity(); + + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + return windows.unexpectedError(windows.GetLastError()); + } + defer windows.CloseHandle(handle); + + var entry: windows.MODULEENTRY32 = undefined; + entry.dwSize = @sizeOf(windows.MODULEENTRY32); + if (windows.kernel32.Module32First(handle, &entry) != 0) { + try cache.modules.append(gpa, entry); + while (windows.kernel32.Module32Next(handle, &entry) != 0) { + try cache.modules.append(gpa, entry); + } + } + } + if (lookupInCache(cache, address)) |m| return m; + return error.MissingDebugInfo; +} +pub fn getSymbolAtAddress(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + if (!di.loaded) try module.loadLocationInfo(gpa, di); + // Translate the runtime address into a virtual address into the module + const vaddr = address - module.base_address; + + if (di.pdb != null) { + if (try di.getSymbolFromPdb(vaddr)) |symbol| return symbol; + } + + if (di.dwarf) |*dwarf| { + const dwarf_address = vaddr + di.coff_image_base; + return dwarf.getSymbol(gpa, native_endian, dwarf_address); + } + + return error.MissingDebugInfo; +} +fn lookupInCache(cache: *const LookupCache, address: usize) ?WindowsModule { + for (cache.modules.items) |*entry| { + const base_address = @intFromPtr(entry.modBaseAddr); + if (address >= base_address and address < base_address + entry.modBaseSize) { + return .{ + .base_address = base_address, + .size = entry.modBaseSize, + .name = std.mem.sliceTo(&entry.szModule, 0), + .handle = entry.hModule, + }; + } + } + return null; +} +fn loadLocationInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) !void { + const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); + const mapped = mapped_ptr[0..module.size]; + var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + if (coff_obj.strtabRequired()) { + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + name_buffer[4..], + windows.PATH_MAX_WIDE, + ); + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => |e| return e, + }; + errdefer coff_file.close(); + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + var coff_len: usize = 0; + var section_view_ptr: [*]const u8 = undefined; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(§ion_view_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr)) == .SUCCESS); + const section_view = section_view_ptr[0..coff_len]; + coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; + di.mapped_file = .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + } + di.coff_image_base = coff_obj.getImageBase(); + + if (coff_obj.getSectionByName(".debug_info")) |_| { + di.dwarf = .{}; + + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { + break :blk .{ + .data = try coff_obj.getSectionDataAlloc(section_header, gpa), + .owned = true, + }; + } else null; + } + + try di.dwarf.?.open(gpa, native_endian); + } + + if (try coff_obj.getPdbPath()) |raw_path| pdb: { + const path = blk: { + if (fs.path.isAbsolute(raw_path)) { + break :blk raw_path; + } else { + const self_dir = try fs.selfExeDirPathAlloc(gpa); + defer gpa.free(self_dir); + break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); + } + }; + defer if (path.ptr != raw_path.ptr) gpa.free(path); + + di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { + error.FileNotFound, error.IsDir => break :pdb, + else => return err, + }; + try di.pdb.?.parseInfoStream(); + try di.pdb.?.parseDbiStream(); + + if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + return error.InvalidDebugInfo; + + di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); + } + + di.loaded = true; +} +pub const LookupCache = struct { + modules: std.ArrayListUnmanaged(windows.MODULEENTRY32), + pub const init: LookupCache = .{ .modules = .empty }; +}; +pub const DebugInfo = struct { + loaded: bool, + + coff_image_base: u64, + mapped_file: ?struct { + file: fs.File, + section_handle: windows.HANDLE, + section_view: []const u8, + fn deinit(mapped: @This()) void { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mapped.section_view.ptr)) == .SUCCESS); + windows.CloseHandle(mapped.section_handle); + mapped.file.close(); + } + }, + + dwarf: ?Dwarf, + + pdb: ?Pdb, + /// Populated iff `pdb != null`; otherwise `&.{}`. + coff_section_headers: []coff.SectionHeader, + + pub const init: DebugInfo = .{ + .loaded = false, + .coff_image_base = undefined, + .mapped_file = null, + .dwarf = null, + .pdb = null, + .coff_section_headers = &.{}, + }; + + fn deinit(di: *DebugInfo, gpa: Allocator) void { + if (di.dwarf) |*dwarf| dwarf.deinit(gpa); + if (di.pdb) |*pdb| pdb.deinit(); + gpa.free(di.coff_section_headers); + if (di.mapped_file) |mapped| mapped.deinit(); + } + + fn getSymbolFromPdb(di: *DebugInfo, relocated_address: usize) !?std.debug.Symbol { + var coff_section: *align(1) const coff.SectionHeader = undefined; + const mod_index = for (di.pdb.?.sect_contribs) |sect_contrib| { + if (sect_contrib.section > di.coff_section_headers.len) continue; + // Remember that SectionContribEntry.Section is 1-based. + coff_section = &di.coff_section_headers[sect_contrib.section - 1]; + + const vaddr_start = coff_section.virtual_address + sect_contrib.offset; + const vaddr_end = vaddr_start + sect_contrib.size; + if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { + break sect_contrib.module_index; + } + } else { + // we have no information to add to the address + return null; + }; + + const module = try di.pdb.?.getModule(mod_index) orelse return error.InvalidDebugInfo; + + return .{ + .name = di.pdb.?.getSymbolName( + module, + relocated_address - coff_section.virtual_address, + ), + .compile_unit_name = fs.path.basename(module.obj_file_name), + .source_location = try di.pdb.?.getLineNumberInfo( + module, + relocated_address - coff_section.virtual_address, + ), + }; + } +}; + +const WindowsModule = @This(); + +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Pdb = std.debug.Pdb; +const assert = std.debug.assert; +const coff = std.coff; +const fs = std.fs; +const mem = std.mem; +const windows = std.os.windows; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); From 665f13b0cde4b9c2e69b139a87d272a67a9489e1 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 18:47:31 +0100 Subject: [PATCH 16/85] SelfInfo deinit magic --- lib/std/debug/SelfInfo.zig | 29 ++++++++++++------------ lib/std/debug/SelfInfo/DarwinModule.zig | 26 ++++++++++----------- lib/std/debug/SelfInfo/ElfModule.zig | 7 +++--- lib/std/debug/SelfInfo/WindowsModule.zig | 19 +++++++++------- 4 files changed, 43 insertions(+), 38 deletions(-) diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 32382ac1e1b7..9ca1b32f4e2c 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -18,7 +18,7 @@ const regValueNative = Dwarf.abi.regValueNative; const SelfInfo = @This(); -modules: std.AutoHashMapUnmanaged(usize, Module.DebugInfo), +modules: std.AutoArrayHashMapUnmanaged(usize, Module.DebugInfo), lookup_cache: Module.LookupCache, /// Indicates whether the `SelfInfo` implementation has support for this target. @@ -68,18 +68,18 @@ comptime { pub const init: SelfInfo = .{ .modules = .empty, - .lookup_cache = .init, + .lookup_cache = if (Module.LookupCache != void) .init, }; -pub fn deinit(self: *SelfInfo) void { - // MLUGG TODO: that's amusing, this function is straight-up unused. i... wonder if it even should be used anywhere? perhaps not... so perhaps it should not even exist...???? - var it = self.modules.iterator(); - while (it.next()) |entry| { - const mdi = entry.value_ptr.*; - mdi.deinit(self.allocator); - self.allocator.destroy(mdi); - } - self.modules.deinit(self.allocator); +pub fn deinit(self: *SelfInfo, gpa: Allocator) void { + for (self.modules.values()) |*di| di.deinit(gpa); + self.modules.deinit(gpa); + if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); +} +comptime { + // `std.debug` does not currently utilize `deinit`, as it keeps hold of debug info for the + // whole lifetime of the program. Let's try to avoid it bitrotting. + _ = &deinit; } pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { @@ -110,11 +110,12 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// This type contains the target-specific implementation. It must expose the following declarations: /// -/// * `LookupCache: type` -/// * `LookupCache.init: LookupCache` +/// * `LookupCache: type`, with the following declarations unless `LookupCache == void`: +/// * `init: LookupCache` +/// * `deinit: fn (*LookupCache, Allocator) void` /// * `lookup: fn (*LookupCache, Allocator, address: usize) !Module` /// * `key: fn (*const Module) usize` -/// * `DebugInfo: type` +/// * `DebugInfo: type`, with the following declarations: /// * `DebugInfo.init: DebugInfo` /// * `getSymbolAtAddress: fn (*const Module, Allocator, *DebugInfo, address: usize) !std.debug.Symbol` /// diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index d0cb47281f75..976893e0af66 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -578,9 +578,7 @@ fn unwindFrameMachO( return new_ip; } /// No cache needed, because `_dyld_get_image_header` etc are already fast. -pub const LookupCache = struct { - pub const init: LookupCache = .{}; -}; +pub const LookupCache = void; pub const DebugInfo = struct { unwind: ?struct { // Backed by the in-memory sections mapped by the loader @@ -601,22 +599,24 @@ pub const DebugInfo = struct { .full = null, }; + pub fn deinit(di: *DebugInfo, gpa: Allocator) void { + if (di.full) |*full| { + for (full.ofiles.values()) |*ofile| { + ofile.dwarf.deinit(gpa); + ofile.addr_table.deinit(gpa); + } + full.ofiles.deinit(gpa); + gpa.free(full.symbols); + posix.munmap(full.mapped_memory); + } + } + const OFile = struct { dwarf: Dwarf, // MLUGG TODO: this could use an adapter to just index straight into the strtab! addr_table: std.StringArrayHashMapUnmanaged(u64), }; - fn deinit(di: *DebugInfo, gpa: Allocator) void { - for (di.full.ofiles.values()) |*ofile| { - ofile.dwarf.deinit(gpa); - ofile.addr_table.deinit(gpa); - } - di.full.ofiles.deinit(); - gpa.free(di.full.symbols); - posix.munmap(di.full.mapped_memory); - } - fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { const mapped_mem = try mapDebugInfoFile(o_file_path); errdefer posix.munmap(mapped_mem); diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 41ea72296246..4f64b147e1d3 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -4,9 +4,7 @@ build_id: ?[]const u8, gnu_eh_frame: ?[]const u8, /// No cache needed, because `dl_iterate_phdr` is already fast. -pub const LookupCache = struct { - pub const init: LookupCache = .{}; -}; +pub const LookupCache = void; pub const DebugInfo = struct { loaded_elf: ?Dwarf.ElfModule, @@ -15,6 +13,9 @@ pub const DebugInfo = struct { .loaded_elf = null, .unwind = null, }; + pub fn deinit(di: *DebugInfo, gpa: Allocator) void { + if (di.loaded_elf) |*loaded_elf| loaded_elf.deinit(gpa); + } }; pub fn key(m: ElfModule) usize { diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index ab322c201a21..4f9d98353b50 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -167,6 +167,9 @@ fn loadLocationInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo pub const LookupCache = struct { modules: std.ArrayListUnmanaged(windows.MODULEENTRY32), pub const init: LookupCache = .{ .modules = .empty }; + pub fn deinit(lc: *LookupCache, gpa: Allocator) void { + lc.modules.deinit(gpa); + } }; pub const DebugInfo = struct { loaded: bool, @@ -176,12 +179,6 @@ pub const DebugInfo = struct { file: fs.File, section_handle: windows.HANDLE, section_view: []const u8, - fn deinit(mapped: @This()) void { - const process_handle = windows.GetCurrentProcess(); - assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mapped.section_view.ptr)) == .SUCCESS); - windows.CloseHandle(mapped.section_handle); - mapped.file.close(); - } }, dwarf: ?Dwarf, @@ -199,11 +196,17 @@ pub const DebugInfo = struct { .coff_section_headers = &.{}, }; - fn deinit(di: *DebugInfo, gpa: Allocator) void { + pub fn deinit(di: *DebugInfo, gpa: Allocator) void { + if (!di.loaded) return; if (di.dwarf) |*dwarf| dwarf.deinit(gpa); if (di.pdb) |*pdb| pdb.deinit(); gpa.free(di.coff_section_headers); - if (di.mapped_file) |mapped| mapped.deinit(); + if (di.mapped_file) |mapped| { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mapped.section_view.ptr)) == .SUCCESS); + windows.CloseHandle(mapped.section_handle); + mapped.file.close(); + } } fn getSymbolFromPdb(di: *DebugInfo, relocated_address: usize) !?std.debug.Symbol { From 4b47a377175c30ae148bae1191ae49e3ace819db Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 2 Sep 2025 23:17:09 +0100 Subject: [PATCH 17/85] stash? more like no --- lib/std/debug/Dwarf/Unwind.zig | 4 +- lib/std/debug/SelfInfo.zig | 8 +- lib/std/debug/SelfInfo/DarwinModule.zig | 200 +++++++++++++----------- lib/std/debug/SelfInfo/ElfModule.zig | 4 +- 4 files changed, 116 insertions(+), 100 deletions(-) diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 323bf3987461..d13aa9f48d28 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -553,8 +553,8 @@ fn readEhPointerAbs(r: *Reader, enc_ty: EH.PE.Type, addr_size_bytes: u8, endian: }; } /// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. -fn readEhPointer(fbr: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 { - const offset = try readEhPointerAbs(fbr, enc.type, addr_size_bytes, endian); +fn readEhPointer(r: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 { + const offset = try readEhPointerAbs(r, enc.type, addr_size_bytes, endian); const base = switch (enc.rel) { .abs, .aligned => 0, .pcrel => ctx.pc_rel_base, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 9ca1b32f4e2c..77d68fe68ef4 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -76,7 +76,7 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void { self.modules.deinit(gpa); if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); } -comptime { +test { // `std.debug` does not currently utilize `deinit`, as it keeps hold of debug info for the // whole lifetime of the program. Let's try to avoid it bitrotting. _ = &deinit; @@ -85,7 +85,7 @@ comptime { pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); - const gop = try self.modules.getOrPut(gpa, module.load_offset); + const gop = try self.modules.getOrPut(gpa, module.key()); self.modules.lockPointers(); defer self.modules.unlockPointers(); if (!gop.found_existing) gop.value_ptr.* = .init; @@ -128,9 +128,7 @@ const Module = switch (native_os) { .macos, .ios, .watchos, .tvos, .visionos => @import("SelfInfo/DarwinModule.zig"), .uefi, .windows => @import("SelfInfo/WindowsModule.zig"), .wasi, .emscripten => struct { - const LookupCache = struct { - const init: LookupCache = .{}; - }; + const LookupCache = void; fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { _ = cache; _ = gpa; diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index 976893e0af66..e1fd387473c5 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -43,7 +43,37 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !DarwinModule } return error.MissingDebugInfo; } -fn loadLocationInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo) !void { +fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { + const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const sections = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break load_cmd.getSections(); + } else unreachable; + + var unwind_info: ?[]const u8 = null; + var eh_frame: ?[]const u8 = null; + for (sections) |sect| { + if (mem.eql(u8, sect.sectName(), "__unwind_info")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + unwind_info = sect_ptr[0..@intCast(sect.size)]; + } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + eh_frame = sect_ptr[0..@intCast(sect.size)]; + } + } + return .{ + .unwind_info = unwind_info, + .eh_frame = eh_frame, + }; +} +fn loadFullInfo(module: *const DarwinModule, gpa: Allocator) !DebugInfo.Full { const mapped_mem = try mapDebugInfoFile(module.name); errdefer posix.munmap(mapped_mem); @@ -149,49 +179,19 @@ fn loadLocationInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo) // This sort is so that we can binary search later. mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - di.full = .{ + return .{ .mapped_memory = mapped_mem, .symbols = symbols_slice, .strings = strings, .ofiles = .empty, }; } -fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo) !void { - _ = gpa; - - const header: *std.macho.mach_header = @ptrFromInt(module.text_base); - - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const sections = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break load_cmd.getSections(); - } else unreachable; - - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; - for (sections) |sect| { - if (mem.eql(u8, sect.sectName(), "__unwind_info")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); - unwind_info = sect_ptr[0..@intCast(sect.size)]; - } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); - eh_frame = sect_ptr[0..@intCast(sect.size)]; - } - } - di.unwind = .{ - .unwind_info = unwind_info, - .eh_frame = eh_frame, - }; -} pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (di.full == null) try module.loadLocationInfo(gpa, di); + if (di.full == null) di.full = try module.loadFullInfo(gpa); + const full = &di.full.?; + const vaddr = address - module.load_offset; - const symbol = MachoSymbol.find(di.full.?.symbols, vaddr) orelse return .{ + const symbol = MachoSymbol.find(full.symbols, vaddr) orelse return .{ .name = null, .compile_unit_name = null, .source_location = null, @@ -202,8 +202,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(di.full.?.strings[symbol.strx..], 0); - const o_file_path = mem.sliceTo(di.full.?.strings[symbol.ofile..], 0); + const stab_symbol = mem.sliceTo(full.strings[symbol.strx..], 0); // If any information is missing, we can at least return this from now on. const sym_only_result: std.debug.Symbol = .{ @@ -213,10 +212,11 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu }; const o_file: *DebugInfo.OFile = of: { - const gop = try di.full.?.ofiles.getOrPut(gpa, o_file_path); + const gop = try full.ofiles.getOrPut(gpa, symbol.ofile); if (!gop.found_existing) { + const o_file_path = mem.sliceTo(full.strings[symbol.ofile..], 0); gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { - defer _ = di.full.?.ofiles.pop().?; + defer _ = full.ofiles.pop().?; switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo, @@ -228,7 +228,11 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu break :of gop.value_ptr; }; - const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return sym_only_result; + const symbol_index = o_file.symbols_by_name.getKeyAdapted( + @as([]const u8, stab_symbol), + @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }), + ) orelse return sym_only_result; + const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return sym_only_result, @@ -257,28 +261,15 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu }, }; } -pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - if (di.unwind == null) try module.loadUnwindInfo(gpa, di); - const unwind_info = di.unwind.?.unwind_info orelse return error.MissingUnwindInfo; - // MLUGG TODO: inline? - return unwindFrameMachO( - module.text_base, - module.load_offset, - context, - unwind_info, - di.unwind.?.eh_frame, - ); -} /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -fn unwindFrameMachO( - text_base: usize, - load_offset: usize, - context: *UnwindContext, - unwind_info: []const u8, - opt_eh_frame: ?[]const u8, -) !usize { +pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { + _ = gpa; + if (di.unwind == null) di.unwind = module.loadUnwindInfo(); + const unwind = &di.unwind.?; + + const unwind_info = unwind.unwind_info orelse return error.MissingUnwindInfo; if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo; const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); @@ -288,7 +279,7 @@ fn unwindFrameMachO( if (indices.len == 0) return error.MissingUnwindInfo; // offset of the PC into the `__TEXT` segment - const pc_text_offset = context.pc - text_base; + const pc_text_offset = context.pc - module.text_base; const start_offset: u32, const first_level_offset: u32 = index: { var left: usize = 0; @@ -443,7 +434,7 @@ fn unwindFrameMachO( } // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. const sub_offset_addr = - text_base + + module.text_base + entry.function_offset + frameless.stack.indirect.sub_offset; // `sub_offset_addr` points to the offset of the literal within the instruction @@ -502,11 +493,11 @@ fn unwindFrameMachO( break :ip new_ip; }, .DWARF => { - const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; + const eh_frame = unwind.eh_frame orelse return error.MissingEhFrame; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; return context.unwindFrameDwarf( &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - load_offset, + module.load_offset, @intCast(encoding.value.x86_64.dwarf), ); }, @@ -521,11 +512,11 @@ fn unwindFrameMachO( break :ip new_ip; }, .DWARF => { - const eh_frame = opt_eh_frame orelse return error.MissingEhFrame; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset; + const eh_frame = unwind.eh_frame orelse return error.MissingEhFrame; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; return context.unwindFrameDwarf( &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - load_offset, + module.load_offset, @intCast(encoding.value.x86_64.dwarf), ); }, @@ -580,19 +571,9 @@ fn unwindFrameMachO( /// No cache needed, because `_dyld_get_image_header` etc are already fast. pub const LookupCache = void; pub const DebugInfo = struct { - unwind: ?struct { - // Backed by the in-memory sections mapped by the loader - unwind_info: ?[]const u8, - eh_frame: ?[]const u8, - }, + unwind: ?Unwind, // MLUGG TODO: awful field name - full: ?struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: [:0]const u8, - // MLUGG TODO: this could use an adapter to just index straight into `strings`! - ofiles: std.StringArrayHashMapUnmanaged(OFile), - }, + full: ?Full, pub const init: DebugInfo = .{ .unwind = null, @@ -603,7 +584,7 @@ pub const DebugInfo = struct { if (di.full) |*full| { for (full.ofiles.values()) |*ofile| { ofile.dwarf.deinit(gpa); - ofile.addr_table.deinit(gpa); + ofile.symbols_by_name.deinit(gpa); } full.ofiles.deinit(gpa); gpa.free(full.symbols); @@ -611,10 +592,42 @@ pub const DebugInfo = struct { } } + const Unwind = struct { + // Backed by the in-memory sections mapped by the loader + unwind_info: ?[]const u8, + eh_frame: ?[]const u8, + }; + + const Full = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + symbols: []const MachoSymbol, + strings: [:0]const u8, + /// Key is index into `strings` of the file path. + ofiles: std.AutoArrayHashMapUnmanaged(u32, OFile), + }; + const OFile = struct { dwarf: Dwarf, - // MLUGG TODO: this could use an adapter to just index straight into the strtab! - addr_table: std.StringArrayHashMapUnmanaged(u64), + strtab: [:0]const u8, + symtab: []align(1) const macho.nlist_64, + /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed + /// through `SymbolAdapter`, so that the symbol name is used as the logical key. + symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), + + const SymbolAdapter = struct { + strtab: [:0]const u8, + symtab: []align(1) const macho.nlist_64, + pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { + _ = ctx; + return @truncate(std.hash.Wyhash.hash(0, sym_name)); + } + pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { + _ = b_index; + const b_sym = ctx.symtab[b_sym_index]; + const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); + return mem.eql(u8, a_sym_name, b_sym_name); + } + }; }; fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { @@ -645,17 +658,17 @@ pub const DebugInfo = struct { if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1 :0]; const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); // TODO handle tentative (common) symbols - var addr_table: std.StringArrayHashMapUnmanaged(u64) = .empty; - defer addr_table.deinit(gpa); - try addr_table.ensureUnusedCapacity(gpa, @intCast(symtab.len)); - for (symtab) |sym| { + var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; + defer symbols_by_name.deinit(gpa); + try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len)); + for (symtab, 0..) |sym, sym_index| { if (sym.n_strx == 0) continue; switch (sym.n_type.bits.type) { .undf => continue, // includes tentative symbols @@ -663,9 +676,12 @@ pub const DebugInfo = struct { else => {}, } const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - const gop = addr_table.getOrPutAssumeCapacity(sym_name); + const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( + @as([]const u8, sym_name), + @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }), + ); if (gop.found_existing) return error.InvalidDebugInfo; - gop.value_ptr.* = sym.n_value; + gop.key_ptr.* = @intCast(sym_index); } var sections: Dwarf.SectionArray = @splat(null); @@ -697,7 +713,9 @@ pub const DebugInfo = struct { return .{ .dwarf = dwarf, - .addr_table = addr_table.move(), + .strtab = strtab, + .symtab = symtab, + .symbols_by_name = symbols_by_name.move(), }; } }; diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 4f64b147e1d3..6c1da9d8da7d 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -92,7 +92,7 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !ElfModule { }; return error.MissingDebugInfo; } -fn loadLocationInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void { +fn loadDwarf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void { if (module.name.len > 0) { di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ .root_dir = .cwd(), @@ -116,7 +116,7 @@ fn loadLocationInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !v } } pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (di.loaded_elf == null) try module.loadLocationInfo(gpa, di); + if (di.loaded_elf == null) try module.loadDwarf(gpa, di); const vaddr = address - module.load_offset; return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr); } From 5e6a1919c730f7c3ad27b9e5c3ddc8938fc2f43f Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 3 Sep 2025 13:11:25 +0100 Subject: [PATCH 18/85] fix aarch64-macos DWARF unwinding turns out this isn't technically specific to that target at all; other targets just don't emit mid-function 'ret' instructions as much so certain CFI instruction patterns were only seen on aarch64. thanks to jacob for finding the bug <3 --- lib/std/debug/Dwarf/Unwind/VirtualMachine.zig | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig index 66100f5edaee..997af95cbd31 100644 --- a/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig +++ b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig @@ -48,7 +48,10 @@ const ColumnRange = struct { }; columns: std.ArrayList(Column) = .empty, -stack: std.ArrayList(ColumnRange) = .empty, +stack: std.ArrayList(struct { + cfa: Column, + columns: ColumnRange, +}) = .empty, current_row: Row = .{}, /// The result of executing the CIE's initial_instructions @@ -205,17 +208,21 @@ pub fn step( column.rule = .{ .register = i.target_register }; }, .remember_state => { - try self.stack.append(gpa, self.current_row.columns); + try self.stack.append(gpa, .{ + .cfa = self.current_row.cfa, + .columns = self.current_row.columns, + }); self.current_row.copy_on_write = true; }, .restore_state => { - const restored_columns = self.stack.pop() orelse return error.InvalidOperation; + const restored = self.stack.pop() orelse return error.InvalidOperation; self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); - try self.columns.ensureUnusedCapacity(gpa, restored_columns.len); + try self.columns.ensureUnusedCapacity(gpa, restored.columns.len); + self.current_row.cfa = restored.cfa; self.current_row.columns.start = self.columns.items.len; - self.current_row.columns.len = restored_columns.len; - self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); + self.current_row.columns.len = restored.columns.len; + self.columns.appendSliceAssumeCapacity(self.columns.items[restored.columns.start..][0..restored.columns.len]); }, .def_cfa => |i| { try self.resolveCopyOnWrite(gpa); From dd9cb1beead2d0b9a22decf089aadf51cfe90da8 Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 3 Sep 2025 13:58:41 +0100 Subject: [PATCH 19/85] doc comments --- lib/std/debug/Dwarf/Unwind.zig | 138 ++++++++++++++++++++++----------- lib/std/debug/SelfInfo.zig | 6 +- 2 files changed, 97 insertions(+), 47 deletions(-) diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index d13aa9f48d28..01ba96aad4cd 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -1,4 +1,24 @@ -//! MLUGG TODO DOCUMENT THIS +//! Contains state relevant to stack unwinding through the DWARF `.debug_frame` section, or the +//! `.eh_frame` section which is an extension of the former specified by Linux Standard Base Core. +//! Like `Dwarf`, no assumptions are made about the host's relationship to the target of the unwind +//! information -- unwind data for any target can be read by any host. +//! +//! `Unwind` specifically deals with loading the data from CIEs and FDEs in the section, and with +//! performing fast lookups of a program counter's corresponding FDE. The CFI instructions in the +//! CIEs and FDEs can be interpreted by `VirtualMachine`. +//! +//! The typical usage of `Unwind` is as follows: +//! +//! * Initialize with `initEhFrameHdr` or `initSection`, depending on the available data +//! * Call `prepareLookup` to construct a search table if necessary +//! * Call `lookupPc` to find the section offset of the FDE corresponding to a PC +//! * Call `getFde` to load the corresponding FDE and CIE +//! * Check that the PC does indeed fall in that range (`lookupPc` may return a false positive) +//! * Interpret the embedded CFI instructions using `VirtualMachine` +//! +//! In some cases, such as when using the "compact unwind" data in Mach-O binaries, the FDE offsets +//! may already be known. In that case, no call to `lookupPc` is necessary, which means the call to +//! `prepareLookup` can also be omitted. pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); @@ -8,7 +28,8 @@ frame_section: struct { /// the binary (e.g. `sh_addr` in an ELF file); the equivalent runtime address may be relocated /// in position-independent binaries. vaddr: u64, - /// The full contents of the section. May have imprecise bounds depending on `section`. + /// The full contents of the section. May have imprecise bounds depending on `section`. This + /// memory is externally managed. /// /// For `.debug_frame`, the slice length is exactly equal to the section length. This is needed /// to know the number of CIEs and FDEs. @@ -22,13 +43,18 @@ frame_section: struct { bytes: []const u8, }, +/// A structure allowing fast lookups of the FDE corresponding to a particular PC. We use a binary +/// search table for the lookup; essentially, a list of all FDEs ordered by PC range. `null` means +/// the lookup data is not yet populated, so `prepareLookup` must be called before `lookupPc`. lookup: ?union(enum) { + /// The `.eh_frame_hdr` section contains a pre-computed search table which we can use. eh_frame_hdr: struct { /// Virtual address of the `.eh_frame_hdr` section. vaddr: u64, table: EhFrameHeader.SearchTable, }, - /// Offsets into `frame_section` of FDEs, sorted by ascending `pc_begin`. + /// There is no pre-computed search table, so we have built one ourselves. + /// Allocated into `gpa` and freed by `deinit`. sorted_fdes: []SortedFdeEntry, }, @@ -39,29 +65,13 @@ const SortedFdeEntry = struct { fde_offset: u64, }; -const Section = enum { debug_frame, eh_frame }; - -/// Initialize with unwind information from the contents of a `.debug_frame` or `.eh_frame` section. -/// -/// If the `.eh_frame_hdr` section is available, consider instead using `initEhFrameHdr`. This -/// allows the implementation to use a search table embedded in that section if it is available. -pub fn initSection(section: Section, section_vaddr: u64, section_bytes: []const u8) Unwind { - return .{ - .frame_section = .{ - .id = section, - .bytes = section_bytes, - .vaddr = section_vaddr, - }, - .lookup = null, - }; -} +pub const Section = enum { debug_frame, eh_frame }; /// Initialize with unwind information from a header loaded from an `.eh_frame_hdr` section, and a /// pointer to the contents of the `.eh_frame` section. /// -/// This differs from `loadFromSection` because `.eh_frame_hdr` may embed a binary search table, and -/// if it does, this function will use that for address lookups instead of constructing our own -/// search table. +/// `.eh_frame_hdr` may embed a binary search table of FDEs. If it does, we will use that table for +/// PC lookups rather than spending time constructing our own search table. pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_ptr: [*]const u8) Unwind { return .{ .frame_section = .{ @@ -76,6 +86,23 @@ pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_p }; } +/// Initialize with unwind information from the contents of a `.debug_frame` or `.eh_frame` section. +/// +/// If the `.eh_frame_hdr` section is available, consider instead using `initEhFrameHdr`, which +/// allows the implementation to use a search table embedded in that section if it is available. +pub fn initSection(section: Section, section_vaddr: u64, section_bytes: []const u8) Unwind { + return .{ + .frame_section = .{ + .id = section, + .bytes = section_bytes, + .vaddr = section_vaddr, + }, + .lookup = null, + }; +} + +/// Technically, it is only necessary to call this if `prepareLookup` has previously been called, +/// since no other function here allocates resources. pub fn deinit(unwind: *Unwind, gpa: Allocator) void { if (unwind.lookup) |lookup| switch (lookup) { .eh_frame_hdr => {}, @@ -83,8 +110,12 @@ pub fn deinit(unwind: *Unwind, gpa: Allocator) void { }; } -/// This represents the decoded .eh_frame_hdr header +/// Decoded version of the `.eh_frame_hdr` section. pub const EhFrameHeader = struct { + /// The virtual address (i.e. as given in the binary, before relocations) of the `.eh_frame` + /// section. This value is important when using `.eh_frame_hdr` to find debug information for + /// the current binary, because it allows locating where the `.eh_frame` section is loaded in + /// memory (by adding it to the ELF module's base address). eh_frame_vaddr: u64, search_table: ?SearchTable, @@ -93,6 +124,8 @@ pub const EhFrameHeader = struct { offset: u8, encoding: EH.PE, fde_count: usize, + /// The actual table entries are viewed as a plain byte slice because `encoding` causes the + /// size of entries in the table to vary. entries: []const u8, /// Returns the vaddr of the FDE for `pc`, or `null` if no matching FDE was found. @@ -104,7 +137,7 @@ pub const EhFrameHeader = struct { endian: Endian, ) !?u64 { const table_vaddr = eh_frame_hdr_vaddr + table.offset; - const entry_size = try EhFrameHeader.entrySize(table.encoding, addr_size_bytes); + const entry_size = try entrySize(table.encoding, addr_size_bytes); var left: usize = 0; var len: usize = table.fde_count; while (len > 1) { @@ -131,18 +164,18 @@ pub const EhFrameHeader = struct { }, endian); return fde_ptr; } - }; - pub fn entrySize(table_enc: EH.PE, addr_size_bytes: u8) !u8 { - return switch (table_enc.type) { - .absptr => 2 * addr_size_bytes, - .udata2, .sdata2 => 4, - .udata4, .sdata4 => 8, - .udata8, .sdata8 => 16, - .uleb128, .sleb128 => return bad(), // this is a binary search table; all entries must be the same size - _ => return bad(), - }; - } + fn entrySize(table_enc: EH.PE, addr_size_bytes: u8) !u8 { + return switch (table_enc.type) { + .absptr => 2 * addr_size_bytes, + .udata2, .sdata2 => 4, + .udata4, .sdata4 => 8, + .udata8, .sdata8 => 16, + .uleb128, .sleb128 => return bad(), // this is a binary search table; all entries must be the same size + _ => return bad(), + }; + } + }; pub fn parse( eh_frame_hdr_vaddr: u64, @@ -169,7 +202,7 @@ pub const EhFrameHeader = struct { const fde_count = try readEhPointer(&r, fde_count_enc, addr_size_bytes, .{ .pc_rel_base = eh_frame_hdr_vaddr + r.seek, }, endian); - const entry_size = try entrySize(table_enc, addr_size_bytes); + const entry_size = try SearchTable.entrySize(table_enc, addr_size_bytes); const bytes_offset = r.seek; const bytes_len = cast(usize, fde_count * entry_size) orelse return error.EndOfStream; const bytes = try r.take(bytes_len); @@ -188,7 +221,15 @@ pub const EhFrameHeader = struct { } }; -pub const EntryHeader = union(enum) { +/// The shared header of an FDE/CIE, containing a length in bytes (DWARF's "initial length field") +/// and a value which differentiates CIEs from FDEs and maps FDEs to their corresponding CIEs. The +/// `.eh_frame` format also includes a third variation, here called `.terminator`, which acts as a +/// sentinel for the whole section. +/// +/// `CommonInformationEntry.parse` and `FrameDescriptionEntry.parse` expect the `EntryHeader` to +/// have been parsed first: they accept data stored in the `EntryHeader`, and only read the bytes +/// following this header. +const EntryHeader = union(enum) { cie: struct { format: Format, /// Remaining bytes in the CIE. These are parseable by `CommonInformationEntry.parse`. @@ -206,7 +247,7 @@ pub const EntryHeader = union(enum) { /// keep track of how many section bytes remain when parsing all entries in `.debug_frame`. terminator, - pub fn read(r: *Reader, header_section_offset: u64, section: Section, endian: Endian) !EntryHeader { + fn read(r: *Reader, header_section_offset: u64, section: Section, endian: Endian) !EntryHeader { const unit_header = try Dwarf.readUnitHeader(r, endian); if (unit_header.unit_length == 0) return .terminator; @@ -284,7 +325,7 @@ pub const CommonInformationEntry = struct { /// /// `length_offset` specifies the offset of this CIE's length field in the /// .eh_frame / .debug_frame section. - pub fn parse( + fn parse( cie_bytes: []const u8, section: Section, default_addr_size_bytes: u8, @@ -364,7 +405,7 @@ pub const FrameDescriptionEntry = struct { /// This function expects to read the FDE starting at the PC Begin field. /// The returned struct references memory backed by `fde_bytes`. - pub fn parse( + fn parse( /// The virtual address of the FDE we're parsing, *excluding* its entry header (i.e. the /// address is after the header). If `fde_bytes` is backed by the memory of a loaded /// module's `.eh_frame` section, this will equal `fde_bytes.ptr`. @@ -405,6 +446,9 @@ pub const FrameDescriptionEntry = struct { } }; +/// Builds the PC FDE lookup table if it is not already built. It is required to call this function +/// at least once before calling `lookupPc`. Once this function is called, memory has been allocated +/// and so `deinit` (matching this `gpa`) is required to free it. pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endian: Endian) !void { if (unwind.lookup != null) return; @@ -443,22 +487,24 @@ pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endia .debug_frame => if (saw_terminator) return bad(), // `.debug_frame` uses the section bounds and does not specify a sentinel entry } - const fde_slice = try fde_list.toOwnedSlice(gpa); - errdefer comptime unreachable; - std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct { + std.mem.sortUnstable(SortedFdeEntry, fde_list.items, {}, struct { fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { ctx; return a.pc_begin < b.pc_begin; } }.lessThan); - unwind.lookup = .{ .sorted_fdes = fde_slice }; + + // This temporary is necessary to avoid an RLS footgun where `lookup` ends up non-null `undefined` on OOM. + const final_fdes = try fde_list.toOwnedSlice(gpa); + unwind.lookup = .{ .sorted_fdes = final_fdes }; } /// Given a program counter value, returns the offset of the corresponding FDE, or `null` if no /// matching FDE was found. The returned offset can be passed to `getFde` to load the data /// associated with the FDE. /// -/// Before calling this function, `prepareLookup` must return successfully. +/// Before calling this function, `prepareLookup` must return successfully at least once, to ensure +/// that `unwind.lookup` is populated. /// /// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must /// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`. @@ -486,6 +532,8 @@ pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: End return sorted_fdes[first_bad_idx - 1].fde_offset; } +/// Get the FDE at a given offset, as well as its associated CIE. This offset typically comes from +/// `lookupPc`. The CFI instructions within can be evaluated with `VirtualMachine`. pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { const section = unwind.frame_section; diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 77d68fe68ef4..9fa57208e160 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -290,8 +290,10 @@ pub const UnwindContext = struct { ) orelse return error.MissingDebugInfo; const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian); - // Check if this FDE *actually* includes the address. - if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) return error.MissingDebugInfo; + // Check if the FDE *actually* includes the pc (`lookupPc` can return false positives). + if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) { + return error.MissingDebugInfo; + } // Do not set `compile_unit` because the spec states that CFIs // may not reference other debug sections anyway. From c895aa7a35b178576b89e600a20af9d16da36dea Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 3 Sep 2025 15:42:33 +0100 Subject: [PATCH 20/85] std.debug.SelfInfo: concrete error sets The downside of this commit is that more precise errors are no longer propagated up. However, these errors were pretty useless in isolation due to them having no context; and regardless, we intentionally swallow most of them in `std.debug` anyway. Therefore, this is better in practice, because it allows `std.debug` to give slightly more useful warnings when handling errors. This commit does that for unwind errors, for instance, which differentiate between the unwind info being corrupt vs missing vs inaccessible vs unsupported. A better solution would be to also include more detailed information via the diagnostics pattern, but this commit is an incremental improvement. --- lib/std/debug.zig | 52 +++++++----- lib/std/debug/Dwarf.zig | 2 +- lib/std/debug/SelfInfo.zig | 72 ++++++++++++++-- lib/std/debug/SelfInfo/DarwinModule.zig | 100 +++++++++++++---------- lib/std/debug/SelfInfo/ElfModule.zig | 88 ++++++++++++++------ lib/std/debug/SelfInfo/WindowsModule.zig | 19 +++-- 6 files changed, 230 insertions(+), 103 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 505677dcb6bd..d005dd784199 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -766,11 +766,6 @@ pub fn writeStackTrace( } } -pub const UnwindError = if (have_ucontext) - @typeInfo(@typeInfo(@TypeOf(SelfInfo.unwindFrame)).@"fn".return_type.?).error_union.error_set -else - void; - pub const StackIterator = struct { // Skip every frame before this address is found. first_address: ?usize, @@ -783,7 +778,7 @@ pub const StackIterator = struct { unwind_state: if (have_ucontext) ?struct { debug_info: *SelfInfo, dwarf_context: SelfInfo.UnwindContext, - last_error: ?UnwindError = null, + last_error: ?SelfInfo.Error = null, failed: bool = false, } else void = if (have_ucontext) null else {}, @@ -821,7 +816,7 @@ pub const StackIterator = struct { } pub fn getLastError(it: *StackIterator) ?struct { - err: UnwindError, + err: SelfInfo.Error, address: usize, } { if (!have_ucontext) return null; @@ -1037,17 +1032,29 @@ fn printLastUnwindError(it: *StackIterator, debug_info: *SelfInfo, writer: *Writ } } -fn printUnwindError(debug_info: *SelfInfo, writer: *Writer, address: usize, unwind_err: UnwindError, tty_config: tty.Config) !void { +fn printUnwindError(debug_info: *SelfInfo, writer: *Writer, address: usize, unwind_err: SelfInfo.Error, tty_config: tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(getDebugInfoAllocator(), address) catch |err| switch (err) { - error.MissingDebugInfo => "???", + error.InvalidDebugInfo, error.MissingDebugInfo, error.UnsupportedDebugInfo, error.ReadFailed => "???", error.Unexpected, error.OutOfMemory => |e| return e, }; try tty_config.setColor(writer, .dim); - // MLUGG TODO this makes no sense given that MissingUnwindInfo exists? - if (unwind_err == error.MissingDebugInfo) { - try writer.print("Unwind information for `{s}:0x{x}` was not available, trace may be incomplete\n\n", .{ module_name, address }); - } else { - try writer.print("Unwind error at address `{s}:0x{x}` ({}), trace may be incomplete\n\n", .{ module_name, address, unwind_err }); + switch (unwind_err) { + error.Unexpected, error.OutOfMemory => |e| return e, + error.MissingDebugInfo => { + try writer.print("Unwind information for `{s}:0x{x}` was not available, trace may be incomplete\n\n", .{ module_name, address }); + }, + error.InvalidDebugInfo, + error.UnsupportedDebugInfo, + error.ReadFailed, + => { + const caption: []const u8 = switch (unwind_err) { + error.InvalidDebugInfo => "invalid unwind info", + error.UnsupportedDebugInfo => "unsupported unwind info", + error.ReadFailed => "filesystem error", + else => unreachable, + }; + try writer.print("Unwind error at address `{s}:0x{x}` ({s}), trace may be incomplete\n\n", .{ module_name, address, caption }); + }, } try tty_config.setColor(writer, .reset); } @@ -1055,12 +1062,17 @@ fn printUnwindError(debug_info: *SelfInfo, writer: *Writer, address: usize, unwi pub fn printSourceAtAddress(debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) !void { const gpa = getDebugInfoAllocator(); const symbol: Symbol = debug_info.getSymbolAtAddress(gpa, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => .{ - .name = null, - .compile_unit_name = null, - .source_location = null, + error.MissingDebugInfo, + error.UnsupportedDebugInfo, + error.InvalidDebugInfo, + => .{ .name = null, .compile_unit_name = null, .source_location = null }, + error.ReadFailed => s: { + try tty_config.setColor(writer, .dim); + try writer.print("Failed to read debug info from filesystem, trace may be incomplete\n\n", .{}); + try tty_config.setColor(writer, .reset); + break :s .{ .name = null, .compile_unit_name = null, .source_location = null }; }, - else => |e| return e, + error.OutOfMemory, error.Unexpected => |e| return e, }; defer if (symbol.source_location) |sl| gpa.free(sl.file_name); return printLineInfo( @@ -1069,7 +1081,7 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, writer: *Writer, address: usi address, symbol.name orelse "???", symbol.compile_unit_name orelse debug_info.getModuleNameForAddress(gpa, address) catch |err| switch (err) { - error.MissingDebugInfo => "???", + error.InvalidDebugInfo, error.MissingDebugInfo, error.UnsupportedDebugInfo, error.ReadFailed => "???", error.Unexpected, error.OutOfMemory => |e| return e, }, tty_config, diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 0ba4ab8048a7..f50b9ed1639d 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1418,7 +1418,7 @@ pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { 4 => 14, // R14 5 => 15, // R15 6 => 6, // RBP - else => error.InvalidUnwindRegisterNumber, + else => error.InvalidRegister, }; } diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 9fa57208e160..1f913efeeaf6 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -1,8 +1,6 @@ //! Cross-platform abstraction for this binary's own debug information, with a //! goal of minimal code bloat and compilation speed penalty. -// MLUGG TODO: audit use of errors in this file. ideally, introduce some concrete error sets - const builtin = @import("builtin"); const native_os = builtin.os.tag; const native_endian = native_arch.endian(); @@ -21,6 +19,19 @@ const SelfInfo = @This(); modules: std.AutoArrayHashMapUnmanaged(usize, Module.DebugInfo), lookup_cache: Module.LookupCache, +pub const Error = error{ + /// The required debug info is invalid or corrupted. + InvalidDebugInfo, + /// The required debug info could not be found. + MissingDebugInfo, + /// The required debug info was found, and may be valid, but is not supported by this implementation. + UnsupportedDebugInfo, + /// The required debug info could not be read from disk due to some IO error. + ReadFailed, + OutOfMemory, + Unexpected, +}; + /// Indicates whether the `SelfInfo` implementation has support for this target. pub const target_supported: bool = switch (native_os) { .linux, @@ -82,7 +93,7 @@ test { _ = &deinit; } -pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { +pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.key()); @@ -92,7 +103,7 @@ pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !us return module.unwindFrame(gpa, gop.value_ptr, context); } -pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std.debug.Symbol { +pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol { comptime assert(target_supported); const module: Module = try .lookup(&self.lookup_cache, gpa, address); const gop = try self.modules.getOrPut(gpa, module.key()); @@ -102,7 +113,7 @@ pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std. return module.getSymbolAtAddress(gpa, gop.value_ptr, address); } -pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) error{ Unexpected, OutOfMemory, MissingDebugInfo }![]const u8 { +pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { comptime assert(target_supported); const module: Module = try .lookup(&self.lookup_cache, gpa, address); return module.name; @@ -271,12 +282,61 @@ pub const UnwindContext = struct { /// may require lazily loading the data in those sections. /// /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info - /// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. pub fn unwindFrameDwarf( context: *UnwindContext, unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, + ) Error!usize { + return unwindFrameDwarfInner(context, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { + error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, + + error.UnimplementedArch, + error.UnimplementedOs, + error.ThreadContextNotSupported, + error.UnimplementedRegisterRule, + error.UnsupportedAddrSize, + error.UnsupportedDwarfVersion, + error.UnimplementedUserOpcode, + error.UnimplementedExpressionCall, + error.UnimplementedOpcode, + error.UnimplementedTypedComparison, + error.UnimplementedTypeConversion, + error.UnknownExpressionOpcode, + => return error.UnsupportedDebugInfo, + + error.InvalidRegister, + error.RegisterContextRequired, + error.ReadFailed, + error.EndOfStream, + error.IncompatibleRegisterSize, + error.Overflow, + error.StreamTooLong, + error.InvalidOperand, + error.InvalidOpcode, + error.InvalidOperation, + error.InvalidCFARule, + error.IncompleteExpressionContext, + error.InvalidCFAOpcode, + error.InvalidExpression, + error.InvalidFrameBase, + error.InvalidIntegralTypeSize, + error.InvalidSubExpression, + error.InvalidTypeLength, + error.TruncatedIntegralType, + error.DivisionByZero, + error.InvalidExpressionValue, + error.NoExpressionValue, + error.RegisterSizeMismatch, + error.InvalidCFA, + => return error.InvalidDebugInfo, + }; + } + fn unwindFrameDwarfInner( + context: *UnwindContext, + unwind: *const Dwarf.Unwind, + load_offset: usize, + explicit_fde_offset: ?usize, ) !usize { if (!supports_unwinding) return error.UnsupportedCpuArchitecture; if (context.pc == 0) return 0; diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index e1fd387473c5..1a38bdd2843e 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -7,7 +7,9 @@ pub fn key(m: *const DarwinModule) usize { return m.text_base; } -pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !DarwinModule { +/// No cache needed, because `_dyld_get_image_header` etc are already fast. +pub const LookupCache = void; +pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!DarwinModule { _ = cache; _ = gpa; const image_count = std.c._dyld_image_count(); @@ -186,8 +188,11 @@ fn loadFullInfo(module: *const DarwinModule, gpa: Allocator) !DebugInfo.Full { .ofiles = .empty, }; } -pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (di.full == null) di.full = try module.loadFullInfo(gpa); +pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { + if (di.full == null) di.full = module.loadFullInfo(gpa) catch |err| switch (err) { + error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| return e, + else => return error.ReadFailed, + }; const full = &di.full.?; const vaddr = address - module.load_offset; @@ -215,14 +220,9 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu const gop = try full.ofiles.getOrPut(gpa, symbol.ofile); if (!gop.found_existing) { const o_file_path = mem.sliceTo(full.strings[symbol.ofile..], 0); - gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { - defer _ = full.ofiles.pop().?; - switch (err) { - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return sym_only_result, - else => |e| return e, - } + gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch { + _ = full.ofiles.pop().?; + return sym_only_result; }; } break :of gop.value_ptr; @@ -234,10 +234,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu ) orelse return sym_only_result; const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; - const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return sym_only_result, - else => |e| return e, - }; + const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; return .{ .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol, @@ -255,28 +252,44 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu native_endian, compile_unit, symbol_ofile_vaddr + address_symbol_offset, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, + ) catch null, }; } /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { +pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { + return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.UnsupportedDebugInfo, + error.ReadFailed, + error.OutOfMemory, + error.Unexpected, + => |e| return e, + error.UnimplementedArch, + error.UnimplementedOs, + error.ThreadContextNotSupported, + => return error.UnsupportedDebugInfo, + error.InvalidRegister, + error.RegisterContextRequired, + error.IncompatibleRegisterSize, + => return error.InvalidDebugInfo, + }; +} +fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { _ = gpa; if (di.unwind == null) di.unwind = module.loadUnwindInfo(); const unwind = &di.unwind.?; - const unwind_info = unwind.unwind_info orelse return error.MissingUnwindInfo; - if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo; + const unwind_info = unwind.unwind_info orelse return error.MissingDebugInfo; + if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidDebugInfo; const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); - if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidUnwindInfo; + if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidDebugInfo; const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); - if (indices.len == 0) return error.MissingUnwindInfo; + if (indices.len == 0) return error.MissingDebugInfo; // offset of the PC into the `__TEXT` segment const pc_text_offset = context.pc - module.text_base; @@ -296,15 +309,15 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; }; // An offset of 0 is a sentinel indicating a range does not have unwind info. - if (start_offset == 0) return error.MissingUnwindInfo; + if (start_offset == 0) return error.MissingDebugInfo; const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidUnwindInfo; + if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidDebugInfo; const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], ); - if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidUnwindInfo; + if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidDebugInfo; const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); const entry: struct { @@ -312,15 +325,15 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, raw_encoding: u32, } = switch (kind.*) { .REGULAR => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidUnwindInfo; + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidDebugInfo; const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], ); - if (entries.len == 0) return error.InvalidUnwindInfo; + if (entries.len == 0) return error.InvalidDebugInfo; var left: usize = 0; var len: usize = entries.len; @@ -339,15 +352,15 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }; }, .COMPRESSED => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidUnwindInfo; + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidDebugInfo; const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], ); - if (entries.len == 0) return error.InvalidUnwindInfo; + if (entries.len == 0) return error.InvalidDebugInfo; var left: usize = 0; var len: usize = entries.len; @@ -372,26 +385,26 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, const local_index = entry.encodingIndex - common_encodings.len; const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidUnwindInfo; + if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidDebugInfo; const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], ); - if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + if (local_index >= local_encodings.len) return error.InvalidDebugInfo; break :entry .{ .function_offset = function_offset, .raw_encoding = local_encodings[local_index], }; }, - else => return error.InvalidUnwindInfo, + else => return error.InvalidDebugInfo, }; - if (entry.raw_encoding == 0) return error.NoUnwindInfo; + if (entry.raw_encoding == 0) return error.MissingDebugInfo; const reg_context: Dwarf.abi.RegisterContext = .{ .eh_frame = false, .is_macho = true }; const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); const new_ip = switch (builtin.cpu.arch) { .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnimplementedUnwindEncoding, + .OLD => return error.UnsupportedDebugInfo, .RBP_FRAME => ip: { const frame = encoding.value.x86_64.frame; @@ -493,7 +506,7 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, break :ip new_ip; }, .DWARF => { - const eh_frame = unwind.eh_frame orelse return error.MissingEhFrame; + const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; return context.unwindFrameDwarf( &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), @@ -503,7 +516,7 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }, }, .aarch64, .aarch64_be => switch (encoding.mode.arm64) { - .OLD => return error.UnimplementedUnwindEncoding, + .OLD => return error.UnsupportedDebugInfo, .FRAMELESS => ip: { const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; @@ -512,7 +525,7 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, break :ip new_ip; }, .DWARF => { - const eh_frame = unwind.eh_frame orelse return error.MissingEhFrame; + const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; return context.unwindFrameDwarf( &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), @@ -568,8 +581,6 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, if (context.pc > 0) context.pc -= 1; return new_ip; } -/// No cache needed, because `_dyld_get_image_header` etc are already fast. -pub const LookupCache = void; pub const DebugInfo = struct { unwind: ?Unwind, // MLUGG TODO: awful field name @@ -785,7 +796,7 @@ const ip_reg_num = Dwarf.abi.ipRegNum(builtin.target.cpu.arch).?; fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return error.MissingDebugInfo, - else => |e| return e, + else => return error.ReadFailed, }; defer file.close(); @@ -812,6 +823,7 @@ const mem = std.mem; const posix = std.posix; const testing = std.testing; const UnwindContext = std.debug.SelfInfo.UnwindContext; +const Error = std.debug.SelfInfo.Error; const regBytes = Dwarf.abi.regBytes; const regValueNative = Dwarf.abi.regValueNative; diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 6c1da9d8da7d..25ce1827b7ea 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -21,7 +21,7 @@ pub const DebugInfo = struct { pub fn key(m: ElfModule) usize { return m.load_offset; } -pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !ElfModule { +pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!ElfModule { _ = cache; _ = gpa; if (builtin.target.os.tag == .haiku) @panic("TODO implement lookup module for Haiku"); @@ -92,42 +92,79 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !ElfModule { }; return error.MissingDebugInfo; } -fn loadDwarf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void { - if (module.name.len > 0) { - di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ +fn loadDwarf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { + const load_result = if (module.name.len > 0) res: { + break :res Dwarf.ElfModule.load(gpa, .{ .root_dir = .cwd(), .sub_path = module.name, - }, module.build_id, null, null, null) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.Overflow => return error.InvalidDebugInfo, - else => |e| return e, + }, module.build_id, null, null, null); + } else res: { + const path = std.fs.selfExePathAlloc(gpa) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return error.ReadFailed, }; - } else { - const path = try std.fs.selfExePathAlloc(gpa); defer gpa.free(path); - di.loaded_elf = Dwarf.ElfModule.load(gpa, .{ + break :res Dwarf.ElfModule.load(gpa, .{ .root_dir = .cwd(), .sub_path = path, - }, module.build_id, null, null, null) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.Overflow => return error.InvalidDebugInfo, - else => |e| return e, - }; - } + }, module.build_id, null, null, null); + }; + di.loaded_elf = load_result catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + + error.OutOfMemory, + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.Unexpected, + => |e| return e, + + error.InvalidElfEndian, + error.InvalidElfMagic, + error.InvalidElfVersion, + error.InvalidUtf8, + error.InvalidWtf8, + error.EndOfStream, + error.Overflow, + error.UnimplementedDwarfForeignEndian, // this should be impossible as we're looking at the debug info for this process + => return error.InvalidDebugInfo, + + else => return error.ReadFailed, + }; } -pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { +pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { if (di.loaded_elf == null) try module.loadDwarf(gpa, di); const vaddr = address - module.load_offset; - return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr); + return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr) catch |err| switch (err) { + error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, + error.ReadFailed, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; } -fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void { - const section_bytes = module.gnu_eh_frame orelse return error.MissingUnwindInfo; // MLUGG TODO: load from file +fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { + const section_bytes = module.gnu_eh_frame orelse return error.MissingDebugInfo; // MLUGG TODO: load from file + const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; - const header: Dwarf.Unwind.EhFrameHeader = try .parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian); - di.unwind = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); - try di.unwind.?.prepareLookup(gpa, @sizeOf(usize), native_endian); + const header = Dwarf.Unwind.EhFrameHeader.parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo => |e| return e, + error.EndOfStream, error.Overflow => return error.InvalidDebugInfo, + error.UnsupportedAddrSize => return error.UnsupportedDebugInfo, + }; + + var unwind: Dwarf.Unwind = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); + unwind.prepareLookup(gpa, @sizeOf(usize), native_endian) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, + error.EndOfStream, error.Overflow, error.StreamTooLong => return error.InvalidDebugInfo, + error.UnsupportedAddrSize, error.UnsupportedDwarfVersion => return error.UnsupportedDebugInfo, + }; + + di.unwind = unwind; } -pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { +pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { if (di.unwind == null) try module.loadUnwindInfo(gpa, di); return context.unwindFrameDwarf(&di.unwind.?, module.load_offset, null); } @@ -140,6 +177,7 @@ const Dwarf = std.debug.Dwarf; const elf = std.elf; const mem = std.mem; const UnwindContext = std.debug.SelfInfo.UnwindContext; +const Error = std.debug.SelfInfo.Error; const builtin = @import("builtin"); const native_endian = builtin.target.cpu.arch.endian(); diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index 4f9d98353b50..674c6adae588 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -5,7 +5,7 @@ handle: windows.HMODULE, pub fn key(m: WindowsModule) usize { return m.base_address; } -pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !WindowsModule { +pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) std.debug.SelfInfo.Error!WindowsModule { if (lookupInCache(cache, address)) |m| return m; { // Check a new module hasn't been loaded @@ -29,18 +29,23 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !WindowsModul if (lookupInCache(cache, address)) |m| return m; return error.MissingDebugInfo; } -pub fn getSymbolAtAddress(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - if (!di.loaded) try module.loadLocationInfo(gpa, di); +pub fn getSymbolAtAddress(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, address: usize) std.debug.SelfInfo.Error!std.debug.Symbol { + if (!di.loaded) module.loadDebugInfo(gpa, di) catch |err| switch (err) { + error.OutOfMemory, error.InvalidDebugInfo, error.MissingDebugInfo, error.Unexpected => |e| return e, + error.FileNotFound => return error.MissingDebugInfo, + error.UnknownPDBVersion => return error.UnsupportedDebugInfo, + else => return error.ReadFailed, + }; // Translate the runtime address into a virtual address into the module const vaddr = address - module.base_address; if (di.pdb != null) { - if (try di.getSymbolFromPdb(vaddr)) |symbol| return symbol; + if (di.getSymbolFromPdb(vaddr) catch return error.InvalidDebugInfo) |symbol| return symbol; } if (di.dwarf) |*dwarf| { const dwarf_address = vaddr + di.coff_image_base; - return dwarf.getSymbol(gpa, native_endian, dwarf_address); + return dwarf.getSymbol(gpa, native_endian, dwarf_address) catch return error.InvalidDebugInfo; } return error.MissingDebugInfo; @@ -59,7 +64,7 @@ fn lookupInCache(cache: *const LookupCache, address: usize) ?WindowsModule { } return null; } -fn loadLocationInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) !void { +fn loadDebugInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) !void { const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); const mapped = mapped_ptr[0..module.size]; var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; @@ -151,7 +156,7 @@ fn loadLocationInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { error.FileNotFound, error.IsDir => break :pdb, - else => return err, + else => |e| return e, }; try di.pdb.?.parseInfoStream(); try di.pdb.?.parseDbiStream(); From 405075f7455492717ac6cba505603eb74e4f54b9 Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 4 Sep 2025 00:39:38 +0100 Subject: [PATCH 21/85] SelfInfo: load eh_frame/debug_frame from ELF file if eh_frame_hdr omitted --- lib/std/debug/Dwarf/ElfModule.zig | 88 ++++++++++++++++++++++------ lib/std/debug/SelfInfo/ElfModule.zig | 73 +++++++++++++++++------ 2 files changed, 124 insertions(+), 37 deletions(-) diff --git a/lib/std/debug/Dwarf/ElfModule.zig b/lib/std/debug/Dwarf/ElfModule.zig index 02b94e580de5..4d425b1718fb 100644 --- a/lib/std/debug/Dwarf/ElfModule.zig +++ b/lib/std/debug/Dwarf/ElfModule.zig @@ -3,6 +3,13 @@ dwarf: Dwarf, +/// If we encounter a `.eh_frame` section while loading the ELF module, it is stored here and may be +/// used with `Dwarf.Unwind` for call stack unwinding. +eh_frame: ?UnwindSection, +/// If we encounter a `.debug_frame` section while loading the ELF module, it is stored here and may +/// be used with `Dwarf.Unwind` for call stack unwinding. +debug_frame: ?UnwindSection, + /// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that /// this memory can be unmapped by `ElfModule.deinit`. mapped_file: []align(std.heap.page_size_min) const u8, @@ -11,10 +18,18 @@ mapped_file: []align(std.heap.page_size_min) const u8, /// be unmapped by `ElfModule.deinit`. mapped_debug_file: ?[]align(std.heap.page_size_min) const u8, -pub fn deinit(em: *ElfModule, allocator: Allocator) void { - em.dwarf.deinit(allocator); +pub const UnwindSection = struct { + vaddr: u64, + bytes: []const u8, + owned: bool, +}; + +pub fn deinit(em: *ElfModule, gpa: Allocator) void { + em.dwarf.deinit(gpa); std.posix.munmap(em.mapped_file); if (em.mapped_debug_file) |m| std.posix.munmap(m); + if (em.eh_frame) |s| if (s.owned) gpa.free(s.bytes); + if (em.debug_frame) |s| if (s.owned) gpa.free(s.bytes); } pub const LoadError = error{ @@ -98,7 +113,6 @@ pub fn load( )[0..hdr.e_shnum]; var sections: Dwarf.SectionArray = @splat(null); - // Combine section list. This takes ownership over any owned sections from the parent scope. if (parent_sections) |ps| { for (ps, §ions) |*parent, *section_elem| { @@ -110,6 +124,12 @@ pub fn load( } errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); + var eh_frame_section: ?UnwindSection = null; + errdefer if (eh_frame_section) |s| if (s.owned) gpa.free(s.bytes); + + var debug_frame_section: ?UnwindSection = null; + errdefer if (debug_frame_section) |s| if (s.owned) gpa.free(s.bytes); + var separate_debug_filename: ?[]const u8 = null; var separate_debug_crc: ?u32 = null; @@ -128,17 +148,35 @@ pub fn load( continue; } - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |sect, i| { - if (mem.eql(u8, "." ++ sect.name, name)) section_index = i; + const section_id: union(enum) { + dwarf: Dwarf.Section.Id, + eh_frame, + debug_frame, + } = s: { + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |s| { + if (mem.eql(u8, "." ++ s.name, name)) { + break :s .{ .dwarf = @enumFromInt(s.value) }; + } + } + if (mem.eql(u8, ".eh_frame", name)) break :s .eh_frame; + if (mem.eql(u8, ".debug_frame", name)) break :s .debug_frame; + continue; + }; + + switch (section_id) { + .dwarf => |i| if (sections[@intFromEnum(i)] != null) continue, + .eh_frame => if (eh_frame_section != null) continue, + .debug_frame => if (debug_frame_section != null) continue, } - if (section_index == null) continue; - if (sections[section_index.?] != null) continue; if (mapped_mem.len < shdr.sh_offset + shdr.sh_size) return error.InvalidDebugInfo; - const section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; - sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { - var section_reader: Reader = .fixed(section_bytes); + const raw_section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + + const section_bytes: []const u8, const section_owned: bool = section: { + if ((shdr.sh_flags & elf.SHF_COMPRESSED) == 0) { + break :section .{ raw_section_bytes, false }; + } + var section_reader: Reader = .fixed(raw_section_bytes); const chdr = section_reader.takeStruct(elf.Chdr, endian) catch continue; if (chdr.ch_type != .ZLIB) continue; @@ -153,14 +191,24 @@ pub fn load( Dwarf.invalidDebugInfoDetected(); continue; } - break :blk .{ - .data = try decompressed_section.toOwnedSlice(gpa), - .owned = true, - }; - } else .{ - .data = section_bytes, - .owned = false, + break :section .{ try decompressed_section.toOwnedSlice(gpa), true }; }; + switch (section_id) { + .dwarf => |id| sections[@intFromEnum(id)] = .{ + .data = section_bytes, + .owned = section_owned, + }, + .eh_frame => eh_frame_section = .{ + .vaddr = shdr.sh_addr, + .bytes = section_bytes, + .owned = section_owned, + }, + .debug_frame => debug_frame_section = .{ + .vaddr = shdr.sh_addr, + .bytes = section_bytes, + .owned = section_owned, + }, + } } const missing_debug_info = @@ -305,9 +353,11 @@ pub fn load( var dwarf: Dwarf = .{ .sections = sections }; try dwarf.open(gpa, endian); return .{ + .dwarf = dwarf, + .eh_frame = eh_frame_section, + .debug_frame = debug_frame_section, .mapped_file = parent_mapped_mem orelse mapped_mem, .mapped_debug_file = if (parent_mapped_mem != null) mapped_mem else null, - .dwarf = dwarf, }; } diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 25ce1827b7ea..8d06be151ec1 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -8,10 +8,10 @@ pub const LookupCache = void; pub const DebugInfo = struct { loaded_elf: ?Dwarf.ElfModule, - unwind: ?Dwarf.Unwind, + unwind: [2]?Dwarf.Unwind, pub const init: DebugInfo = .{ .loaded_elf = null, - .unwind = null, + .unwind = @splat(null), }; pub fn deinit(di: *DebugInfo, gpa: Allocator) void { if (di.loaded_elf) |*loaded_elf| loaded_elf.deinit(gpa); @@ -143,30 +143,67 @@ pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugIn => return error.InvalidDebugInfo, }; } -fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { - const section_bytes = module.gnu_eh_frame orelse return error.MissingDebugInfo; // MLUGG TODO: load from file - - const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; - const header = Dwarf.Unwind.EhFrameHeader.parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian) catch |err| switch (err) { - error.ReadFailed => unreachable, // it's all fixed buffers - error.InvalidDebugInfo => |e| return e, - error.EndOfStream, error.Overflow => return error.InvalidDebugInfo, - error.UnsupportedAddrSize => return error.UnsupportedDebugInfo, - }; - - var unwind: Dwarf.Unwind = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); +fn prepareUnwindLookup(unwind: *Dwarf.Unwind, gpa: Allocator) Error!void { unwind.prepareLookup(gpa, @sizeOf(usize), native_endian) catch |err| switch (err) { error.ReadFailed => unreachable, // it's all fixed buffers error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, error.EndOfStream, error.Overflow, error.StreamTooLong => return error.InvalidDebugInfo, error.UnsupportedAddrSize, error.UnsupportedDwarfVersion => return error.UnsupportedDebugInfo, }; - - di.unwind = unwind; +} +fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { + var buf: [2]Dwarf.Unwind = undefined; + const unwinds: []Dwarf.Unwind = if (module.gnu_eh_frame) |section_bytes| unwinds: { + const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; + const header = Dwarf.Unwind.EhFrameHeader.parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo => |e| return e, + error.EndOfStream, error.Overflow => return error.InvalidDebugInfo, + error.UnsupportedAddrSize => return error.UnsupportedDebugInfo, + }; + buf[0] = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); + break :unwinds buf[0..1]; + } else unwinds: { + // There is no `.eh_frame_hdr` section. There may still be an `.eh_frame` or `.debug_frame` + // section, but we'll have to load the binary to get at it. + try module.loadDwarf(gpa, di); + const opt_debug_frame = &di.loaded_elf.?.debug_frame; + const opt_eh_frame = &di.loaded_elf.?.eh_frame; + // If both are present, we can't just pick one -- the info could be split between them. + // `.debug_frame` is likely to be the more complete section, so we'll prioritize that one. + if (opt_debug_frame.*) |*debug_frame| { + buf[0] = .initSection(.debug_frame, debug_frame.vaddr, debug_frame.bytes); + if (opt_eh_frame.*) |*eh_frame| { + buf[1] = .initSection(.eh_frame, eh_frame.vaddr, eh_frame.bytes); + break :unwinds buf[0..2]; + } + break :unwinds buf[0..1]; + } else if (opt_eh_frame.*) |eh_frame| { + buf[0] = .initSection(.eh_frame, eh_frame.vaddr, eh_frame.bytes); + break :unwinds buf[0..1]; + } + return error.MissingDebugInfo; + }; + errdefer for (unwinds) |*u| u.deinit(gpa); + for (unwinds) |*u| try prepareUnwindLookup(u, gpa); + switch (unwinds.len) { + 0 => unreachable, + 1 => di.unwind = .{ unwinds[0], null }, + 2 => di.unwind = .{ unwinds[0], unwinds[1] }, + else => unreachable, + } } pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { - if (di.unwind == null) try module.loadUnwindInfo(gpa, di); - return context.unwindFrameDwarf(&di.unwind.?, module.load_offset, null); + if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di); + std.debug.assert(di.unwind[0] != null); + for (&di.unwind) |*opt_unwind| { + const unwind = &(opt_unwind.* orelse break); + return context.unwindFrameDwarf(unwind, module.load_offset, null) catch |err| switch (err) { + error.MissingDebugInfo => continue, // try the next one + else => |e| return e, + }; + } + return error.MissingDebugInfo; } const ElfModule = @This(); From ba5d9d5a4198e49c6b9c1d02e4f97e24412f8f65 Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 4 Sep 2025 01:03:02 +0100 Subject: [PATCH 22/85] remove redundant test turns out this actually has coverage in std.debug --- lib/std/debug/SelfInfo.zig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 1f913efeeaf6..c5e163eef2cc 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -87,11 +87,6 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void { self.modules.deinit(gpa); if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); } -test { - // `std.debug` does not currently utilize `deinit`, as it keeps hold of debug info for the - // whole lifetime of the program. Let's try to avoid it bitrotting. - _ = &deinit; -} pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { comptime assert(supports_unwinding); From 67fa5664b71202a2b288aa94b9c1bf10de9bdac3 Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 5 Sep 2025 15:17:49 +0100 Subject: [PATCH 23/85] std.posix: mark getcontext as unsupported by default --- lib/std/posix.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/std/posix.zig b/lib/std/posix.zig index 67d337a08189..50a210ece14a 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -47,6 +47,7 @@ else switch (native_os) { .linux => linux, .plan9 => std.os.plan9, else => struct { + pub const getcontext = {}; pub const ucontext_t = void; pub const pid_t = void; pub const pollfd = void; From d4f710791f88c29e08659241e7976c08fe05ba49 Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 5 Sep 2025 15:18:18 +0100 Subject: [PATCH 24/85] tweaks --- lib/std/debug/Dwarf.zig | 2 +- lib/std/debug/SelfInfo/ElfModule.zig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index f50b9ed1639d..8efba51beed5 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -56,7 +56,7 @@ pub const Range = struct { pub const Section = struct { data: []const u8, - // If `data` is owned by this Dwarf. + /// If `data` is owned by this Dwarf. owned: bool, pub const Id = enum { diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 8d06be151ec1..e69600be1411 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -178,7 +178,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro break :unwinds buf[0..2]; } break :unwinds buf[0..1]; - } else if (opt_eh_frame.*) |eh_frame| { + } else if (opt_eh_frame.*) |*eh_frame| { buf[0] = .initSection(.eh_frame, eh_frame.vaddr, eh_frame.bytes); break :unwinds buf[0..1]; } From 5709369d059ba107accaadb4b977281e2ba843ed Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 5 Sep 2025 19:43:08 +0100 Subject: [PATCH 25/85] std.debug: improve the APIs and stuff --- lib/std/debug.zig | 1106 +++++++++-------------- lib/std/debug/Dwarf.zig | 2 +- lib/std/debug/SelfInfo.zig | 17 +- lib/std/debug/SelfInfo/DarwinModule.zig | 6 +- 4 files changed, 456 insertions(+), 675 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index d005dd784199..bc2610fb7cea 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1,19 +1,19 @@ -const builtin = @import("builtin"); const std = @import("std.zig"); const math = std.math; const mem = std.mem; const posix = std.posix; const fs = std.fs; const testing = std.testing; -const root = @import("root"); +const Allocator = mem.Allocator; const File = std.fs.File; const windows = std.os.windows; -const native_arch = builtin.cpu.arch; -const native_os = builtin.os.tag; -const native_endian = native_arch.endian(); const Writer = std.Io.Writer; const tty = std.Io.tty; +const builtin = @import("builtin"); +const native_arch = builtin.cpu.arch; +const native_os = builtin.os.tag; + pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); @@ -156,6 +156,11 @@ pub const Symbol = struct { name: ?[]const u8, compile_unit_name: ?[]const u8, source_location: ?SourceLocation, + pub const unknown: Symbol = .{ + .name = null, + .compile_unit_name = null, + .source_location = null, + }; }; /// Deprecated because it returns the optimization mode of the standard @@ -186,7 +191,7 @@ pub const sys_can_stack_trace = switch (builtin.cpu.arch) { .wasm64, => native_os == .emscripten and builtin.mode == .Debug, - // `@returnAddress()` is unsupported in LLVM 13. + // `@returnAddress()` is unsupported in LLVM 21. .bpfel, .bpfeb, => false, @@ -234,8 +239,7 @@ pub fn print(comptime fmt: []const u8, args: anytype) void { /// TODO multithreaded awareness /// Marked `inline` to propagate a comptime-known error to callers. pub inline fn getSelfDebugInfo() !*SelfInfo { - if (builtin.strip_debug_info) return error.MissingDebugInfo; - if (!SelfInfo.target_supported) return error.UnsupportedOperatingSystem; + if (!SelfInfo.target_supported) return error.UnsupportedTarget; const S = struct { var self_info: SelfInfo = .init; }; @@ -320,40 +324,11 @@ test dumpHexFallible { try std.testing.expectEqualStrings(expected, aw.written()); } -/// Tries to print the current stack trace to stderr, unbuffered, and ignores any error returned. -pub fn dumpCurrentStackTrace(start_addr: ?usize) void { - const stderr = lockStderrWriter(&.{}); - defer unlockStderrWriter(); - nosuspend dumpCurrentStackTraceToWriter(start_addr, stderr) catch return; -} - -/// Prints the current stack trace to the provided writer. -pub fn dumpCurrentStackTraceToWriter(start_addr: ?usize, writer: *Writer) !void { - if (builtin.target.cpu.arch.isWasm()) { - if (native_os == .wasi) { - try writer.writeAll("Unable to dump stack trace: not implemented for Wasm\n"); - } - return; - } - if (builtin.strip_debug_info) { - try writer.writeAll("Unable to dump stack trace: debug info stripped\n"); - return; - } - const debug_info = getSelfDebugInfo() catch |err| { - try writer.print("Unable to dump stack trace: Unable to open debug info: {s}\n", .{@errorName(err)}); - return; - }; - writeCurrentStackTrace(writer, debug_info, tty.detectConfig(.stderr()), start_addr) catch |err| { - try writer.print("Unable to dump stack trace: {s}\n", .{@errorName(err)}); - return; - }; -} - pub const have_ucontext = posix.ucontext_t != void; /// Platform-specific thread state. This contains register state, and on some platforms /// information about the stack. This is not safe to trivially copy, because some platforms -/// use internal pointers within this structure. To make a copy, use `copyContext`. +/// use internal pointers within this structure. After copying, call `relocateContext`. pub const ThreadContext = blk: { if (native_os == .windows) { break :blk windows.CONTEXT; @@ -363,22 +338,12 @@ pub const ThreadContext = blk: { break :blk void; } }; - -/// Copies one context to another, updating any internal pointers -pub fn copyContext(source: *const ThreadContext, dest: *ThreadContext) void { - if (!have_ucontext) return {}; - dest.* = source.*; - relocateContext(dest); -} - -/// Updates any internal pointers in the context to reflect its current location -pub fn relocateContext(context: *ThreadContext) void { - return switch (native_os) { - .macos => { - context.mcontext = &context.__mcontext_data; - }, +/// Updates any internal pointers of a `ThreadContext` after the caller copies it. +pub fn relocateContext(dest: *ThreadContext) void { + switch (native_os) { + .macos => dest.mcontext = &dest.__mcontext_data, else => {}, - }; + } } pub const have_getcontext = @TypeOf(posix.system.getcontext) != void; @@ -409,142 +374,6 @@ pub inline fn getContext(context: *ThreadContext) bool { return result; } -/// Tries to print the stack trace starting from the supplied base pointer to stderr, -/// unbuffered, and ignores any error returned. -/// TODO multithreaded awareness -pub fn dumpStackTraceFromBase(context: *ThreadContext, stderr: *Writer) void { - nosuspend { - if (builtin.target.cpu.arch.isWasm()) { - if (native_os == .wasi) { - stderr.print("Unable to dump stack trace: not implemented for Wasm\n", .{}) catch return; - } - return; - } - if (builtin.strip_debug_info) { - stderr.print("Unable to dump stack trace: debug info stripped\n", .{}) catch return; - return; - } - const debug_info = getSelfDebugInfo() catch |err| { - stderr.print("Unable to dump stack trace: Unable to open debug info: {s}\n", .{@errorName(err)}) catch return; - return; - }; - const tty_config = tty.detectConfig(.stderr()); - if (native_os == .windows) { - // On x86_64 and aarch64, the stack will be unwound using RtlVirtualUnwind using the context - // provided by the exception handler. On x86, RtlVirtualUnwind doesn't exist. Instead, a new backtrace - // will be captured and frames prior to the exception will be filtered. - // The caveat is that RtlCaptureStackBackTrace does not include the KiUserExceptionDispatcher frame, - // which is where the IP in `context` points to, so it can't be used as start_addr. - // Instead, start_addr is recovered from the stack. - const start_addr = if (builtin.cpu.arch == .x86) @as(*const usize, @ptrFromInt(context.getRegs().bp + 4)).* else null; - writeStackTraceWindows(stderr, debug_info, tty_config, context, start_addr) catch return; - return; - } - - var it = StackIterator.initWithContext(null, debug_info, context, @frameAddress()) catch return; - defer it.deinit(); - - // DWARF unwinding on aarch64-macos is not complete so we need to get pc address from mcontext - const pc_addr = it.unwind_state.?.dwarf_context.pc; - printSourceAtAddress(debug_info, stderr, pc_addr, tty_config) catch return; - - while (it.next()) |return_address| { - printLastUnwindError(&it, debug_info, stderr, tty_config); - - // On arm64 macOS, the address of the last frame is 0x0 rather than 0x1 as on x86_64 macOS, - // therefore, we do a check for `return_address == 0` before subtracting 1 from it to avoid - // an overflow. We do not need to signal `StackIterator` as it will correctly detect this - // condition on the subsequent iteration and return `null` thus terminating the loop. - // same behaviour for x86-windows-msvc - const address = return_address -| 1; - printSourceAtAddress(debug_info, stderr, address, tty_config) catch return; - } else printLastUnwindError(&it, debug_info, stderr, tty_config); - } -} - -/// Returns a slice with the same pointer as addresses, with a potentially smaller len. -/// On Windows, when first_address is not null, we ask for at least 32 stack frames, -/// and then try to find the first address. If addresses.len is more than 32, we -/// capture that many stack frames exactly, and then look for the first address, -/// chopping off the irrelevant frames and shifting so that the returned addresses pointer -/// equals the passed in addresses pointer. -pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackTrace) void { - if (native_os == .windows) { - const addrs = stack_trace.instruction_addresses; - const first_addr = first_address orelse { - stack_trace.index = walkStackWindows(addrs[0..], null); - return; - }; - var addr_buf_stack: [32]usize = undefined; - const addr_buf = if (addr_buf_stack.len > addrs.len) addr_buf_stack[0..] else addrs; - const n = walkStackWindows(addr_buf[0..], null); - const first_index = for (addr_buf[0..n], 0..) |addr, i| { - if (addr == first_addr) { - break i; - } - } else { - stack_trace.index = 0; - return; - }; - const end_index = @min(first_index + addrs.len, n); - const slice = addr_buf[first_index..end_index]; - // We use a for loop here because slice and addrs may alias. - for (slice, 0..) |addr, i| { - addrs[i] = addr; - } - stack_trace.index = slice.len; - } else { - if (builtin.cpu.arch == .powerpc64) { - // https://github.com/ziglang/zig/issues/24970 - stack_trace.index = 0; - return; - } - var context: ThreadContext = undefined; - const has_context = getContext(&context); - - var it = (if (has_context) blk: { - break :blk StackIterator.initWithContext(first_address, getSelfDebugInfo() catch break :blk null, &context) catch null; - } else null) orelse StackIterator.init(first_address, null); - defer it.deinit(); - for (stack_trace.instruction_addresses, 0..) |*addr, i| { - addr.* = it.next() orelse { - stack_trace.index = i; - return; - }; - } - stack_trace.index = stack_trace.instruction_addresses.len; - } -} - -/// Tries to print a stack trace to stderr, unbuffered, and ignores any error returned. -/// TODO multithreaded awareness -pub fn dumpStackTrace(stack_trace: std.builtin.StackTrace) void { - nosuspend { - if (builtin.target.cpu.arch.isWasm()) { - if (native_os == .wasi) { - const stderr = lockStderrWriter(&.{}); - defer unlockStderrWriter(); - stderr.writeAll("Unable to dump stack trace: not implemented for Wasm\n") catch return; - } - return; - } - const stderr = lockStderrWriter(&.{}); - defer unlockStderrWriter(); - if (builtin.strip_debug_info) { - stderr.writeAll("Unable to dump stack trace: debug info stripped\n") catch return; - return; - } - const debug_info = getSelfDebugInfo() catch |err| { - stderr.print("Unable to dump stack trace: Unable to open debug info: {s}\n", .{@errorName(err)}) catch return; - return; - }; - writeStackTrace(stack_trace, stderr, debug_info, tty.detectConfig(.stderr())) catch |err| { - stderr.print("Unable to dump stack trace: {s}\n", .{@errorName(err)}) catch return; - return; - }; - } -} - /// Invokes detectable illegal behavior when `ok` is `false`. /// /// In Debug and ReleaseSafe modes, calls to this function are always @@ -613,6 +442,24 @@ var panicking = std.atomic.Value(u8).init(0); /// This is used to catch and handle panics triggered by the panic handler. threadlocal var panic_stage: usize = 0; +/// For backends that cannot handle the language features depended on by the +/// default panic handler, we will use a simpler implementation. +const use_trap_panic = switch (builtin.zig_backend) { + .stage2_aarch64, + .stage2_arm, + .stage2_powerpc, + .stage2_riscv64, + .stage2_spirv, + .stage2_wasm, + .stage2_x86, + => true, + .stage2_x86_64 => switch (builtin.target.ofmt) { + .elf, .macho => false, + else => true, + }, + else => false, +}; + /// Dumps a stack trace to standard error, then aborts. pub fn defaultPanic( msg: []const u8, @@ -620,8 +467,8 @@ pub fn defaultPanic( ) noreturn { @branchHint(.cold); - // For backends that cannot handle the language features depended on by the - // default panic handler, we have a simpler panic handler: + if (use_trap_panic) @trap(); + switch (builtin.zig_backend) { .stage2_aarch64, .stage2_arm, @@ -686,41 +533,48 @@ pub fn defaultPanic( resetSegfaultHandler(); } - // Note there is similar logic in handleSegfaultPosix and handleSegfaultWindowsExtra. - nosuspend switch (panic_stage) { + // There is very similar logic to the following in `handleSegfault`. + switch (panic_stage) { 0 => { panic_stage = 1; - _ = panicking.fetchAdd(1, .seq_cst); - { + trace: { + const tty_config = tty.detectConfig(.stderr()); + const stderr = lockStderrWriter(&.{}); defer unlockStderrWriter(); if (builtin.single_threaded) { - stderr.print("panic: ", .{}) catch posix.abort(); + stderr.print("panic: ", .{}) catch break :trace; } else { const current_thread_id = std.Thread.getCurrentId(); - stderr.print("thread {} panic: ", .{current_thread_id}) catch posix.abort(); + stderr.print("thread {} panic: ", .{current_thread_id}) catch break :trace; } - stderr.print("{s}\n", .{msg}) catch posix.abort(); + stderr.print("{s}\n", .{msg}) catch break :trace; - if (@errorReturnTrace()) |t| dumpStackTrace(t.*); - dumpCurrentStackTraceToWriter(first_trace_addr orelse @returnAddress(), stderr) catch {}; + if (@errorReturnTrace()) |t| if (t.index > 0) { + stderr.writeAll("error return context:\n") catch break :trace; + writeStackTrace(t, stderr, tty_config) catch break :trace; + stderr.writeAll("\nstack trace:\n") catch break :trace; + }; + writeCurrentStackTrace(.{ + .first_address = first_trace_addr orelse @returnAddress(), + .allow_unsafe_unwind = true, // we're crashing anyway, give it our all! + }, stderr, tty_config) catch break :trace; } waitForOtherThreadToFinishPanicking(); }, 1 => { panic_stage = 2; - // A panic happened while trying to print a previous panic message. // We're still holding the mutex but that's fine as we're going to // call abort(). fs.File.stderr().writeAll("aborting due to recursive panic\n") catch {}; }, else => {}, // Panicked while printing the recursive panic message. - }; + } posix.abort(); } @@ -739,340 +593,307 @@ fn waitForOtherThreadToFinishPanicking() void { } } -pub fn writeStackTrace( - stack_trace: std.builtin.StackTrace, - writer: *Writer, - debug_info: *SelfInfo, - tty_config: tty.Config, -) !void { - if (builtin.strip_debug_info) return error.MissingDebugInfo; - var frame_index: usize = 0; - var frames_left: usize = @min(stack_trace.index, stack_trace.instruction_addresses.len); - - while (frames_left != 0) : ({ - frames_left -= 1; - frame_index = (frame_index + 1) % stack_trace.instruction_addresses.len; - }) { - const return_address = stack_trace.instruction_addresses[frame_index]; - try printSourceAtAddress(debug_info, writer, return_address -| 1, tty_config); - } +pub const StackUnwindOptions = struct { + /// If not `null`, we will ignore all frames up until this return address. This is typically + /// used to omit intermediate handling code (for instance, a panic handler and its machinery) + /// from stack traces. + first_address: ?usize = null, + /// If not `null`, we will unwind from this `ThreadContext` instead of the current top of the + /// stack. The main use case here is printing stack traces from signal handlers, where the + /// kernel provides a `*ThreadContext` of the state before the signal. + context: ?*const ThreadContext = null, + /// If `true`, stack unwinding strategies which may cause crashes are used as a last resort. + /// If `false`, only known-safe mechanisms will be attempted. + allow_unsafe_unwind: bool = false, +}; - if (stack_trace.index > stack_trace.instruction_addresses.len) { - const dropped_frames = stack_trace.index - stack_trace.instruction_addresses.len; +/// Capture and return the current stack trace. The returned `StackTrace` stores its addresses in +/// the given buffer, so `addr_buf` must have a lifetime at least equal to the `StackTrace`. +/// +/// See `writeCurrentStackTrace` to immediately print the trace instead of capturing it. +pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) std.builtin.StackTrace { + var context_buf: ThreadContext = undefined; + var it: StackIterator = .init(options.context, &context_buf); + defer it.deinit(); + if (!it.stratOk(options.allow_unsafe_unwind)) { + return .{ .index = 0, .instruction_addresses = &.{} }; + } + var frame_idx: usize = 0; + var wait_for = options.first_address; + while (true) switch (it.next()) { + .switch_to_fp => if (!it.stratOk(options.allow_unsafe_unwind)) break, + .end => break, + .frame => |return_address| { + if (wait_for) |target| { + if (return_address != target) continue; + wait_for = null; + } + if (frame_idx < addr_buf.len) addr_buf[frame_idx] = return_address; + frame_idx += 1; + }, + }; + return .{ + .index = frame_idx, + .instruction_addresses = addr_buf[0..@min(frame_idx, addr_buf.len)], + }; +} +/// Write the current stack trace to `writer`, annotated with source locations. +/// +/// See `captureCurrentStackTrace` to capture the trace addresses into a buffer instead of printing. +pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_config: tty.Config) Writer.Error!void { + const di_gpa = getDebugInfoAllocator(); + const di = getSelfDebugInfo() catch |err| switch (err) { + error.UnsupportedTarget => { + tty_config.setColor(writer, .dim) catch {}; + try writer.print("Cannot print stack trace: debug info unavailable for target\n", .{}); + tty_config.setColor(writer, .reset) catch {}; + return; + }, + }; + var context_buf: ThreadContext = undefined; + var it: StackIterator = .init(options.context, &context_buf); + defer it.deinit(); + if (!it.stratOk(options.allow_unsafe_unwind)) { + tty_config.setColor(writer, .dim) catch {}; + try writer.print("Cannot print stack trace: safe unwind unavailable for target\n", .{}); + tty_config.setColor(writer, .reset) catch {}; + return; + } + var wait_for = options.first_address; + var printed_any_frame = false; + while (true) switch (it.next()) { + .switch_to_fp => |unwind_error| { + const module_name = di.getModuleNameForAddress(di_gpa, unwind_error.address) catch "???"; + const caption: []const u8 = switch (unwind_error.err) { + error.MissingDebugInfo => "unwind info unavailable", + error.InvalidDebugInfo => "unwind info invalid", + error.UnsupportedDebugInfo => "unwind info unsupported", + error.ReadFailed => "filesystem error", + error.OutOfMemory => "out of memory", + error.Unexpected => "unexpected error", + }; + if (it.stratOk(options.allow_unsafe_unwind)) { + tty_config.setColor(writer, .dim) catch {}; + try writer.print( + "Unwind error at address `{s}:0x{x}` ({s}), remaining frames may be incorrect\n", + .{ module_name, unwind_error.address, caption }, + ); + tty_config.setColor(writer, .reset) catch {}; + } else { + tty_config.setColor(writer, .dim) catch {}; + try writer.print( + "Unwind error at address `{s}:0x{x}` ({s}), stopping trace early\n", + .{ module_name, unwind_error.address, caption }, + ); + tty_config.setColor(writer, .reset) catch {}; + return; + } + }, + .end => break, + .frame => |return_address| { + if (wait_for) |target| { + if (return_address != target) continue; + wait_for = null; + } + try printSourceAtAddress(di_gpa, di, writer, return_address -| 1, tty_config); + printed_any_frame = true; + }, + }; + if (!printed_any_frame) return writer.writeAll("(empty stack trace)\n"); +} +/// A thin wrapper around `writeCurrentStackTrace` which writes to stderr and ignores write errors. +pub fn dumpCurrentStackTrace(options: StackUnwindOptions) void { + const tty_config = tty.detectConfig(.stderr()); + const stderr = lockStderrWriter(&.{}); + defer unlockStderrWriter(); + writeCurrentStackTrace(options, stderr, tty_config) catch |err| switch (err) { + error.WriteFailed => {}, + }; +} +/// Write a previously captured stack trace to `writer`, annotated with source locations. +pub fn writeStackTrace(st: *const std.builtin.StackTrace, writer: *Writer, tty_config: tty.Config) Writer.Error!void { + const di_gpa = getDebugInfoAllocator(); + const di = getSelfDebugInfo() catch |err| switch (err) { + error.UnsupportedTarget => { + tty_config.setColor(writer, .dim) catch {}; + try writer.print("Cannot print stack trace: debug info unavailable for target\n\n", .{}); + tty_config.setColor(writer, .reset) catch {}; + return; + }, + }; + if (st.index == 0) return writer.writeAll("(empty stack trace)\n"); + const captured_frames = @min(st.index, st.instruction_addresses.len); + for (st.instruction_addresses[0..captured_frames]) |return_address| { + try printSourceAtAddress(di_gpa, di, writer, return_address -| 1, tty_config); + } + if (st.index > captured_frames) { tty_config.setColor(writer, .bold) catch {}; - try writer.print("({d} additional stack frames skipped...)\n", .{dropped_frames}); + try writer.print("({d} additional stack frames skipped...)\n", .{st.index - captured_frames}); tty_config.setColor(writer, .reset) catch {}; } } +/// A thin wrapper around `writeStackTrace` which writes to stderr and ignores write errors. +pub fn dumpStackTrace(st: *const std.builtin.StackTrace) void { + const tty_config = tty.detectConfig(.stderr()); + const stderr = lockStderrWriter(&.{}); + defer unlockStderrWriter(); + writeStackTrace(st, stderr, tty_config) catch |err| switch (err) { + error.WriteFailed => {}, + }; +} -pub const StackIterator = struct { - // Skip every frame before this address is found. - first_address: ?usize, - // Last known value of the frame pointer register. +const StackIterator = union(enum) { + /// Unwinding using debug info (e.g. DWARF CFI). + di: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn, + /// Naive frame-pointer-based unwinding. Very simple, but typically unreliable. fp: usize, - // When SelfInfo and a register context is available, this iterator can unwind - // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer), - // using DWARF and MachO unwind info. - unwind_state: if (have_ucontext) ?struct { - debug_info: *SelfInfo, - dwarf_context: SelfInfo.UnwindContext, - last_error: ?SelfInfo.Error = null, - failed: bool = false, - } else void = if (have_ucontext) null else {}, - - pub fn init(first_address: ?usize, fp: usize) StackIterator { - if (native_arch.isSPARC()) { + /// It is important that this function is marked `inline` so that it can safely use + /// `@frameAddress` and `getContext` as the caller's stack frame and our own are one + /// and the same. + inline fn init(context_opt: ?*const ThreadContext, context_buf: *ThreadContext) StackIterator { + if (builtin.cpu.arch.isSPARC()) { // Flush all the register windows on stack. - asm volatile (if (builtin.cpu.has(.sparc, .v9)) - "flushw" - else - "ta 3" // ST_FLUSH_WINDOWS - ::: .{ .memory = true }); - } - - return .{ - .first_address = first_address, - .fp = fp, - }; - } - - pub fn initWithContext(first_address: ?usize, debug_info: *SelfInfo, context: *posix.ucontext_t, fp: usize) !StackIterator { - if (SelfInfo.supports_unwinding) { - var iterator = init(first_address, fp); - iterator.unwind_state = .{ - .debug_info = debug_info, - .dwarf_context = try SelfInfo.UnwindContext.init(getDebugInfoAllocator(), context), - }; - return iterator; - } - - return init(first_address, fp); - } - - pub fn deinit(it: *StackIterator) void { - if (have_ucontext and it.unwind_state != null) it.unwind_state.?.dwarf_context.deinit(); - } - - pub fn getLastError(it: *StackIterator) ?struct { - err: SelfInfo.Error, - address: usize, - } { - if (!have_ucontext) return null; - if (it.unwind_state) |*unwind_state| { - if (unwind_state.last_error) |err| { - unwind_state.last_error = null; - return .{ - .err = err, - .address = unwind_state.dwarf_context.pc, - }; + if (builtin.cpu.has(.sparc, .v9)) { + asm volatile ("flushw" ::: .{ .memory = true }); + } else { + asm volatile ("ta 3" ::: .{ .memory = true }); // ST_FLUSH_WINDOWS } } - - return null; - } - - // Offset of the saved BP wrt the frame pointer. - const fp_offset = if (native_arch.isRISCV()) - // On RISC-V the frame pointer points to the top of the saved register - // area, on pretty much every other architecture it points to the stack - // slot where the previous frame pointer is saved. - 2 * @sizeOf(usize) - else if (native_arch.isSPARC()) - // On SPARC the previous frame pointer is stored at 14 slots past %fp+BIAS. - 14 * @sizeOf(usize) - else - 0; - - const fp_bias = if (native_arch.isSPARC()) - // On SPARC frame pointers are biased by a constant. - 2047 - else - 0; - - // Positive offset of the saved PC wrt the frame pointer. - const pc_offset = if (native_arch == .powerpc64le) - 2 * @sizeOf(usize) - else - @sizeOf(usize); - - pub fn next(it: *StackIterator) ?usize { - var address = it.nextInternal() orelse return null; - - if (it.first_address) |first_address| { - while (address != first_address) { - address = it.nextInternal() orelse return null; - } - it.first_address = null; + if (context_opt) |context| { + context_buf.* = context.*; + relocateContext(context_buf); + return .{ .di = .init(getDebugInfoAllocator(), context_buf) }; } - - return address; - } - - fn nextInternal(it: *StackIterator) ?usize { - if (have_ucontext) { - if (it.unwind_state) |*unwind_state| { - if (!unwind_state.failed) { - if (unwind_state.dwarf_context.pc == 0) return null; - defer it.fp = unwind_state.dwarf_context.getFp() catch 0; - if (unwind_state.debug_info.unwindFrame(getDebugInfoAllocator(), &unwind_state.dwarf_context)) |return_address| { - return return_address; - } else |err| { - unwind_state.last_error = err; - unwind_state.failed = true; - - // Fall back to fp-based unwinding on the first failure. - // We can't attempt it again for other modules higher in the - // stack because the full register state won't have been unwound. - } - } - } + if (getContext(context_buf)) { + return .{ .di = .init(getDebugInfoAllocator(), context_buf) }; } - - if (builtin.omit_frame_pointer) return null; - - const fp = if (comptime native_arch.isSPARC()) - // On SPARC the offset is positive. (!) - math.add(usize, it.fp, fp_offset) catch return null - else - math.sub(usize, it.fp, fp_offset) catch return null; - - // Sanity check. - if (fp == 0 or !mem.isAligned(fp, @alignOf(usize))) return null; - const new_fp = math.add(usize, @as(*usize, @ptrFromInt(fp)).*, fp_bias) catch - return null; - - // Sanity check: the stack grows down thus all the parent frames must be - // be at addresses that are greater (or equal) than the previous one. - // A zero frame pointer often signals this is the last frame, that case - // is gracefully handled by the next call to nextInternal. - if (new_fp != 0 and new_fp < it.fp) return null; - const new_pc = @as(*usize, @ptrFromInt(math.add(usize, fp, pc_offset) catch return null)).*; - - it.fp = new_fp; - - return new_pc; + return .{ .fp = @frameAddress() }; } -}; - -pub fn writeCurrentStackTrace( - writer: *Writer, - debug_info: *SelfInfo, - tty_config: tty.Config, - start_addr: ?usize, -) !void { - if (native_os == .windows) { - var context: ThreadContext = undefined; - assert(getContext(&context)); - return writeStackTraceWindows(writer, debug_info, tty_config, &context, start_addr); + fn deinit(si: *StackIterator) void { + switch (si.*) { + .fp => {}, + .di => |*unwind_context| unwind_context.deinit(), + } } - var context: ThreadContext = undefined; - const has_context = getContext(&context); - - var it = (if (has_context) blk: { - break :blk StackIterator.initWithContext(start_addr, debug_info, &context, @frameAddress()) catch null; - } else null) orelse StackIterator.init(start_addr, @frameAddress()); - defer it.deinit(); - while (it.next()) |return_address| { - printLastUnwindError(&it, debug_info, writer, tty_config); - - // On arm64 macOS, the address of the last frame is 0x0 rather than 0x1 as on x86_64 macOS, - // therefore, we do a check for `return_address == 0` before subtracting 1 from it to avoid - // an overflow. We do not need to signal `StackIterator` as it will correctly detect this - // condition on the subsequent iteration and return `null` thus terminating the loop. - // same behaviour for x86-windows-msvc - const address = return_address -| 1; - try printSourceAtAddress(debug_info, writer, address, tty_config); - } else printLastUnwindError(&it, debug_info, writer, tty_config); -} + /// On aarch64-macos, Apple mandate that the frame pointer is always used. + /// TODO: are there any other architectures with guarantees like this? + const fp_unwind_is_safe = !builtin.omit_frame_pointer and builtin.cpu.arch == .aarch64 and builtin.os.tag.isDarwin(); -pub noinline fn walkStackWindows(addresses: []usize, existing_context: ?*const windows.CONTEXT) usize { - if (builtin.cpu.arch == .x86) { - // RtlVirtualUnwind doesn't exist on x86 - return windows.ntdll.RtlCaptureStackBackTrace(0, addresses.len, @as(**anyopaque, @ptrCast(addresses.ptr)), null); + /// Whether the current unwind strategy is allowed given `allow_unsafe`. + fn stratOk(it: *const StackIterator, allow_unsafe: bool) bool { + return switch (it.*) { + .di => true, + .fp => allow_unsafe or fp_unwind_is_safe, + }; } - const tib = &windows.teb().NtTib; + const Result = union(enum) { + /// A stack frame has been found; this is the corresponding return address. + frame: usize, + /// The end of the stack has been reached. + end, + /// We were using the `.di` strategy, but are now switching to `.fp` due to this error. + switch_to_fp: struct { + address: usize, + err: SelfInfo.Error, + }, + }; + fn next(it: *StackIterator) Result { + switch (it.*) { + .di => |*unwind_context| { + const di = getSelfDebugInfo() catch unreachable; + const di_gpa = getDebugInfoAllocator(); + if (di.unwindFrame(di_gpa, unwind_context)) |ra| { + if (ra == 0) return .end; + return .{ .frame = ra }; + } else |err| { + const bad_pc = unwind_context.pc; + it.* = .{ .fp = unwind_context.getFp() catch 0 }; + return .{ .switch_to_fp = .{ + .address = bad_pc, + .err = err, + } }; + } + }, + .fp => |fp| { + if (fp == 0) return .end; // we reached the "sentinel" base pointer + + const bp_addr = applyOffset(fp, bp_offset) orelse return .end; + const ra_addr = applyOffset(fp, ra_offset) orelse return .end; + + if (bp_addr == 0 or !mem.isAligned(bp_addr, @alignOf(usize)) or + ra_addr == 0 or !mem.isAligned(ra_addr, @alignOf(usize))) + { + // This isn't valid, but it most likely indicates end of stack. + return .end; + } - var context: windows.CONTEXT = undefined; - if (existing_context) |context_ptr| { - context = context_ptr.*; - } else { - context = std.mem.zeroes(windows.CONTEXT); - windows.ntdll.RtlCaptureContext(&context); - } + const bp_ptr: *const usize = @ptrFromInt(bp_addr); + const ra_ptr: *const usize = @ptrFromInt(ra_addr); + const bp = applyOffset(bp_ptr.*, bp_bias) orelse return .end; - var i: usize = 0; - var image_base: windows.DWORD64 = undefined; - var history_table: windows.UNWIND_HISTORY_TABLE = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE); - - while (i < addresses.len) : (i += 1) { - const current_regs = context.getRegs(); - if (windows.ntdll.RtlLookupFunctionEntry(current_regs.ip, &image_base, &history_table)) |runtime_function| { - var handler_data: ?*anyopaque = null; - var establisher_frame: u64 = undefined; - _ = windows.ntdll.RtlVirtualUnwind( - windows.UNW_FLAG_NHANDLER, - image_base, - current_regs.ip, - runtime_function, - &context, - &handler_data, - &establisher_frame, - null, - ); - } else { - // leaf function - context.setIp(@as(*usize, @ptrFromInt(current_regs.sp)).*); - context.setSp(current_regs.sp + @sizeOf(usize)); - } + // The stack grows downards, so `bp > fp` should always hold. If it doesn't, this + // frame is invalid, so we'll treat it as though it we reached end of stack. The + // exception is address 0, which is a graceful end-of-stack signal, in which case + // *this* return address is valid and the *next* iteration will be the last. + if (bp != 0 and bp <= fp) return .end; - const next_regs = context.getRegs(); - if (next_regs.sp < @intFromPtr(tib.StackLimit) or next_regs.sp > @intFromPtr(tib.StackBase)) { - break; + it.fp = bp; + return .{ .frame = ra_ptr.* }; + }, } - - if (next_regs.ip == 0) { - break; - } - - addresses[i] = next_regs.ip; } - return i; -} - -pub fn writeStackTraceWindows( - writer: *Writer, - debug_info: *SelfInfo, - tty_config: tty.Config, - context: *const windows.CONTEXT, - start_addr: ?usize, -) !void { - var addr_buf: [1024]usize = undefined; - const n = walkStackWindows(addr_buf[0..], context); - const addrs = addr_buf[0..n]; - const start_i: usize = if (start_addr) |saddr| blk: { - for (addrs, 0..) |addr, i| { - if (addr == saddr) break :blk i; - } - return; - } else 0; - for (addrs[start_i..]) |addr| { - try printSourceAtAddress(debug_info, writer, addr - 1, tty_config); - } -} + /// Offset of the saved base pointer (previous frame pointer) wrt the frame pointer. + const bp_offset = off: { + // On RISC-V the frame pointer points to the top of the saved register + // area, on pretty much every other architecture it points to the stack + // slot where the previous frame pointer is saved. + if (native_arch.isRISCV()) break :off -2 * @sizeOf(usize); + // On SPARC the previous frame pointer is stored at 14 slots past %fp+BIAS. + if (native_arch.isSPARC()) break :off 14 * @sizeOf(usize); + break :off 0; + }; -fn printLastUnwindError(it: *StackIterator, debug_info: *SelfInfo, writer: *Writer, tty_config: tty.Config) void { - if (!have_ucontext) return; - if (it.getLastError()) |unwind_error| { - printUnwindError(debug_info, writer, unwind_error.address, unwind_error.err, tty_config) catch {}; - } -} + /// Offset of the saved return address wrt the frame pointer. + const ra_offset = off: { + if (native_arch == .powerpc64le) break :off 2 * @sizeOf(usize); + break :off @sizeOf(usize); + }; -fn printUnwindError(debug_info: *SelfInfo, writer: *Writer, address: usize, unwind_err: SelfInfo.Error, tty_config: tty.Config) !void { - const module_name = debug_info.getModuleNameForAddress(getDebugInfoAllocator(), address) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.UnsupportedDebugInfo, error.ReadFailed => "???", - error.Unexpected, error.OutOfMemory => |e| return e, + /// Value to add to a base pointer after loading it from the stack. Yes, SPARC really does this. + const bp_bias = bias: { + if (native_arch.isSPARC()) break :bias 2047; + break :bias 0; }; - try tty_config.setColor(writer, .dim); - switch (unwind_err) { - error.Unexpected, error.OutOfMemory => |e| return e, - error.MissingDebugInfo => { - try writer.print("Unwind information for `{s}:0x{x}` was not available, trace may be incomplete\n\n", .{ module_name, address }); - }, - error.InvalidDebugInfo, - error.UnsupportedDebugInfo, - error.ReadFailed, - => { - const caption: []const u8 = switch (unwind_err) { - error.InvalidDebugInfo => "invalid unwind info", - error.UnsupportedDebugInfo => "unsupported unwind info", - error.ReadFailed => "filesystem error", - else => unreachable, - }; - try writer.print("Unwind error at address `{s}:0x{x}` ({s}), trace may be incomplete\n\n", .{ module_name, address, caption }); - }, + + fn applyOffset(addr: usize, comptime off: comptime_int) ?usize { + if (off >= 0) return math.add(usize, addr, off) catch return null; + return math.sub(usize, addr, -off) catch return null; } - try tty_config.setColor(writer, .reset); -} +}; -pub fn printSourceAtAddress(debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) !void { - const gpa = getDebugInfoAllocator(); +fn printSourceAtAddress(gpa: Allocator, debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) Writer.Error!void { const symbol: Symbol = debug_info.getSymbolAtAddress(gpa, address) catch |err| switch (err) { error.MissingDebugInfo, error.UnsupportedDebugInfo, error.InvalidDebugInfo, - => .{ .name = null, .compile_unit_name = null, .source_location = null }, - error.ReadFailed => s: { - try tty_config.setColor(writer, .dim); + => .unknown, + error.ReadFailed, error.Unexpected => s: { + tty_config.setColor(writer, .dim) catch {}; try writer.print("Failed to read debug info from filesystem, trace may be incomplete\n\n", .{}); - try tty_config.setColor(writer, .reset); - break :s .{ .name = null, .compile_unit_name = null, .source_location = null }; + tty_config.setColor(writer, .reset) catch {}; + break :s .unknown; + }, + error.OutOfMemory => s: { + tty_config.setColor(writer, .dim) catch {}; + try writer.print("Ran out of memory loading debug info, trace may be incomplete\n\n", .{}); + tty_config.setColor(writer, .reset) catch {}; + break :s .unknown; }, - error.OutOfMemory, error.Unexpected => |e| return e, }; defer if (symbol.source_location) |sl| gpa.free(sl.file_name); return printLineInfo( @@ -1080,10 +901,7 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, writer: *Writer, address: usi symbol.source_location, address, symbol.name orelse "???", - symbol.compile_unit_name orelse debug_info.getModuleNameForAddress(gpa, address) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.UnsupportedDebugInfo, error.ReadFailed => "???", - error.Unexpected, error.OutOfMemory => |e| return e, - }, + symbol.compile_unit_name orelse debug_info.getModuleNameForAddress(gpa, address) catch "???", tty_config, ); } @@ -1094,9 +912,9 @@ fn printLineInfo( symbol_name: []const u8, compile_unit_name: []const u8, tty_config: tty.Config, -) !void { +) Writer.Error!void { nosuspend { - try tty_config.setColor(writer, .bold); + tty_config.setColor(writer, .bold) catch {}; if (source_location) |*sl| { try writer.print("{s}:{d}:{d}", .{ sl.file_name, sl.line, sl.column }); @@ -1104,11 +922,11 @@ fn printLineInfo( try writer.writeAll("???:?:?"); } - try tty_config.setColor(writer, .reset); + tty_config.setColor(writer, .reset) catch {}; try writer.writeAll(": "); - try tty_config.setColor(writer, .dim); + tty_config.setColor(writer, .dim) catch {}; try writer.print("0x{x} in {s} ({s})", .{ address, symbol_name, compile_unit_name }); - try tty_config.setColor(writer, .reset); + tty_config.setColor(writer, .reset) catch {}; try writer.writeAll("\n"); // Show the matching source code line if possible @@ -1119,21 +937,23 @@ fn printLineInfo( const space_needed = @as(usize, @intCast(sl.column - 1)); try writer.splatByteAll(' ', space_needed); - try tty_config.setColor(writer, .green); + tty_config.setColor(writer, .green) catch {}; try writer.writeAll("^"); - try tty_config.setColor(writer, .reset); + tty_config.setColor(writer, .reset) catch {}; } try writer.writeAll("\n"); } else |err| switch (err) { - error.EndOfFile, error.FileNotFound => {}, - error.BadPathName => {}, - error.AccessDenied => {}, - else => return err, + error.WriteFailed => |e| return e, + else => { + // Ignore everything else. Seeing some lines in the trace without the associated + // source line printed is a far better user experience than interleaving the + // trace with a load of filesystem error crap. The user can always just open the + // source file themselves to see the line. + }, } } } } - fn printLineFromFile(writer: *Writer, source_location: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. @@ -1392,52 +1212,9 @@ fn resetSegfaultHandler() void { } fn handleSegfaultPosix(sig: i32, info: *const posix.siginfo_t, ctx_ptr: ?*anyopaque) callconv(.c) noreturn { - // Reset to the default handler so that if a segfault happens in this handler it will crash - // the process. Also when this handler returns, the original instruction will be repeated - // and the resulting segfault will crash the process rather than continually dump stack traces. - resetSegfaultHandler(); - - const addr = switch (native_os) { - .linux => @intFromPtr(info.fields.sigfault.addr), - .freebsd, .macos => @intFromPtr(info.addr), - .netbsd => @intFromPtr(info.info.reason.fault.addr), - .openbsd => @intFromPtr(info.data.fault.addr), - .solaris, .illumos => @intFromPtr(info.reason.fault.addr), - else => unreachable, - }; - - const code = if (native_os == .netbsd) info.info.code else info.code; - nosuspend switch (panic_stage) { - 0 => { - panic_stage = 1; - _ = panicking.fetchAdd(1, .seq_cst); - - { - lockStdErr(); - defer unlockStdErr(); - - dumpSegfaultInfoPosix(sig, code, addr, ctx_ptr); - } - - waitForOtherThreadToFinishPanicking(); - }, - else => { - // panic mutex already locked - dumpSegfaultInfoPosix(sig, code, addr, ctx_ptr); - }, - }; - - // We cannot allow the signal handler to return because when it runs the original instruction - // again, the memory may be mapped and undefined behavior would occur rather than repeating - // the segfault. So we simply abort here. - posix.abort(); -} - -fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*anyopaque) void { - const stderr = lockStderrWriter(&.{}); - defer unlockStderrWriter(); - _ = switch (sig) { - posix.SIG.SEGV => if (native_arch == .x86_64 and native_os == .linux and code == 128) // SI_KERNEL + if (use_trap_panic) @trap(); + const addr: ?usize, const name: []const u8 = info: { + if (native_os == .linux and native_arch == .x86_64) { // x86_64 doesn't have a full 64-bit virtual address space. // Addresses outside of that address space are non-canonical // and the CPU won't provide the faulting address to us. @@ -1445,16 +1222,31 @@ fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*anyopaque) // but can also happen when no addressable memory is involved; // for example when reading/writing model-specific registers // by executing `rdmsr` or `wrmsr` in user-space (unprivileged mode). - stderr.writeAll("General protection exception (no address available)\n") - else - stderr.print("Segmentation fault at address 0x{x}\n", .{addr}), - posix.SIG.ILL => stderr.print("Illegal instruction at address 0x{x}\n", .{addr}), - posix.SIG.BUS => stderr.print("Bus error at address 0x{x}\n", .{addr}), - posix.SIG.FPE => stderr.print("Arithmetic exception at address 0x{x}\n", .{addr}), - else => unreachable, - } catch posix.abort(); - - switch (native_arch) { + const SI_KERNEL = 0x80; + if (sig == posix.SIG.SEGV and info.code == SI_KERNEL) { + break :info .{ null, "General protection exception" }; + } + } + const addr: usize = switch (native_os) { + .linux => @intFromPtr(info.fields.sigfault.addr), + .freebsd, .macos => @intFromPtr(info.addr), + .netbsd => @intFromPtr(info.info.reason.fault.addr), + .openbsd => @intFromPtr(info.data.fault.addr), + .solaris, .illumos => @intFromPtr(info.reason.fault.addr), + else => comptime unreachable, + }; + const name = switch (sig) { + posix.SIG.SEGV => "Segmentation fault", + posix.SIG.ILL => "Illegal instruction", + posix.SIG.BUS => "Bus error", + posix.SIG.FPE => "Arithmetic exception", + else => unreachable, + }; + break :info .{ addr, name }; + }; + + // MLUGG TODO: this doesn't make any sense at all? + const use_context = switch (native_arch) { .x86, .x86_64, .arm, @@ -1463,82 +1255,90 @@ fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*anyopaque) .thumbeb, .aarch64, .aarch64_be, - => { - // Some kernels don't align `ctx_ptr` properly. Handle this defensively. - const ctx: *align(1) posix.ucontext_t = @ptrCast(ctx_ptr); - var new_ctx: posix.ucontext_t = ctx.*; - if (builtin.os.tag.isDarwin() and builtin.cpu.arch == .aarch64) { - // The kernel incorrectly writes the contents of `__mcontext_data` right after `mcontext`, - // rather than after the 8 bytes of padding that are supposed to sit between the two. Copy the - // contents to the right place so that the `mcontext` pointer will be correct after the - // `relocateContext` call below. - new_ctx.__mcontext_data = @as(*align(1) extern struct { - onstack: c_int, - sigmask: std.c.sigset_t, - stack: std.c.stack_t, - link: ?*std.c.ucontext_t, - mcsize: u64, - mcontext: *std.c.mcontext_t, - __mcontext_data: std.c.mcontext_t align(@sizeOf(usize)), // Disable padding after `mcontext`. - }, @ptrCast(ctx)).__mcontext_data; - } - relocateContext(&new_ctx); - dumpStackTraceFromBase(&new_ctx, stderr); - }, - else => {}, + => true, + else => false, + }; + if (!have_ucontext or !use_context) return handleSegfault(addr, name, null); + + // Some kernels don't align `ctx_ptr` properly, so we'll copy it into a local buffer. + var copied_ctx: ThreadContext = undefined; + const orig_ctx: *align(1) posix.ucontext_t = @ptrCast(ctx_ptr); + copied_ctx = orig_ctx.*; + if (builtin.os.tag.isDarwin() and builtin.cpu.arch == .aarch64) { + // The kernel incorrectly writes the contents of `__mcontext_data` right after `mcontext`, + // rather than after the 8 bytes of padding that are supposed to sit between the two. Copy the + // contents to the right place so that the `mcontext` pointer will be correct after the + // `relocateContext` call below. + const WrittenContext = extern struct { + onstack: c_int, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + link: ?*std.c.ucontext_t, + mcsize: u64, + mcontext: *std.c.mcontext_t, + __mcontext_data: std.c.mcontext_t align(@sizeOf(usize)), // Disable padding after `mcontext`. + }; + const written_ctx: *align(1) WrittenContext = @ptrCast(ctx_ptr); + copied_ctx.__mcontext_data = written_ctx.__mcontext_data; } + relocateContext(&copied_ctx); + + handleSegfault(addr, name, &copied_ctx); } fn handleSegfaultWindows(info: *windows.EXCEPTION_POINTERS) callconv(.winapi) c_long { - switch (info.ExceptionRecord.ExceptionCode) { - windows.EXCEPTION_DATATYPE_MISALIGNMENT => handleSegfaultWindowsExtra(info, 0, "Unaligned Memory Access"), - windows.EXCEPTION_ACCESS_VIOLATION => handleSegfaultWindowsExtra(info, 1, null), - windows.EXCEPTION_ILLEGAL_INSTRUCTION => handleSegfaultWindowsExtra(info, 2, null), - windows.EXCEPTION_STACK_OVERFLOW => handleSegfaultWindowsExtra(info, 0, "Stack Overflow"), + if (use_trap_panic) @trap(); + const name: []const u8, const addr: ?usize = switch (info.ExceptionRecord.ExceptionCode) { + windows.EXCEPTION_DATATYPE_MISALIGNMENT => .{ "Unaligned memory access", null }, + windows.EXCEPTION_ACCESS_VIOLATION => .{ "Segmentation fault", info.ExceptionRecord.ExceptionInformation[1] }, + windows.EXCEPTION_ILLEGAL_INSTRUCTION => .{ "Illegal instruction", info.ContextRecord.getRegs().ip }, + windows.EXCEPTION_STACK_OVERFLOW => .{ "Stack overflow", null }, else => return windows.EXCEPTION_CONTINUE_SEARCH, - } + }; + handleSegfault(addr, name, info.ContextRecord); } -fn handleSegfaultWindowsExtra(info: *windows.EXCEPTION_POINTERS, msg: u8, label: ?[]const u8) noreturn { - // For backends that cannot handle the language features used by this segfault handler, we have a simpler one, - switch (builtin.zig_backend) { - .stage2_x86_64 => if (builtin.target.ofmt == .coff) @trap(), - else => {}, - } - - comptime assert(windows.CONTEXT != void); - nosuspend switch (panic_stage) { +fn handleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?*ThreadContext) noreturn { + // There is very similar logic to the following in `defaultPanic`. + switch (panic_stage) { 0 => { panic_stage = 1; _ = panicking.fetchAdd(1, .seq_cst); - { + trace: { + const tty_config = tty.detectConfig(.stderr()); + const stderr = lockStderrWriter(&.{}); defer unlockStderrWriter(); - dumpSegfaultInfoWindows(info, msg, label, stderr); + if (addr) |a| { + stderr.print("{s} at address 0x{x}\n", .{ name, a }) catch break :trace; + } else { + stderr.print("{s} (no address available)\n", .{name}) catch break :trace; + } + // MLUGG TODO: for this to work neatly, `ThreadContext` needs to be `noreturn` when not supported! + if (opt_ctx) |context| { + writeCurrentStackTrace(.{ + .context = context, + .allow_unsafe_unwind = true, // we're crashing anyway, give it our all! + }, stderr, tty_config) catch break :trace; + } } - - waitForOtherThreadToFinishPanicking(); }, 1 => { panic_stage = 2; + // A segfault happened while trying to print a previous panic message. + // We're still holding the mutex but that's fine as we're going to + // call abort(). fs.File.stderr().writeAll("aborting due to recursive panic\n") catch {}; }, - else => {}, - }; - posix.abort(); -} - -fn dumpSegfaultInfoWindows(info: *windows.EXCEPTION_POINTERS, msg: u8, label: ?[]const u8, stderr: *Writer) void { - _ = switch (msg) { - 0 => stderr.print("{s}\n", .{label.?}), - 1 => stderr.print("Segmentation fault at address 0x{x}\n", .{info.ExceptionRecord.ExceptionInformation[1]}), - 2 => stderr.print("Illegal instruction at address 0x{x}\n", .{info.ContextRecord.getRegs().ip}), - else => unreachable, - } catch posix.abort(); + else => {}, // Panicked while printing the recursive panic message. + } - dumpStackTraceFromBase(info.ContextRecord, stderr); + // We cannot allow the signal handler to return because when it runs the original instruction + // again, the memory may be mapped and undefined behavior would occur rather than repeating + // the segfault. So we simply abort here. + posix.abort(); } pub fn dumpStackPointerAddr(prefix: []const u8) void { @@ -1549,26 +1349,22 @@ pub fn dumpStackPointerAddr(prefix: []const u8) void { } test "manage resources correctly" { - if (builtin.strip_debug_info) return error.SkipZigTest; - - if (native_os == .wasi) return error.SkipZigTest; - - if (native_os == .windows) { - // https://github.com/ziglang/zig/issues/13963 - return error.SkipZigTest; - } - - // self-hosted debug info is still too buggy - if (builtin.zig_backend != .stage2_llvm) return error.SkipZigTest; - - var discarding: Writer.Discarding = .init(&.{}); - var di = try SelfInfo.open(testing.allocator); + if (!SelfInfo.target_supported) return error.SkipZigTest; + const S = struct { + noinline fn showMyTrace() usize { + return @returnAddress(); + } + }; + var discarding: std.io.Writer.Discarding = .init(&.{}); + var di: SelfInfo = try .open(testing.allocator); defer di.deinit(); - try printSourceAtAddress(&di, &discarding.writer, showMyTrace(), tty.detectConfig(.stderr())); -} - -noinline fn showMyTrace() usize { - return @returnAddress(); + try printSourceAtAddress( + testing.allocator, + &di, + &discarding.writer, + S.showMyTrace(), + tty.detectConfig(.stderr()), + ); } /// This API helps you track where a value originated and where it was mutated, @@ -1615,12 +1411,11 @@ pub fn ConfigurableTrace(comptime size: usize, comptime stack_frame_count: usize if (t.index < size) { t.notes[t.index] = note; - t.addrs[t.index] = [1]usize{0} ** stack_frame_count; - var stack_trace: std.builtin.StackTrace = .{ - .index = 0, - .instruction_addresses = &t.addrs[t.index], - }; - captureStackTrace(addr, &stack_trace); + const addrs = &t.addrs[t.index]; + const st = captureCurrentStackTrace(.{ .first_address = addr }, addrs); + if (st.index < addrs.len) { + @memset(addrs[st.index..], 0); // zero unused frames to indicate end of trace + } } // Keep counting even if the end is reached so that the // user can find out how much more size they need. @@ -1634,13 +1429,6 @@ pub fn ConfigurableTrace(comptime size: usize, comptime stack_frame_count: usize const stderr = lockStderrWriter(&.{}); defer unlockStderrWriter(); const end = @min(t.index, size); - const debug_info = getSelfDebugInfo() catch |err| { - stderr.print( - "Unable to dump stack trace: Unable to open debug info: {s}\n", - .{@errorName(err)}, - ) catch return; - return; - }; for (t.addrs[0..end], 0..) |frames_array, i| { stderr.print("{s}:\n", .{t.notes[i]}) catch return; var frames_array_mutable = frames_array; @@ -1649,7 +1437,7 @@ pub fn ConfigurableTrace(comptime size: usize, comptime stack_frame_count: usize .index = frames.len, .instruction_addresses = frames, }; - writeStackTrace(stack_trace, stderr, debug_info, tty_config) catch continue; + writeStackTrace(stack_trace, stderr, tty_config) catch return; } if (t.index > end) { stderr.print("{d} more traces not shown; consider increasing trace size\n", .{ diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 8efba51beed5..e9ed9077af45 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1449,7 +1449,7 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { pub fn getSymbol(di: *Dwarf, allocator: Allocator, endian: Endian, address: u64) !std.debug.Symbol { const compile_unit = di.findCompileUnit(endian, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return .{ .name = null, .compile_unit_name = null, .source_location = null }, + error.MissingDebugInfo, error.InvalidDebugInfo => return .unknown, else => return err, }; return .{ diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index c5e163eef2cc..a7dea036c468 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -158,7 +158,7 @@ test { } pub const UnwindContext = struct { - gpa: Allocator, + gpa: Allocator, // MLUGG TODO: make unmanaged (also maybe rename this type, DwarfUnwindContext or smth idk) cfa: ?usize, pc: usize, thread_context: *std.debug.ThreadContext, @@ -166,22 +166,20 @@ pub const UnwindContext = struct { vm: Dwarf.Unwind.VirtualMachine, stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), - pub fn init(gpa: Allocator, thread_context: *std.debug.ThreadContext) !UnwindContext { + pub fn init(gpa: Allocator, thread_context: *std.debug.ThreadContext) UnwindContext { comptime assert(supports_unwinding); const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; - const pc = stripInstructionPtrAuthCode( - (try regValueNative(thread_context, ip_reg_num, null)).*, - ); - - const context_copy = try gpa.create(std.debug.ThreadContext); - std.debug.copyContext(thread_context, context_copy); + const raw_pc_ptr = regValueNative(thread_context, ip_reg_num, null) catch { + unreachable; // error means unsupported, in which case `supports_unwinding` should have been `false` + }; + const pc = stripInstructionPtrAuthCode(raw_pc_ptr.*); return .{ .gpa = gpa, .cfa = null, .pc = pc, - .thread_context = context_copy, + .thread_context = thread_context, .reg_context = undefined, .vm = .{}, .stack_machine = .{}, @@ -191,7 +189,6 @@ pub const UnwindContext = struct { pub fn deinit(self: *UnwindContext) void { self.vm.deinit(self.gpa); self.stack_machine.deinit(self.gpa); - self.gpa.destroy(self.thread_context); self.* = undefined; } diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index 1a38bdd2843e..1de7c1201566 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -196,11 +196,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu const full = &di.full.?; const vaddr = address - module.load_offset; - const symbol = MachoSymbol.find(full.symbols, vaddr) orelse return .{ - .name = null, - .compile_unit_name = null, - .source_location = null, - }; + const symbol = MachoSymbol.find(full.symbols, vaddr) orelse return .unknown; // offset of `address` from start of `symbol` const address_symbol_offset = vaddr - symbol.addr; From c2ada49354897f89a2e2030ad1e083a8ee9b7c3b Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 5 Sep 2025 20:40:11 +0100 Subject: [PATCH 26/85] replace usages of old std.debug APIs src/crash_handler.zig is still TODO though, i am planning bigger changes there --- lib/std/Build.zig | 2 +- lib/std/Build/Step.zig | 24 +++---------- lib/std/builtin.zig | 13 +++---- lib/std/heap/debug_allocator.zig | 51 +++++++--------------------- lib/std/os/windows.zig | 4 +-- lib/std/posix.zig | 2 +- lib/std/testing/FailingAllocator.zig | 8 ++--- src/link/MachO.zig | 2 +- 8 files changed, 29 insertions(+), 77 deletions(-) diff --git a/lib/std/Build.zig b/lib/std/Build.zig index 2644b79dcde6..e3006fa25513 100644 --- a/lib/std/Build.zig +++ b/lib/std/Build.zig @@ -2195,7 +2195,7 @@ fn dependencyInner( sub_builder.runBuild(bz) catch @panic("unhandled error"); if (sub_builder.validateUserInputDidItFail()) { - std.debug.dumpCurrentStackTrace(@returnAddress()); + std.debug.dumpCurrentStackTrace(.{ .first_address = @returnAddress() }); } } diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index 3002db628f4b..f71a24f2e9a3 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -60,7 +60,7 @@ test_results: TestResults, /// The return address associated with creation of this step that can be useful /// to print along with debugging messages. -debug_stack_trace: []usize, +debug_stack_trace: std.builtin.StackTrace, pub const TestResults = struct { fail_count: u32 = 0, @@ -220,16 +220,9 @@ pub fn init(options: StepOptions) Step { .state = .precheck_unstarted, .max_rss = options.max_rss, .debug_stack_trace = blk: { - if (!std.debug.sys_can_stack_trace) break :blk &.{}; - const addresses = arena.alloc(usize, options.owner.debug_stack_frames_count) catch @panic("OOM"); - @memset(addresses, 0); + const addr_buf = arena.alloc(usize, options.owner.debug_stack_frames_count) catch @panic("OOM"); const first_ret_addr = options.first_ret_addr orelse @returnAddress(); - var stack_trace = std.builtin.StackTrace{ - .instruction_addresses = addresses, - .index = 0, - }; - std.debug.captureStackTrace(first_ret_addr, &stack_trace); - break :blk addresses; + break :blk std.debug.captureCurrentStackTrace(.{ .first_address = first_ret_addr }, addr_buf); }, .result_error_msgs = .{}, .result_error_bundle = std.zig.ErrorBundle.empty, @@ -315,18 +308,9 @@ pub fn cast(step: *Step, comptime T: type) ?*T { /// For debugging purposes, prints identifying information about this Step. pub fn dump(step: *Step, w: *std.Io.Writer, tty_config: std.Io.tty.Config) void { - const debug_info = std.debug.getSelfDebugInfo() catch |err| { - w.print("Unable to dump stack trace: Unable to open debug info: {s}\n", .{ - @errorName(err), - }) catch {}; - return; - }; if (step.getStackTrace()) |stack_trace| { w.print("name: '{s}'. creation stack trace:\n", .{step.name}) catch {}; - std.debug.writeStackTrace(stack_trace, w, debug_info, tty_config) catch |err| { - w.print("Unable to dump stack trace: {s}\n", .{@errorName(err)}) catch {}; - return; - }; + std.debug.writeStackTrace(stack_trace, w, tty_config) catch {}; } else { const field = "debug_stack_frames_count"; comptime assert(@hasField(Build, field)); diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig index 2504b8fe2f7b..a3f3e791e6bc 100644 --- a/lib/std/builtin.zig +++ b/lib/std/builtin.zig @@ -38,20 +38,17 @@ pub const StackTrace = struct { index: usize, instruction_addresses: []usize, - pub fn format(self: StackTrace, writer: *std.Io.Writer) std.Io.Writer.Error!void { + pub fn format(st: *const StackTrace, writer: *std.Io.Writer) std.Io.Writer.Error!void { // TODO: re-evaluate whether to use format() methods at all. // Until then, avoid an error when using GeneralPurposeAllocator with WebAssembly // where it tries to call detectTTYConfig here. if (builtin.os.tag == .freestanding) return; - const debug_info = std.debug.getSelfDebugInfo() catch |err| { - return writer.print("\nUnable to print stack trace: Unable to open debug info: {s}\n", .{@errorName(err)}); - }; - const tty_config = std.Io.tty.detectConfig(std.fs.File.stderr()); + // TODO: why on earth are we using stderr's ttyconfig? + // If we want colored output, we should just make a formatter out of `writeStackTrace`. + const tty_config = std.Io.tty.detectConfig(.stderr()); try writer.writeAll("\n"); - std.debug.writeStackTrace(self, writer, debug_info, tty_config) catch |err| { - try writer.print("Unable to print stack trace: {s}\n", .{@errorName(err)}); - }; + try std.debug.writeStackTrace(st, writer, tty_config); } }; diff --git a/lib/std/heap/debug_allocator.zig b/lib/std/heap/debug_allocator.zig index a4b1de5b47b5..fe512f03d600 100644 --- a/lib/std/heap/debug_allocator.zig +++ b/lib/std/heap/debug_allocator.zig @@ -505,23 +505,14 @@ pub fn DebugAllocator(comptime config: Config) type { return if (leaks) .leak else .ok; } - fn collectStackTrace(first_trace_addr: usize, addresses: *[stack_n]usize) void { - if (stack_n == 0) return; - @memset(addresses, 0); - var stack_trace: StackTrace = .{ - .instruction_addresses = addresses, - .index = 0, - }; - std.debug.captureStackTrace(first_trace_addr, &stack_trace); + fn collectStackTrace(first_trace_addr: usize, addr_buf: *[stack_n]usize) void { + const st = std.debug.captureCurrentStackTrace(.{ .first_address = first_trace_addr }, addr_buf); + @memset(addr_buf[@min(st.index, addr_buf.len)..], 0); } fn reportDoubleFree(ret_addr: usize, alloc_stack_trace: StackTrace, free_stack_trace: StackTrace) void { - var addresses: [stack_n]usize = @splat(0); - var second_free_stack_trace: StackTrace = .{ - .instruction_addresses = &addresses, - .index = 0, - }; - std.debug.captureStackTrace(ret_addr, &second_free_stack_trace); + var addr_buf: [stack_n]usize = undefined; + const second_free_stack_trace = std.debug.captureCurrentStackTrace(.{ .first_address = ret_addr }, &addr_buf); log.err("Double free detected. Allocation: {f} First free: {f} Second free: {f}", .{ alloc_stack_trace, free_stack_trace, second_free_stack_trace, }); @@ -562,12 +553,8 @@ pub fn DebugAllocator(comptime config: Config) type { } if (config.safety and old_mem.len != entry.value_ptr.bytes.len) { - var addresses: [stack_n]usize = [1]usize{0} ** stack_n; - var free_stack_trace: StackTrace = .{ - .instruction_addresses = &addresses, - .index = 0, - }; - std.debug.captureStackTrace(ret_addr, &free_stack_trace); + var addr_buf: [stack_n]usize = undefined; + const free_stack_trace = std.debug.captureCurrentStackTrace(.{ .first_address = ret_addr }, &addr_buf); log.err("Allocation size {d} bytes does not match free size {d}. Allocation: {f} Free: {f}", .{ entry.value_ptr.bytes.len, old_mem.len, @@ -672,12 +659,8 @@ pub fn DebugAllocator(comptime config: Config) type { } if (config.safety and old_mem.len != entry.value_ptr.bytes.len) { - var addresses: [stack_n]usize = [1]usize{0} ** stack_n; - var free_stack_trace = StackTrace{ - .instruction_addresses = &addresses, - .index = 0, - }; - std.debug.captureStackTrace(ret_addr, &free_stack_trace); + var addr_buf: [stack_n]usize = undefined; + const free_stack_trace = std.debug.captureCurrentStackTrace(.{ .first_address = ret_addr }, &addr_buf); log.err("Allocation size {d} bytes does not match free size {d}. Allocation: {f} Free: {f}", .{ entry.value_ptr.bytes.len, old_mem.len, @@ -900,12 +883,8 @@ pub fn DebugAllocator(comptime config: Config) type { if (requested_size == 0) @panic("Invalid free"); const slot_alignment = bucket.log2PtrAligns(slot_count)[slot_index]; if (old_memory.len != requested_size or alignment != slot_alignment) { - var addresses: [stack_n]usize = [1]usize{0} ** stack_n; - var free_stack_trace: StackTrace = .{ - .instruction_addresses = &addresses, - .index = 0, - }; - std.debug.captureStackTrace(return_address, &free_stack_trace); + var addr_buf: [stack_n]usize = undefined; + const free_stack_trace = std.debug.captureCurrentStackTrace(.{ .first_address = return_address }, &addr_buf); if (old_memory.len != requested_size) { log.err("Allocation size {d} bytes does not match free size {d}. Allocation: {f} Free: {f}", .{ requested_size, @@ -999,12 +978,8 @@ pub fn DebugAllocator(comptime config: Config) type { if (requested_size == 0) @panic("Invalid free"); const slot_alignment = bucket.log2PtrAligns(slot_count)[slot_index]; if (memory.len != requested_size or alignment != slot_alignment) { - var addresses: [stack_n]usize = [1]usize{0} ** stack_n; - var free_stack_trace: StackTrace = .{ - .instruction_addresses = &addresses, - .index = 0, - }; - std.debug.captureStackTrace(return_address, &free_stack_trace); + var addr_buf: [stack_n]usize = undefined; + const free_stack_trace = std.debug.captureCurrentStackTrace(.{ .first_address = return_address }, &addr_buf); if (memory.len != requested_size) { log.err("Allocation size {d} bytes does not match free size {d}. Allocation: {f} Free: {f}", .{ requested_size, diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 66583d21b218..0a74f7a609e7 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2849,7 +2849,7 @@ pub fn unexpectedError(err: Win32Error) UnexpectedError { std.debug.print("error.Unexpected: GetLastError({d}): {f}\n", .{ err, std.unicode.fmtUtf16Le(buf_wstr[0..len]), }); - std.debug.dumpCurrentStackTrace(@returnAddress()); + std.debug.dumpCurrentStackTrace(.{ .first_address = @returnAddress() }); } return error.Unexpected; } @@ -2863,7 +2863,7 @@ pub fn unexpectedWSAError(err: ws2_32.WinsockError) UnexpectedError { pub fn unexpectedStatus(status: NTSTATUS) UnexpectedError { if (std.posix.unexpected_error_tracing) { std.debug.print("error.Unexpected NTSTATUS=0x{x}\n", .{@intFromEnum(status)}); - std.debug.dumpCurrentStackTrace(@returnAddress()); + std.debug.dumpCurrentStackTrace(.{ .first_address = @returnAddress() }); } return error.Unexpected; } diff --git a/lib/std/posix.zig b/lib/std/posix.zig index 50a210ece14a..14f2935649f7 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -7591,7 +7591,7 @@ pub const UnexpectedError = error{ pub fn unexpectedErrno(err: E) UnexpectedError { if (unexpected_error_tracing) { std.debug.print("unexpected errno: {d}\n", .{@intFromEnum(err)}); - std.debug.dumpCurrentStackTrace(null); + std.debug.dumpCurrentStackTrace(.{}); } return error.Unexpected; } diff --git a/lib/std/testing/FailingAllocator.zig b/lib/std/testing/FailingAllocator.zig index c1f9791e39a9..916dbc665528 100644 --- a/lib/std/testing/FailingAllocator.zig +++ b/lib/std/testing/FailingAllocator.zig @@ -64,12 +64,8 @@ fn alloc( const self: *FailingAllocator = @ptrCast(@alignCast(ctx)); if (self.alloc_index == self.fail_index) { if (!self.has_induced_failure) { - @memset(&self.stack_addresses, 0); - var stack_trace = std.builtin.StackTrace{ - .instruction_addresses = &self.stack_addresses, - .index = 0, - }; - std.debug.captureStackTrace(return_address, &stack_trace); + const st = std.debug.captureCurrentStackTrace(return_address, &self.stack_addresses); + @memset(self.stack_addresses[@min(st.index, self.stack_addresses.len)..], 0); self.has_induced_failure = true; } return null; diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d819e3909782..fd1119851d41 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -5070,7 +5070,7 @@ pub fn getKernError(err: std.c.kern_return_t) KernE { pub fn unexpectedKernError(err: KernE) std.posix.UnexpectedError { if (std.posix.unexpected_error_tracing) { std.debug.print("unexpected error: {d}\n", .{@intFromEnum(err)}); - std.debug.dumpCurrentStackTrace(null); + std.debug.dumpCurrentStackTrace(.{}); } return error.Unexpected; } From 9859440d83e5ef17d353be39f32f2dc0b9ce0e02 Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 5 Sep 2025 21:28:18 +0100 Subject: [PATCH 27/85] add freestanding support IN THEORY untested because this branch has errors rn --- lib/std/debug.zig | 25 +++-- lib/std/debug/SelfInfo.zig | 136 ++++++++++------------- lib/std/debug/SelfInfo/DarwinModule.zig | 1 + lib/std/debug/SelfInfo/ElfModule.zig | 20 ++++ lib/std/debug/SelfInfo/WindowsModule.zig | 1 + 5 files changed, 93 insertions(+), 90 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index bc2610fb7cea..f2d15e3fe882 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -14,6 +14,8 @@ const builtin = @import("builtin"); const native_arch = builtin.cpu.arch; const native_os = builtin.os.tag; +const root = @import("root"); + pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); @@ -942,19 +944,15 @@ fn printLineInfo( tty_config.setColor(writer, .reset) catch {}; } try writer.writeAll("\n"); - } else |err| switch (err) { - error.WriteFailed => |e| return e, - else => { - // Ignore everything else. Seeing some lines in the trace without the associated - // source line printed is a far better user experience than interleaving the - // trace with a load of filesystem error crap. The user can always just open the - // source file themselves to see the line. - }, } } } } fn printLineFromFile(writer: *Writer, source_location: SourceLocation) !void { + if (@hasDecl(root, "debug") and @hasDecl(root.debug, "printLineFromFile")) { + return root.debug.printLineFromFile(writer, source_location); + } + // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. var f = try fs.cwd().openFile(source_location.file_name, .{}); @@ -1139,11 +1137,16 @@ test printLineFromFile { /// TODO multithreaded awareness var debug_info_arena: ?std.heap.ArenaAllocator = null; +var debug_info_fba: std.heap.FixedBufferAllocator = .init(&debug_info_fba_buf); +var debug_info_fba_buf: [1024 * 1024 * 4]u8 = undefined; fn getDebugInfoAllocator() mem.Allocator { - if (debug_info_arena == null) { - debug_info_arena = .init(std.heap.page_allocator); + if (false) { + if (debug_info_arena == null) { + debug_info_arena = .init(std.heap.page_allocator); + } + return debug_info_arena.?.allocator(); } - return debug_info_arena.?.allocator(); + return debug_info_fba.allocator(); } /// Whether or not the current target can print useful debug information when a segfault occurs. diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index a7dea036c468..93fa7a60457b 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -14,6 +14,8 @@ const Dwarf = std.debug.Dwarf; const regBytes = Dwarf.abi.regBytes; const regValueNative = Dwarf.abi.regValueNative; +const root = @import("root"); + const SelfInfo = @This(); modules: std.AutoArrayHashMapUnmanaged(usize, Module.DebugInfo), @@ -33,49 +35,12 @@ pub const Error = error{ }; /// Indicates whether the `SelfInfo` implementation has support for this target. -pub const target_supported: bool = switch (native_os) { - .linux, - .freebsd, - .netbsd, - .dragonfly, - .openbsd, - .macos, - .solaris, - .illumos, - .windows, - => true, - else => false, -}; +pub const target_supported: bool = Module != void; -/// Indicates whether unwinding for the host is *implemented* here in the Zig -/// standard library. +/// Indicates whether the `SelfInfo` implementation has support for unwinding on this target. /// -/// See also `Dwarf.abi.supportsUnwinding` which tells whether Dwarf supports -/// unwinding on a target *in theory*. -pub const supports_unwinding: bool = switch (builtin.target.cpu.arch) { - .x86 => switch (builtin.target.os.tag) { - .linux, .netbsd, .solaris, .illumos => true, - else => false, - }, - .x86_64 => switch (builtin.target.os.tag) { - .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, - else => false, - }, - .arm, .armeb, .thumb, .thumbeb => switch (builtin.target.os.tag) { - .linux => true, - else => false, - }, - .aarch64, .aarch64_be => switch (builtin.target.os.tag) { - .linux, .netbsd, .freebsd, .macos, .ios => true, - else => false, - }, - // Unwinding is possible on other targets but this implementation does - // not support them...yet! - else => false, -}; -comptime { - if (supports_unwinding) assert(Dwarf.abi.supportsUnwinding(&builtin.target)); -} +/// For whether DWARF unwinding is *theoretically* possible, see `Dwarf.abi.supportsUnwinding`. +pub const supports_unwinding: bool = Module.supports_unwinding; pub const init: SelfInfo = .{ .modules = .empty, @@ -114,48 +79,61 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) return module.name; } -/// This type contains the target-specific implementation. It must expose the following declarations: +/// `void` indicates that `SelfInfo` is not supported for this target. /// -/// * `LookupCache: type`, with the following declarations unless `LookupCache == void`: -/// * `init: LookupCache` -/// * `deinit: fn (*LookupCache, Allocator) void` -/// * `lookup: fn (*LookupCache, Allocator, address: usize) !Module` -/// * `key: fn (*const Module) usize` -/// * `DebugInfo: type`, with the following declarations: -/// * `DebugInfo.init: DebugInfo` -/// * `getSymbolAtAddress: fn (*const Module, Allocator, *DebugInfo, address: usize) !std.debug.Symbol` +/// This type contains the target-specific implementation. Logically, a `Module` represents a subset +/// of the executable with its own debug information. This typically corresponds to what ELF calls a +/// module, i.e. a shared library or executable image, but could be anything. For instance, it would +/// be valid to consider the entire application one module, or on the other hand to consider each +/// object file a module. /// -/// If unwinding is supported on this target, it must additionally expose the following declarations: +/// This type must must expose the following declarations: /// -/// * `unwindFrame: fn (*const Module, Allocator, *DebugInfo, *UnwindContext) !usize` -const Module = switch (native_os) { - else => {}, // Dwarf, // TODO MLUGG: it's this on master but that's definitely broken atm... - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => @import("SelfInfo/ElfModule.zig"), - .macos, .ios, .watchos, .tvos, .visionos => @import("SelfInfo/DarwinModule.zig"), - .uefi, .windows => @import("SelfInfo/WindowsModule.zig"), - .wasi, .emscripten => struct { - const LookupCache = void; - fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module { - _ = cache; - _ = gpa; - _ = address; - @panic("TODO implement lookup module for Wasm"); - } - const DebugInfo = struct { - const init: DebugInfo = .{}; - }; - fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { - _ = module; - _ = gpa; - _ = di; - _ = address; - unreachable; - } - }, +/// ``` +/// /// Holds state cached by the implementation between calls to `lookup`. +/// /// This may be `void`, in which case the inner declarations can be omitted. +/// pub const LookupCache = struct { +/// pub const init: LookupCache; +/// pub fn deinit(lc: *LookupCache, gpa: Allocator) void; +/// }; +/// /// Holds debug information associated with a particular `Module`. +/// pub const DebugInfo = struct { +/// pub const init: DebugInfo; +/// }; +/// /// Finds the `Module` corresponding to `address`. +/// pub fn lookup(lc: *LookupCache, gpa: Allocator, address: usize) SelfInfo.Error!Module; +/// /// Returns a unique identifier for this `Module`, such as a load address. +/// pub fn key(mod: *const Module) usize; +/// /// Locates and loads location information for the symbol corresponding to `address`. +/// pub fn getSymbolAtAddress( +/// mod: *const Module, +/// gpa: Allocator, +/// di: *DebugInfo, +/// address: usize, +/// ) SelfInfo.Error!std.debug.Symbol; +/// /// Whether a reliable stack unwinding strategy, such as DWARF unwinding, is available. +/// pub const supports_unwinding: bool; +/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame and returns +/// /// the next return address (which may be 0 indicating end of stack). This is currently +/// /// specialized to DWARF unwinding. +/// pub fn unwindFrame( +/// mod: *const Module, +/// gpa: Allocator, +/// di: *DebugInfo, +/// ctx: *SelfInfo.UnwindContext, +/// ) SelfInfo.Error!usize; +/// ``` +const Module: type = Module: { + if (@hasDecl(root, "debug") and @hasDecl(root.debug, "Module")) { + break :Module root.debug.Module; + } + break :Module switch (native_os) { + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => @import("SelfInfo/ElfModule.zig"), + .macos, .ios, .watchos, .tvos, .visionos => @import("SelfInfo/DarwinModule.zig"), + .uefi, .windows => @import("SelfInfo/WindowsModule.zig"), + else => void, + }; }; -test { - _ = Module; -} pub const UnwindContext = struct { gpa: Allocator, // MLUGG TODO: make unmanaged (also maybe rename this type, DwarfUnwindContext or smth idk) diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index 1de7c1201566..4f94798c63e1 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -251,6 +251,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu ) catch null, }; } +pub const supports_unwinding: bool = true; /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index e69600be1411..b58125031e83 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -205,6 +205,26 @@ pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, con } return error.MissingDebugInfo; } +pub const supports_unwinding: bool = s: { + const archs: []const std.Target.Cpu.Arch = switch (builtin.target.os.tag) { + .linux => &.{ .x86, .x86_64, .arm, .armeb, .thumb, .thumbeb, .aarch64, .aarch64_be }, + .netbsd => &.{ .x86, .x86_64, .aarch64, .aarch64_be }, + .freebsd => &.{ .x86_64, .aarch64, .aarch64_be }, + .openbsd => &.{.x86_64}, + .solaris => &.{ .x86, .x86_64 }, + .illumos => &.{ .x86, .x86_64 }, + else => unreachable, + }; + for (archs) |a| { + if (builtin.target.cpu.arch == a) break :s true; + } + break :s false; +}; +comptime { + if (supports_unwinding) { + std.debug.assert(Dwarf.abi.supportsUnwinding(&builtin.target)); + } +} const ElfModule = @This(); diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index 674c6adae588..ccede7efb2bf 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -246,6 +246,7 @@ pub const DebugInfo = struct { }; } }; +pub const supports_unwinding: bool = false; const WindowsModule = @This(); From 253fdfce7064a6ebf70dbb62b465d6564eac0948 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 11:38:25 +0100 Subject: [PATCH 28/85] SelfInfo: be honest about how general unwinding is ...in that it isn't: it's currently very specialized to DWARF unwinding. Also, make a type unmanaged. --- lib/std/debug.zig | 11 +++--- lib/std/debug/SelfInfo.zig | 45 +++++++++++++------------ lib/std/debug/SelfInfo/DarwinModule.zig | 15 +++++---- lib/std/debug/SelfInfo/ElfModule.zig | 6 ++-- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index f2d15e3fe882..cfc442562fac 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -747,7 +747,7 @@ pub fn dumpStackTrace(st: *const std.builtin.StackTrace) void { const StackIterator = union(enum) { /// Unwinding using debug info (e.g. DWARF CFI). - di: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn, + di: if (SelfInfo.supports_unwinding) SelfInfo.DwarfUnwindContext else noreturn, /// Naive frame-pointer-based unwinding. Very simple, but typically unreliable. fp: usize, @@ -766,17 +766,17 @@ const StackIterator = union(enum) { if (context_opt) |context| { context_buf.* = context.*; relocateContext(context_buf); - return .{ .di = .init(getDebugInfoAllocator(), context_buf) }; + return .{ .di = .init(context_buf) }; } if (getContext(context_buf)) { - return .{ .di = .init(getDebugInfoAllocator(), context_buf) }; + return .{ .di = .init(context_buf) }; } return .{ .fp = @frameAddress() }; } fn deinit(si: *StackIterator) void { switch (si.*) { .fp => {}, - .di => |*unwind_context| unwind_context.deinit(), + .di => |*unwind_context| unwind_context.deinit(getDebugInfoAllocator()), } } @@ -944,6 +944,9 @@ fn printLineInfo( tty_config.setColor(writer, .reset) catch {}; } try writer.writeAll("\n"); + } else |_| { + // Ignore all errors; it's a better UX to just print the source location without the + // corresponding line number. The user can always open the source file themselves. } } } diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 93fa7a60457b..f1832f49023b 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -53,7 +53,7 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void { if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); } -pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { +pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *DwarfUnwindContext) Error!usize { comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.key()); @@ -120,7 +120,7 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// mod: *const Module, /// gpa: Allocator, /// di: *DebugInfo, -/// ctx: *SelfInfo.UnwindContext, +/// ctx: *SelfInfo.DwarfUnwindContext, /// ) SelfInfo.Error!usize; /// ``` const Module: type = Module: { @@ -135,8 +135,7 @@ const Module: type = Module: { }; }; -pub const UnwindContext = struct { - gpa: Allocator, // MLUGG TODO: make unmanaged (also maybe rename this type, DwarfUnwindContext or smth idk) +pub const DwarfUnwindContext = struct { cfa: ?usize, pc: usize, thread_context: *std.debug.ThreadContext, @@ -144,7 +143,7 @@ pub const UnwindContext = struct { vm: Dwarf.Unwind.VirtualMachine, stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), - pub fn init(gpa: Allocator, thread_context: *std.debug.ThreadContext) UnwindContext { + pub fn init(thread_context: *std.debug.ThreadContext) DwarfUnwindContext { comptime assert(supports_unwinding); const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; @@ -154,7 +153,6 @@ pub const UnwindContext = struct { const pc = stripInstructionPtrAuthCode(raw_pc_ptr.*); return .{ - .gpa = gpa, .cfa = null, .pc = pc, .thread_context = thread_context, @@ -164,19 +162,20 @@ pub const UnwindContext = struct { }; } - pub fn deinit(self: *UnwindContext) void { - self.vm.deinit(self.gpa); - self.stack_machine.deinit(self.gpa); + pub fn deinit(self: *DwarfUnwindContext, gpa: Allocator) void { + self.vm.deinit(gpa); + self.stack_machine.deinit(gpa); self.* = undefined; } - pub fn getFp(self: *const UnwindContext) !usize { + pub fn getFp(self: *const DwarfUnwindContext) !usize { return (try regValueNative(self.thread_context, Dwarf.abi.fpRegNum(native_arch, self.reg_context), self.reg_context)).*; } /// Resolves the register rule and places the result into `out` (see regBytes) pub fn resolveRegisterRule( - context: *UnwindContext, + context: *DwarfUnwindContext, + gpa: Allocator, col: Dwarf.Unwind.VirtualMachine.Column, expression_context: std.debug.Dwarf.expression.Context, out: []u8, @@ -224,7 +223,7 @@ pub const UnwindContext = struct { }, .expression => |expression| { context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.gpa, expression_context, context.cfa.?); + const value = try context.stack_machine.run(expression, gpa, expression_context, context.cfa.?); const addr = if (value) |v| blk: { if (v != .generic) return error.InvalidExpressionValue; break :blk v.generic; @@ -235,7 +234,7 @@ pub const UnwindContext = struct { }, .val_expression => |expression| { context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.gpa, expression_context, context.cfa.?); + const value = try context.stack_machine.run(expression, gpa, expression_context, context.cfa.?); if (value) |v| { if (v != .generic) return error.InvalidExpressionValue; mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); @@ -252,13 +251,14 @@ pub const UnwindContext = struct { /// may require lazily loading the data in those sections. /// /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info - pub fn unwindFrameDwarf( - context: *UnwindContext, + pub fn unwindFrame( + context: *DwarfUnwindContext, + gpa: Allocator, unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, ) Error!usize { - return unwindFrameDwarfInner(context, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { + return unwindFrameInner(context, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, error.UnimplementedArch, @@ -302,8 +302,9 @@ pub const UnwindContext = struct { => return error.InvalidDebugInfo, }; } - fn unwindFrameDwarfInner( - context: *UnwindContext, + fn unwindFrameInner( + context: *DwarfUnwindContext, + gpa: Allocator, unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, @@ -338,7 +339,7 @@ pub const UnwindContext = struct { context.reg_context.eh_frame = cie.version != 4; context.reg_context.is_macho = native_os.isDarwin(); - const row = try context.vm.runTo(context.gpa, context.pc - load_offset, cie, fde, @sizeOf(usize), native_endian); + const row = try context.vm.runTo(gpa, context.pc - load_offset, cie, fde, @sizeOf(usize), native_endian); context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; @@ -349,7 +350,7 @@ pub const UnwindContext = struct { context.stack_machine.reset(); const value = try context.stack_machine.run( expr, - context.gpa, + gpa, expression_context, context.cfa, ); @@ -366,7 +367,7 @@ pub const UnwindContext = struct { // Buffering the modifications is done because copying the thread context is not portable, // some implementations (ie. darwin) use internal pointers to the mcontext. - var arena: std.heap.ArenaAllocator = .init(context.gpa); + var arena: std.heap.ArenaAllocator = .init(gpa); defer arena.deinit(); const update_arena = arena.allocator(); @@ -388,7 +389,7 @@ pub const UnwindContext = struct { const dest = try regBytes(context.thread_context, register, context.reg_context); const src = try update_arena.alloc(u8, dest.len); - try context.resolveRegisterRule(column, expression_context, src); + try context.resolveRegisterRule(gpa, column, expression_context, src); const new_update = try update_arena.create(RegisterUpdate); new_update.* = .{ diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index 4f94798c63e1..ce15aed95cd7 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -255,7 +255,7 @@ pub const supports_unwinding: bool = true; /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { +pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *DwarfUnwindContext) Error!usize { return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, @@ -274,8 +274,7 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, => return error.InvalidDebugInfo, }; } -fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - _ = gpa; +fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *DwarfUnwindContext) !usize { if (di.unwind == null) di.unwind = module.loadUnwindInfo(); const unwind = &di.unwind.?; @@ -505,7 +504,8 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, .DWARF => { const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; - return context.unwindFrameDwarf( + return context.unwindFrame( + gpa, &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), module.load_offset, @intCast(encoding.value.x86_64.dwarf), @@ -524,7 +524,8 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, .DWARF => { const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; - return context.unwindFrameDwarf( + return context.unwindFrame( + gpa, &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), module.load_offset, @intCast(encoding.value.x86_64.dwarf), @@ -574,7 +575,7 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, else => comptime unreachable, // unimplemented }; - context.pc = UnwindContext.stripInstructionPtrAuthCode(new_ip); + context.pc = DwarfUnwindContext.stripInstructionPtrAuthCode(new_ip); if (context.pc > 0) context.pc -= 1; return new_ip; } @@ -819,7 +820,7 @@ const macho = std.macho; const mem = std.mem; const posix = std.posix; const testing = std.testing; -const UnwindContext = std.debug.SelfInfo.UnwindContext; +const DwarfUnwindContext = std.debug.SelfInfo.DwarfUnwindContext; const Error = std.debug.SelfInfo.Error; const regBytes = Dwarf.abi.regBytes; const regValueNative = Dwarf.abi.regValueNative; diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index b58125031e83..ff37e283b965 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -193,12 +193,12 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro else => unreachable, } } -pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { +pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *DwarfUnwindContext) Error!usize { if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di); std.debug.assert(di.unwind[0] != null); for (&di.unwind) |*opt_unwind| { const unwind = &(opt_unwind.* orelse break); - return context.unwindFrameDwarf(unwind, module.load_offset, null) catch |err| switch (err) { + return context.unwindFrame(gpa, unwind, module.load_offset, null) catch |err| switch (err) { error.MissingDebugInfo => continue, // try the next one else => |e| return e, }; @@ -233,7 +233,7 @@ const Allocator = std.mem.Allocator; const Dwarf = std.debug.Dwarf; const elf = std.elf; const mem = std.mem; -const UnwindContext = std.debug.SelfInfo.UnwindContext; +const DwarfUnwindContext = std.debug.SelfInfo.DwarfUnwindContext; const Error = std.debug.SelfInfo.Error; const builtin = @import("builtin"); From f1215adedab23535024a28367375a62909de6395 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 11:41:26 +0100 Subject: [PATCH 29/85] SelfInfo.DarwinModule: rename field --- lib/std/debug/SelfInfo/DarwinModule.zig | 33 ++++++++++++------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index ce15aed95cd7..0434b4eaaad0 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -75,7 +75,7 @@ fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { .eh_frame = eh_frame, }; } -fn loadFullInfo(module: *const DarwinModule, gpa: Allocator) !DebugInfo.Full { +fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO { const mapped_mem = try mapDebugInfoFile(module.name); errdefer posix.munmap(mapped_mem); @@ -189,21 +189,21 @@ fn loadFullInfo(module: *const DarwinModule, gpa: Allocator) !DebugInfo.Full { }; } pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { - if (di.full == null) di.full = module.loadFullInfo(gpa) catch |err| switch (err) { + if (di.loaded_macho == null) di.loaded_macho = module.loadMachO(gpa) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| return e, else => return error.ReadFailed, }; - const full = &di.full.?; + const loaded_macho = &di.loaded_macho.?; const vaddr = address - module.load_offset; - const symbol = MachoSymbol.find(full.symbols, vaddr) orelse return .unknown; + const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; // offset of `address` from start of `symbol` const address_symbol_offset = vaddr - symbol.addr; // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(full.strings[symbol.strx..], 0); + const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); // If any information is missing, we can at least return this from now on. const sym_only_result: std.debug.Symbol = .{ @@ -213,11 +213,11 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu }; const o_file: *DebugInfo.OFile = of: { - const gop = try full.ofiles.getOrPut(gpa, symbol.ofile); + const gop = try loaded_macho.ofiles.getOrPut(gpa, symbol.ofile); if (!gop.found_existing) { - const o_file_path = mem.sliceTo(full.strings[symbol.ofile..], 0); + const o_file_path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch { - _ = full.ofiles.pop().?; + _ = loaded_macho.ofiles.pop().?; return sym_only_result; }; } @@ -581,23 +581,22 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, } pub const DebugInfo = struct { unwind: ?Unwind, - // MLUGG TODO: awful field name - full: ?Full, + loaded_macho: ?LoadedMachO, pub const init: DebugInfo = .{ .unwind = null, - .full = null, + .loaded_macho = null, }; pub fn deinit(di: *DebugInfo, gpa: Allocator) void { - if (di.full) |*full| { - for (full.ofiles.values()) |*ofile| { + if (di.loaded_macho) |*loaded_macho| { + for (loaded_macho.ofiles.values()) |*ofile| { ofile.dwarf.deinit(gpa); ofile.symbols_by_name.deinit(gpa); } - full.ofiles.deinit(gpa); - gpa.free(full.symbols); - posix.munmap(full.mapped_memory); + loaded_macho.ofiles.deinit(gpa); + gpa.free(loaded_macho.symbols); + posix.munmap(loaded_macho.mapped_memory); } } @@ -607,7 +606,7 @@ pub const DebugInfo = struct { eh_frame: ?[]const u8, }; - const Full = struct { + const LoadedMachO = struct { mapped_memory: []align(std.heap.page_size_min) const u8, symbols: []const MachoSymbol, strings: [:0]const u8, From 202aeacc05a2fc53762c49d18daeefdf0fa5fbec Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 12:52:00 +0100 Subject: [PATCH 30/85] std: fixes --- lib/std/Build/Step.zig | 16 ++-------------- lib/std/Build/Step/CheckObject.zig | 30 ++++++++++++------------------ lib/std/Thread.zig | 2 +- lib/std/debug.zig | 2 +- lib/std/macho.zig | 4 ++-- lib/std/start.zig | 2 +- 6 files changed, 19 insertions(+), 37 deletions(-) diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index f71a24f2e9a3..8e9e12248aab 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -275,18 +275,6 @@ pub fn dependOn(step: *Step, other: *Step) void { step.dependencies.append(other) catch @panic("OOM"); } -pub fn getStackTrace(s: *Step) ?std.builtin.StackTrace { - var len: usize = 0; - while (len < s.debug_stack_trace.len and s.debug_stack_trace[len] != 0) { - len += 1; - } - - return if (len == 0) null else .{ - .instruction_addresses = s.debug_stack_trace, - .index = len, - }; -} - fn makeNoOp(step: *Step, options: MakeOptions) anyerror!void { _ = options; @@ -308,9 +296,9 @@ pub fn cast(step: *Step, comptime T: type) ?*T { /// For debugging purposes, prints identifying information about this Step. pub fn dump(step: *Step, w: *std.Io.Writer, tty_config: std.Io.tty.Config) void { - if (step.getStackTrace()) |stack_trace| { + if (step.debug_stack_trace.instruction_addresses.len > 0) { w.print("name: '{s}'. creation stack trace:\n", .{step.name}) catch {}; - std.debug.writeStackTrace(stack_trace, w, tty_config) catch {}; + std.debug.writeStackTrace(&step.debug_stack_trace, w, tty_config) catch {}; } else { const field = "debug_stack_frames_count"; comptime assert(@hasField(Build, field)); diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index e65120641f38..56be318a84b2 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -1098,15 +1098,9 @@ const MachODumper = struct { for (ctx.symtab.items) |sym| { const sym_name = ctx.getString(sym.n_strx); if (sym.n_type.bits.is_stab != 0) { - const tt = switch (sym.n_type) { - macho.N_SO => "SO", - macho.N_OSO => "OSO", - macho.N_BNSYM => "BNSYM", - macho.N_ENSYM => "ENSYM", - macho.N_FUN => "FUN", - macho.N_GSYM => "GSYM", - macho.N_STSYM => "STSYM", - else => "UNKNOWN STAB", + const tt = switch (sym.n_type.stab) { + _ => "UNKNOWN STAB", + else => @tagName(sym.n_type.stab), }; try writer.print("{x}", .{sym.n_value}); if (sym.n_sect > 0) { @@ -1114,27 +1108,27 @@ const MachODumper = struct { try writer.print(" ({s},{s})", .{ sect.segName(), sect.sectName() }); } try writer.print(" {s} (stab) {s}\n", .{ tt, sym_name }); - } else if (sym.n_type.type == .sect) { + } else if (sym.n_type.bits.type == .sect) { const sect = ctx.sections.items[sym.n_sect - 1]; try writer.print("{x} ({s},{s})", .{ sym.n_value, sect.segName(), sect.sectName(), }); - if (sym.n_desc & macho.REFERENCED_DYNAMICALLY != 0) try writer.writeAll(" [referenced dynamically]"); + if (sym.n_desc.referenced_dynamically) try writer.writeAll(" [referenced dynamically]"); if (sym.n_desc.weak_def_or_ref_to_weak) try writer.writeAll(" weak"); if (sym.n_desc.weak_ref) try writer.writeAll(" weakref"); - if (sym.ext()) { - if (sym.pext()) try writer.writeAll(" private"); + if (sym.n_type.bits.ext) { + if (sym.n_type.bits.pext) try writer.writeAll(" private"); try writer.writeAll(" external"); - } else if (sym.pext()) try writer.writeAll(" (was private external)"); + } else if (sym.n_type.bits.pext) try writer.writeAll(" (was private external)"); try writer.print(" {s}\n", .{sym_name}); } else if (sym.tentative()) { - const alignment = (sym.n_desc >> 8) & 0x0F; + const alignment = (@as(u16, @bitCast(sym.n_desc)) >> 8) & 0x0F; try writer.print(" 0x{x:0>16} (common) (alignment 2^{d})", .{ sym.n_value, alignment }); - if (sym.ext()) try writer.writeAll(" external"); + if (sym.n_type.bits.ext) try writer.writeAll(" external"); try writer.print(" {s}\n", .{sym_name}); - } else if (sym.n_type.type == .undf) { + } else if (sym.n_type.bits.type == .undf) { const ordinal = @divFloor(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); const import_name = blk: { if (ordinal <= 0) { @@ -1154,7 +1148,7 @@ const MachODumper = struct { }; try writer.writeAll("(undefined)"); if (sym.n_desc.weak_ref) try writer.writeAll(" weakref"); - if (sym.ext()) try writer.writeAll(" external"); + if (sym.n_type.bits.ext) try writer.writeAll(" external"); try writer.print(" {s} (from {s})\n", .{ sym_name, import_name, diff --git a/lib/std/Thread.zig b/lib/std/Thread.zig index d377172f083a..a3b382f3726f 100644 --- a/lib/std/Thread.zig +++ b/lib/std/Thread.zig @@ -530,7 +530,7 @@ fn callFn(comptime f: anytype, args: anytype) switch (Impl) { @call(.auto, f, args) catch |err| { std.debug.print("error: {s}\n", .{@errorName(err)}); if (@errorReturnTrace()) |trace| { - std.debug.dumpStackTrace(trace.*); + std.debug.dumpStackTrace(trace); } }; diff --git a/lib/std/debug.zig b/lib/std/debug.zig index cfc442562fac..c1e2d19fc888 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1443,7 +1443,7 @@ pub fn ConfigurableTrace(comptime size: usize, comptime stack_frame_count: usize .index = frames.len, .instruction_addresses = frames, }; - writeStackTrace(stack_trace, stderr, tty_config) catch return; + writeStackTrace(&stack_trace, stderr, tty_config) catch return; } if (t.index > end) { stderr.print("{d} more traces not shown; consider increasing trace size\n", .{ diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 4ebb0cabd855..d541e2d13e77 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -892,7 +892,7 @@ pub const nlist_64 = extern struct { n_desc: packed struct(u16) { _pad0: u3 = 0, arm_thumb_def: bool, - _pad1: u1 = 0, + referenced_dynamically: bool, /// The meaning of this bit is contextual. /// See `N_DESC_DISCARDED` and `N_NO_DEAD_STRIP`. discarded_or_no_dead_strip: bool, @@ -907,7 +907,7 @@ pub const nlist_64 = extern struct { n_value: u64, pub fn tentative(sym: nlist_64) bool { - return sym.n_type.type == .undf and sym.n_value != 0; + return sym.n_type.bits.type == .undf and sym.n_value != 0; } }; diff --git a/lib/std/start.zig b/lib/std/start.zig index 7030616d6d11..0ea5c44c2b5f 100644 --- a/lib/std/start.zig +++ b/lib/std/start.zig @@ -636,7 +636,7 @@ pub inline fn callMain() u8 { } std.log.err("{s}", .{@errorName(err)}); if (@errorReturnTrace()) |trace| { - std.debug.dumpStackTrace(trace.*); + std.debug.dumpStackTrace(trace); } return 1; }; From 0c7b2a7bd5433b7e7bcde3bb49d48226dc2adef9 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 14:03:11 +0100 Subject: [PATCH 31/85] fix compiler ftbfs from std.macho and std.dwarf changes --- lib/std/debug/Dwarf/Unwind.zig | 2 +- lib/std/dwarf/EH.zig | 8 +- src/link/Dwarf.zig | 2 +- src/link/Elf/eh_frame.zig | 6 +- src/link/MachO.zig | 4 +- src/link/MachO/InternalObject.zig | 30 +++--- src/link/MachO/Object.zig | 146 +++++++++++++++--------------- src/link/MachO/Symbol.zig | 42 ++++++--- src/link/MachO/Thunk.zig | 4 +- src/link/MachO/ZigObject.zig | 32 +++---- src/link/MachO/eh_frame.zig | 20 ++-- src/link/MachO/file.zig | 16 ++-- 12 files changed, 165 insertions(+), 147 deletions(-) diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 01ba96aad4cd..5334988b90d0 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -603,13 +603,13 @@ fn readEhPointerAbs(r: *Reader, enc_ty: EH.PE.Type, addr_size_bytes: u8, endian: /// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. fn readEhPointer(r: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 { const offset = try readEhPointerAbs(r, enc.type, addr_size_bytes, endian); + if (enc.indirect) return bad(); // GCC extension; not supported const base = switch (enc.rel) { .abs, .aligned => 0, .pcrel => ctx.pc_rel_base, .textrel => ctx.text_rel_base orelse return bad(), .datarel => ctx.data_rel_base orelse return bad(), .funcrel => ctx.function_rel_base orelse return bad(), - .indirect => return bad(), // GCC extension; not supported _ => return bad(), }; return switch (offset) { diff --git a/lib/std/dwarf/EH.zig b/lib/std/dwarf/EH.zig index 8396f36f6aab..97f1ffbfe2f1 100644 --- a/lib/std/dwarf/EH.zig +++ b/lib/std/dwarf/EH.zig @@ -1,6 +1,8 @@ pub const PE = packed struct(u8) { type: Type, rel: Rel, + /// Undocumented GCC extension + indirect: bool = false, /// This is a special encoding which does not correspond to named `type`/`rel` values. pub const omit: PE = @bitCast(@as(u8, 0xFF)); @@ -18,15 +20,15 @@ pub const PE = packed struct(u8) { _, }; - pub const Rel = enum(u4) { + /// The specification considers this a `u4`, but the GCC `indirect` field extension conflicts + /// with that, so we consider it a `u3` instead. + pub const Rel = enum(u3) { abs = 0x0, pcrel = 0x1, textrel = 0x2, datarel = 0x3, funcrel = 0x4, aligned = 0x5, - /// Undocumented GCC extension - indirect = 0x8, _, }; }; diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 8918386b3525..228a8d6a25a8 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -4852,7 +4852,7 @@ fn flushWriterError(dwarf: *Dwarf, pt: Zcu.PerThread) (FlushError || Writer.Erro hw.writeSleb128(dwarf.debug_frame.header.data_alignment_factor) catch unreachable; hw.writeUleb128(dwarf.debug_frame.header.return_address_register) catch unreachable; hw.writeUleb128(1) catch unreachable; - hw.writeByte(DW.EH.PE.pcrel | DW.EH.PE.sdata4) catch unreachable; + hw.writeByte(@bitCast(@as(DW.EH.PE, .{ .type = .sdata4, .rel = .pcrel }))) catch unreachable; hw.writeByte(DW.CFA.def_cfa_sf) catch unreachable; hw.writeUleb128(Register.rsp.dwarfNum()) catch unreachable; hw.writeSleb128(-1) catch unreachable; diff --git a/src/link/Elf/eh_frame.zig b/src/link/Elf/eh_frame.zig index 76756d66d880..326764070ab8 100644 --- a/src/link/Elf/eh_frame.zig +++ b/src/link/Elf/eh_frame.zig @@ -456,10 +456,10 @@ pub fn writeEhFrameRelocs(elf_file: *Elf, relocs: *std.array_list.Managed(elf.El pub fn writeEhFrameHdr(elf_file: *Elf, writer: anytype) !void { try writer.writeByte(1); // version - try writer.writeByte(DW_EH_PE.pcrel | DW_EH_PE.sdata4); // eh_frame_ptr_enc + try writer.writeByte(@bitCast(@as(DW_EH_PE, .{ .type = .sdata4, .rel = .pcrel }))); // eh_frame_ptr_enc // Building the lookup table would be expensive work on every `flush` -- omit it. - try writer.writeByte(DW_EH_PE.omit); // fde_count_enc - try writer.writeByte(DW_EH_PE.omit); // table_enc + try writer.writeByte(@bitCast(DW_EH_PE.omit)); // fde_count_enc + try writer.writeByte(@bitCast(DW_EH_PE.omit)); // table_enc const shdrs = elf_file.sections.items(.shdr); const eh_frame_shdr = shdrs[elf_file.section_indexes.eh_frame.?]; diff --git a/src/link/MachO.zig b/src/link/MachO.zig index fd1119851d41..3e7e3a4e8a08 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4149,9 +4149,9 @@ pub const SymtabCtx = struct { pub const null_sym = macho.nlist_64{ .n_strx = 0, - .n_type = 0, + .n_type = @bitCast(@as(u8, 0)), .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; diff --git a/src/link/MachO/InternalObject.zig b/src/link/MachO/InternalObject.zig index 00721c470b76..f0c6c1fee730 100644 --- a/src/link/MachO/InternalObject.zig +++ b/src/link/MachO/InternalObject.zig @@ -69,9 +69,9 @@ pub fn initSymbols(self: *InternalObject, macho_file: *MachO) !void { const nlist = obj.symtab.addOneAssumeCapacity(); nlist.* = .{ .n_strx = name.pos, - .n_type = args.type, + .n_type = @bitCast(args.type), .n_sect = 0, - .n_desc = args.desc, + .n_desc = @bitCast(args.desc), .n_value = 0, }; symbol.nlist_idx = nlist_idx; @@ -143,7 +143,7 @@ pub fn resolveSymbols(self: *InternalObject, macho_file: *MachO) !void { } global.* = gop.index; - if (nlist.undf()) continue; + if (nlist.n_type.bits.type == .undf) continue; if (gop.ref.getFile(macho_file) == null) { gop.ref.* = .{ .index = @intCast(i), .file = self.index }; continue; @@ -171,7 +171,7 @@ pub fn resolveBoundarySymbols(self: *InternalObject, macho_file: *MachO) !void { const object = macho_file.getFile(index).?.object; for (object.symbols.items, 0..) |sym, i| { const nlist = object.symtab.items(.nlist)[i]; - if (!nlist.undf() or !nlist.ext()) continue; + if (nlist.n_type.bits.type != .undf or !nlist.n_type.bits.ext) continue; const ref = object.getSymbolRef(@intCast(i), macho_file); if (ref.getFile(macho_file) != null) continue; const name = sym.getName(macho_file); @@ -206,9 +206,9 @@ pub fn resolveBoundarySymbols(self: *InternalObject, macho_file: *MachO) !void { const nlist = self.symtab.addOneAssumeCapacity(); nlist.* = .{ .n_strx = name_str.pos, - .n_type = macho.N_SECT, + .n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }, .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; sym.nlist_idx = nlist_idx; @@ -226,7 +226,7 @@ pub fn markLive(self: *InternalObject, macho_file: *MachO) void { for (0..self.symbols.items.len) |i| { const nlist = self.symtab.items[i]; - if (!nlist.ext()) continue; + if (!nlist.n_type.bits.ext) continue; const ref = self.getSymbolRef(@intCast(i), macho_file); const file = ref.getFile(macho_file) orelse continue; @@ -273,9 +273,9 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil const nlist = try self.symtab.addOne(gpa); nlist.* = .{ .n_strx = name_str.pos, - .n_type = macho.N_SECT, + .n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }, .n_sect = @intCast(n_sect + 1), - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; sym.nlist_idx = nlist_idx; @@ -326,9 +326,9 @@ fn addObjcSelrefsSection(self: *InternalObject, methname_sym_index: Symbol.Index const nlist = try self.symtab.addOne(gpa); nlist.* = .{ .n_strx = 0, - .n_type = macho.N_SECT, + .n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }, .n_sect = @intCast(n_sect + 1), - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; sym.nlist_idx = nlist_idx; @@ -352,8 +352,8 @@ pub fn resolveObjcMsgSendSymbols(self: *InternalObject, macho_file: *MachO) !voi for (object.symbols.items, 0..) |sym, i| { const nlist = object.symtab.items(.nlist)[i]; - if (!nlist.ext()) continue; - if (!nlist.undf()) continue; + if (!nlist.n_type.bits.ext) continue; + if (nlist.n_type.bits.type != .undf) continue; const ref = object.getSymbolRef(@intCast(i), macho_file); if (ref.getFile(macho_file) != null) continue; @@ -381,9 +381,9 @@ pub fn resolveObjcMsgSendSymbols(self: *InternalObject, macho_file: *MachO) !voi const nlist = try self.symtab.addOne(gpa); nlist.* = .{ .n_strx = name_str.pos, - .n_type = macho.N_SECT | macho.N_EXT | macho.N_PEXT, + .n_type = .{ .bits = .{ .ext = true, .type = .sect, .pext = true, .is_stab = 0 } }, .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; sym.nlist_idx = nlist_idx; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index e2d9564799be..90d96b85d956 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -179,7 +179,7 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { idx: usize, fn rank(ctx: *const Object, nl: macho.nlist_64) u8 { - if (!nl.ext()) { + if (!nl.n_type.bits.ext) { const name = ctx.getNStrx(nl.n_strx); if (name.len == 0) return 5; if (name[0] == 'l' or name[0] == 'L') return 4; @@ -202,7 +202,7 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { var nlists = try std.array_list.Managed(NlistIdx).initCapacity(gpa, self.symtab.items(.nlist).len); defer nlists.deinit(); for (self.symtab.items(.nlist), 0..) |nlist, i| { - if (nlist.n_type.bits.is_stab != 0 or nlist.n_type.type != .sect) continue; + if (nlist.n_type.bits.is_stab != 0 or nlist.n_type.bits.type != .sect) continue; nlists.appendAssumeCapacity(.{ .nlist = nlist, .idx = i }); } mem.sort(NlistIdx, nlists.items, self, NlistIdx.lessThan); @@ -488,9 +488,9 @@ fn initCstringLiterals(self: *Object, allocator: Allocator, file: File.Handle, m self.symtab.set(nlist_index, .{ .nlist = .{ .n_strx = name_str.pos, - .n_type = macho.N_SECT, + .n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }, .n_sect = @intCast(atom.n_sect + 1), - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = atom.getInputAddress(macho_file), }, .size = atom.size, @@ -555,9 +555,9 @@ fn initFixedSizeLiterals(self: *Object, allocator: Allocator, macho_file: *MachO self.symtab.set(nlist_index, .{ .nlist = .{ .n_strx = name_str.pos, - .n_type = macho.N_SECT, + .n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }, .n_sect = @intCast(atom.n_sect + 1), - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = atom.getInputAddress(macho_file), }, .size = atom.size, @@ -613,9 +613,9 @@ fn initPointerLiterals(self: *Object, allocator: Allocator, macho_file: *MachO) self.symtab.set(nlist_index, .{ .nlist = .{ .n_strx = name_str.pos, - .n_type = macho.N_SECT, + .n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }, .n_sect = @intCast(atom.n_sect + 1), - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = atom.getInputAddress(macho_file), }, .size = atom.size, @@ -805,7 +805,7 @@ fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); for (self.symtab.items(.nlist), self.symtab.items(.atom)) |nlist, *atom| { - if (!nlist.n_type.bits.is_stab != 0 and nlist.n_type.type == .sect) { + if (nlist.n_type.bits.is_stab == 0 and nlist.n_type.bits.type == .sect) { const sect = self.sections.items(.header)[nlist.n_sect - 1]; const subs = self.sections.items(.subsections)[nlist.n_sect - 1].items; if (nlist.n_value == sect.addr) { @@ -852,30 +852,30 @@ fn initSymbols(self: *Object, allocator: Allocator, macho_file: *MachO) !void { symbol.extra = self.addSymbolExtraAssumeCapacity(.{}); if (self.getAtom(atom_index)) |atom| { - assert(nlist.n_type.type != .abs); + assert(nlist.n_type.bits.type != .abs); symbol.value -= atom.getInputAddress(macho_file); symbol.atom_ref = .{ .index = atom_index, .file = self.index }; } symbol.flags.weak = nlist.n_desc.weak_def_or_ref_to_weak; - symbol.flags.abs = nlist.n_type.type == .abs; + symbol.flags.abs = nlist.n_type.bits.type == .abs; symbol.flags.tentative = nlist.tentative(); symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.n_desc.discarded_or_no_dead_strip; - symbol.flags.dyn_ref = nlist.n_desc & macho.REFERENCED_DYNAMICALLY != 0; + symbol.flags.dyn_ref = nlist.n_desc.referenced_dynamically; symbol.flags.interposable = false; // TODO - // symbol.flags.interposable = nlist.ext() and (nlist.n_type.type == .sect or nlist.n_type.type == .abs) and macho_file.base.isDynLib() and macho_file.options.namespace == .flat and !nlist.pext(); + // symbol.flags.interposable = nlist.ext() and (nlist.n_type.bits.type == .sect or nlist.n_type.bits.type == .abs) and macho_file.base.isDynLib() and macho_file.options.namespace == .flat and !nlist.pext(); - if (nlist.n_type.type == .sect and + if (nlist.n_type.bits.type == .sect and self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) { symbol.flags.tlv = true; } - if (nlist.ext()) { - if (nlist.n_type.type == .undf) { + if (nlist.n_type.bits.ext) { + if (nlist.n_type.bits.type == .undf) { symbol.flags.weak_ref = nlist.n_desc.weak_ref; - } else if (nlist.pext() or (nlist.n_desc.weak_def_or_ref_to_weak and nlist.n_desc.weak_ref) or self.hidden) { + } else if (nlist.n_type.bits.pext or (nlist.n_desc.weak_def_or_ref_to_weak and nlist.n_desc.weak_ref) or self.hidden) { symbol.visibility = .hidden; } else { symbol.visibility = .global; @@ -919,7 +919,7 @@ fn initSymbolStabs(self: *Object, allocator: Allocator, nlists: anytype, macho_f var addr_lookup = std.StringHashMap(u64).init(allocator); defer addr_lookup.deinit(); for (syms) |sym| { - if (sym.n_type.type == .sect and (sym.ext() or sym.pext())) { + if (sym.n_type.bits.type == .sect and (sym.n_type.bits.ext or sym.n_type.bits.pext)) { try addr_lookup.putNoClobber(self.getNStrx(sym.n_strx), sym.n_value); } } @@ -927,41 +927,43 @@ fn initSymbolStabs(self: *Object, allocator: Allocator, nlists: anytype, macho_f var i: u32 = start; while (i < end) : (i += 1) { const open = syms[i]; - if (open.n_type != macho.N_SO) { + if (open.n_type.stab != .so) { try macho_file.reportParseError2(self.index, "unexpected symbol stab type 0x{x} as the first entry", .{ - open.n_type, + @intFromEnum(open.n_type.stab), }); return error.MalformedObject; } - while (i < end and syms[i].n_type == macho.N_SO and syms[i].n_sect != 0) : (i += 1) {} + while (i < end and syms[i].n_type.stab == .so and syms[i].n_sect != 0) : (i += 1) {} var sf: StabFile = .{ .comp_dir = i }; // TODO validate i += 3; - while (i < end and syms[i].n_type != macho.N_SO) : (i += 1) { + while (i < end and syms[i].n_type.stab != .so) : (i += 1) { const nlist = syms[i]; var stab: StabFile.Stab = .{}; - switch (nlist.n_type) { - macho.N_BNSYM => { + switch (nlist.n_type.stab) { + .bnsym => { stab.is_func = true; stab.index = sym_lookup.find(nlist.n_value); // TODO validate i += 3; }, - macho.N_GSYM => { + .gsym => { stab.is_func = false; stab.index = sym_lookup.find(addr_lookup.get(self.getNStrx(nlist.n_strx)).?); }, - macho.N_STSYM => { + .stsym => { stab.is_func = false; stab.index = sym_lookup.find(nlist.n_value); }, + _ => { + try macho_file.reportParseError2(self.index, "unhandled symbol stab type 0x{x}", .{@intFromEnum(nlist.n_type.stab)}); + return error.MalformedObject; + }, else => { - try macho_file.reportParseError2(self.index, "unhandled symbol stab type 0x{x}", .{ - nlist.n_type, - }); + try macho_file.reportParseError2(self.index, "unhandled symbol stab type '{t}'", .{nlist.n_type.stab}); return error.MalformedObject; }, } @@ -1132,7 +1134,7 @@ fn initUnwindRecords(self: *Object, allocator: Allocator, sect_id: u8, file: Fil fn find(fs: @This(), addr: u64) ?Symbol.Index { for (0..fs.ctx.symbols.items.len) |i| { const nlist = fs.ctx.symtab.items(.nlist)[i]; - if (nlist.ext() and nlist.n_value == addr) return @intCast(i); + if (nlist.n_type.bits.ext and nlist.n_value == addr) return @intCast(i); } return null; } @@ -1242,7 +1244,7 @@ fn parseUnwindRecords(self: *Object, allocator: Allocator, cpu_arch: std.Target. const slice = self.symtab.slice(); for (slice.items(.nlist), slice.items(.atom), slice.items(.size)) |nlist, atom, size| { if (nlist.n_type.bits.is_stab != 0) continue; - if (nlist.n_type.type != .sect) continue; + if (nlist.n_type.bits.type != .sect) continue; const sect = self.sections.items(.header)[nlist.n_sect - 1]; if (sect.isCode() and sect.size > 0) { try superposition.ensureUnusedCapacity(1); @@ -1458,8 +1460,8 @@ pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void { const gpa = macho_file.base.comp.gpa; for (self.symtab.items(.nlist), self.symtab.items(.atom), self.globals.items, 0..) |nlist, atom_index, *global, i| { - if (!nlist.ext()) continue; - if (nlist.n_type.type == .sect) { + if (!nlist.n_type.bits.ext) continue; + if (nlist.n_type.bits.type == .sect) { const atom = self.getAtom(atom_index).?; if (!atom.isAlive()) continue; } @@ -1473,7 +1475,7 @@ pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void { } global.* = gop.index; - if (nlist.n_type.type == .undf and !nlist.tentative()) continue; + if (nlist.n_type.bits.type == .undf and !nlist.tentative()) continue; if (gop.ref.getFile(macho_file) == null) { gop.ref.* = .{ .index = @intCast(i), .file = self.index }; continue; @@ -1495,12 +1497,12 @@ pub fn markLive(self: *Object, macho_file: *MachO) void { for (0..self.symbols.items.len) |i| { const nlist = self.symtab.items(.nlist)[i]; - if (!nlist.ext()) continue; + if (!nlist.n_type.bits.ext) continue; const ref = self.getSymbolRef(@intCast(i), macho_file); const file = ref.getFile(macho_file) orelse continue; const sym = ref.getSymbol(macho_file).?; - const should_keep = nlist.n_type.type == .undf or (nlist.tentative() and !sym.flags.tentative); + const should_keep = nlist.n_type.bits.type == .undf or (nlist.tentative() and !sym.flags.tentative); if (should_keep and file == .object and !file.object.alive) { file.object.alive = true; file.object.markLive(macho_file); @@ -1565,7 +1567,7 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { const name = try std.fmt.allocPrintSentinel(gpa, "__DATA$__common${s}", .{sym.getName(macho_file)}, 0); defer gpa.free(name); - const alignment = (nlist.n_desc >> 8) & 0x0f; + const alignment = (@as(u16, @bitCast(nlist.n_desc)) >> 8) & 0x0f; const n_sect = try self.addSection(gpa, "__DATA", "__common"); const atom_index = try self.addAtom(gpa, .{ .name = try self.addString(gpa, name), @@ -1589,9 +1591,9 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { sym.visibility = .global; nlist.n_value = 0; - nlist.n_type = macho.N_EXT | macho.N_SECT; + nlist.n_type = .{ .bits = .{ .ext = true, .type = .sect, .pext = false, .is_stab = 0 } }; nlist.n_sect = 0; - nlist.n_desc = 0; + nlist.n_desc = @bitCast(@as(u16, 0)); nlist_atom.* = atom_index; } } @@ -1685,7 +1687,7 @@ pub fn parseAr(self: *Object, macho_file: *MachO) !void { pub fn updateArSymtab(self: Object, ar_symtab: *Archive.ArSymtab, macho_file: *MachO) error{OutOfMemory}!void { const gpa = macho_file.base.comp.gpa; for (self.symtab.items(.nlist)) |nlist| { - if (!nlist.ext() or (nlist.n_type.type == .undf and !nlist.tentative())) continue; + if (!nlist.n_type.bits.ext or (nlist.n_type.bits.type == .undf and !nlist.tentative())) continue; const off = try ar_symtab.strtab.insert(gpa, self.getNStrx(nlist.n_strx)); try ar_symtab.entries.append(gpa, .{ .off = off, .file = self.index }); } @@ -2028,30 +2030,30 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v ) void { context.symtab.items[index] = .{ .n_strx = 0, - .n_type = macho.N_BNSYM, + .n_type = .{ .stab = .bnsym }, .n_sect = n_sect, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = n_value, }; context.symtab.items[index + 1] = .{ .n_strx = n_strx, - .n_type = macho.N_FUN, + .n_type = .{ .stab = .fun }, .n_sect = n_sect, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = n_value, }; context.symtab.items[index + 2] = .{ .n_strx = 0, - .n_type = macho.N_FUN, + .n_type = .{ .stab = .fun }, .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = size, }; context.symtab.items[index + 3] = .{ .n_strx = 0, - .n_type = macho.N_ENSYM, + .n_type = .{ .stab = .ensym }, .n_sect = n_sect, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = size, }; } @@ -2068,9 +2070,9 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v // N_SO comp_dir ctx.symtab.items[index] = .{ .n_strx = n_strx, - .n_type = macho.N_SO, + .n_type = .{ .stab = .so }, .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; index += 1; @@ -2081,9 +2083,9 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v // N_SO tu_name macho_file.symtab.items[index] = .{ .n_strx = n_strx, - .n_type = macho.N_SO, + .n_type = .{ .stab = .so }, .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; index += 1; @@ -2094,9 +2096,9 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v // N_OSO path ctx.symtab.items[index] = .{ .n_strx = n_strx, - .n_type = macho.N_OSO, + .n_type = .{ .stab = .oso }, .n_sect = 0, - .n_desc = 1, + .n_desc = @bitCast(@as(u16, 1)), .n_value = self.mtime, }; index += 1; @@ -2159,18 +2161,18 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v } else if (sym.visibility == .global) { ctx.symtab.items[index] = .{ .n_strx = sym_n_strx, - .n_type = macho.N_GSYM, + .n_type = .{ .stab = .gsym }, .n_sect = sym_n_sect, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; index += 1; } else { ctx.symtab.items[index] = .{ .n_strx = sym_n_strx, - .n_type = macho.N_STSYM, + .n_type = .{ .stab = .stsym }, .n_sect = sym_n_sect, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = sym_n_value, }; index += 1; @@ -2181,9 +2183,9 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v // N_SO ctx.symtab.items[index] = .{ .n_strx = 0, - .n_type = macho.N_SO, + .n_type = .{ .stab = .so }, .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; } else { @@ -2198,9 +2200,9 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v // N_SO comp_dir ctx.symtab.items[index] = .{ .n_strx = n_strx, - .n_type = macho.N_SO, + .n_type = .{ .stab = .so }, .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; index += 1; @@ -2211,9 +2213,9 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v // N_SO tu_name ctx.symtab.items[index] = .{ .n_strx = n_strx, - .n_type = macho.N_SO, + .n_type = .{ .stab = .so }, .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; index += 1; @@ -2224,9 +2226,9 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v // N_OSO path ctx.symtab.items[index] = .{ .n_strx = n_strx, - .n_type = macho.N_OSO, + .n_type = .{ .stab = .so }, .n_sect = 0, - .n_desc = 1, + .n_desc = @bitCast(@as(u16, 1)), .n_value = sf.getOsoModTime(self), }; index += 1; @@ -2254,18 +2256,18 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v } else if (sym.visibility == .global) { ctx.symtab.items[index] = .{ .n_strx = sym_n_strx, - .n_type = macho.N_GSYM, + .n_type = .{ .stab = .gsym }, .n_sect = sym_n_sect, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; index += 1; } else { ctx.symtab.items[index] = .{ .n_strx = sym_n_strx, - .n_type = macho.N_STSYM, + .n_type = .{ .stab = .stsym }, .n_sect = sym_n_sect, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = sym_n_value, }; index += 1; @@ -2276,9 +2278,9 @@ pub fn writeStabs(self: Object, stroff: u32, macho_file: *MachO, ctx: anytype) v // N_SO ctx.symtab.items[index] = .{ .n_strx = 0, - .n_type = macho.N_SO, + .n_type = .{ .stab = .so }, .n_sect = 0, - .n_desc = 0, + .n_desc = @bitCast(@as(u16, 0)), .n_value = 0, }; index += 1; diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 582768ad10c6..abf9dd81bd10 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -36,7 +36,7 @@ pub fn isLocal(symbol: Symbol) bool { pub fn isSymbolStab(symbol: Symbol, macho_file: *MachO) bool { const file = symbol.getFile(macho_file) orelse return false; return switch (file) { - .object => symbol.getNlist(macho_file).stab(), + .object => symbol.getNlist(macho_file).n_type.bits.is_stab != 0, else => false, }; } @@ -233,35 +233,49 @@ pub inline fn setExtra(symbol: Symbol, extra: Extra, macho_file: *MachO) void { pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) void { if (symbol.isLocal()) { - out.n_type = if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; + out.n_type = .{ .bits = .{ + .ext = false, + .type = if (symbol.flags.abs) .abs else .sect, + .pext = false, + .is_stab = 0, + } }; out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.getOutputSectionIndex(macho_file) + 1); - out.n_desc = 0; + out.n_desc = @bitCast(@as(u16, 0)); out.n_value = symbol.getAddress(.{ .stubs = false }, macho_file); switch (symbol.visibility) { - .hidden => out.n_type |= macho.N_PEXT, + .hidden => out.n_type.bits.pext = true, else => {}, } } else if (symbol.flags.@"export") { assert(symbol.visibility == .global); - out.n_type = macho.N_EXT; - out.n_type |= if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; + out.n_type = .{ .bits = .{ + .ext = true, + .type = if (symbol.flags.abs) .abs else .sect, + .pext = false, + .is_stab = 0, + } }; out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.getOutputSectionIndex(macho_file) + 1); out.n_value = symbol.getAddress(.{ .stubs = false }, macho_file); - out.n_desc = 0; + out.n_desc = @bitCast(@as(u16, 0)); if (symbol.flags.weak) { - out.n_desc |= macho.N_WEAK_DEF; + out.n_desc.weak_def_or_ref_to_weak = true; } if (symbol.flags.dyn_ref) { - out.n_desc |= macho.REFERENCED_DYNAMICALLY; + out.n_desc.referenced_dynamically = true; } } else { assert(symbol.visibility == .global); - out.n_type = macho.N_EXT; + out.n_type = .{ .bits = .{ + .ext = true, + .type = .undf, + .pext = false, + .is_stab = 0, + } }; out.n_sect = 0; out.n_value = 0; - out.n_desc = 0; + out.n_desc = @bitCast(@as(u16, 0)); // TODO: // const ord: u16 = if (macho_file.options.namespace == .flat) @@ -274,14 +288,14 @@ pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) vo ord else macho.BIND_SPECIAL_DYLIB_SELF; - out.n_desc = macho.N_SYMBOL_RESOLVER * ord; + out.n_desc = @bitCast(macho.N_SYMBOL_RESOLVER * ord); if (symbol.flags.weak) { - out.n_desc |= macho.N_WEAK_DEF; + out.n_desc.weak_def_or_ref_to_weak = true; } if (symbol.weakRef(macho_file)) { - out.n_desc |= macho.N_WEAK_REF; + out.n_desc.weak_ref = true; } } } diff --git a/src/link/MachO/Thunk.zig b/src/link/MachO/Thunk.zig index 4588a7f0e0dc..cdb9eb649af4 100644 --- a/src/link/MachO/Thunk.zig +++ b/src/link/MachO/Thunk.zig @@ -56,10 +56,10 @@ pub fn writeSymtab(thunk: Thunk, macho_file: *MachO, ctx: anytype) void { n_strx += @intCast("__thunk".len); ctx.strtab.items[n_strx] = 0; n_strx += 1; - out_sym.n_type = macho.N_SECT; + out_sym.n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }; out_sym.n_sect = @intCast(thunk.out_n_sect + 1); out_sym.n_value = @intCast(thunk.getTargetAddress(ref, macho_file)); - out_sym.n_desc = 0; + out_sym.n_desc = @bitCast(@as(u16, 0)); } } diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 5a0a71f380c8..4a0280244844 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -123,9 +123,9 @@ fn newSymbol(self: *ZigObject, allocator: Allocator, name: MachO.String, args: s self.symtab.set(nlist_idx, .{ .nlist = .{ .n_strx = name.pos, - .n_type = args.type, + .n_type = @bitCast(args.type), .n_sect = 0, - .n_desc = args.desc, + .n_desc = @bitCast(args.desc), .n_value = 0, }, .size = 0, @@ -206,8 +206,8 @@ pub fn resolveSymbols(self: *ZigObject, macho_file: *MachO) !void { const gpa = macho_file.base.comp.gpa; for (self.symtab.items(.nlist), self.symtab.items(.atom), self.globals.items, 0..) |nlist, atom_index, *global, i| { - if (!nlist.ext()) continue; - if (nlist.sect()) { + if (!nlist.n_type.bits.ext) continue; + if (nlist.n_type.bits.type == .sect) { const atom = self.getAtom(atom_index).?; if (!atom.isAlive()) continue; } @@ -221,7 +221,7 @@ pub fn resolveSymbols(self: *ZigObject, macho_file: *MachO) !void { } global.* = gop.index; - if (nlist.undf() and !nlist.tentative()) continue; + if (nlist.n_type.bits.type == .undf and !nlist.tentative()) continue; if (gop.ref.getFile(macho_file) == null) { gop.ref.* = .{ .index = @intCast(i), .file = self.index }; continue; @@ -229,7 +229,7 @@ pub fn resolveSymbols(self: *ZigObject, macho_file: *MachO) !void { if (self.asFile().getSymbolRank(.{ .archive = false, - .weak = nlist.weakDef(), + .weak = nlist.n_desc.weak_def_or_ref_to_weak, .tentative = nlist.tentative(), }) < gop.ref.getSymbol(macho_file).?.getSymbolRank(macho_file)) { gop.ref.* = .{ .index = @intCast(i), .file = self.index }; @@ -243,12 +243,12 @@ pub fn markLive(self: *ZigObject, macho_file: *MachO) void { for (0..self.symbols.items.len) |i| { const nlist = self.symtab.items(.nlist)[i]; - if (!nlist.ext()) continue; + if (!nlist.n_type.bits.ext) continue; const ref = self.getSymbolRef(@intCast(i), macho_file); const file = ref.getFile(macho_file) orelse continue; const sym = ref.getSymbol(macho_file).?; - const should_keep = nlist.undf() or (nlist.tentative() and !sym.flags.tentative); + const should_keep = nlist.n_type.bits.type == .undf or (nlist.tentative() and !sym.flags.tentative); if (should_keep and file == .object and !file.object.alive) { file.object.alive = true; file.object.markLive(macho_file); @@ -331,8 +331,8 @@ pub fn claimUnresolved(self: *ZigObject, macho_file: *MachO) void { for (self.symbols.items, 0..) |*sym, i| { const nlist = self.symtab.items(.nlist)[i]; - if (!nlist.ext()) continue; - if (!nlist.undf()) continue; + if (!nlist.n_type.bits.ext) continue; + if (nlist.n_type.bits.type != .undf) continue; if (self.getSymbolRef(@intCast(i), macho_file).getFile(macho_file) != null) continue; @@ -974,7 +974,7 @@ fn updateNavCode( atom.setAlive(true); atom.name = sym.name; nlist.n_strx = sym.name.pos; - nlist.n_type = macho.N_SECT; + nlist.n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }; nlist.n_sect = sect_index + 1; self.symtab.items(.size)[sym.nlist_idx] = code.len; @@ -1115,7 +1115,7 @@ fn createTlvDescriptor( atom.name = sym.name; nlist.n_strx = sym.name.pos; nlist.n_sect = sect_index + 1; - nlist.n_type = macho.N_SECT; + nlist.n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }; nlist.n_value = 0; self.symtab.items(.size)[sym.nlist_idx] = size; @@ -1322,7 +1322,7 @@ pub fn updateExports( const global_sym = &self.symbols.items[global_nlist_index]; global_nlist.n_value = nlist.n_value; global_nlist.n_sect = nlist.n_sect; - global_nlist.n_type = macho.N_EXT | macho.N_SECT; + global_nlist.n_type = .{ .bits = .{ .ext = true, .type = .sect, .pext = false, .is_stab = 0 } }; self.symtab.items(.size)[global_nlist_index] = self.symtab.items(.size)[nlist_idx]; self.symtab.items(.atom)[global_nlist_index] = atom_index; global_sym.atom_ref = .{ .index = atom_index, .file = self.index }; @@ -1330,7 +1330,7 @@ pub fn updateExports( switch (exp.opts.linkage) { .internal => { // Symbol should be hidden, or in MachO lingo, private extern. - global_nlist.n_type |= macho.N_PEXT; + global_nlist.n_type.bits.pext = true; global_sym.visibility = .hidden; }, .strong => { @@ -1339,7 +1339,7 @@ pub fn updateExports( .weak => { // Weak linkage is specified as part of n_desc field. // Symbol's n_type is like for a symbol with strong linkage. - global_nlist.n_desc |= macho.N_WEAK_DEF; + global_nlist.n_desc.weak_def_or_ref_to_weak = true; global_sym.visibility = .global; global_sym.flags.weak = true; }, @@ -1394,7 +1394,7 @@ fn updateLazySymbol( const nlist = &self.symtab.items(.nlist)[sym.nlist_idx]; nlist.n_strx = name_str.pos; - nlist.n_type = macho.N_SECT; + nlist.n_type = .{ .bits = .{ .ext = false, .type = .sect, .pext = false, .is_stab = 0 } }; nlist.n_sect = output_section_index + 1; self.symtab.items(.size)[sym.nlist_idx] = code.len; diff --git a/src/link/MachO/eh_frame.zig b/src/link/MachO/eh_frame.zig index 86edf973961f..1d3afaa3ac87 100644 --- a/src/link/MachO/eh_frame.zig +++ b/src/link/MachO/eh_frame.zig @@ -26,24 +26,24 @@ pub const Cie = struct { for (aug[1..]) |ch| switch (ch) { 'R' => { - const enc = try reader.takeByte(); - if (enc != DW_EH_PE.pcrel | DW_EH_PE.absptr) { + const enc: DW.EH.PE = @bitCast(try reader.takeByte()); + if (enc != @as(DW.EH.PE, .{ .type = .absptr, .rel = .pcrel })) { @panic("unexpected pointer encoding"); // TODO error } }, 'P' => { - const enc = try reader.takeByte(); - if (enc != DW_EH_PE.pcrel | DW_EH_PE.indirect | DW_EH_PE.sdata4) { + const enc: DW.EH.PE = @bitCast(try reader.takeByte()); + if (enc != @as(DW.EH.PE, .{ .type = .sdata4, .rel = .pcrel, .indirect = true })) { @panic("unexpected personality pointer encoding"); // TODO error } _ = try reader.takeInt(u32, .little); // personality pointer }, 'L' => { - const enc = try reader.takeByte(); - switch (enc & DW_EH_PE.type_mask) { - DW_EH_PE.sdata4 => cie.lsda_size = .p32, - DW_EH_PE.absptr => cie.lsda_size = .p64, - else => unreachable, // TODO error + const enc: DW.EH.PE = @bitCast(try reader.takeByte()); + switch (enc.type) { + .sdata4 => cie.lsda_size = .p32, + .absptr => cie.lsda_size = .p64, + else => @panic("unexpected lsda encoding"), // TODO error } }, else => @panic("unexpected augmentation string"), // TODO error @@ -505,7 +505,7 @@ const Writer = std.Io.Writer; const Allocator = std.mem.Allocator; const Atom = @import("Atom.zig"); -const DW_EH_PE = std.dwarf.EH.PE; +const DW = std.dwarf; const File = @import("file.zig").File; const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); diff --git a/src/link/MachO/file.zig b/src/link/MachO/file.zig index 577e9374a647..ca3e1c0e8236 100644 --- a/src/link/MachO/file.zig +++ b/src/link/MachO/file.zig @@ -192,21 +192,21 @@ pub const File = union(enum) { assert(file == .object or file == .zig_object); for (file.getSymbols(), file.getNlists(), 0..) |*sym, nlist, i| { - if (!nlist.ext()) continue; - if (!nlist.undf()) continue; + if (!nlist.n_type.bits.ext) continue; + if (nlist.n_type.bits.type != .undf) continue; if (file.getSymbolRef(@intCast(i), macho_file).getFile(macho_file) != null) continue; const is_import = switch (macho_file.undefined_treatment) { .@"error" => false, - .warn, .suppress => nlist.weakRef(), + .warn, .suppress => nlist.n_desc.weak_ref, .dynamic_lookup => true, }; if (is_import) { sym.value = 0; sym.atom_ref = .{ .index = 0, .file = 0 }; sym.flags.weak = false; - sym.flags.weak_ref = nlist.weakRef(); + sym.flags.weak_ref = nlist.n_desc.weak_ref; sym.flags.import = is_import; sym.visibility = .global; @@ -223,13 +223,13 @@ pub const File = union(enum) { assert(file == .object or file == .zig_object); for (file.getSymbols(), file.getNlists(), 0..) |*sym, nlist, i| { - if (!nlist.ext()) continue; - if (!nlist.undf()) continue; + if (!nlist.n_type.bits.ext) continue; + if (nlist.n_type.bits.type != .undf) continue; if (file.getSymbolRef(@intCast(i), macho_file).getFile(macho_file) != null) continue; sym.value = 0; sym.atom_ref = .{ .index = 0, .file = 0 }; - sym.flags.weak_ref = nlist.weakRef(); + sym.flags.weak_ref = nlist.n_desc.weak_ref; sym.flags.import = true; sym.visibility = .global; @@ -247,7 +247,7 @@ pub const File = union(enum) { for (file.getSymbols(), file.getNlists(), 0..) |sym, nlist, i| { if (sym.visibility != .global) continue; if (sym.flags.weak) continue; - if (nlist.undf()) continue; + if (nlist.n_type.bits.type == .undf) continue; const ref = file.getSymbolRef(@intCast(i), macho_file); const ref_file = ref.getFile(macho_file) orelse continue; if (ref_file.getIndex() == file.getIndex()) continue; From 3a561da38d42ba331eb67bdb7d86d4a3e9b74533 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 14:31:45 +0100 Subject: [PATCH 32/85] std: doc comments and tweaks --- lib/std/debug.zig | 78 ++++++++++++++++++++++++-------------- lib/std/debug/SelfInfo.zig | 1 + 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index c1e2d19fc888..b8d0cbaab413 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -216,8 +216,12 @@ pub fn unlockStdErr() void { /// /// During the lock, any `std.Progress` information is cleared from the terminal. /// -/// Returns a `Writer` with empty buffer, meaning that it is -/// in fact unbuffered and does not need to be flushed. +/// The lock is recursive, so it is valid for the same thread to call `lockStderrWriter` multiple +/// times. The primary motivation is that this allows the panic handler to safely dump the stack +/// trace and panic message even if the mutex was held at the panic site. +/// +/// The returned `Writer` does not need to be manually flushed: flushing is performed automatically +/// when the matching `unlockStderrWriter` call occurs. pub fn lockStderrWriter(buffer: []u8) *Writer { return std.Progress.lockStderrWriter(buffer); } @@ -348,13 +352,12 @@ pub fn relocateContext(dest: *ThreadContext) void { } } -pub const have_getcontext = @TypeOf(posix.system.getcontext) != void; - /// Capture the current context. The register values in the context will reflect the /// state after the platform `getcontext` function returns. /// /// It is valid to call this if the platform doesn't have context capturing support, -/// in that case false will be returned. +/// in that case `false` will be returned. This function is `inline` so that the `false` +/// is comptime-known at the call site in that case. pub inline fn getContext(context: *ThreadContext) bool { if (native_os == .windows) { context.* = std.mem.zeroes(windows.CONTEXT); @@ -362,18 +365,19 @@ pub inline fn getContext(context: *ThreadContext) bool { return true; } - const result = have_getcontext and posix.system.getcontext(context) == 0; - if (native_os == .macos) { - assert(context.mcsize == @sizeOf(std.c.mcontext_t)); + if (@TypeOf(posix.system.getcontext) != void) { + if (posix.system.getcontext(context) != 0) return false; + if (native_os == .macos) { + assert(context.mcsize == @sizeOf(std.c.mcontext_t)); - // On aarch64-macos, the system getcontext doesn't write anything into the pc - // register slot, it only writes lr. This makes the context consistent with - // other aarch64 getcontext implementations which write the current lr - // (where getcontext will return to) into both the lr and pc slot of the context. - if (native_arch == .aarch64) context.mcontext.ss.pc = context.mcontext.ss.lr; + // On aarch64-macos, the system getcontext doesn't write anything into the pc + // register slot, it only writes lr. This makes the context consistent with + // other aarch64 getcontext implementations which write the current lr + // (where getcontext will return to) into both the lr and pc slot of the context. + if (native_arch == .aarch64) context.mcontext.ss.pc = context.mcontext.ss.lr; + } + return true; } - - return result; } /// Invokes detectable illegal behavior when `ok` is `false`. @@ -413,8 +417,8 @@ pub fn panic(comptime format: []const u8, args: anytype) noreturn { panicExtra(@returnAddress(), format, args); } -/// Equivalent to `@panic` but with a formatted message, and with an explicitly -/// provided return address. +/// Equivalent to `@panic` but with a formatted message and an explicitly provided return address +/// which will be the first address in the stack trace. pub fn panicExtra( ret_addr: ?usize, comptime format: []const u8, @@ -952,6 +956,7 @@ fn printLineInfo( } } fn printLineFromFile(writer: *Writer, source_location: SourceLocation) !void { + // Allow overriding the target-agnostic source line printing logic by exposing `root.debug.printLineFromFile`. if (@hasDecl(root, "debug") and @hasDecl(root.debug, "printLineFromFile")) { return root.debug.printLineFromFile(writer, source_location); } @@ -1139,17 +1144,17 @@ test printLineFromFile { } /// TODO multithreaded awareness -var debug_info_arena: ?std.heap.ArenaAllocator = null; -var debug_info_fba: std.heap.FixedBufferAllocator = .init(&debug_info_fba_buf); -var debug_info_fba_buf: [1024 * 1024 * 4]u8 = undefined; -fn getDebugInfoAllocator() mem.Allocator { - if (false) { - if (debug_info_arena == null) { - debug_info_arena = .init(std.heap.page_allocator); - } - return debug_info_arena.?.allocator(); +fn getDebugInfoAllocator() Allocator { + // Allow overriding the debug info allocator by exposing `root.debug.getDebugInfoAllocator`. + if (@hasDecl(root, "debug") and @hasDecl(root.debug, "getDebugInfoAllocator")) { + return root.debug.getDebugInfoAllocator(); } - return debug_info_fba.allocator(); + // Otherwise, use a global arena backed by the page allocator + const S = struct { + var arena: ?std.heap.ArenaAllocator = null; + }; + if (S.arena == null) S.arena = .init(std.heap.page_allocator); + return S.arena.?.allocator(); } /// Whether or not the current target can print useful debug information when a segfault occurs. @@ -1184,7 +1189,16 @@ pub fn updateSegfaultHandler(act: ?*const posix.Sigaction) void { posix.sigaction(posix.SIG.FPE, act, null); } -/// Attaches a global SIGSEGV handler which calls `@panic("segmentation fault");` +/// Attaches a global handler for several signals which, when triggered, prints output to stderr +/// similar to the default panic handler, with a message containing the type of signal and a stack +/// trace if possible. This implementation does not just call the panic handler, because unwinding +/// the stack (for a stack trace) when a signal is received requires special target-specific logic. +/// +/// The signals for which a handler is installed are: +/// * SIGSEGV (segmentation fault) +/// * SIGILL (illegal instruction) +/// * SIGBUS (bus error) +/// * SIGFPE (arithmetic exception) pub fn attachSegfaultHandler() void { if (!have_segfault_handling_support) { @compileError("segfault handler not supported for this target"); @@ -1305,6 +1319,14 @@ fn handleSegfaultWindows(info: *windows.EXCEPTION_POINTERS) callconv(.winapi) c_ } fn handleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?*ThreadContext) noreturn { + // Allow overriding the target-agnostic segfault handler by exposing `root.debug.handleSegfault`. + if (@hasDecl(root, "debug") and @hasDecl(root.debug, "handleSegfault")) { + return root.debug.handleSegfault(addr, name, opt_ctx); + } + return defaultHandleSegfault(addr, name, opt_ctx); +} + +pub fn defaultHandleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?*ThreadContext) noreturn { // There is very similar logic to the following in `defaultPanic`. switch (panic_stage) { 0 => { diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index f1832f49023b..df5a9e7ad81a 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -124,6 +124,7 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// ) SelfInfo.Error!usize; /// ``` const Module: type = Module: { + // Allow overriding the target-specific `SelfInfo` implementation by exposing `root.debug.Module`. if (@hasDecl(root, "debug") and @hasDecl(root.debug, "Module")) { break :Module root.debug.Module; } From d9661e9e05af7a4be31c17cbfbbd8bc44d7c9000 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 14:32:02 +0100 Subject: [PATCH 33/85] compiler: better crash handler Far simpler, because everything which `crash_report.zig` did is now handled pretty well by `std.debug` anyway. All we want is to print some context around panics and segfaults. Using the new ability to override the default segfault handler while still having std handle the target-specific bits for us, that's really simple. --- src/Sema.zig | 6 +- src/Zcu/PerThread.zig | 4 + src/crash_report.zig | 568 ++++++++---------------------------------- src/main.zig | 6 +- 4 files changed, 117 insertions(+), 467 deletions(-) diff --git a/src/Sema.zig b/src/Sema.zig index 73c5b13c21f3..81d7ed43cd81 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -1130,8 +1130,8 @@ fn analyzeBodyInner( const tags = sema.code.instructions.items(.tag); const datas = sema.code.instructions.items(.data); - var crash_info = crash_report.prepAnalyzeBody(sema, block, body); - crash_info.push(); + var crash_info: crash_report.AnalyzeBody = undefined; + crash_info.push(sema, block, body); defer crash_info.pop(); // We use a while (true) loop here to avoid a redundant way of breaking out of @@ -2632,7 +2632,7 @@ pub fn failWithOwnedErrorMsg(sema: *Sema, block: ?*Block, err_msg: *Zcu.ErrorMsg std.debug.print("compile error during Sema:\n", .{}); var error_bundle = wip_errors.toOwnedBundle("") catch @panic("out of memory"); error_bundle.renderToStdErr(.{ .ttyconf = .no_color }); - crash_report.compilerPanic("unexpected compile error occurred", null); + std.debug.panicExtra(@returnAddress(), "unexpected compile error occurred", .{}); } if (block) |start_block| { diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig index 32782e7e8952..62b8756c4948 100644 --- a/src/Zcu/PerThread.zig +++ b/src/Zcu/PerThread.zig @@ -28,6 +28,7 @@ const Value = @import("../Value.zig"); const Zcu = @import("../Zcu.zig"); const Compilation = @import("../Compilation.zig"); const codegen = @import("../codegen.zig"); +const crash_report = @import("../crash_report.zig"); const Zir = std.zig.Zir; const Zoir = std.zig.Zoir; const ZonGen = std.zig.ZonGen; @@ -4390,6 +4391,9 @@ pub fn addDependency(pt: Zcu.PerThread, unit: AnalUnit, dependee: InternPool.Dep pub fn runCodegen(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air, out: *@import("../link.zig").ZcuTask.LinkFunc.SharedMir) void { const zcu = pt.zcu; + crash_report.CodegenFunc.start(zcu, func_index); + defer crash_report.CodegenFunc.stop(func_index); + var timer = zcu.comp.startTimer(); const success: bool = if (runCodegenInner(pt, func_index, air)) |mir| success: { diff --git a/src/crash_report.zig b/src/crash_report.zig index e1692568bf55..c696c42cfc5b 100644 --- a/src/crash_report.zig +++ b/src/crash_report.zig @@ -1,34 +1,31 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const build_options = @import("build_options"); -const debug = std.debug; -const print_zir = @import("print_zir.zig"); -const windows = std.os.windows; -const posix = std.posix; -const native_os = builtin.os.tag; - -const Zcu = @import("Zcu.zig"); -const Sema = @import("Sema.zig"); -const InternPool = @import("InternPool.zig"); -const Zir = std.zig.Zir; -const Decl = Zcu.Decl; -const dev = @import("dev.zig"); - -/// To use these crash report diagnostics, publish this panic in your main file -/// and add `pub const enable_segfault_handler = false;` to your `std_options`. -/// You will also need to call initialize() on startup, preferably as the very first operation in your program. -pub const panic = if (build_options.enable_debug_extensions) - std.debug.FullPanic(compilerPanic) -else if (dev.env == .bootstrap) +/// We override the panic implementation to our own one, so we can print our own information before +/// calling the default panic handler. This declaration must be re-exposed from `@import("root")`. +pub const panic = if (dev.env == .bootstrap) std.debug.simple_panic else - std.debug.FullPanic(std.debug.defaultPanic); + std.debug.FullPanic(panicImpl); -/// Install signal handlers to identify crashes and report diagnostics. -pub fn initialize() void { - if (build_options.enable_debug_extensions and debug.have_segfault_handling_support) { - attachSegfaultHandler(); - } +/// We let std install its segfault handler, but we override the target-agnostic handler it calls, +/// so we can print our own information before calling the default segfault logic. This declaration +/// must be re-exposed from `@import("root")`. +pub const debug = struct { + pub const handleSegfault = handleSegfaultImpl; +}; + +/// Printed in panic messages when suggesting a command to run, allowing copy-pasting the command. +/// Set by `main` as soon as arguments are known. The value here is a default in case we somehow +/// crash earlier than that. +pub var zig_argv0: []const u8 = "zig"; + +fn handleSegfaultImpl(addr: ?usize, name: []const u8, opt_ctx: ?*std.debug.ThreadContext) noreturn { + @branchHint(.cold); + dumpCrashContext() catch {}; + std.debug.defaultHandleSegfault(addr, name, opt_ctx); +} +fn panicImpl(msg: []const u8, first_trace_addr: ?usize) noreturn { + @branchHint(.cold); + dumpCrashContext() catch {}; + std.debug.defaultPanic(msg, first_trace_addr orelse @returnAddress()); } pub const AnalyzeBody = if (build_options.enable_debug_extensions) struct { @@ -38,63 +35,96 @@ pub const AnalyzeBody = if (build_options.enable_debug_extensions) struct { body: []const Zir.Inst.Index, body_index: usize, - pub fn push(self: *@This()) void { - const head = &zir_state; - debug.assert(self.parent == null); - self.parent = head.*; - head.* = self; - } + threadlocal var current: ?*AnalyzeBody = null; - pub fn pop(self: *@This()) void { - const head = &zir_state; - const old = head.*.?; - debug.assert(old == self); - head.* = old.parent; + pub fn setBodyIndex(ab: *AnalyzeBody, index: usize) void { + ab.body_index = index; } - pub fn setBodyIndex(self: *@This(), index: usize) void { - self.body_index = index; + pub fn push(ab: *AnalyzeBody, sema: *Sema, block: *Sema.Block, body: []const Zir.Inst.Index) void { + ab.* = .{ + .parent = current, + .sema = sema, + .block = block, + .body = body, + .body_index = 0, + }; + current = ab; + } + pub fn pop(ab: *AnalyzeBody) void { + std.debug.assert(current.? == ab); // `Sema.analyzeBodyInner` did not match push/pop calls + current = ab.parent; } } else struct { - pub inline fn push(_: @This()) void {} - pub inline fn pop(_: @This()) void {} + // Dummy implementation, with functions marked `inline` to avoid interfering with tail calls. + pub inline fn push(_: AnalyzeBody, _: *Sema, _: *Sema.Block, _: []const Zir.Inst.Index) void {} + pub inline fn pop(_: AnalyzeBody) void {} pub inline fn setBodyIndex(_: @This(), _: usize) void {} }; -threadlocal var zir_state: ?*AnalyzeBody = if (build_options.enable_debug_extensions) null else @compileError("Cannot use zir_state without debug extensions."); +pub const CodegenFunc = if (build_options.enable_debug_extensions) struct { + zcu: *const Zcu, + func_index: InternPool.Index, + threadlocal var current: ?CodegenFunc = null; + pub fn start(zcu: *const Zcu, func_index: InternPool.Index) void { + std.debug.assert(current == null); + current = .{ .zcu = zcu, .func_index = func_index }; + } + pub fn stop(func_index: InternPool.Index) void { + std.debug.assert(current.?.func_index == func_index); + current = null; + } +} else struct { + // Dummy implementation + pub fn start(_: *const Zcu, _: InternPool.Index) void {} + pub fn stop(_: InternPool.Index) void {} +}; -pub fn prepAnalyzeBody(sema: *Sema, block: *Sema.Block, body: []const Zir.Inst.Index) AnalyzeBody { - return if (build_options.enable_debug_extensions) .{ - .parent = null, - .sema = sema, - .block = block, - .body = body, - .body_index = 0, - } else .{}; -} +fn dumpCrashContext() Io.Writer.Error!void { + const S = struct { + /// In the case of recursive panics or segfaults, don't print the context for a second time. + threadlocal var already_dumped = false; + /// TODO: make this unnecessary. It exists because `print_zir` currently needs an allocator, + /// but that shouldn't be necessary---it's already only used in one place. + threadlocal var crash_heap: [64 * 1024]u8 = undefined; + }; + if (S.already_dumped) return; + S.already_dumped = true; + + // TODO: this does mean that a different thread could grab the stderr mutex between the context + // and the actual panic printing, which would be quite confusing. + const stderr = std.debug.lockStderrWriter(&.{}); + defer std.debug.unlockStderrWriter(); -fn dumpStatusReport() !void { - const anal = zir_state orelse return; - // Note: We have the panic mutex here, so we can safely use the global crash heap. - var fba = std.heap.FixedBufferAllocator.init(&crash_heap); - const allocator = fba.allocator(); + try stderr.writeAll("Compiler crash context:\n"); - var stderr_fw = std.fs.File.stderr().writer(&.{}); - const stderr = &stderr_fw.interface; + if (CodegenFunc.current) |*cg| { + const func_nav = cg.zcu.funcInfo(cg.func_index).owner_nav; + const func_fqn = cg.zcu.intern_pool.getNav(func_nav).fqn; + try stderr.print("Generating function '{f}'\n\n", .{func_fqn.fmt(&cg.zcu.intern_pool)}); + } else if (AnalyzeBody.current) |anal| { + try dumpCrashContextSema(anal, stderr, &S.crash_heap); + } else { + try stderr.writeAll("(no context)\n\n"); + } +} +fn dumpCrashContextSema(anal: *AnalyzeBody, stderr: *Io.Writer, crash_heap: []u8) Io.Writer.Error!void { const block: *Sema.Block = anal.block; const zcu = anal.sema.pt.zcu; + const comp = zcu.comp; + + var fba: std.heap.FixedBufferAllocator = .init(crash_heap); const file, const src_base_node = Zcu.LazySrcLoc.resolveBaseNode(block.src_base_inst, zcu) orelse { const file = zcu.fileByIndex(block.src_base_inst.resolveFile(&zcu.intern_pool)); - try stderr.print("Analyzing lost instruction in file '{f}'. This should not happen!\n\n", .{file.path.fmt(zcu.comp)}); + try stderr.print("Analyzing lost instruction in file '{f}'. This should not happen!\n\n", .{file.path.fmt(comp)}); return; }; - try stderr.writeAll("Analyzing "); - try stderr.print("Analyzing '{f}'\n", .{file.path.fmt(zcu.comp)}); + try stderr.print("Analyzing '{f}'\n", .{file.path.fmt(comp)}); print_zir.renderInstructionContext( - allocator, + fba.allocator(), anal.body, anal.body_index, file, @@ -107,16 +137,16 @@ fn dumpStatusReport() !void { }; try stderr.print( \\ For full context, use the command - \\ zig ast-check -t {f} + \\ {s} ast-check -t {f} \\ \\ - , .{file.path.fmt(zcu.comp)}); + , .{ zig_argv0, file.path.fmt(comp) }); var parent = anal.parent; while (parent) |curr| { fba.reset(); const cur_block_file = zcu.fileByIndex(curr.block.src_base_inst.resolveFile(&zcu.intern_pool)); - try stderr.print(" in {f}\n", .{cur_block_file.path.fmt(zcu.comp)}); + try stderr.print(" in {f}\n", .{cur_block_file.path.fmt(comp)}); _, const cur_block_src_base_node = Zcu.LazySrcLoc.resolveBaseNode(curr.block.src_base_inst, zcu) orelse { try stderr.writeAll(" > [lost instruction; this should not happen]\n"); parent = curr.parent; @@ -124,7 +154,7 @@ fn dumpStatusReport() !void { }; try stderr.writeAll(" > "); print_zir.renderSingleInstruction( - allocator, + fba.allocator(), curr.body[curr.body_index], cur_block_file, cur_block_src_base_node, @@ -142,398 +172,14 @@ fn dumpStatusReport() !void { try stderr.writeByte('\n'); } -var crash_heap: [16 * 4096]u8 = undefined; - -pub fn compilerPanic(msg: []const u8, maybe_ret_addr: ?usize) noreturn { - @branchHint(.cold); - PanicSwitch.preDispatch(); - const ret_addr = maybe_ret_addr orelse @returnAddress(); - const stack_ctx: StackContext = .{ .current = .{ .ret_addr = ret_addr } }; - PanicSwitch.dispatch(@errorReturnTrace(), stack_ctx, msg); -} - -/// Attaches a global SIGSEGV handler -pub fn attachSegfaultHandler() void { - if (!debug.have_segfault_handling_support) { - @compileError("segfault handler not supported for this target"); - } - if (native_os == .windows) { - _ = windows.kernel32.AddVectoredExceptionHandler(0, handleSegfaultWindows); - return; - } - const act: posix.Sigaction = .{ - .handler = .{ .sigaction = handleSegfaultPosix }, - .mask = posix.sigemptyset(), - .flags = (posix.SA.SIGINFO | posix.SA.RESTART | posix.SA.RESETHAND), - }; - debug.updateSegfaultHandler(&act); -} - -fn handleSegfaultPosix(sig: i32, info: *const posix.siginfo_t, ctx_ptr: ?*anyopaque) callconv(.c) noreturn { - // TODO: use alarm() here to prevent infinite loops - PanicSwitch.preDispatch(); - - const addr = switch (native_os) { - .linux => @intFromPtr(info.fields.sigfault.addr), - .freebsd, .macos => @intFromPtr(info.addr), - .netbsd => @intFromPtr(info.info.reason.fault.addr), - .openbsd => @intFromPtr(info.data.fault.addr), - .solaris, .illumos => @intFromPtr(info.reason.fault.addr), - else => @compileError("TODO implement handleSegfaultPosix for new POSIX OS"), - }; - - var err_buffer: [128]u8 = undefined; - const error_msg = switch (sig) { - posix.SIG.SEGV => std.fmt.bufPrint(&err_buffer, "Segmentation fault at address 0x{x}", .{addr}) catch "Segmentation fault", - posix.SIG.ILL => std.fmt.bufPrint(&err_buffer, "Illegal instruction at address 0x{x}", .{addr}) catch "Illegal instruction", - posix.SIG.BUS => std.fmt.bufPrint(&err_buffer, "Bus error at address 0x{x}", .{addr}) catch "Bus error", - else => std.fmt.bufPrint(&err_buffer, "Unknown error (signal {}) at address 0x{x}", .{ sig, addr }) catch "Unknown error", - }; - - const stack_ctx: StackContext = switch (builtin.cpu.arch) { - .x86, - .x86_64, - .arm, - .aarch64, - => StackContext{ .exception = @ptrCast(@alignCast(ctx_ptr)) }, - else => .not_supported, - }; - - PanicSwitch.dispatch(null, stack_ctx, error_msg); -} - -const WindowsSegfaultMessage = union(enum) { - literal: []const u8, - segfault: void, - illegal_instruction: void, -}; - -fn handleSegfaultWindows(info: *windows.EXCEPTION_POINTERS) callconv(.winapi) c_long { - switch (info.ExceptionRecord.ExceptionCode) { - windows.EXCEPTION_DATATYPE_MISALIGNMENT => handleSegfaultWindowsExtra(info, .{ .literal = "Unaligned Memory Access" }), - windows.EXCEPTION_ACCESS_VIOLATION => handleSegfaultWindowsExtra(info, .segfault), - windows.EXCEPTION_ILLEGAL_INSTRUCTION => handleSegfaultWindowsExtra(info, .illegal_instruction), - windows.EXCEPTION_STACK_OVERFLOW => handleSegfaultWindowsExtra(info, .{ .literal = "Stack Overflow" }), - else => return windows.EXCEPTION_CONTINUE_SEARCH, - } -} - -fn handleSegfaultWindowsExtra(info: *windows.EXCEPTION_POINTERS, comptime msg: WindowsSegfaultMessage) noreturn { - PanicSwitch.preDispatch(); - - const stack_ctx = if (@hasDecl(windows, "CONTEXT")) - StackContext{ .exception = info.ContextRecord } - else ctx: { - const addr = @intFromPtr(info.ExceptionRecord.ExceptionAddress); - break :ctx StackContext{ .current = .{ .ret_addr = addr } }; - }; - - switch (msg) { - .literal => |err| PanicSwitch.dispatch(null, stack_ctx, err), - .segfault => { - const format_item = "Segmentation fault at address 0x{x}"; - var buf: [format_item.len + 32]u8 = undefined; // 32 is arbitrary, but sufficiently large - const to_print = std.fmt.bufPrint(&buf, format_item, .{info.ExceptionRecord.ExceptionInformation[1]}) catch unreachable; - PanicSwitch.dispatch(null, stack_ctx, to_print); - }, - .illegal_instruction => { - const ip: ?usize = switch (stack_ctx) { - .exception => |ex| ex.getRegs().ip, - .current => |cur| cur.ret_addr, - .not_supported => null, - }; - - if (ip) |addr| { - const format_item = "Illegal instruction at address 0x{x}"; - var buf: [format_item.len + 32]u8 = undefined; // 32 is arbitrary, but sufficiently large - const to_print = std.fmt.bufPrint(&buf, format_item, .{addr}) catch unreachable; - PanicSwitch.dispatch(null, stack_ctx, to_print); - } else { - PanicSwitch.dispatch(null, stack_ctx, "Illegal Instruction"); - } - }, - } -} - -const StackContext = union(enum) { - current: struct { - ret_addr: ?usize, - }, - exception: *debug.ThreadContext, - not_supported: void, - - pub fn dumpStackTrace(ctx: @This()) void { - switch (ctx) { - .current => |ct| { - debug.dumpCurrentStackTrace(ct.ret_addr); - }, - .exception => |context| { - var stderr_fw = std.fs.File.stderr().writer(&.{}); - const stderr = &stderr_fw.interface; - debug.dumpStackTraceFromBase(context, stderr); - }, - .not_supported => { - std.fs.File.stderr().writeAll("Stack trace not supported on this platform.\n") catch {}; - }, - } - } -}; - -const PanicSwitch = struct { - const RecoverStage = enum { - initialize, - report_stack, - release_mutex, - release_ref_count, - abort, - silent_abort, - }; - - const RecoverVerbosity = enum { - message_and_stack, - message_only, - silent, - }; - - const PanicState = struct { - recover_stage: RecoverStage = .initialize, - recover_verbosity: RecoverVerbosity = .message_and_stack, - panic_ctx: StackContext = undefined, - panic_trace: ?*const std.builtin.StackTrace = null, - awaiting_dispatch: bool = false, - }; - - /// Counter for the number of threads currently panicking. - /// Updated atomically before taking the panic_mutex. - /// In recoverable cases, the program will not abort - /// until all panicking threads have dumped their traces. - var panicking = std.atomic.Value(u8).init(0); - - /// Tracks the state of the current panic. If the code within the - /// panic triggers a secondary panic, this allows us to recover. - threadlocal var panic_state_raw: PanicState = .{}; - - /// The segfault handlers above need to do some work before they can dispatch - /// this switch. Calling preDispatch() first makes that work fault tolerant. - pub fn preDispatch() void { - // TODO: We want segfaults to trigger the panic recursively here, - // but if there is a segfault accessing this TLS slot it will cause an - // infinite loop. We should use `alarm()` to prevent the infinite - // loop and maybe also use a non-thread-local global to detect if - // it's happening and print a message. - var panic_state: *volatile PanicState = &panic_state_raw; - if (panic_state.awaiting_dispatch) { - dispatch(null, .{ .current = .{ .ret_addr = null } }, "Panic while preparing callstack"); - } - panic_state.awaiting_dispatch = true; - } - - /// This is the entry point to a panic-tolerant panic handler. - /// preDispatch() *MUST* be called exactly once before calling this. - /// A threadlocal "recover_stage" is updated throughout the process. - /// If a panic happens during the panic, the recover_stage will be - /// used to select a recover* function to call to resume the panic. - /// The recover_verbosity field is used to handle panics while reporting - /// panics within panics. If the panic handler triggers a panic, it will - /// attempt to log an additional stack trace for the secondary panic. If - /// that panics, it will fall back to just logging the panic message. If - /// it can't even do that witout panicing, it will recover without logging - /// anything about the internal panic. Depending on the state, "recover" - /// here may just mean "call abort". - pub fn dispatch( - trace: ?*const std.builtin.StackTrace, - stack_ctx: StackContext, - msg: []const u8, - ) noreturn { - var panic_state: *volatile PanicState = &panic_state_raw; - debug.assert(panic_state.awaiting_dispatch); - panic_state.awaiting_dispatch = false; - nosuspend switch (panic_state.recover_stage) { - .initialize => goTo(initPanic, .{ panic_state, trace, stack_ctx, msg }), - .report_stack => goTo(recoverReportStack, .{ panic_state, trace, stack_ctx, msg }), - .release_mutex => goTo(recoverReleaseMutex, .{ panic_state, trace, stack_ctx, msg }), - .release_ref_count => goTo(recoverReleaseRefCount, .{ panic_state, trace, stack_ctx, msg }), - .abort => goTo(recoverAbort, .{ panic_state, trace, stack_ctx, msg }), - .silent_abort => goTo(abort, .{}), - }; - } - - noinline fn initPanic( - state: *volatile PanicState, - trace: ?*const std.builtin.StackTrace, - stack: StackContext, - msg: []const u8, - ) noreturn { - // use a temporary so there's only one volatile store - const new_state = PanicState{ - .recover_stage = .abort, - .panic_ctx = stack, - .panic_trace = trace, - }; - state.* = new_state; - - _ = panicking.fetchAdd(1, .seq_cst); - - state.recover_stage = .release_ref_count; - - std.debug.lockStdErr(); - - state.recover_stage = .release_mutex; - - var stderr_fw = std.fs.File.stderr().writer(&.{}); - const stderr = &stderr_fw.interface; - if (builtin.single_threaded) { - stderr.print("panic: ", .{}) catch goTo(releaseMutex, .{state}); - } else { - const current_thread_id = std.Thread.getCurrentId(); - stderr.print("thread {} panic: ", .{current_thread_id}) catch goTo(releaseMutex, .{state}); - } - stderr.print("{s}\n", .{msg}) catch goTo(releaseMutex, .{state}); - - state.recover_stage = .report_stack; - - dumpStatusReport() catch |err| { - stderr.print("\nIntercepted error.{} while dumping current state. Continuing...\n", .{err}) catch {}; - }; - - goTo(reportStack, .{state}); - } - - noinline fn recoverReportStack( - state: *volatile PanicState, - trace: ?*const std.builtin.StackTrace, - stack: StackContext, - msg: []const u8, - ) noreturn { - recover(state, trace, stack, msg); - - state.recover_stage = .release_mutex; - var stderr_fw = std.fs.File.stderr().writer(&.{}); - const stderr = &stderr_fw.interface; - stderr.writeAll("\nOriginal Error:\n") catch {}; - goTo(reportStack, .{state}); - } - - noinline fn reportStack(state: *volatile PanicState) noreturn { - state.recover_stage = .release_mutex; - - if (state.panic_trace) |t| { - debug.dumpStackTrace(t.*); - } - state.panic_ctx.dumpStackTrace(); - - goTo(releaseMutex, .{state}); - } - - noinline fn recoverReleaseMutex( - state: *volatile PanicState, - trace: ?*const std.builtin.StackTrace, - stack: StackContext, - msg: []const u8, - ) noreturn { - recover(state, trace, stack, msg); - goTo(releaseMutex, .{state}); - } - - noinline fn releaseMutex(state: *volatile PanicState) noreturn { - state.recover_stage = .abort; - - std.debug.unlockStdErr(); - - goTo(releaseRefCount, .{state}); - } - - noinline fn recoverReleaseRefCount( - state: *volatile PanicState, - trace: ?*const std.builtin.StackTrace, - stack: StackContext, - msg: []const u8, - ) noreturn { - recover(state, trace, stack, msg); - goTo(releaseRefCount, .{state}); - } - - noinline fn releaseRefCount(state: *volatile PanicState) noreturn { - state.recover_stage = .abort; - - if (panicking.fetchSub(1, .seq_cst) != 1) { - // Another thread is panicking, wait for the last one to finish - // and call abort() - - // Sleep forever without hammering the CPU - var futex = std.atomic.Value(u32).init(0); - while (true) std.Thread.Futex.wait(&futex, 0); - - // This should be unreachable, recurse into recoverAbort. - @panic("event.wait() returned"); - } - - goTo(abort, .{}); - } - - noinline fn recoverAbort( - state: *volatile PanicState, - trace: ?*const std.builtin.StackTrace, - stack: StackContext, - msg: []const u8, - ) noreturn { - recover(state, trace, stack, msg); - - state.recover_stage = .silent_abort; - var stderr_fw = std.fs.File.stderr().writer(&.{}); - const stderr = &stderr_fw.interface; - stderr.writeAll("Aborting...\n") catch {}; - goTo(abort, .{}); - } - - noinline fn abort() noreturn { - std.process.abort(); - } - - inline fn goTo(comptime func: anytype, args: anytype) noreturn { - // TODO: Tailcall is broken right now, but eventually this should be used - // to avoid blowing up the stack. It's ok for now though, there are no - // cycles in the state machine so the max stack usage is bounded. - //@call(.always_tail, func, args); - @call(.auto, func, args); - } - - fn recover( - state: *volatile PanicState, - trace: ?*const std.builtin.StackTrace, - stack: StackContext, - msg: []const u8, - ) void { - switch (state.recover_verbosity) { - .message_and_stack => { - // lower the verbosity, and restore it at the end if we don't panic. - state.recover_verbosity = .message_only; - - var stderr_fw = std.fs.File.stderr().writer(&.{}); - const stderr = &stderr_fw.interface; - stderr.writeAll("\nPanicked during a panic: ") catch {}; - stderr.writeAll(msg) catch {}; - stderr.writeAll("\nInner panic stack:\n") catch {}; - if (trace) |t| { - debug.dumpStackTrace(t.*); - } - stack.dumpStackTrace(); - - state.recover_verbosity = .message_and_stack; - }, - .message_only => { - state.recover_verbosity = .silent; +const std = @import("std"); +const Io = std.Io; +const Zir = std.zig.Zir; - var stderr_fw = std.fs.File.stderr().writer(&.{}); - const stderr = &stderr_fw.interface; - stderr.writeAll("\nPanicked while dumping inner panic stack: ") catch {}; - stderr.writeAll(msg) catch {}; - stderr.writeByte('\n') catch {}; +const Sema = @import("Sema.zig"); +const Zcu = @import("Zcu.zig"); +const InternPool = @import("InternPool.zig"); +const dev = @import("dev.zig"); +const print_zir = @import("print_zir.zig"); - // If we succeed, restore all the way to dumping the stack. - state.recover_verbosity = .message_and_stack; - }, - .silent => {}, - } - } -}; +const build_options = @import("build_options"); diff --git a/src/main.zig b/src/main.zig index 1111031e8771..89a552de0b1b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -43,7 +43,6 @@ const thread_stack_size = 60 << 20; pub const std_options: std.Options = .{ .wasiCwd = wasi_cwd, .logFn = log, - .enable_segfault_handler = false, .log_level = switch (builtin.mode) { .Debug => .debug, @@ -53,6 +52,7 @@ pub const std_options: std.Options = .{ }; pub const panic = crash_report.panic; +pub const debug = crash_report.debug; var wasi_preopens: fs.wasi.Preopens = undefined; pub fn wasi_cwd() std.os.wasi.fd_t { @@ -165,8 +165,6 @@ var debug_allocator: std.heap.DebugAllocator(.{ }) = .init; pub fn main() anyerror!void { - crash_report.initialize(); - const gpa, const is_debug = gpa: { if (build_options.debug_gpa) break :gpa .{ debug_allocator.allocator(), true }; if (native_os == .wasi) break :gpa .{ std.heap.wasm_allocator, false }; @@ -192,6 +190,8 @@ pub fn main() anyerror!void { const args = try process.argsAlloc(arena); + if (args.len > 0) crash_report.zig_argv0 = args[0]; + if (tracy.enable_allocation) { var gpa_tracy = tracy.tracyAllocator(gpa); return mainArgs(gpa_tracy.allocator(), arena, args); From f40fbdb3b300233a2fc880bad77c986259c38b0f Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 15:17:20 +0100 Subject: [PATCH 34/85] link.Elf: restore eh_frame_hdr search table building At least, when there's not a ZigObject. The old behavior was incorrect in the presence of a ZigObject, and this doesn't really mix nicely with incremental compilation anyway; but when the objects are all external, we may as well build the search table. --- src/link/Elf/eh_frame.zig | 71 +++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/src/link/Elf/eh_frame.zig b/src/link/Elf/eh_frame.zig index 326764070ab8..0b1c92d09899 100644 --- a/src/link/Elf/eh_frame.zig +++ b/src/link/Elf/eh_frame.zig @@ -234,7 +234,14 @@ pub fn calcEhFrameSize(elf_file: *Elf) !usize { return offset; } +fn haveEhFrameHdrSearchTable(elf_file: *Elf) bool { + // Seach table generation is not implemented for the ZigObject. Also, it would be wasteful to + // re-do this work on every single incremental update. + return elf_file.zigObjectPtr() == null; +} + pub fn calcEhFrameHdrSize(elf_file: *Elf) usize { + if (!haveEhFrameHdrSearchTable(elf_file)) return 8; var count: usize = 0; for (elf_file.objects.items) |index| { for (elf_file.file(index).?.object.fdes.items) |fde| { @@ -242,7 +249,7 @@ pub fn calcEhFrameHdrSize(elf_file: *Elf) usize { count += 1; } } - return eh_frame_hdr_header_size + count * 8; + return 12 + count * 8; } pub fn calcEhFrameRelocs(elf_file: *Elf) usize { @@ -455,15 +462,23 @@ pub fn writeEhFrameRelocs(elf_file: *Elf, relocs: *std.array_list.Managed(elf.El } pub fn writeEhFrameHdr(elf_file: *Elf, writer: anytype) !void { + const endian = elf_file.getTarget().cpu.arch.endian(); + const have_table = haveEhFrameHdrSearchTable(elf_file); + try writer.writeByte(1); // version try writer.writeByte(@bitCast(@as(DW_EH_PE, .{ .type = .sdata4, .rel = .pcrel }))); // eh_frame_ptr_enc - // Building the lookup table would be expensive work on every `flush` -- omit it. - try writer.writeByte(@bitCast(DW_EH_PE.omit)); // fde_count_enc - try writer.writeByte(@bitCast(DW_EH_PE.omit)); // table_enc + if (have_table) { + try writer.writeByte(@bitCast(@as(DW_EH_PE, .{ .type = .udata4, .rel = .abs }))); // fde_count_enc + try writer.writeByte(@bitCast(@as(DW_EH_PE, .{ .type = .sdata4, .rel = .datarel }))); // table_enc + } else { + try writer.writeByte(@bitCast(DW_EH_PE.omit)); // fde_count_enc + try writer.writeByte(@bitCast(DW_EH_PE.omit)); // table_enc + } const shdrs = elf_file.sections.items(.shdr); const eh_frame_shdr = shdrs[elf_file.section_indexes.eh_frame.?]; const eh_frame_hdr_shdr = shdrs[elf_file.section_indexes.eh_frame_hdr.?]; + // eh_frame_ptr try writer.writeInt( u32, @as(u32, @bitCast(@as( @@ -472,9 +487,51 @@ pub fn writeEhFrameHdr(elf_file: *Elf, writer: anytype) !void { ))), .little, ); -} -const eh_frame_hdr_header_size: usize = 12; + if (!have_table) return; + + const gpa = elf_file.base.comp.gpa; + + // This must be an `extern struct` because we will write the bytes directly to the file. + const Entry = extern struct { + first_pc_rel: i32, + fde_addr_rel: i32, + fn lessThan(_: void, lhs: @This(), rhs: @This()) bool { + return lhs.first_pc_rel < rhs.first_pc_rel; + } + }; + // The number of entries was already computed by `calcEhFrameHdrSize`. + const num_fdes: u32 = @intCast(@divExact(eh_frame_hdr_shdr.sh_size - 12, 8)); + try writer.writeInt(u32, num_fdes, endian); + + var entries: std.ArrayList(Entry) = try .initCapacity(gpa, num_fdes); + defer entries.deinit(gpa); + for (elf_file.objects.items) |file_index| { + const object = elf_file.file(file_index).?.object; + for (object.fdes.items) |fde| { + if (!fde.alive) continue; + const relocs = fde.relocs(object); + // Should `relocs.len == 0` be an error? Things are completely broken anyhow in that case... + const rel = relocs[0]; + const ref = object.resolveSymbol(rel.r_sym(), elf_file); + const sym = elf_file.symbol(ref).?; + const fde_addr_abs: i64 = @intCast(fde.address(elf_file)); + const fde_addr_rel: i64 = fde_addr_abs - @as(i64, @intCast(eh_frame_hdr_shdr.sh_addr)); + const first_pc_abs: i64 = @as(i64, @intCast(sym.address(.{}, elf_file))) + rel.r_addend; + const first_pc_rel: i64 = first_pc_abs - @as(i64, @intCast(eh_frame_hdr_shdr.sh_addr)); + entries.appendAssumeCapacity(.{ + .first_pc_rel = @truncate(first_pc_rel), + .fde_addr_rel = @truncate(fde_addr_rel), + }); + } + } + assert(entries.items.len == num_fdes); + std.mem.sort(Entry, entries.items, {}, Entry.lessThan); + if (endian != builtin.cpu.arch.endian()) { + std.mem.byteSwapAllElements(Entry, entries.items); + } + try writer.writeAll(@ptrCast(entries.items)); +} const x86_64 = struct { fn resolveReloc(rec: anytype, elf_file: *Elf, rel: elf.Elf64_Rela, source: i64, target: i64, data: []u8) !void { @@ -538,3 +595,5 @@ const DW_EH_PE = std.dwarf.EH.PE; const Elf = @import("../Elf.zig"); const Object = @import("Object.zig"); const Symbol = @import("Symbol.zig"); + +const builtin = @import("builtin"); From ac4d633ed691159ea61130182a1b51635a95e228 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 15:31:09 +0100 Subject: [PATCH 35/85] std: fix debug.Info and debug.Coverage --- lib/std/debug/Coverage.zig | 3 ++- lib/std/debug/Dwarf.zig | 4 ++-- lib/std/debug/Info.zig | 13 ++++++++----- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig index 58e600dc6370..f1621c0e1208 100644 --- a/lib/std/debug/Coverage.zig +++ b/lib/std/debug/Coverage.zig @@ -145,6 +145,7 @@ pub const ResolveAddressesDwarfError = Dwarf.ScanError; pub fn resolveAddressesDwarf( cov: *Coverage, gpa: Allocator, + endian: std.builtin.Endian, /// Asserts the addresses are in ascending order. sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. @@ -184,7 +185,7 @@ pub fn resolveAddressesDwarf( if (cu.src_loc_cache == null) { cov.mutex.unlock(); defer cov.mutex.lock(); - d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { + d.populateSrcLocCache(gpa, endian, cu) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => { out.* = SourceLocation.invalid; continue :next_pc; diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index e9ed9077af45..5bc751e1aad7 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -652,7 +652,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator, endian: Endian) ScanErr } } -pub fn populateRanges(d: *Dwarf, gpa: Allocator) ScanError!void { +pub fn populateRanges(d: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void { assert(d.ranges.items.len == 0); for (d.compile_unit_list.items, 0..) |*cu, cu_index| { @@ -665,7 +665,7 @@ pub fn populateRanges(d: *Dwarf, gpa: Allocator) ScanError!void { continue; } const ranges_value = cu.die.getAttr(AT.ranges) orelse continue; - var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue; + var iter = DebugRangeIterator.init(ranges_value, d, endian, cu) catch continue; while (try iter.next()) |range| { // Not sure why LLVM thinks it's OK to emit these... if (range.start == range.end) continue; diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index e38645e1f93f..bc8efc71b451 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -24,14 +24,15 @@ coverage: *Coverage, pub const LoadError = Dwarf.ElfModule.LoadError; pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - var elf_module = try Dwarf.ElfModule.load(gpa, path, null, null, §ions, null); - try elf_module.dwarf.populateRanges(gpa); + var elf_module = try Dwarf.ElfModule.load(gpa, path, null, null, null, null); + // This is correct because `Dwarf.ElfModule` currently only supports native-endian ELF files. + const endian = @import("builtin").target.cpu.arch.endian(); + try elf_module.dwarf.populateRanges(gpa, endian); var info: Info = .{ .address_map = .{}, .coverage = coverage, }; - try info.address_map.put(gpa, elf_module.base_address, elf_module); + try info.address_map.put(gpa, 0, elf_module); return info; } @@ -58,5 +59,7 @@ pub fn resolveAddresses( assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return info.coverage.resolveAddressesDwarf(gpa, sorted_pc_addrs, output, &elf_module.dwarf); + // This is correct because `Dwarf.ElfModule` currently only supports native-endian ELF files. + const endian = @import("builtin").target.cpu.arch.endian(); + return info.coverage.resolveAddressesDwarf(gpa, endian, sorted_pc_addrs, output, &elf_module.dwarf); } From 1392a7af171c00679b0754775a8f6f54e967eafd Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 20:08:28 +0100 Subject: [PATCH 36/85] std.debug: unwinding on Windows ...using `RtlVirtualUnwind` on x86_64 and aarch64, and `RtaCaptureStackBackTrace` on x86. --- lib/std/debug.zig | 29 ++++-- lib/std/debug/Dwarf.zig | 4 +- lib/std/debug/SelfInfo.zig | 28 ++++-- lib/std/debug/SelfInfo/DarwinModule.zig | 8 +- lib/std/debug/SelfInfo/ElfModule.zig | 4 +- lib/std/debug/SelfInfo/WindowsModule.zig | 117 ++++++++++++++++++++++- 6 files changed, 162 insertions(+), 28 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index b8d0cbaab413..eaa8ae3b8b70 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -378,6 +378,8 @@ pub inline fn getContext(context: *ThreadContext) bool { } return true; } + + return false; } /// Invokes detectable illegal behavior when `ok` is `false`. @@ -619,7 +621,9 @@ pub const StackUnwindOptions = struct { /// See `writeCurrentStackTrace` to immediately print the trace instead of capturing it. pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) std.builtin.StackTrace { var context_buf: ThreadContext = undefined; - var it: StackIterator = .init(options.context, &context_buf); + var it = StackIterator.init(options.context, &context_buf) catch { + return .{ .index = 0, .instruction_addresses = &.{} }; + }; defer it.deinit(); if (!it.stratOk(options.allow_unsafe_unwind)) { return .{ .index = 0, .instruction_addresses = &.{} }; @@ -657,7 +661,14 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ }, }; var context_buf: ThreadContext = undefined; - var it: StackIterator = .init(options.context, &context_buf); + var it = StackIterator.init(options.context, &context_buf) catch |err| switch (err) { + error.OutOfMemory => { + tty_config.setColor(writer, .dim) catch {}; + try writer.print("Cannot print stack trace: out of memory\n", .{}); + tty_config.setColor(writer, .reset) catch {}; + return; + }, + }; defer it.deinit(); if (!it.stratOk(options.allow_unsafe_unwind)) { tty_config.setColor(writer, .dim) catch {}; @@ -751,14 +762,14 @@ pub fn dumpStackTrace(st: *const std.builtin.StackTrace) void { const StackIterator = union(enum) { /// Unwinding using debug info (e.g. DWARF CFI). - di: if (SelfInfo.supports_unwinding) SelfInfo.DwarfUnwindContext else noreturn, + di: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn, /// Naive frame-pointer-based unwinding. Very simple, but typically unreliable. fp: usize, /// It is important that this function is marked `inline` so that it can safely use /// `@frameAddress` and `getContext` as the caller's stack frame and our own are one /// and the same. - inline fn init(context_opt: ?*const ThreadContext, context_buf: *ThreadContext) StackIterator { + inline fn init(context_opt: ?*const ThreadContext, context_buf: *ThreadContext) error{OutOfMemory}!StackIterator { if (builtin.cpu.arch.isSPARC()) { // Flush all the register windows on stack. if (builtin.cpu.has(.sparc, .v9)) { @@ -770,10 +781,10 @@ const StackIterator = union(enum) { if (context_opt) |context| { context_buf.* = context.*; relocateContext(context_buf); - return .{ .di = .init(context_buf) }; + return .{ .di = try .init(context_buf, getDebugInfoAllocator()) }; } if (getContext(context_buf)) { - return .{ .di = .init(context_buf) }; + return .{ .di = try .init(context_buf, getDebugInfoAllocator()) }; } return .{ .fp = @frameAddress() }; } @@ -816,10 +827,10 @@ const StackIterator = union(enum) { if (ra == 0) return .end; return .{ .frame = ra }; } else |err| { - const bad_pc = unwind_context.pc; - it.* = .{ .fp = unwind_context.getFp() catch 0 }; + const pc = unwind_context.pc; + it.* = .{ .fp = unwind_context.getFp() }; return .{ .switch_to_fp = .{ - .address = bad_pc, + .address = pc, .err = err, } }; } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 5bc751e1aad7..8b8dc7d732eb 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -282,13 +282,13 @@ pub const Die = struct { .@"32" => { const byte_offset = compile_unit.str_offsets_base + 4 * index; if (byte_offset + 4 > debug_str_offsets.len) return bad(); - const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], endian); + const offset = mem.readInt(u32, debug_str_offsets[@intCast(byte_offset)..][0..4], endian); return getStringGeneric(opt_str, offset); }, .@"64" => { const byte_offset = compile_unit.str_offsets_base + 8 * index; if (byte_offset + 8 > debug_str_offsets.len) return bad(); - const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], endian); + const offset = mem.readInt(u64, debug_str_offsets[@intCast(byte_offset)..][0..8], endian); return getStringGeneric(opt_str, offset); }, } diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index df5a9e7ad81a..ef222cc7f446 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -42,6 +42,8 @@ pub const target_supported: bool = Module != void; /// For whether DWARF unwinding is *theoretically* possible, see `Dwarf.abi.supportsUnwinding`. pub const supports_unwinding: bool = Module.supports_unwinding; +pub const UnwindContext = if (supports_unwinding) Module.UnwindContext; + pub const init: SelfInfo = .{ .modules = .empty, .lookup_cache = if (Module.LookupCache != void) .init, @@ -53,7 +55,7 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void { if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); } -pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *DwarfUnwindContext) Error!usize { +pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.key()); @@ -113,14 +115,23 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// ) SelfInfo.Error!std.debug.Symbol; /// /// Whether a reliable stack unwinding strategy, such as DWARF unwinding, is available. /// pub const supports_unwinding: bool; +/// /// Only required if `supports_unwinding == true`. +/// pub const UnwindContext = struct { +/// /// A PC value inside the function of the last unwound frame. +/// pc: usize, +/// pub fn init(tc: *std.debug.ThreadContext, gpa: Allocator) Allocator.Error!UnwindContext; +/// pub fn deinit(uc: *UnwindContext, gpa: Allocator) void; +/// /// Returns the frame pointer associated with the last unwound stack frame. If the frame +/// /// pointer is unknown, 0 may be returned instead. +/// pub fn getFp(uc: *UnwindContext) usize; +/// }; /// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame and returns -/// /// the next return address (which may be 0 indicating end of stack). This is currently -/// /// specialized to DWARF unwinding. +/// /// the next return address (which may be 0 indicating end of stack). /// pub fn unwindFrame( /// mod: *const Module, /// gpa: Allocator, /// di: *DebugInfo, -/// ctx: *SelfInfo.DwarfUnwindContext, +/// ctx: *UnwindContext, /// ) SelfInfo.Error!usize; /// ``` const Module: type = Module: { @@ -136,6 +147,8 @@ const Module: type = Module: { }; }; +/// An implementation of `UnwindContext` useful for DWARF-based unwinders. The `Module.unwindFrame` +/// implementation should wrap `DwarfUnwindContext.unwindFrame`. pub const DwarfUnwindContext = struct { cfa: ?usize, pc: usize, @@ -144,8 +157,9 @@ pub const DwarfUnwindContext = struct { vm: Dwarf.Unwind.VirtualMachine, stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), - pub fn init(thread_context: *std.debug.ThreadContext) DwarfUnwindContext { + pub fn init(thread_context: *std.debug.ThreadContext, gpa: Allocator) error{}!DwarfUnwindContext { comptime assert(supports_unwinding); + _ = gpa; const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; const raw_pc_ptr = regValueNative(thread_context, ip_reg_num, null) catch { @@ -169,8 +183,8 @@ pub const DwarfUnwindContext = struct { self.* = undefined; } - pub fn getFp(self: *const DwarfUnwindContext) !usize { - return (try regValueNative(self.thread_context, Dwarf.abi.fpRegNum(native_arch, self.reg_context), self.reg_context)).*; + pub fn getFp(self: *const DwarfUnwindContext) usize { + return (regValueNative(self.thread_context, Dwarf.abi.fpRegNum(native_arch, self.reg_context), self.reg_context) catch return 0).*; } /// Resolves the register rule and places the result into `out` (see regBytes) diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index 0434b4eaaad0..5bce65b89f0d 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -252,10 +252,11 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu }; } pub const supports_unwinding: bool = true; +pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext; /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *DwarfUnwindContext) Error!usize { +pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, @@ -274,7 +275,7 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, => return error.InvalidDebugInfo, }; } -fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *DwarfUnwindContext) !usize { +fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { if (di.unwind == null) di.unwind = module.loadUnwindInfo(); const unwind = &di.unwind.?; @@ -575,7 +576,7 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, else => comptime unreachable, // unimplemented }; - context.pc = DwarfUnwindContext.stripInstructionPtrAuthCode(new_ip); + context.pc = UnwindContext.stripInstructionPtrAuthCode(new_ip); if (context.pc > 0) context.pc -= 1; return new_ip; } @@ -819,7 +820,6 @@ const macho = std.macho; const mem = std.mem; const posix = std.posix; const testing = std.testing; -const DwarfUnwindContext = std.debug.SelfInfo.DwarfUnwindContext; const Error = std.debug.SelfInfo.Error; const regBytes = Dwarf.abi.regBytes; const regValueNative = Dwarf.abi.regValueNative; diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index ff37e283b965..7a280c0d6ecb 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -193,7 +193,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro else => unreachable, } } -pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *DwarfUnwindContext) Error!usize { +pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di); std.debug.assert(di.unwind[0] != null); for (&di.unwind) |*opt_unwind| { @@ -205,6 +205,7 @@ pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, con } return error.MissingDebugInfo; } +pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext; pub const supports_unwinding: bool = s: { const archs: []const std.Target.Cpu.Arch = switch (builtin.target.os.tag) { .linux => &.{ .x86, .x86_64, .arm, .armeb, .thumb, .thumbeb, .aarch64, .aarch64_be }, @@ -233,7 +234,6 @@ const Allocator = std.mem.Allocator; const Dwarf = std.debug.Dwarf; const elf = std.elf; const mem = std.mem; -const DwarfUnwindContext = std.debug.SelfInfo.DwarfUnwindContext; const Error = std.debug.SelfInfo.Error; const builtin = @import("builtin"); diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index ccede7efb2bf..a0f5deafc5c8 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -102,7 +102,7 @@ fn loadDebugInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) ! if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; errdefer windows.CloseHandle(section_handle); var coff_len: usize = 0; - var section_view_ptr: [*]const u8 = undefined; + var section_view_ptr: ?[*]const u8 = null; const map_section_rc = windows.ntdll.NtMapViewOfSection( section_handle, process_handle, @@ -116,8 +116,8 @@ fn loadDebugInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) ! windows.PAGE_READONLY, ); if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr)) == .SUCCESS); - const section_view = section_view_ptr[0..coff_len]; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr.?)) == .SUCCESS); + const section_view = section_view_ptr.?[0..coff_len]; coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; di.mapped_file = .{ .file = coff_file, @@ -246,7 +246,116 @@ pub const DebugInfo = struct { }; } }; -pub const supports_unwinding: bool = false; + +pub const supports_unwinding: bool = true; +pub const UnwindContext = switch (builtin.cpu.arch) { + .x86 => struct { + pc: usize, + frames: []usize, + frames_capacity: usize, + next_index: usize, + /// Marked `noinline` to ensure that `RtlCaptureStackBackTrace` includes our caller. + pub noinline fn init(ctx: *windows.CONTEXT, gpa: Allocator) Allocator.Error!UnwindContext { + const frames_buf = try gpa.alloc(usize, 1024); + errdefer comptime unreachable; + const frames_len = windows.ntdll.RtlCaptureStackBackTrace(0, frames_buf.len, @ptrCast(frames_buf.ptr), null); + const regs = ctx.getRegs(); + const first_index = for (frames_buf[0..frames_len], 0..) |ret_addr, idx| { + if (ret_addr == regs.ip) break idx; + } else i: { + // If we were called by an exception handler, `regs.ip` wasn't in the trace because + // RtlCaptureStackBackTrace omits the KiUserExceptionDispatcher frame, which is the + // one in `regs.ip`. In that case, we have to start one frame shallower instead, and + // we can figure out that frame's ip from the context's bp. + const start_addr_ptr: *const usize = @ptrFromInt(regs.bp + 4); + const start_addr = start_addr_ptr.*; + for (frames_buf[0..frames_len], 0..) |ret_addr, idx| { + if (ret_addr == start_addr) break :i idx; + } + // The IP in the context can't be found; return an empty trace. + gpa.free(frames_buf); + return .{ .pc = 0, .frames = &.{}, .frames_capacity = 0, .next_index = 0 }; + }; + return .{ + .pc = @returnAddress(), + .frames = frames_buf[0..frames_len], + .frames_capacity = 0, + .next_index = first_index, + }; + } + pub fn deinit(ctx: *UnwindContext, gpa: Allocator) void { + gpa.free(ctx.frames.ptr[0..ctx.frames_capacity]); + ctx.* = undefined; + } + pub fn getFp(ctx: *UnwindContext) usize { + _ = ctx; + return 0; + } + }, + else => struct { + pc: usize, + cur: windows.CONTEXT, + history_table: windows.UNWIND_HISTORY_TABLE, + pub fn init(ctx: *const windows.CONTEXT, gpa: Allocator) Allocator.Error!UnwindContext { + _ = gpa; + return .{ + .pc = @returnAddress(), + .cur = ctx.*, + .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE), + }; + } + pub fn deinit(ctx: *UnwindContext, gpa: Allocator) void { + _ = ctx; + _ = gpa; + } + pub fn getFp(ctx: *UnwindContext) usize { + return ctx.cur.getRegs().bp; + } + }, +}; +pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { + _ = module; + _ = gpa; + _ = di; + + if (builtin.cpu.arch == .x86) { + const i = context.next_index; + if (i == context.frames.len) return 0; + context.next_index += 1; + const ip = context.frames[i]; + context.pc = ip -| 1; + return ip; + } + + const current_regs = context.cur.getRegs(); + var image_base: windows.DWORD64 = undefined; + if (windows.ntdll.RtlLookupFunctionEntry(current_regs.ip, &image_base, &context.history_table)) |runtime_function| { + var handler_data: ?*anyopaque = null; + var establisher_frame: u64 = undefined; + _ = windows.ntdll.RtlVirtualUnwind( + windows.UNW_FLAG_NHANDLER, + image_base, + current_regs.ip, + runtime_function, + &context.cur, + &handler_data, + &establisher_frame, + null, + ); + } else { + // leaf function + context.cur.setIp(@as(*const usize, @ptrFromInt(current_regs.sp)).*); + context.cur.setSp(current_regs.sp + @sizeOf(usize)); + } + + const next_regs = context.cur.getRegs(); + const tib = &windows.teb().NtTib; + if (next_regs.sp < @intFromPtr(tib.StackLimit) or next_regs.sp > @intFromPtr(tib.StackBase)) { + return 0; + } + context.pc = next_regs.ip -| 1; + return next_regs.ip; +} const WindowsModule = @This(); From 229f0a01b86a04b354a0d6ad29d3c0e7be05d03e Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 20:46:58 +0100 Subject: [PATCH 37/85] std.debug: handle ThreadContext slightly better It's now user-overrideable, and uses `noreturn` types to neatly stop analysis. --- lib/std/debug.zig | 50 +++++++++++++++++++++---------------- lib/std/debug/Dwarf/abi.zig | 2 +- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index eaa8ae3b8b70..f57fcd80a026 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -330,19 +330,19 @@ test dumpHexFallible { try std.testing.expectEqualStrings(expected, aw.written()); } -pub const have_ucontext = posix.ucontext_t != void; - /// Platform-specific thread state. This contains register state, and on some platforms /// information about the stack. This is not safe to trivially copy, because some platforms /// use internal pointers within this structure. After copying, call `relocateContext`. -pub const ThreadContext = blk: { - if (native_os == .windows) { - break :blk windows.CONTEXT; - } else if (have_ucontext) { - break :blk posix.ucontext_t; - } else { - break :blk void; +pub const ThreadContext = ThreadContext: { + // Allow overriding the target's `ThreadContext` by exposing `root.debug.ThreadContext`. + if (@hasDecl(root, "debug") and @hasDecl(root.debug, "ThreadContext")) { + break :ThreadContext root.debug.ThreadContext; } + + if (native_os == .windows) break :ThreadContext windows.CONTEXT; + if (posix.ucontext_t != void) break :ThreadContext posix.ucontext_t; + + break :ThreadContext noreturn; }; /// Updates any internal pointers of a `ThreadContext` after the caller copies it. pub fn relocateContext(dest: *ThreadContext) void { @@ -351,6 +351,10 @@ pub fn relocateContext(dest: *ThreadContext) void { else => {}, } } +/// The value which is placed on the stack to make a copy of a `ThreadContext`. +const ThreadContextBuf = if (ThreadContext == noreturn) void else ThreadContext; +/// The pointer through which a `ThreadContext` is received from callers of stack tracing logic. +const ThreadContextPtr = if (ThreadContext == noreturn) noreturn else *const ThreadContext; /// Capture the current context. The register values in the context will reflect the /// state after the platform `getcontext` function returns. @@ -358,7 +362,12 @@ pub fn relocateContext(dest: *ThreadContext) void { /// It is valid to call this if the platform doesn't have context capturing support, /// in that case `false` will be returned. This function is `inline` so that the `false` /// is comptime-known at the call site in that case. -pub inline fn getContext(context: *ThreadContext) bool { +pub inline fn getContext(context: *ThreadContextBuf) bool { + // Allow overriding the target's `getContext` by exposing `root.debug.getContext`. + if (@hasDecl(root, "debug") and @hasDecl(root.debug, "getContext")) { + return root.debug.getContext(context); + } + if (native_os == .windows) { context.* = std.mem.zeroes(windows.CONTEXT); windows.ntdll.RtlCaptureContext(context); @@ -608,8 +617,8 @@ pub const StackUnwindOptions = struct { first_address: ?usize = null, /// If not `null`, we will unwind from this `ThreadContext` instead of the current top of the /// stack. The main use case here is printing stack traces from signal handlers, where the - /// kernel provides a `*ThreadContext` of the state before the signal. - context: ?*const ThreadContext = null, + /// kernel provides a `*const ThreadContext` of the state before the signal. + context: ?ThreadContextPtr = null, /// If `true`, stack unwinding strategies which may cause crashes are used as a last resort. /// If `false`, only known-safe mechanisms will be attempted. allow_unsafe_unwind: bool = false, @@ -620,7 +629,7 @@ pub const StackUnwindOptions = struct { /// /// See `writeCurrentStackTrace` to immediately print the trace instead of capturing it. pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) std.builtin.StackTrace { - var context_buf: ThreadContext = undefined; + var context_buf: ThreadContextBuf = undefined; var it = StackIterator.init(options.context, &context_buf) catch { return .{ .index = 0, .instruction_addresses = &.{} }; }; @@ -660,7 +669,7 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ return; }, }; - var context_buf: ThreadContext = undefined; + var context_buf: ThreadContextBuf = undefined; var it = StackIterator.init(options.context, &context_buf) catch |err| switch (err) { error.OutOfMemory => { tty_config.setColor(writer, .dim) catch {}; @@ -769,7 +778,7 @@ const StackIterator = union(enum) { /// It is important that this function is marked `inline` so that it can safely use /// `@frameAddress` and `getContext` as the caller's stack frame and our own are one /// and the same. - inline fn init(context_opt: ?*const ThreadContext, context_buf: *ThreadContext) error{OutOfMemory}!StackIterator { + inline fn init(context_opt: ?ThreadContextPtr, context_buf: *ThreadContextBuf) error{OutOfMemory}!StackIterator { if (builtin.cpu.arch.isSPARC()) { // Flush all the register windows on stack. if (builtin.cpu.has(.sparc, .v9)) { @@ -1178,7 +1187,7 @@ pub const have_segfault_handling_support = switch (native_os) { .windows, => true, - .freebsd, .openbsd => have_ucontext, + .freebsd, .openbsd => ThreadContext != noreturn, else => false, }; @@ -1289,10 +1298,10 @@ fn handleSegfaultPosix(sig: i32, info: *const posix.siginfo_t, ctx_ptr: ?*anyopa => true, else => false, }; - if (!have_ucontext or !use_context) return handleSegfault(addr, name, null); + if (ThreadContext == noreturn or !use_context) return handleSegfault(addr, name, null); // Some kernels don't align `ctx_ptr` properly, so we'll copy it into a local buffer. - var copied_ctx: ThreadContext = undefined; + var copied_ctx: ThreadContextBuf = undefined; const orig_ctx: *align(1) posix.ucontext_t = @ptrCast(ctx_ptr); copied_ctx = orig_ctx.*; if (builtin.os.tag.isDarwin() and builtin.cpu.arch == .aarch64) { @@ -1329,7 +1338,7 @@ fn handleSegfaultWindows(info: *windows.EXCEPTION_POINTERS) callconv(.winapi) c_ handleSegfault(addr, name, info.ContextRecord); } -fn handleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?*ThreadContext) noreturn { +fn handleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?ThreadContextPtr) noreturn { // Allow overriding the target-agnostic segfault handler by exposing `root.debug.handleSegfault`. if (@hasDecl(root, "debug") and @hasDecl(root.debug, "handleSegfault")) { return root.debug.handleSegfault(addr, name, opt_ctx); @@ -1337,7 +1346,7 @@ fn handleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?*ThreadContext) nore return defaultHandleSegfault(addr, name, opt_ctx); } -pub fn defaultHandleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?*ThreadContext) noreturn { +pub fn defaultHandleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?ThreadContextPtr) noreturn { // There is very similar logic to the following in `defaultPanic`. switch (panic_stage) { 0 => { @@ -1355,7 +1364,6 @@ pub fn defaultHandleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?*ThreadCo } else { stderr.print("{s} (no address available)\n", .{name}) catch break :trace; } - // MLUGG TODO: for this to work neatly, `ThreadContext` needs to be `noreturn` when not supported! if (opt_ctx) |context| { writeCurrentStackTrace(.{ .context = context, diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig index 9fca02e38222..98a84392e641 100644 --- a/lib/std/debug/Dwarf/abi.zig +++ b/lib/std/debug/Dwarf/abi.zig @@ -139,7 +139,7 @@ pub fn regBytes( }; } - if (!std.debug.have_ucontext) return error.ThreadContextNotSupported; + if (posix.ucontext_t == void) return error.ThreadContextNotSupported; const ucontext_ptr = thread_context_ptr; return switch (builtin.cpu.arch) { From 2743fdb7ce3471e77caed7adf0e8657fa66716a0 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 8 Sep 2025 20:49:17 +0100 Subject: [PATCH 38/85] std.debug: try removing a probably-redundant condition --- lib/std/debug.zig | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index f57fcd80a026..87af0eefb953 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1285,20 +1285,7 @@ fn handleSegfaultPosix(sig: i32, info: *const posix.siginfo_t, ctx_ptr: ?*anyopa break :info .{ addr, name }; }; - // MLUGG TODO: this doesn't make any sense at all? - const use_context = switch (native_arch) { - .x86, - .x86_64, - .arm, - .armeb, - .thumb, - .thumbeb, - .aarch64, - .aarch64_be, - => true, - else => false, - }; - if (ThreadContext == noreturn or !use_context) return handleSegfault(addr, name, null); + if (ThreadContext == noreturn) return handleSegfault(addr, name, null); // Some kernels don't align `ctx_ptr` properly, so we'll copy it into a local buffer. var copied_ctx: ThreadContextBuf = undefined; From e6adddf80c5c24ed89e6dc9e5bbaa3b88dd5ed69 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 9 Sep 2025 08:48:52 +0100 Subject: [PATCH 39/85] small reasonable change --- lib/std/debug/SelfInfo/WindowsModule.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index a0f5deafc5c8..fc728935dacb 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -142,7 +142,7 @@ fn loadDebugInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) ! try di.dwarf.?.open(gpa, native_endian); } - if (try coff_obj.getPdbPath()) |raw_path| pdb: { + if (coff_obj.getPdbPath() catch return error.InvalidDebugInfo) |raw_path| pdb: { const path = blk: { if (fs.path.isAbsolute(raw_path)) { break :blk raw_path; From f7980487395b660d5c568ba57891ab371a27102d Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 9 Sep 2025 08:55:11 +0100 Subject: [PATCH 40/85] std.debug: don't include dumpCurrentStackTrace frame If it's not given, we should set `first_address` to the return address of `dumpCurrentStackTrace` to avoid the call to `writeCurrentStackTrace` appearing in the trace. However, we must only do that if no `context` is given; if there's a context then we're starting the stack unwind elsewhere. --- lib/std/debug.zig | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 87af0eefb953..2b6028ca82d1 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -732,7 +732,15 @@ pub fn dumpCurrentStackTrace(options: StackUnwindOptions) void { const tty_config = tty.detectConfig(.stderr()); const stderr = lockStderrWriter(&.{}); defer unlockStderrWriter(); - writeCurrentStackTrace(options, stderr, tty_config) catch |err| switch (err) { + writeCurrentStackTrace(.{ + .first_address = a: { + if (options.first_address) |a| break :a a; + if (options.context != null) break :a null; + break :a @returnAddress(); // don't include this frame in the trace + }, + .context = options.context, + .allow_unsafe_unwind = options.allow_unsafe_unwind, + }, stderr, tty_config) catch |err| switch (err) { error.WriteFailed => {}, }; } From c1a30bd0d876330ce7a241fc297c66577ae7e6aa Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 9 Sep 2025 14:20:49 +0100 Subject: [PATCH 41/85] std: replace debug.Dwarf.ElfModule with debug.ElfFile This abstraction isn't really tied to DWARF at all! Really, we're just loading some information from an ELF file which is useful for debugging. That *includes* DWARF, but it also includes other information. For instance, the other change here: Now, if DWARF information is missing, `debug.SelfInfo.ElfModule` will name symbols by finding a matching symtab entry. We actually already do this on Mach-O, so it makes obvious sense to do the same on ELF! This change is what motivated the restructuring to begin with. The symtab work is derived from #22077. Co-authored-by: geemili --- lib/std/debug.zig | 1 + lib/std/debug/Dwarf.zig | 1 - lib/std/debug/Dwarf/ElfModule.zig | 376 ------------------- lib/std/debug/ElfFile.zig | 536 +++++++++++++++++++++++++++ lib/std/debug/Info.zig | 33 +- lib/std/debug/SelfInfo.zig | 1 + lib/std/debug/SelfInfo/ElfModule.zig | 100 +++-- 7 files changed, 627 insertions(+), 421 deletions(-) delete mode 100644 lib/std/debug/Dwarf/ElfModule.zig create mode 100644 lib/std/debug/ElfFile.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 2b6028ca82d1..b7d877bfaf17 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -18,6 +18,7 @@ const root = @import("root"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); +pub const ElfFile = @import("debug/ElfFile.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); pub const Info = @import("debug/Info.zig"); pub const Coverage = @import("debug/Coverage.zig"); diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 8b8dc7d732eb..ebfb15e6e2bd 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -30,7 +30,6 @@ pub const expression = @import("Dwarf/expression.zig"); pub const abi = @import("Dwarf/abi.zig"); pub const call_frame = @import("Dwarf/call_frame.zig"); pub const Unwind = @import("Dwarf/Unwind.zig"); -pub const ElfModule = @import("Dwarf/ElfModule.zig"); /// Useful to temporarily enable while working on this file. const debug_debug_mode = false; diff --git a/lib/std/debug/Dwarf/ElfModule.zig b/lib/std/debug/Dwarf/ElfModule.zig deleted file mode 100644 index 4d425b1718fb..000000000000 --- a/lib/std/debug/Dwarf/ElfModule.zig +++ /dev/null @@ -1,376 +0,0 @@ -//! A thin wrapper around `Dwarf` which handles loading debug information from an ELF file. Load the -//! info with `load`, then directly access the `dwarf` field before finally `deinit`ing. - -dwarf: Dwarf, - -/// If we encounter a `.eh_frame` section while loading the ELF module, it is stored here and may be -/// used with `Dwarf.Unwind` for call stack unwinding. -eh_frame: ?UnwindSection, -/// If we encounter a `.debug_frame` section while loading the ELF module, it is stored here and may -/// be used with `Dwarf.Unwind` for call stack unwinding. -debug_frame: ?UnwindSection, - -/// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that -/// this memory can be unmapped by `ElfModule.deinit`. -mapped_file: []align(std.heap.page_size_min) const u8, -/// Sometimes, debug info is stored separately to the main ELF file. In that case, `mapped_file` -/// is the mapped ELF binary, and `mapped_debug_file` is the mapped debug info file. Both must -/// be unmapped by `ElfModule.deinit`. -mapped_debug_file: ?[]align(std.heap.page_size_min) const u8, - -pub const UnwindSection = struct { - vaddr: u64, - bytes: []const u8, - owned: bool, -}; - -pub fn deinit(em: *ElfModule, gpa: Allocator) void { - em.dwarf.deinit(gpa); - std.posix.munmap(em.mapped_file); - if (em.mapped_debug_file) |m| std.posix.munmap(m); - if (em.eh_frame) |s| if (s.owned) gpa.free(s.bytes); - if (em.debug_frame) |s| if (s.owned) gpa.free(s.bytes); -} - -pub const LoadError = error{ - InvalidDebugInfo, - MissingDebugInfo, - InvalidElfMagic, - InvalidElfVersion, - InvalidElfEndian, - /// TODO: implement this and then remove this error code - UnimplementedDwarfForeignEndian, - /// The debug info may be valid but this implementation uses memory - /// mapping which limits things to usize. If the target debug info is - /// 64-bit and host is 32-bit, there may be debug info that is not - /// supportable using this method. - Overflow, - - PermissionDenied, - LockedMemoryLimitExceeded, - MemoryMappingNotSupported, -} || Allocator.Error || std.fs.File.OpenError || Dwarf.OpenError; - -/// Reads debug info from an ELF file given its path. -/// -/// If the required sections aren't present but a reference to external debug -/// info is, then this this function will recurse to attempt to load the debug -/// sections from an external file. -pub fn load( - gpa: Allocator, - elf_file_path: Path, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: ?*Dwarf.SectionArray, - parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, -) LoadError!ElfModule { - const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: { - const elf_file = try elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}); - defer elf_file.close(); - - const file_len = std.math.cast( - usize, - elf_file.getEndPos() catch return Dwarf.bad(), - ) orelse return error.Overflow; - - break :mapped std.posix.mmap( - null, - file_len, - std.posix.PROT.READ, - .{ .TYPE = .SHARED }, - elf_file.handle, - 0, - ) catch |err| switch (err) { - error.MappingAlreadyExists => unreachable, - else => |e| return e, - }; - }; - errdefer std.posix.munmap(mapped_mem); - - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - if (endian != native_endian) return error.UnimplementedDwarfForeignEndian; - - const shoff = hdr.e_shoff; - const str_section_off = std.math.cast( - usize, - shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx), - ) orelse return error.Overflow; - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(mapped_mem[str_section_off..])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = @splat(null); - // Combine section list. This takes ownership over any owned sections from the parent scope. - if (parent_sections) |ps| { - for (ps, §ions) |*parent, *section_elem| { - if (parent.*) |*p| { - section_elem.* = p.*; - p.owned = false; - } - } - } - errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); - - var eh_frame_section: ?UnwindSection = null; - errdefer if (eh_frame_section) |s| if (s.owned) gpa.free(s.bytes); - - var debug_frame_section: ?UnwindSection = null; - errdefer if (debug_frame_section) |s| if (s.owned) gpa.free(s.bytes); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - if (mapped_mem.len < shdr.sh_offset + shdr.sh_size) return error.InvalidDebugInfo; - const gnu_debuglink = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, debug_filename.len + 1, 4); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, endian); - separate_debug_filename = debug_filename; - continue; - } - - const section_id: union(enum) { - dwarf: Dwarf.Section.Id, - eh_frame, - debug_frame, - } = s: { - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |s| { - if (mem.eql(u8, "." ++ s.name, name)) { - break :s .{ .dwarf = @enumFromInt(s.value) }; - } - } - if (mem.eql(u8, ".eh_frame", name)) break :s .eh_frame; - if (mem.eql(u8, ".debug_frame", name)) break :s .debug_frame; - continue; - }; - - switch (section_id) { - .dwarf => |i| if (sections[@intFromEnum(i)] != null) continue, - .eh_frame => if (eh_frame_section != null) continue, - .debug_frame => if (debug_frame_section != null) continue, - } - - if (mapped_mem.len < shdr.sh_offset + shdr.sh_size) return error.InvalidDebugInfo; - const raw_section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; - - const section_bytes: []const u8, const section_owned: bool = section: { - if ((shdr.sh_flags & elf.SHF_COMPRESSED) == 0) { - break :section .{ raw_section_bytes, false }; - } - var section_reader: Reader = .fixed(raw_section_bytes); - const chdr = section_reader.takeStruct(elf.Chdr, endian) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{}); - var decompressed_section: ArrayList(u8) = .empty; - defer decompressed_section.deinit(gpa); - decompress.reader.appendRemainingUnlimited(gpa, &decompressed_section) catch { - Dwarf.invalidDebugInfoDetected(); - continue; - }; - if (chdr.ch_size != decompressed_section.items.len) { - Dwarf.invalidDebugInfoDetected(); - continue; - } - break :section .{ try decompressed_section.toOwnedSlice(gpa), true }; - }; - switch (section_id) { - .dwarf => |id| sections[@intFromEnum(id)] = .{ - .data = section_bytes, - .owned = section_owned, - }, - .eh_frame => eh_frame_section = .{ - .vaddr = shdr.sh_addr, - .bytes = section_bytes, - .owned = section_owned, - }, - .debug_frame => debug_frame_section = .{ - .vaddr = shdr.sh_addr, - .bytes = section_bytes, - .owned = section_owned, - }, - } - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - // $XDG_CACHE_HOME/debuginfod_client//debuginfo - // This only opportunisticly tries to load from the debuginfod cache, but doesn't try to populate it. - // One can manually run `debuginfod-find debuginfo PATH` to download the symbols - debuginfod: { - const id = build_id orelse break :debuginfod; - switch (builtin.os.tag) { - .wasi, .windows => break :debuginfod, - else => {}, - } - const id_dir_path: []u8 = p: { - if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |path| { - break :p try std.fmt.allocPrint(gpa, "{s}/{x}", .{ path, id }); - } - if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { - if (cache_path.len > 0) { - break :p try std.fmt.allocPrint(gpa, "{s}/debuginfod_client/{x}", .{ cache_path, id }); - } - } - if (std.posix.getenv("HOME")) |home_path| { - break :p try std.fmt.allocPrint(gpa, "{s}/.cache/debuginfod_client/{x}", .{ home_path, id }); - } - break :debuginfod; - }; - defer gpa.free(id_dir_path); - if (!std.fs.path.isAbsolute(id_dir_path)) break :debuginfod; - - var id_dir = std.fs.openDirAbsolute(id_dir_path, .{}) catch break :debuginfod; - defer id_dir.close(); - - return load(gpa, .{ - .root_dir = .{ .path = id_dir_path, .handle = id_dir }, - .sub_path = "debuginfo", - }, null, separate_debug_crc, §ions, mapped_mem) catch break :debuginfod; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // /.build-id/<2-character id prefix>/.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{x}", .{id[0..1]}) catch unreachable; - const filename = std.fmt.bufPrint(&filename_buf, "{x}" ++ extension, .{id[1..]}) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path: Path = .{ - .root_dir = .cwd(), - .sub_path = try std.fs.path.join(gpa, &.{ - global_directory, ".build-id", &id_prefix_buf, filename, - }), - }; - defer gpa.free(path.sub_path); - - return load(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - separate: { - const separate_filename = separate_debug_filename orelse break :separate; - if (mem.eql(u8, std.fs.path.basename(elf_file_path.sub_path), separate_filename)) - return error.MissingDebugInfo; - - exe_dir: { - const exe_dir_path = try std.fs.path.resolve(gpa, &.{ - elf_file_path.root_dir.path orelse ".", - std.fs.path.dirname(elf_file_path.sub_path) orelse ".", - }); - defer gpa.free(exe_dir_path); - var exe_dir = std.fs.openDirAbsolute(exe_dir_path, .{}) catch break :exe_dir; - defer exe_dir.close(); - - // / - if (load( - gpa, - .{ - .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, - .sub_path = separate_filename, - }, - null, - separate_debug_crc, - §ions, - mapped_mem, - )) |em| { - return em; - } else |_| {} - - // /.debug/ - const path: Path = .{ - .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, - .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), - }; - defer gpa.free(path.sub_path); - - if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { - return em; - } else |_| {} - } - - var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; - const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :separate; - - // // - for (global_debug_directories) |global_directory| { - const path: Path = .{ - .root_dir = .cwd(), - .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), - }; - defer gpa.free(path.sub_path); - if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { - return em; - } else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var dwarf: Dwarf = .{ .sections = sections }; - try dwarf.open(gpa, endian); - return .{ - .dwarf = dwarf, - .eh_frame = eh_frame_section, - .debug_frame = debug_frame_section, - .mapped_file = parent_mapped_mem orelse mapped_mem, - .mapped_debug_file = if (parent_mapped_mem != null) mapped_mem else null, - }; -} - -const std = @import("../../std.zig"); -const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; -const Dwarf = std.debug.Dwarf; -const Path = std.Build.Cache.Path; -const Reader = std.Io.Reader; -const mem = std.mem; -const elf = std.elf; - -const builtin = @import("builtin"); -const native_endian = builtin.cpu.arch.endian(); - -const ElfModule = @This(); diff --git a/lib/std/debug/ElfFile.zig b/lib/std/debug/ElfFile.zig new file mode 100644 index 000000000000..b8f1bdf615c7 --- /dev/null +++ b/lib/std/debug/ElfFile.zig @@ -0,0 +1,536 @@ +//! A helper type for loading an ELF file and collecting its DWARF debug information, unwind +//! information, and symbol table. + +is_64: bool, +endian: Endian, + +/// This is `null` iff any of the required DWARF sections were missing. `ElfFile.load` does *not* +/// call `Dwarf.open`, `Dwarf.scanAllFunctions`, etc; that is the caller's responsibility. +dwarf: ?Dwarf, + +/// If non-`null`, describes the `.eh_frame` section, which can be used with `Dwarf.Unwind`. +eh_frame: ?UnwindSection, +/// If non-`null`, describes the `.debug_frame` section, which can be used with `Dwarf.Unwind`. +debug_frame: ?UnwindSection, + +/// If non-`null`, this is the contents of the `.strtab` section. +strtab: ?[]const u8, +/// If non-`null`, describes the `.symtab` section. +symtab: ?SymtabSection, + +/// Binary search table lazily populated by `searchSymtab`. +symbol_search_table: ?[]u64, + +/// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that +/// this memory can be unmapped by `ElfFile.deinit`. +mapped_file: []align(std.heap.page_size_min) const u8, +/// Sometimes, debug info is stored separately to the main ELF file. In that case, `mapped_file` +/// is the mapped ELF binary, and `mapped_debug_file` is the mapped debug info file. Both must +/// be unmapped by `ElfFile.deinit`. +mapped_debug_file: ?[]align(std.heap.page_size_min) const u8, + +arena: std.heap.ArenaAllocator.State, + +pub const UnwindSection = struct { + vaddr: u64, + bytes: []const u8, +}; +pub const SymtabSection = struct { + entry_size: u64, + bytes: []const u8, +}; + +pub const DebugInfoSearchPaths = struct { + /// The location of a debuginfod client directory, which acts as a search path for build IDs. If + /// given, we can load from this directory opportunistically, but make no effort to populate it. + /// To avoid allocation when building the search paths, this is given as two components which + /// will be concatenated. + debuginfod_client: ?[2][]const u8, + /// All "global debug directories" on the system. These are used as search paths for both debug + /// links and build IDs. On typical systems this is just "/usr/lib/debug". + global_debug: []const []const u8, + /// The path to the dirname of the ELF file, which acts as a search path for debug links. + exe_dir: ?[]const u8, + + pub const none: DebugInfoSearchPaths = .{ + .debuginfod_client = null, + .global_debug = &.{}, + .exe_dir = null, + }; + + pub fn native(exe_path: []const u8) DebugInfoSearchPaths { + return .{ + .debuginfod_client = p: { + if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |p| { + break :p .{ p, "" }; + } + if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { + break :p .{ cache_path, "/debuginfod_client" }; + } + if (std.posix.getenv("HOME")) |home_path| { + break :p .{ home_path, "/.cache/debuginfod_client" }; + } + break :p null; + }, + .global_debug = &.{ + "/usr/lib/debug", + }, + .exe_dir = std.fs.path.dirname(exe_path) orelse ".", + }; + } +}; + +pub fn deinit(ef: *ElfFile, gpa: Allocator) void { + if (ef.dwarf) |*dwarf| dwarf.deinit(gpa); + if (ef.symbol_search_table) |t| gpa.free(t); + var arena = ef.arena.promote(gpa); + arena.deinit(); + + std.posix.munmap(ef.mapped_file); + if (ef.mapped_debug_file) |m| std.posix.munmap(m); + + ef.* = undefined; +} + +pub const LoadError = error{ + OutOfMemory, + Overflow, + TruncatedElfFile, + InvalidCompressedSection, + InvalidElfMagic, + InvalidElfVersion, + InvalidElfClass, + InvalidElfEndian, + // The remaining errors all occur when attemping to stat or mmap a file. + SystemResources, + MemoryMappingNotSupported, + AccessDenied, + LockedMemoryLimitExceeded, + ProcessFdQuotaExceeded, + SystemFdQuotaExceeded, + Unexpected, +}; + +pub fn load( + gpa: Allocator, + elf_file: std.fs.File, + opt_build_id: ?[]const u8, + di_search_paths: *const DebugInfoSearchPaths, +) LoadError!ElfFile { + var arena_instance: std.heap.ArenaAllocator = .init(gpa); + errdefer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + var result = loadInner(arena, elf_file, null) catch |err| switch (err) { + error.CrcMismatch => unreachable, // we passed crc as null + else => |e| return e, + }; + errdefer std.posix.munmap(result.mapped_mem); + + // `loadInner` did most of the work, but we might need to load an external debug info file + + const di_mapped_mem: ?[]align(std.heap.page_size_min) const u8 = load_di: { + if (result.sections.get(.debug_info) != null and + result.sections.get(.debug_abbrev) != null and + result.sections.get(.debug_str) != null and + result.sections.get(.debug_line) != null) + { + // The info is already loaded from this file alone! + break :load_di null; + } + + // We're missing some debug info---let's try and load it from a separate file. + + build_id: { + const build_id = opt_build_id orelse break :build_id; + if (build_id.len < 3) break :build_id; + + for (di_search_paths.global_debug) |global_debug| { + if (try loadSeparateDebugFile(arena, &result, null, "{s}/.build-id/{x}/{x}.debug", .{ + global_debug, + build_id[0..1], + build_id[1..], + })) |mapped| break :load_di mapped; + } + + if (di_search_paths.debuginfod_client) |components| { + if (try loadSeparateDebugFile(arena, &result, null, "{s}{s}/{x}/debuginfo", .{ + components[0], + components[1], + build_id, + })) |mapped| break :load_di mapped; + } + } + + debug_link: { + const section = result.sections.get(.gnu_debuglink) orelse break :debug_link; + const debug_filename = std.mem.sliceTo(section.bytes, 0); + const crc_offset = std.mem.alignForward(usize, debug_filename.len + 1, 4); + if (section.bytes.len < crc_offset + 4) break :debug_link; + const debug_crc = std.mem.readInt(u32, section.bytes[crc_offset..][0..4], result.endian); + + const exe_dir = di_search_paths.exe_dir orelse break :debug_link; + + if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}", .{ + exe_dir, + debug_filename, + })) |mapped| break :load_di mapped; + if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/.debug/{s}", .{ + exe_dir, + debug_filename, + })) |mapped| break :load_di mapped; + for (di_search_paths.global_debug) |global_debug| { + // This looks like a bug; it isn't. They really do embed the absolute path to the + // exe's dirname, *under* the global debug path. + if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}/{s}", .{ + global_debug, + exe_dir, + debug_filename, + })) |mapped| break :load_di mapped; + } + } + + break :load_di null; + }; + errdefer comptime unreachable; + + return .{ + .is_64 = result.is_64, + .endian = result.endian, + .dwarf = dwarf: { + if (result.sections.get(.debug_info) == null or + result.sections.get(.debug_abbrev) == null or + result.sections.get(.debug_str) == null or + result.sections.get(.debug_line) == null) + { + break :dwarf null; // debug info not present + } + var sections: Dwarf.SectionArray = @splat(null); + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| { + if (result.sections.get(@field(Section.Id, f.name))) |s| { + sections[f.value] = .{ .data = s.bytes, .owned = false }; + } + } + break :dwarf .{ .sections = sections }; + }, + .eh_frame = if (result.sections.get(.eh_frame)) |s| .{ + .vaddr = s.header.sh_addr, + .bytes = s.bytes, + } else null, + .debug_frame = if (result.sections.get(.debug_frame)) |s| .{ + .vaddr = s.header.sh_addr, + .bytes = s.bytes, + } else null, + .strtab = if (result.sections.get(.strtab)) |s| s.bytes else null, + .symtab = if (result.sections.get(.symtab)) |s| .{ + .entry_size = s.header.sh_entsize, + .bytes = s.bytes, + } else null, + .symbol_search_table = null, + .mapped_file = result.mapped_mem, + .mapped_debug_file = di_mapped_mem, + .arena = arena_instance.state, + }; +} + +pub fn searchSymtab(ef: *ElfFile, gpa: Allocator, vaddr: u64) error{ + NoSymtab, + NoStrtab, + BadSymtab, + OutOfMemory, +}!std.debug.Symbol { + const symtab = ef.symtab orelse return error.NoSymtab; + const strtab = ef.strtab orelse return error.NoStrtab; + + if (symtab.bytes.len % symtab.entry_size != 0) return error.BadSymtab; + + const swap_endian = ef.endian != @import("builtin").cpu.arch.endian(); + + switch (ef.is_64) { + inline true, false => |is_64| { + const Sym = if (is_64) elf.Elf64_Sym else elf.Elf32_Sym; + if (symtab.entry_size != @sizeOf(Sym)) return error.BadSymtab; + const symbols: []align(1) const Sym = @ptrCast(symtab.bytes); + if (ef.symbol_search_table == null) { + ef.symbol_search_table = try buildSymbolSearchTable(gpa, ef.endian, Sym, symbols); + } + const search_table = ef.symbol_search_table.?; + const SearchContext = struct { + swap_endian: bool, + target: u64, + symbols: []align(1) const Sym, + fn predicate(ctx: @This(), sym_index: u64) bool { + // We need to return `true` for the first N items, then `false` for the rest -- + // the index we'll get out is the first `false` one. So, we'll return `true` iff + // the target address is after the *end* of this symbol. This synchronizes with + // the logic in `buildSymbolSearchTable` which sorts by *end* address. + var sym = ctx.symbols[sym_index]; + if (ctx.swap_endian) std.mem.byteSwapAllFields(Sym, &sym); + const sym_end = sym.st_value + sym.st_size; + return ctx.target >= sym_end; + } + }; + const sym_index_index = std.sort.partitionPoint(u64, search_table, @as(SearchContext, .{ + .swap_endian = swap_endian, + .target = vaddr, + .symbols = symbols, + }), SearchContext.predicate); + if (sym_index_index == search_table.len) return .unknown; + var sym = symbols[search_table[sym_index_index]]; + if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym); + if (vaddr < sym.st_value or vaddr >= sym.st_value + sym.st_size) return .unknown; + return .{ + .name = std.mem.sliceTo(strtab[sym.st_name..], 0), + .compile_unit_name = null, + .source_location = null, + }; + }, + } +} + +fn buildSymbolSearchTable(gpa: Allocator, endian: Endian, comptime Sym: type, symbols: []align(1) const Sym) error{ + OutOfMemory, + BadSymtab, +}![]u64 { + var result: std.ArrayList(u64) = .empty; + defer result.deinit(gpa); + + const swap_endian = endian != @import("builtin").cpu.arch.endian(); + + for (symbols, 0..) |sym_orig, sym_index| { + var sym = sym_orig; + if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym); + if (sym.st_name == 0) continue; + if (sym.st_shndx == elf.SHN_UNDEF) continue; + try result.append(gpa, sym_index); + } + + const SortContext = struct { + swap_endian: bool, + symbols: []align(1) const Sym, + fn lessThan(ctx: @This(), lhs_sym_index: u64, rhs_sym_index: u64) bool { + // We sort by *end* address, not start address. This matches up with logic in `searchSymtab`. + var lhs_sym = ctx.symbols[lhs_sym_index]; + var rhs_sym = ctx.symbols[rhs_sym_index]; + if (ctx.swap_endian) { + std.mem.byteSwapAllFields(Sym, &lhs_sym); + std.mem.byteSwapAllFields(Sym, &rhs_sym); + } + const lhs_val = lhs_sym.st_value + lhs_sym.st_size; + const rhs_val = rhs_sym.st_value + rhs_sym.st_size; + return lhs_val < rhs_val; + } + }; + std.mem.sort(u64, result.items, @as(SortContext, .{ + .swap_endian = swap_endian, + .symbols = symbols, + }), SortContext.lessThan); + + return result.toOwnedSlice(gpa); +} + +/// Only used locally, during `load`. +const Section = struct { + header: elf.Elf64_Shdr, + bytes: []const u8, + const Id = enum { + // DWARF sections: see `Dwarf.Section.Id`. + debug_info, + debug_abbrev, + debug_str, + debug_str_offsets, + debug_line, + debug_line_str, + debug_ranges, + debug_loclists, + debug_rnglists, + debug_addr, + debug_names, + // Then anything else we're interested in. + gnu_debuglink, + eh_frame, + debug_frame, + symtab, + strtab, + }; + const Array = std.enums.EnumArray(Section.Id, ?Section); +}; + +fn loadSeparateDebugFile(arena: Allocator, main_loaded: *LoadInnerResult, opt_crc: ?u32, comptime fmt: []const u8, args: anytype) Allocator.Error!?[]align(std.heap.page_size_min) const u8 { + const path = try std.fmt.allocPrint(arena, fmt, args); + const elf_file = std.fs.cwd().openFile(path, .{}) catch return null; + defer elf_file.close(); + + const result = loadInner(arena, elf_file, opt_crc) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + error.CrcMismatch => return null, + else => return null, + }; + errdefer comptime unreachable; + + const have_debug_sections = inline for (@as([]const []const u8, &.{ + "debug_info", + "debug_abbrev", + "debug_str", + "debug_line", + })) |name| { + const s = @field(Section.Id, name); + if (main_loaded.sections.get(s) == null and result.sections.get(s) != null) { + break false; + } + } else true; + + if (result.is_64 != main_loaded.is_64 or + result.endian != main_loaded.endian or + !have_debug_sections) + { + std.posix.munmap(result.mapped_mem); + return null; + } + + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| { + const id = @field(Section.Id, f.name); + if (main_loaded.sections.get(id) == null) { + main_loaded.sections.set(id, result.sections.get(id)); + } + } + + return result.mapped_mem; +} + +const LoadInnerResult = struct { + is_64: bool, + endian: Endian, + sections: Section.Array, + mapped_mem: []align(std.heap.page_size_min) const u8, +}; +fn loadInner( + arena: Allocator, + elf_file: std.fs.File, + opt_crc: ?u32, +) (LoadError || error{CrcMismatch})!LoadInnerResult { + const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: { + const file_len = std.math.cast( + usize, + elf_file.getEndPos() catch |err| switch (err) { + error.PermissionDenied => unreachable, // not asking for PROT_EXEC + else => |e| return e, + }, + ) orelse return error.Overflow; + + break :mapped std.posix.mmap( + null, + file_len, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + elf_file.handle, + 0, + ) catch |err| switch (err) { + error.MappingAlreadyExists => unreachable, // not using FIXED_NOREPLACE + error.PermissionDenied => unreachable, // not asking for PROT_EXEC + else => |e| return e, + }; + }; + + if (opt_crc) |crc| { + if (std.hash.crc.Crc32.hash(mapped_mem) != crc) { + return error.CrcMismatch; + } + } + errdefer std.posix.munmap(mapped_mem); + + var fr: std.Io.Reader = .fixed(mapped_mem); + + const header = elf.Header.read(&fr) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.TruncatedElfFile, + + error.InvalidElfMagic, + error.InvalidElfVersion, + error.InvalidElfClass, + error.InvalidElfEndian, + => |e| return e, + }; + const endian = header.endian; + + const shstrtab_shdr_off = try std.math.add( + u64, + header.shoff, + try std.math.mul(u64, header.shstrndx, header.shentsize), + ); + fr.seek = std.math.cast(usize, shstrtab_shdr_off) orelse return error.Overflow; + const shstrtab: []const u8 = if (header.is_64) shstrtab: { + const shdr = fr.takeStruct(elf.Elf64_Shdr, endian) catch return error.TruncatedElfFile; + if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile; + break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + } else shstrtab: { + const shdr = fr.takeStruct(elf.Elf32_Shdr, endian) catch return error.TruncatedElfFile; + if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile; + break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + }; + + var sections: Section.Array = .initFill(null); + + var it = header.iterateSectionHeadersBuffer(mapped_mem); + while (it.next() catch return error.TruncatedElfFile) |shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + if (shdr.sh_name > shstrtab.len) return error.TruncatedElfFile; + const name = std.mem.sliceTo(shstrtab[@intCast(shdr.sh_name)..], 0); + + const section_id: Section.Id = inline for (@typeInfo(Section.Id).@"enum".fields) |s| { + if (std.mem.eql(u8, "." ++ s.name, name)) { + break @enumFromInt(s.value); + } + } else continue; + + if (sections.get(section_id) != null) continue; + + if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile; + const raw_section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + const section_bytes: []const u8 = bytes: { + if ((shdr.sh_flags & elf.SHF_COMPRESSED) == 0) break :bytes raw_section_bytes; + + var section_reader: std.Io.Reader = .fixed(raw_section_bytes); + const ch_type: elf.COMPRESS, const ch_size: u64 = if (header.is_64) ch: { + const chdr = section_reader.takeStruct(elf.Elf64_Chdr, endian) catch return error.InvalidCompressedSection; + break :ch .{ chdr.ch_type, chdr.ch_size }; + } else ch: { + const chdr = section_reader.takeStruct(elf.Elf32_Chdr, endian) catch return error.InvalidCompressedSection; + break :ch .{ chdr.ch_type, chdr.ch_size }; + }; + if (ch_type != .ZLIB) { + // The compression algorithm is unsupported, but don't make that a hard error; the + // file might still be valid, and we might still be okay without this section. + continue; + } + + const buf = try arena.alloc(u8, ch_size); + var fw: std.Io.Writer = .fixed(buf); + var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{}); + const n = decompress.reader.streamRemaining(&fw) catch |err| switch (err) { + // If a write failed, then `buf` filled up, so `ch_size` was incorrect + error.WriteFailed => return error.InvalidCompressedSection, + // If a read failed, flate expected the section to have more data + error.ReadFailed => return error.InvalidCompressedSection, + }; + // It's also an error if the data is shorter than expected. + if (n != buf.len) return error.InvalidCompressedSection; + break :bytes buf; + }; + sections.set(section_id, .{ .header = shdr, .bytes = section_bytes }); + } + + return .{ + .is_64 = header.is_64, + .endian = endian, + .sections = sections, + .mapped_mem = mapped_mem, + }; +} + +const std = @import("std"); +const Endian = std.builtin.Endian; +const Dwarf = std.debug.Dwarf; +const ElfFile = @This(); +const Allocator = std.mem.Allocator; +const elf = std.elf; diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index bc8efc71b451..74119a3ea40c 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -9,7 +9,7 @@ const std = @import("../std.zig"); const Allocator = std.mem.Allocator; const Path = std.Build.Cache.Path; -const Dwarf = std.debug.Dwarf; +const ElfFile = std.debug.ElfFile; const assert = std.debug.assert; const Coverage = std.debug.Coverage; const SourceLocation = std.debug.Coverage.SourceLocation; @@ -17,28 +17,35 @@ const SourceLocation = std.debug.Coverage.SourceLocation; const Info = @This(); /// Sorted by key, ascending. -address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), +address_map: std.AutoArrayHashMapUnmanaged(u64, ElfFile), /// Externally managed, outlives this `Info` instance. coverage: *Coverage, -pub const LoadError = Dwarf.ElfModule.LoadError; +pub const LoadError = std.fs.File.OpenError || ElfFile.LoadError || std.debug.Dwarf.ScanError || error{MissingDebugInfo}; pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { - var elf_module = try Dwarf.ElfModule.load(gpa, path, null, null, null, null); - // This is correct because `Dwarf.ElfModule` currently only supports native-endian ELF files. - const endian = @import("builtin").target.cpu.arch.endian(); - try elf_module.dwarf.populateRanges(gpa, endian); + var file = try path.root_dir.handle.openFile(path.sub_path, .{}); + defer file.close(); + + var elf_file: ElfFile = try .load(gpa, file, null, &.none); + errdefer elf_file.deinit(gpa); + + if (elf_file.dwarf == null) return error.MissingDebugInfo; + try elf_file.dwarf.?.open(gpa, elf_file.endian); + try elf_file.dwarf.?.populateRanges(gpa, elf_file.endian); + var info: Info = .{ .address_map = .{}, .coverage = coverage, }; - try info.address_map.put(gpa, 0, elf_module); + try info.address_map.put(gpa, 0, elf_file); + errdefer comptime unreachable; // elf_file is owned by the map now return info; } pub fn deinit(info: *Info, gpa: Allocator) void { - for (info.address_map.values()) |*elf_module| { - elf_module.dwarf.deinit(gpa); + for (info.address_map.values()) |*elf_file| { + elf_file.dwarf.?.deinit(gpa); } info.address_map.deinit(gpa); info.* = undefined; @@ -58,8 +65,6 @@ pub fn resolveAddresses( ) ResolveAddressesError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); - const elf_module = &info.address_map.values()[0]; - // This is correct because `Dwarf.ElfModule` currently only supports native-endian ELF files. - const endian = @import("builtin").target.cpu.arch.endian(); - return info.coverage.resolveAddressesDwarf(gpa, endian, sorted_pc_addrs, output, &elf_module.dwarf); + const elf_file = &info.address_map.values()[0]; + return info.coverage.resolveAddressesDwarf(gpa, elf_file.endian, sorted_pc_addrs, output, &elf_file.dwarf.?); } diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index ef222cc7f446..d838a1a6c180 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -78,6 +78,7 @@ pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { comptime assert(target_supported); const module: Module = try .lookup(&self.lookup_cache, gpa, address); + if (module.name.len == 0) return error.MissingDebugInfo; return module.name; } diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 7a280c0d6ecb..7871f1012fb6 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -7,10 +7,12 @@ gnu_eh_frame: ?[]const u8, pub const LookupCache = void; pub const DebugInfo = struct { - loaded_elf: ?Dwarf.ElfModule, + loaded_elf: ?ElfFile, + scanned_dwarf: bool, unwind: [2]?Dwarf.Unwind, pub const init: DebugInfo = .{ .loaded_elf = null, + .scanned_dwarf = false, .unwind = @splat(null), }; pub fn deinit(di: *DebugInfo, gpa: Allocator) void { @@ -92,55 +94,92 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!ElfModu }; return error.MissingDebugInfo; } -fn loadDwarf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { +fn loadElf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { + std.debug.assert(di.loaded_elf == null); + std.debug.assert(!di.scanned_dwarf); + const load_result = if (module.name.len > 0) res: { - break :res Dwarf.ElfModule.load(gpa, .{ - .root_dir = .cwd(), - .sub_path = module.name, - }, module.build_id, null, null, null); + var file = std.fs.cwd().openFile(module.name, .{}) catch return error.MissingDebugInfo; + defer file.close(); + break :res ElfFile.load(gpa, file, module.build_id, &.native(module.name)); } else res: { const path = std.fs.selfExePathAlloc(gpa) catch |err| switch (err) { error.OutOfMemory => |e| return e, else => return error.ReadFailed, }; defer gpa.free(path); - break :res Dwarf.ElfModule.load(gpa, .{ - .root_dir = .cwd(), - .sub_path = path, - }, module.build_id, null, null, null); + var file = std.fs.cwd().openFile(path, .{}) catch return error.MissingDebugInfo; + defer file.close(); + break :res ElfFile.load(gpa, file, module.build_id, &.native(path)); }; di.loaded_elf = load_result catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.OutOfMemory, - error.InvalidDebugInfo, - error.MissingDebugInfo, error.Unexpected, => |e| return e, - error.InvalidElfEndian, + error.Overflow, + error.TruncatedElfFile, + error.InvalidCompressedSection, error.InvalidElfMagic, error.InvalidElfVersion, - error.InvalidUtf8, - error.InvalidWtf8, - error.EndOfStream, - error.Overflow, - error.UnimplementedDwarfForeignEndian, // this should be impossible as we're looking at the debug info for this process + error.InvalidElfClass, + error.InvalidElfEndian, => return error.InvalidDebugInfo, - else => return error.ReadFailed, + error.SystemResources, + error.MemoryMappingNotSupported, + error.AccessDenied, + error.LockedMemoryLimitExceeded, + error.ProcessFdQuotaExceeded, + error.SystemFdQuotaExceeded, + => return error.ReadFailed, }; + + const matches_native = + di.loaded_elf.?.endian == native_endian and + di.loaded_elf.?.is_64 == (@sizeOf(usize) == 8); + + if (!matches_native) { + di.loaded_elf.?.deinit(gpa); + di.loaded_elf = null; + return error.InvalidDebugInfo; + } } pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { - if (di.loaded_elf == null) try module.loadDwarf(gpa, di); + if (di.loaded_elf == null) try module.loadElf(gpa, di); const vaddr = address - module.load_offset; - return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, - error.ReadFailed, - error.EndOfStream, - error.Overflow, - error.StreamTooLong, - => return error.InvalidDebugInfo, + if (di.loaded_elf.?.dwarf) |*dwarf| { + if (!di.scanned_dwarf) { + dwarf.open(gpa, native_endian) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.ReadFailed, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; + di.scanned_dwarf = true; + } + return dwarf.getSymbol(gpa, native_endian, vaddr) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.ReadFailed, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; + } + // When there's no DWARF available, fall back to searching the symtab. + return di.loaded_elf.?.searchSymtab(gpa, vaddr) catch |err| switch (err) { + error.NoSymtab, error.NoStrtab => return error.MissingDebugInfo, + error.BadSymtab => return error.InvalidDebugInfo, + error.OutOfMemory => |e| return e, }; } fn prepareUnwindLookup(unwind: *Dwarf.Unwind, gpa: Allocator) Error!void { @@ -166,7 +205,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro } else unwinds: { // There is no `.eh_frame_hdr` section. There may still be an `.eh_frame` or `.debug_frame` // section, but we'll have to load the binary to get at it. - try module.loadDwarf(gpa, di); + try module.loadElf(gpa, di); const opt_debug_frame = &di.loaded_elf.?.debug_frame; const opt_eh_frame = &di.loaded_elf.?.eh_frame; // If both are present, we can't just pick one -- the info could be split between them. @@ -232,6 +271,7 @@ const ElfModule = @This(); const std = @import("../../std.zig"); const Allocator = std.mem.Allocator; const Dwarf = std.debug.Dwarf; +const ElfFile = std.debug.ElfFile; const elf = std.elf; const mem = std.mem; const Error = std.debug.SelfInfo.Error; From bfbbda77517ec83ef7cfacced651acc9d85b8bb0 Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 10 Sep 2025 12:53:03 +0100 Subject: [PATCH 42/85] compiler: fix new panic handler in release builds --- lib/std/debug.zig | 4 ++-- src/crash_report.zig | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index b7d877bfaf17..ae8ec072dfe0 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -355,7 +355,7 @@ pub fn relocateContext(dest: *ThreadContext) void { /// The value which is placed on the stack to make a copy of a `ThreadContext`. const ThreadContextBuf = if (ThreadContext == noreturn) void else ThreadContext; /// The pointer through which a `ThreadContext` is received from callers of stack tracing logic. -const ThreadContextPtr = if (ThreadContext == noreturn) noreturn else *const ThreadContext; +pub const ThreadContextPtr = if (ThreadContext == noreturn) noreturn else *const ThreadContext; /// Capture the current context. The register values in the context will reflect the /// state after the platform `getcontext` function returns. @@ -1297,7 +1297,7 @@ fn handleSegfaultPosix(sig: i32, info: *const posix.siginfo_t, ctx_ptr: ?*anyopa if (ThreadContext == noreturn) return handleSegfault(addr, name, null); // Some kernels don't align `ctx_ptr` properly, so we'll copy it into a local buffer. - var copied_ctx: ThreadContextBuf = undefined; + var copied_ctx: posix.ucontext_t = undefined; const orig_ctx: *align(1) posix.ucontext_t = @ptrCast(ctx_ptr); copied_ctx = orig_ctx.*; if (builtin.os.tag.isDarwin() and builtin.cpu.arch == .aarch64) { diff --git a/src/crash_report.zig b/src/crash_report.zig index c696c42cfc5b..8c686459fee1 100644 --- a/src/crash_report.zig +++ b/src/crash_report.zig @@ -17,7 +17,7 @@ pub const debug = struct { /// crash earlier than that. pub var zig_argv0: []const u8 = "zig"; -fn handleSegfaultImpl(addr: ?usize, name: []const u8, opt_ctx: ?*std.debug.ThreadContext) noreturn { +fn handleSegfaultImpl(addr: ?usize, name: []const u8, opt_ctx: ?std.debug.ThreadContextPtr) noreturn { @branchHint(.cold); dumpCrashContext() catch {}; std.debug.defaultHandleSegfault(addr, name, opt_ctx); @@ -56,6 +56,7 @@ pub const AnalyzeBody = if (build_options.enable_debug_extensions) struct { current = ab.parent; } } else struct { + const current: ?noreturn = null; // Dummy implementation, with functions marked `inline` to avoid interfering with tail calls. pub inline fn push(_: AnalyzeBody, _: *Sema, _: *Sema.Block, _: []const Zir.Inst.Index) void {} pub inline fn pop(_: AnalyzeBody) void {} @@ -75,6 +76,7 @@ pub const CodegenFunc = if (build_options.enable_debug_extensions) struct { current = null; } } else struct { + const current: ?noreturn = null; // Dummy implementation pub fn start(_: *const Zcu, _: InternPool.Index) void {} pub fn stop(_: InternPool.Index) void {} From 1123741fd5fc6545daf10e2bcdcad74ec148f61b Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 10 Sep 2025 12:53:33 +0100 Subject: [PATCH 43/85] Dwarf: use 'gpa' terminology --- lib/std/debug/Dwarf.zig | 56 ++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index ebfb15e6e2bd..dfe9bd8dd387 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -79,8 +79,8 @@ pub const Abbrev = struct { has_children: bool, attrs: []Attr, - fn deinit(abbrev: *Abbrev, allocator: Allocator) void { - allocator.free(abbrev.attrs); + fn deinit(abbrev: *Abbrev, gpa: Allocator) void { + gpa.free(abbrev.attrs); abbrev.* = undefined; } @@ -96,11 +96,11 @@ pub const Abbrev = struct { offset: u64, abbrevs: []Abbrev, - fn deinit(table: *Table, allocator: Allocator) void { + fn deinit(table: *Table, gpa: Allocator) void { for (table.abbrevs) |*abbrev| { - abbrev.deinit(allocator); + abbrev.deinit(gpa); } - allocator.free(table.abbrevs); + gpa.free(table.abbrevs); table.* = undefined; } @@ -213,8 +213,8 @@ pub const Die = struct { value: FormValue, }; - fn deinit(self: *Die, allocator: Allocator) void { - allocator.free(self.attrs); + fn deinit(self: *Die, gpa: Allocator) void { + gpa.free(self.attrs); self.* = undefined; } @@ -368,7 +368,7 @@ pub const ScanError = error{ StreamTooLong, } || Allocator.Error; -fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError!void { +fn scanAllFunctions(di: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void { var fr: Reader = .fixed(di.section(.debug_info).?); var this_unit_offset: u64 = 0; @@ -394,7 +394,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! address_size = try fr.takeByte(); } - const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); + const abbrev_table = try di.getAbbrevTable(gpa, debug_abbrev_offset); var max_attrs: usize = 0; var zig_padding_abbrev_code: u7 = 0; @@ -409,8 +409,8 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! } } } - const attrs_buf = try allocator.alloc(Die.Attr, max_attrs * 3); - defer allocator.free(attrs_buf); + const attrs_buf = try gpa.alloc(Die.Attr, max_attrs * 3); + defer gpa.free(attrs_buf); var attrs_bufs: [3][]Die.Attr = undefined; for (&attrs_bufs, 0..) |*buf, index| buf.* = attrs_buf[index * max_attrs ..][0..max_attrs]; @@ -510,7 +510,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! else => return bad(), }; - try di.func_list.append(allocator, .{ + try di.func_list.append(gpa, .{ .name = fn_name, .pc_range = .{ .start = low_pc, @@ -535,7 +535,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! while (try iter.next()) |range| { range_added = true; - try di.func_list.append(allocator, .{ + try di.func_list.append(gpa, .{ .name = fn_name, .pc_range = .{ .start = range.start, @@ -546,7 +546,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! } if (fn_name != null and !range_added) { - try di.func_list.append(allocator, .{ + try di.func_list.append(gpa, .{ .name = fn_name, .pc_range = null, }); @@ -560,11 +560,11 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError! } } -fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator, endian: Endian) ScanError!void { +fn scanAllCompileUnits(di: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void { var fr: Reader = .fixed(di.section(.debug_info).?); var this_unit_offset: u64 = 0; - var attrs_buf = std.array_list.Managed(Die.Attr).init(allocator); + var attrs_buf = std.array_list.Managed(Die.Attr).init(gpa); defer attrs_buf.deinit(); while (this_unit_offset < fr.buffer.len) { @@ -589,7 +589,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator, endian: Endian) ScanErr address_size = try fr.takeByte(); } - const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); + const abbrev_table = try di.getAbbrevTable(gpa, debug_abbrev_offset); var max_attrs: usize = 0; for (abbrev_table.abbrevs) |abbrev| { @@ -608,7 +608,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator, endian: Endian) ScanErr if (compile_unit_die.tag_id != DW.TAG.compile_unit) return bad(); - compile_unit_die.attrs = try allocator.dupe(Die.Attr, compile_unit_die.attrs); + compile_unit_die.attrs = try gpa.dupe(Die.Attr, compile_unit_die.attrs); var compile_unit: CompileUnit = .{ .version = version, @@ -645,7 +645,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator, endian: Endian) ScanErr } }; - try di.compile_unit_list.append(allocator, compile_unit); + try di.compile_unit_list.append(gpa, compile_unit); this_unit_offset += next_offset; } @@ -855,32 +855,32 @@ pub fn findCompileUnit(di: *const Dwarf, endian: Endian, target_address: u64) !* /// Gets an already existing AbbrevTable given the abbrev_offset, or if not found, /// seeks in the stream and parses it. -fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const Abbrev.Table { +fn getAbbrevTable(di: *Dwarf, gpa: Allocator, abbrev_offset: u64) !*const Abbrev.Table { for (di.abbrev_table_list.items) |*table| { if (table.offset == abbrev_offset) { return table; } } try di.abbrev_table_list.append( - allocator, - try di.parseAbbrevTable(allocator, abbrev_offset), + gpa, + try di.parseAbbrevTable(gpa, abbrev_offset), ); return &di.abbrev_table_list.items[di.abbrev_table_list.items.len - 1]; } -fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { +fn parseAbbrevTable(di: *Dwarf, gpa: Allocator, offset: u64) !Abbrev.Table { var fr: Reader = .fixed(di.section(.debug_abbrev).?); fr.seek = cast(usize, offset) orelse return bad(); - var abbrevs = std.array_list.Managed(Abbrev).init(allocator); + var abbrevs = std.array_list.Managed(Abbrev).init(gpa); defer { for (abbrevs.items) |*abbrev| { - abbrev.deinit(allocator); + abbrev.deinit(gpa); } abbrevs.deinit(); } - var attrs = std.array_list.Managed(Abbrev.Attr).init(allocator); + var attrs = std.array_list.Managed(Abbrev.Attr).init(gpa); defer attrs.deinit(); while (true) { @@ -1446,7 +1446,7 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { return str[casted_offset..last :0]; } -pub fn getSymbol(di: *Dwarf, allocator: Allocator, endian: Endian, address: u64) !std.debug.Symbol { +pub fn getSymbol(di: *Dwarf, gpa: Allocator, endian: Endian, address: u64) !std.debug.Symbol { const compile_unit = di.findCompileUnit(endian, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return .unknown, else => return err, @@ -1456,7 +1456,7 @@ pub fn getSymbol(di: *Dwarf, allocator: Allocator, endian: Endian, address: u64) .compile_unit_name = compile_unit.die.getAttrString(di, endian, std.dwarf.AT.name, di.section(.debug_str), compile_unit) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, }, - .source_location = di.getLineNumberInfo(allocator, endian, compile_unit, address) catch |err| switch (err) { + .source_location = di.getLineNumberInfo(gpa, endian, compile_unit, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, else => return err, }, From 4e45362529e05ba1be44fab48bc3469f5bb6492d Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 10 Sep 2025 16:41:10 +0100 Subject: [PATCH 44/85] link.Elf: fix static PIE We mustn't emit the DT_PLTGOT entry in `.dynamic` in a statically-linked PIE, because there's no dl to relocate it (and `std.pie.relocate`, or the PIE relocator in libc, won't touch it). In that case, there cannot be any PLT entries, so there's no point emitting the `.got.plt` section at all. If we just don't create that section, `link.Elf` already knows not to add the DT_PLTGOT entry to `.dynamic`. Co-authored-by: Jacob Young --- lib/std/dynamic_library.zig | 3 +-- src/link/Elf.zig | 39 +++++++++++++++++++++---------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/lib/std/dynamic_library.zig b/lib/std/dynamic_library.zig index bac31f57601c..9504242cd578 100644 --- a/lib/std/dynamic_library.zig +++ b/lib/std/dynamic_library.zig @@ -95,8 +95,7 @@ pub fn get_DYNAMIC() ?[*]const elf.Dyn { pub fn linkmap_iterator(phdrs: []const elf.Phdr) error{InvalidExe}!LinkMap.Iterator { _ = phdrs; const _DYNAMIC = get_DYNAMIC() orelse { - // No PT_DYNAMIC means this is either a statically-linked program or a - // badly corrupted dynamically-linked one. + // No PT_DYNAMIC means this is a statically-linked non-PIE program. return .{ .current = null }; }; diff --git a/src/link/Elf.zig b/src/link/Elf.zig index f64e9c337b57..de506700fa04 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -1884,6 +1884,16 @@ fn initSyntheticSections(self: *Elf) !void { const ptr_size = self.ptrWidthBytes(); const shared_objects = self.shared_objects.values(); + const is_exe_or_dyn_lib = switch (comp.config.output_mode) { + .Exe => true, + .Lib => comp.config.link_mode == .dynamic, + .Obj => false, + }; + const have_dynamic_linker = comp.config.link_mode == .dynamic and is_exe_or_dyn_lib and !target.dynamic_linker.eql(.none); + + const needs_interp = have_dynamic_linker and + (comp.config.link_libc or comp.root_mod.resolved_target.is_explicit_dynamic_linker); + const needs_eh_frame = blk: { if (self.zigObjectPtr()) |zo| if (zo.eh_frame_index != null) break :blk true; @@ -1891,6 +1901,7 @@ fn initSyntheticSections(self: *Elf) !void { if (self.file(index).?.object.cies.items.len > 0) break true; } else false; }; + if (needs_eh_frame) { if (self.section_indexes.eh_frame == null) { self.section_indexes.eh_frame = self.sectionByName(".eh_frame") orelse try self.addSection(.{ @@ -1922,13 +1933,17 @@ fn initSyntheticSections(self: *Elf) !void { }); } - if (self.section_indexes.got_plt == null) { - self.section_indexes.got_plt = try self.addSection(.{ - .name = try self.insertShString(".got.plt"), - .type = elf.SHT_PROGBITS, - .flags = elf.SHF_ALLOC | elf.SHF_WRITE, - .addralign = @alignOf(u64), - }); + if (have_dynamic_linker) { + if (self.section_indexes.got_plt == null) { + self.section_indexes.got_plt = try self.addSection(.{ + .name = try self.insertShString(".got.plt"), + .type = elf.SHT_PROGBITS, + .flags = elf.SHF_ALLOC | elf.SHF_WRITE, + .addralign = @alignOf(u64), + }); + } + } else { + assert(self.plt.symbols.items.len == 0); } const needs_rela_dyn = blk: { @@ -1989,16 +2004,6 @@ fn initSyntheticSections(self: *Elf) !void { }); } - const is_exe_or_dyn_lib = switch (comp.config.output_mode) { - .Exe => true, - .Lib => comp.config.link_mode == .dynamic, - .Obj => false, - }; - const have_dynamic_linker = comp.config.link_mode == .dynamic and is_exe_or_dyn_lib and !target.dynamic_linker.eql(.none); - - const needs_interp = have_dynamic_linker and - (comp.config.link_libc or comp.root_mod.resolved_target.is_explicit_dynamic_linker); - if (needs_interp and self.section_indexes.interp == null) { self.section_indexes.interp = try self.addSection(.{ .name = try self.insertShString(".interp"), From 02a0ade138c035a7d45bd1ed26fe49dfec2942d2 Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 11 Sep 2025 13:32:15 +0100 Subject: [PATCH 45/85] std.debug: never attempt FP unwind under fomit-frame-pointer --- lib/std/debug.zig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index ae8ec072dfe0..56769826b1e8 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -815,13 +815,17 @@ const StackIterator = union(enum) { /// On aarch64-macos, Apple mandate that the frame pointer is always used. /// TODO: are there any other architectures with guarantees like this? - const fp_unwind_is_safe = !builtin.omit_frame_pointer and builtin.cpu.arch == .aarch64 and builtin.os.tag.isDarwin(); + const fp_unwind_is_safe = builtin.cpu.arch == .aarch64 and builtin.os.tag.isDarwin(); /// Whether the current unwind strategy is allowed given `allow_unsafe`. fn stratOk(it: *const StackIterator, allow_unsafe: bool) bool { return switch (it.*) { .di => true, - .fp => allow_unsafe or fp_unwind_is_safe, + // If we omitted frame pointers from *this* compilation, FP unwinding would crash + // immediately regardless of anything. But FPs could also be omitted from a different + // linked object, so it's not guaranteed to be safe, unless the target specifically + // requires it. + .fp => !builtin.omit_frame_pointer and (fp_unwind_is_safe or allow_unsafe), }; } From 7601b397ef95b57c05e9a4ed3670782d2dddf84c Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 11 Sep 2025 13:34:02 +0100 Subject: [PATCH 46/85] fix bad merge The API of `std.debug.Pdb` changed. --- lib/std/debug/SelfInfo/WindowsModule.zig | 27 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index fc728935dacb..d1fd070ec738 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -154,17 +154,29 @@ fn loadDebugInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) ! }; defer if (path.ptr != raw_path.ptr) gpa.free(path); - di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { + const pdb_file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound, error.IsDir => break :pdb, else => |e| return e, }; - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); + errdefer pdb_file.close(); - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + const pdb_reader = try gpa.create(std.fs.File.Reader); + errdefer gpa.destroy(pdb_reader); + + pdb_reader.* = pdb_file.reader(try gpa.alloc(u8, 4096)); + errdefer gpa.free(pdb_reader.interface.buffer); + + var pdb: Pdb = try .init(gpa, pdb_reader); + errdefer pdb.deinit(); + try pdb.parseInfoStream(); + try pdb.parseDbiStream(); + + if (!mem.eql(u8, &coff_obj.guid, &pdb.guid) or coff_obj.age != pdb.age) return error.InvalidDebugInfo; di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); + + di.pdb = pdb; } di.loaded = true; @@ -204,7 +216,12 @@ pub const DebugInfo = struct { pub fn deinit(di: *DebugInfo, gpa: Allocator) void { if (!di.loaded) return; if (di.dwarf) |*dwarf| dwarf.deinit(gpa); - if (di.pdb) |*pdb| pdb.deinit(); + if (di.pdb) |*pdb| { + pdb.file_reader.file.close(); + gpa.free(pdb.file_reader.interface.buffer); + gpa.destroy(pdb.file_reader); + pdb.deinit(); + } gpa.free(di.coff_section_headers); if (di.mapped_file) |mapped| { const process_handle = windows.GetCurrentProcess(); From 9901b9389ed963ff262d1ce4973029a570035f19 Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 11 Sep 2025 13:42:51 +0100 Subject: [PATCH 47/85] std: fix 32-bit build and some unsafe casts --- lib/std/debug/Dwarf/Unwind.zig | 33 +++++++++++++++++----------- lib/std/debug/ElfFile.zig | 16 +++++++------- lib/std/debug/SelfInfo/ElfModule.zig | 21 +++++++++--------- lib/std/elf.zig | 3 ++- 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 5334988b90d0..2eaa89c40425 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -433,7 +433,7 @@ pub const FrameDescriptionEntry = struct { .lsb_z => { // There is augmentation data, but it's irrelevant to us -- it // only contains the LSDA pointer, which we don't care about. - const aug_data_len = try r.takeLeb128(u64); + const aug_data_len = try r.takeLeb128(usize); _ = try r.discardAll(aug_data_len); }, } @@ -463,17 +463,20 @@ pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endia switch (try EntryHeader.read(&r, entry_offset, section.id, endian)) { .cie => |cie_info| { // Ignore CIEs for now; we'll parse them when we read a corresponding FDE - try r.discardAll(cie_info.bytes_len); + try r.discardAll(cast(usize, cie_info.bytes_len) orelse return error.EndOfStream); continue; }, .fde => |fde_info| { - var cie_r: Reader = .fixed(section.bytes[fde_info.cie_offset..]); + if (fde_info.cie_offset > section.bytes.len) return error.EndOfStream; + var cie_r: Reader = .fixed(section.bytes[@intCast(fde_info.cie_offset)..]); const cie_info = switch (try EntryHeader.read(&cie_r, fde_info.cie_offset, section.id, endian)) { .cie => |cie_info| cie_info, .fde, .terminator => return bad(), // this is meant to be a CIE }; - const cie: CommonInformationEntry = try .parse(try cie_r.take(cie_info.bytes_len), section.id, addr_size_bytes); - const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(fde_info.bytes_len), cie, endian); + const cie_bytes_len = cast(usize, cie_info.bytes_len) orelse return error.EndOfStream; + const fde_bytes_len = cast(usize, fde_info.bytes_len) orelse return error.EndOfStream; + const cie: CommonInformationEntry = try .parse(try cie_r.take(cie_bytes_len), section.id, addr_size_bytes); + const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(fde_bytes_len), cie, endian); try fde_list.append(gpa, .{ .pc_begin = fde.pc_begin, .fde_offset = entry_offset, @@ -537,27 +540,29 @@ pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: End pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { const section = unwind.frame_section; - var fde_reader: Reader = .fixed(section.bytes[fde_offset..]); + if (fde_offset > section.bytes.len) return error.EndOfStream; + var fde_reader: Reader = .fixed(section.bytes[@intCast(fde_offset)..]); const fde_info = switch (try EntryHeader.read(&fde_reader, fde_offset, section.id, endian)) { .fde => |info| info, .cie, .terminator => return bad(), // This is meant to be an FDE }; const cie_offset = fde_info.cie_offset; - var cie_reader: Reader = .fixed(section.bytes[cie_offset..]); + if (cie_offset > section.bytes.len) return error.EndOfStream; + var cie_reader: Reader = .fixed(section.bytes[@intCast(cie_offset)..]); const cie_info = switch (try EntryHeader.read(&cie_reader, cie_offset, section.id, endian)) { .cie => |info| info, .fde, .terminator => return bad(), // This is meant to be a CIE }; const cie: CommonInformationEntry = try .parse( - try cie_reader.take(cie_info.bytes_len), + try cie_reader.take(cast(usize, cie_info.bytes_len) orelse return error.EndOfStream), section.id, addr_size_bytes, ); const fde: FrameDescriptionEntry = try .parse( section.vaddr + fde_offset + fde_reader.seek, - try fde_reader.take(fde_info.bytes_len), + try fde_reader.take(cast(usize, fde_info.bytes_len) orelse return error.EndOfStream), cie, endian, ); @@ -566,9 +571,8 @@ pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endia } const EhPointerContext = struct { - // The address of the pointer field itself + /// The address of the pointer field itself pc_rel_base: u64, - // These relative addressing modes are only used in specific cases, and // might not be available / required in all parsing contexts data_rel_base: ?u64 = null, @@ -604,7 +608,7 @@ fn readEhPointerAbs(r: *Reader, enc_ty: EH.PE.Type, addr_size_bytes: u8, endian: fn readEhPointer(r: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 { const offset = try readEhPointerAbs(r, enc.type, addr_size_bytes, endian); if (enc.indirect) return bad(); // GCC extension; not supported - const base = switch (enc.rel) { + const base: u64 = switch (enc.rel) { .abs, .aligned => 0, .pcrel => ctx.pc_rel_base, .textrel => ctx.text_rel_base orelse return bad(), @@ -613,7 +617,10 @@ fn readEhPointer(r: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerCont _ => return bad(), }; return switch (offset) { - .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(base)))), + .signed => |s| if (s >= 0) + try std.math.add(u64, base, @intCast(s)) + else + try std.math.sub(u64, base, @intCast(-s)), // absptr can actually contain signed values in some cases (aarch64 MachO) .unsigned => |u| u +% base, }; diff --git a/lib/std/debug/ElfFile.zig b/lib/std/debug/ElfFile.zig index b8f1bdf615c7..5be5ee55c508 100644 --- a/lib/std/debug/ElfFile.zig +++ b/lib/std/debug/ElfFile.zig @@ -19,7 +19,7 @@ strtab: ?[]const u8, symtab: ?SymtabSection, /// Binary search table lazily populated by `searchSymtab`. -symbol_search_table: ?[]u64, +symbol_search_table: ?[]usize, /// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that /// this memory can be unmapped by `ElfFile.deinit`. @@ -259,7 +259,7 @@ pub fn searchSymtab(ef: *ElfFile, gpa: Allocator, vaddr: u64) error{ swap_endian: bool, target: u64, symbols: []align(1) const Sym, - fn predicate(ctx: @This(), sym_index: u64) bool { + fn predicate(ctx: @This(), sym_index: usize) bool { // We need to return `true` for the first N items, then `false` for the rest -- // the index we'll get out is the first `false` one. So, we'll return `true` iff // the target address is after the *end* of this symbol. This synchronizes with @@ -270,7 +270,7 @@ pub fn searchSymtab(ef: *ElfFile, gpa: Allocator, vaddr: u64) error{ return ctx.target >= sym_end; } }; - const sym_index_index = std.sort.partitionPoint(u64, search_table, @as(SearchContext, .{ + const sym_index_index = std.sort.partitionPoint(usize, search_table, @as(SearchContext, .{ .swap_endian = swap_endian, .target = vaddr, .symbols = symbols, @@ -291,8 +291,8 @@ pub fn searchSymtab(ef: *ElfFile, gpa: Allocator, vaddr: u64) error{ fn buildSymbolSearchTable(gpa: Allocator, endian: Endian, comptime Sym: type, symbols: []align(1) const Sym) error{ OutOfMemory, BadSymtab, -}![]u64 { - var result: std.ArrayList(u64) = .empty; +}![]usize { + var result: std.ArrayList(usize) = .empty; defer result.deinit(gpa); const swap_endian = endian != @import("builtin").cpu.arch.endian(); @@ -308,7 +308,7 @@ fn buildSymbolSearchTable(gpa: Allocator, endian: Endian, comptime Sym: type, sy const SortContext = struct { swap_endian: bool, symbols: []align(1) const Sym, - fn lessThan(ctx: @This(), lhs_sym_index: u64, rhs_sym_index: u64) bool { + fn lessThan(ctx: @This(), lhs_sym_index: usize, rhs_sym_index: usize) bool { // We sort by *end* address, not start address. This matches up with logic in `searchSymtab`. var lhs_sym = ctx.symbols[lhs_sym_index]; var rhs_sym = ctx.symbols[rhs_sym_index]; @@ -321,7 +321,7 @@ fn buildSymbolSearchTable(gpa: Allocator, endian: Endian, comptime Sym: type, sy return lhs_val < rhs_val; } }; - std.mem.sort(u64, result.items, @as(SortContext, .{ + std.mem.sort(usize, result.items, @as(SortContext, .{ .swap_endian = swap_endian, .symbols = symbols, }), SortContext.lessThan); @@ -504,7 +504,7 @@ fn loadInner( continue; } - const buf = try arena.alloc(u8, ch_size); + const buf = try arena.alloc(u8, std.math.cast(usize, ch_size) orelse return error.Overflow); var fw: std.Io.Writer = .fixed(buf); var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{}); const n = decompress.reader.streamRemaining(&fw) catch |err| switch (err) { diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 7871f1012fb6..8a0acf8bb06f 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -200,7 +200,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro error.EndOfStream, error.Overflow => return error.InvalidDebugInfo, error.UnsupportedAddrSize => return error.UnsupportedDebugInfo, }; - buf[0] = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(module.load_offset + header.eh_frame_vaddr)); + buf[0] = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(@as(usize, @intCast(module.load_offset + header.eh_frame_vaddr)))); break :unwinds buf[0..1]; } else unwinds: { // There is no `.eh_frame_hdr` section. There may still be an `.eh_frame` or `.debug_frame` @@ -208,20 +208,19 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro try module.loadElf(gpa, di); const opt_debug_frame = &di.loaded_elf.?.debug_frame; const opt_eh_frame = &di.loaded_elf.?.eh_frame; + var i: usize = 0; // If both are present, we can't just pick one -- the info could be split between them. // `.debug_frame` is likely to be the more complete section, so we'll prioritize that one. if (opt_debug_frame.*) |*debug_frame| { - buf[0] = .initSection(.debug_frame, debug_frame.vaddr, debug_frame.bytes); - if (opt_eh_frame.*) |*eh_frame| { - buf[1] = .initSection(.eh_frame, eh_frame.vaddr, eh_frame.bytes); - break :unwinds buf[0..2]; - } - break :unwinds buf[0..1]; - } else if (opt_eh_frame.*) |*eh_frame| { - buf[0] = .initSection(.eh_frame, eh_frame.vaddr, eh_frame.bytes); - break :unwinds buf[0..1]; + buf[i] = .initSection(.debug_frame, debug_frame.vaddr, debug_frame.bytes); + i += 1; + } + if (opt_eh_frame.*) |*eh_frame| { + buf[i] = .initSection(.eh_frame, eh_frame.vaddr, eh_frame.bytes); + i += 1; } - return error.MissingDebugInfo; + if (i == 0) return error.MissingDebugInfo; + break :unwinds buf[0..i]; }; errdefer for (unwinds) |*u| u.deinit(gpa); for (unwinds) |*u| try prepareUnwindLookup(u, gpa); diff --git a/lib/std/elf.zig b/lib/std/elf.zig index 43b542c09e04..3b0c08500312 100644 --- a/lib/std/elf.zig +++ b/lib/std/elf.zig @@ -744,7 +744,8 @@ pub const SectionHeaderBufferIterator = struct { const size: u64 = if (it.elf_header.is_64) @sizeOf(Elf64_Shdr) else @sizeOf(Elf32_Shdr); const offset = it.elf_header.shoff + size * it.index; - var reader = std.Io.Reader.fixed(it.buf[offset..]); + if (offset > it.buf.len) return error.EndOfStream; + var reader = std.Io.Reader.fixed(it.buf[@intCast(offset)..]); return takeShdr(&reader, it.elf_header); } From a12ce28224f475bd97dc92bc8314ffff60fd6dd2 Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 11 Sep 2025 16:50:48 +0100 Subject: [PATCH 48/85] std: fix os.linux.x86.syscall6 It was possible for `arg6` to be passed as an operand relative to esp. In that case, the `push` at the top clobbered esp and hence made the reference to arg6 invalid. This was manifesting in this branch as broken stack traces on x86-linux due to an `mmap2` syscall accidentally passing the page offset as non-zero! This commit fixes a bug introduced in cb0e6d8aa. --- lib/std/os/linux/x86.zig | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/lib/std/os/linux/x86.zig b/lib/std/os/linux/x86.zig index 0ffe9c2956b7..eed85a2ad0be 100644 --- a/lib/std/os/linux/x86.zig +++ b/lib/std/os/linux/x86.zig @@ -80,28 +80,32 @@ pub fn syscall6( arg5: usize, arg6: usize, ) usize { - // arg5/arg6 are passed via memory as we're out of registers if ebp is used as frame pointer, or - // if we're compiling with PIC. We push arg5/arg6 on the stack before changing ebp/esp as the - // compiler may reference arg5/arg6 as an offset relative to ebp/esp. + // arg6 can't be passed to asm in a register because ebp might be reserved as the frame pointer + // and there are no more GPRs available; so we'll need a memory operand for it. Adding that + // memory operand means that on PIC we might need a reference to the GOT, which in turn needs + // *its* own GPR, so we need to pass another arg in memory too! This is surprisingly hard to get + // right, because we can't touch esp or ebp until we're done with the memory input (as that + // input could be relative to esp or ebp). + const args56: [2]usize = .{ arg5, arg6 }; return asm volatile ( - \\ push %[arg5] - \\ push %[arg6] - \\ push %%edi + \\ push %[args56] \\ push %%ebp - \\ mov 12(%%esp), %%edi - \\ mov 8(%%esp), %%ebp + \\ mov 4(%%esp), %%ebp + \\ mov %%edi, 4(%%esp) + \\ // The saved %edi and %ebp are on the stack, and %ebp points to `args56`. + \\ // Prepare the last two args, syscall, then pop the saved %ebp and %edi. + \\ mov (%%ebp), %%edi + \\ mov 4(%%ebp), %%ebp \\ int $0x80 \\ pop %%ebp \\ pop %%edi - \\ add $8, %%esp : [ret] "={eax}" (-> usize), : [number] "{eax}" (@intFromEnum(number)), [arg1] "{ebx}" (arg1), [arg2] "{ecx}" (arg2), [arg3] "{edx}" (arg3), [arg4] "{esi}" (arg4), - [arg5] "rm" (arg5), - [arg6] "rm" (arg6), + [args56] "rm" (&args56), : .{ .memory = true }); } From cedd9de64f183e4ce088654f8a9ed978cbdc8962 Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 11 Sep 2025 20:07:55 +0100 Subject: [PATCH 49/85] std.debug.Dwarf: fix names of inlined functions --- lib/std/debug/Dwarf.zig | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index dfe9bd8dd387..9a70746b0ad3 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -348,7 +348,13 @@ pub fn deinit(di: *Dwarf, gpa: Allocator) void { } pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { - for (di.func_list.items) |*func| { + // Iterate the function list backwards so that we see child DIEs before their parents. This is + // important because `DW_TAG_inlined_subroutine` DIEs will have a range which is a sub-range of + // their caller, and we want to return the callee's name, not the caller's. + var i: usize = di.func_list.items.len; + while (i > 0) { + i -= 1; + const func = &di.func_list.items[i]; if (func.pc_range) |range| { if (address >= range.start and address < range.end) { return func.name; From 1a8a8c610d9d5256df25090b0c8ca47cbe94ef1b Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 9 Sep 2025 22:45:39 +0100 Subject: [PATCH 50/85] tests: split up and enhance stack trace tests Previously, the `test-stack-traces` step was essentially just testing error traces, and even there we didn't have much coverage. This commit solves that by splitting the "stack trace" tests into two separate harnesses: the "stack trace" tests are for actual stack traces (i.e. involving stack unwinding), while the "error trace" tests are specifically for error return traces. The "stack trace" tests will test different configurations of: * `-lc` * `-fPIE` * `-fomit-frame-pointer` * `-fllvm` * unwind tables (currently disabled) * strip debug info (currently disabled) The main goal there is to test *stack unwinding* under different conditions. Meanwhile, the "error trace" tests will test different configurations of `-O` and `-fllvm`; the main goal here, aside from checking that error traces themselves do not miscompile, is to check whether debug info is still working even in optimized builds. Of course, aggressive optimizations *can* thwart debug info no matter what, so as before, there is a way to disable cases for specific targets / optimize modes. The program which converts stack traces into a more validatable format by removing things like addresses (previously `check-stack-trace.zig`, now `convert-stack-trace.zig`) has been rewritten and simplified. Also, thanks to various fixes in this branch, several workarounds have become unnecessary: for instance, we don't need to ignore the function name printed in stack traces in release modes, because `std.debug.Dwarf` now uses the correct DIE for inlined functions! Neither `test-stack-traces` nor `test-error-traces` does general foreign architecture testing, because it seems that (at least for now) external executors often aren't particularly good at handling stack tracing correctly (looking at you, Wine). Generally, they just test the native target (this matches the old behavior of `test-stack-traces`). However, there is one exception: when on an x86_64 or aarch64 host, we will also test the 32-bit version (x86 or arm) if the OS supports it, because such executables can be trivially tested without an external executor. Oh, also, I wrote a bunch of stack trace tests. Previously there was, erm, *one* test in `test-stack-traces` which wasn't for error traces. Now there are a good few! --- build.zig | 3 +- lib/std/debug/SelfInfo.zig | 2 +- test/error_traces.zig | 430 ++++++++++++++ test/src/ErrorTrace.zig | 126 ++++ test/src/StackTrace.zig | 208 +++++-- test/src/check-stack-trace.zig | 88 --- test/src/convert-stack-trace.zig | 104 ++++ test/stack_traces.zig | 960 +++++-------------------------- test/tests.zig | 71 ++- 9 files changed, 1032 insertions(+), 960 deletions(-) create mode 100644 test/error_traces.zig create mode 100644 test/src/ErrorTrace.zig delete mode 100644 test/src/check-stack-trace.zig create mode 100644 test/src/convert-stack-trace.zig diff --git a/build.zig b/build.zig index 8611afabdef3..c57c3e713ad1 100644 --- a/build.zig +++ b/build.zig @@ -563,7 +563,8 @@ pub fn build(b: *std.Build) !void { .skip_release = skip_release, })); test_step.dependOn(tests.addLinkTests(b, enable_macos_sdk, enable_ios_sdk, enable_symlinks_windows)); - test_step.dependOn(tests.addStackTraceTests(b, test_filters, optimization_modes)); + test_step.dependOn(tests.addStackTraceTests(b, test_filters, skip_non_native)); + test_step.dependOn(tests.addErrorTraceTests(b, test_filters, optimization_modes, skip_non_native)); test_step.dependOn(tests.addCliTests(b)); if (tests.addDebuggerTests(b, .{ .test_filters = test_filters, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index d838a1a6c180..38027dbb5882 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -355,7 +355,7 @@ pub const DwarfUnwindContext = struct { context.reg_context.eh_frame = cie.version != 4; context.reg_context.is_macho = native_os.isDarwin(); - const row = try context.vm.runTo(gpa, context.pc - load_offset, cie, fde, @sizeOf(usize), native_endian); + const row = try context.vm.runTo(gpa, pc_vaddr, cie, fde, @sizeOf(usize), native_endian); context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; diff --git a/test/error_traces.zig b/test/error_traces.zig new file mode 100644 index 000000000000..ea84f14ce5a8 --- /dev/null +++ b/test/error_traces.zig @@ -0,0 +1,430 @@ +pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { + cases.addCase(.{ + .name = "return", + .source = + \\pub fn main() !void { + \\ return error.TheSkyIsFalling; + \\} + , + .expect_error = "TheSkyIsFalling", + .expect_trace = + \\source.zig:2:5: [address] in main + \\ return error.TheSkyIsFalling; + \\ ^ + , + }); + + cases.addCase(.{ + .name = "try return", + .source = + \\fn foo() !void { + \\ return error.TheSkyIsFalling; + \\} + \\ + \\pub fn main() !void { + \\ try foo(); + \\} + , + .expect_error = "TheSkyIsFalling", + .expect_trace = + \\source.zig:2:5: [address] in foo + \\ return error.TheSkyIsFalling; + \\ ^ + \\source.zig:6:5: [address] in main + \\ try foo(); + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); + cases.addCase(.{ + .name = "non-error return pops error trace", + .source = + \\fn bar() !void { + \\ return error.UhOh; + \\} + \\ + \\fn foo() !void { + \\ bar() catch { + \\ return; // non-error result: success + \\ }; + \\} + \\ + \\pub fn main() !void { + \\ try foo(); + \\ return error.UnrelatedError; + \\} + , + .expect_error = "UnrelatedError", + .expect_trace = + \\source.zig:13:5: [address] in main + \\ return error.UnrelatedError; + \\ ^ + , + }); + + cases.addCase(.{ + .name = "continue in while loop", + .source = + \\fn foo() !void { + \\ return error.UhOh; + \\} + \\ + \\pub fn main() !void { + \\ var i: usize = 0; + \\ while (i < 3) : (i += 1) { + \\ foo() catch continue; + \\ } + \\ return error.UnrelatedError; + \\} + , + .expect_error = "UnrelatedError", + .expect_trace = + \\source.zig:10:5: [address] in main + \\ return error.UnrelatedError; + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .linux }, + .{ .x86, .linux }, + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); + + cases.addCase(.{ + .name = "try return + handled catch/if-else", + .source = + \\fn foo() !void { + \\ return error.TheSkyIsFalling; + \\} + \\ + \\pub fn main() !void { + \\ foo() catch {}; // should not affect error trace + \\ if (foo()) |_| {} else |_| { + \\ // should also not affect error trace + \\ } + \\ try foo(); + \\} + , + .expect_error = "TheSkyIsFalling", + .expect_trace = + \\source.zig:2:5: [address] in foo + \\ return error.TheSkyIsFalling; + \\ ^ + \\source.zig:10:5: [address] in main + \\ try foo(); + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); + + cases.addCase(.{ + .name = "break from inline loop pops error return trace", + .source = + \\fn foo() !void { return error.FooBar; } + \\ + \\pub fn main() !void { + \\ comptime var i: usize = 0; + \\ b: inline while (i < 5) : (i += 1) { + \\ foo() catch { + \\ break :b; // non-error break, success + \\ }; + \\ } + \\ // foo() was successfully handled, should not appear in trace + \\ + \\ return error.BadTime; + \\} + , + .expect_error = "BadTime", + .expect_trace = + \\source.zig:12:5: [address] in main + \\ return error.BadTime; + \\ ^ + , + }); + + cases.addCase(.{ + .name = "catch and re-throw error", + .source = + \\fn foo() !void { + \\ return error.TheSkyIsFalling; + \\} + \\ + \\pub fn main() !void { + \\ return foo() catch error.AndMyCarIsOutOfGas; + \\} + , + .expect_error = "AndMyCarIsOutOfGas", + .expect_trace = + \\source.zig:2:5: [address] in foo + \\ return error.TheSkyIsFalling; + \\ ^ + \\source.zig:6:5: [address] in main + \\ return foo() catch error.AndMyCarIsOutOfGas; + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); + + cases.addCase(.{ + .name = "errors stored in var do not contribute to error trace", + .source = + \\fn foo() !void { + \\ return error.TheSkyIsFalling; + \\} + \\ + \\pub fn main() !void { + \\ // Once an error is stored in a variable, it is popped from the trace + \\ var x = foo(); + \\ x = {}; + \\ + \\ // As a result, this error trace will still be clean + \\ return error.SomethingUnrelatedWentWrong; + \\} + , + .expect_error = "SomethingUnrelatedWentWrong", + .expect_trace = + \\source.zig:11:5: [address] in main + \\ return error.SomethingUnrelatedWentWrong; + \\ ^ + , + }); + + cases.addCase(.{ + .name = "error stored in const has trace preserved for duration of block", + .source = + \\fn foo() !void { return error.TheSkyIsFalling; } + \\fn bar() !void { return error.InternalError; } + \\fn baz() !void { return error.UnexpectedReality; } + \\ + \\pub fn main() !void { + \\ const x = foo(); + \\ const y = b: { + \\ if (true) + \\ break :b bar(); + \\ + \\ break :b {}; + \\ }; + \\ x catch {}; + \\ y catch {}; + \\ // foo()/bar() error traces not popped until end of block + \\ + \\ { + \\ const z = baz(); + \\ z catch {}; + \\ // baz() error trace still alive here + \\ } + \\ // baz() error trace popped, foo(), bar() still alive + \\ return error.StillUnresolved; + \\} + , + .expect_error = "StillUnresolved", + .expect_trace = + \\source.zig:1:18: [address] in foo + \\fn foo() !void { return error.TheSkyIsFalling; } + \\ ^ + \\source.zig:2:18: [address] in bar + \\fn bar() !void { return error.InternalError; } + \\ ^ + \\source.zig:23:5: [address] in main + \\ return error.StillUnresolved; + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); + + cases.addCase(.{ + .name = "error passed to function has its trace preserved for duration of the call", + .source = + \\pub fn expectError(expected_error: anyerror, actual_error: anyerror!void) !void { + \\ actual_error catch |err| { + \\ if (err == expected_error) return {}; + \\ }; + \\ return error.TestExpectedError; + \\} + \\ + \\fn alwaysErrors() !void { return error.ThisErrorShouldNotAppearInAnyTrace; } + \\fn foo() !void { return error.Foo; } + \\ + \\pub fn main() !void { + \\ try expectError(error.ThisErrorShouldNotAppearInAnyTrace, alwaysErrors()); + \\ try expectError(error.ThisErrorShouldNotAppearInAnyTrace, alwaysErrors()); + \\ try expectError(error.Foo, foo()); + \\ + \\ // Only the error trace for this failing check should appear: + \\ try expectError(error.Bar, foo()); + \\} + , + .expect_error = "TestExpectedError", + .expect_trace = + \\source.zig:9:18: [address] in foo + \\fn foo() !void { return error.Foo; } + \\ ^ + \\source.zig:5:5: [address] in expectError + \\ return error.TestExpectedError; + \\ ^ + \\source.zig:17:5: [address] in main + \\ try expectError(error.Bar, foo()); + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); + + cases.addCase(.{ + .name = "try return from within catch", + .source = + \\fn foo() !void { + \\ return error.TheSkyIsFalling; + \\} + \\ + \\fn bar() !void { + \\ return error.AndMyCarIsOutOfGas; + \\} + \\ + \\pub fn main() !void { + \\ foo() catch { // error trace should include foo() + \\ try bar(); + \\ }; + \\} + , + .expect_error = "AndMyCarIsOutOfGas", + .expect_trace = + \\source.zig:2:5: [address] in foo + \\ return error.TheSkyIsFalling; + \\ ^ + \\source.zig:6:5: [address] in bar + \\ return error.AndMyCarIsOutOfGas; + \\ ^ + \\source.zig:11:9: [address] in main + \\ try bar(); + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); + + cases.addCase(.{ + .name = "try return from within if-else", + .source = + \\fn foo() !void { + \\ return error.TheSkyIsFalling; + \\} + \\ + \\fn bar() !void { + \\ return error.AndMyCarIsOutOfGas; + \\} + \\ + \\pub fn main() !void { + \\ if (foo()) |_| {} else |_| { // error trace should include foo() + \\ try bar(); + \\ } + \\} + , + .expect_error = "AndMyCarIsOutOfGas", + .expect_trace = + \\source.zig:2:5: [address] in foo + \\ return error.TheSkyIsFalling; + \\ ^ + \\source.zig:6:5: [address] in bar + \\ return error.AndMyCarIsOutOfGas; + \\ ^ + \\source.zig:11:9: [address] in main + \\ try bar(); + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); + + cases.addCase(.{ + .name = "try try return return", + .source = + \\fn foo() !void { + \\ try bar(); + \\} + \\ + \\fn bar() !void { + \\ return make_error(); + \\} + \\ + \\fn make_error() !void { + \\ return error.TheSkyIsFalling; + \\} + \\ + \\pub fn main() !void { + \\ try foo(); + \\} + , + .expect_error = "TheSkyIsFalling", + .expect_trace = + \\source.zig:10:5: [address] in make_error + \\ return error.TheSkyIsFalling; + \\ ^ + \\source.zig:6:5: [address] in bar + \\ return make_error(); + \\ ^ + \\source.zig:2:5: [address] in foo + \\ try bar(); + \\ ^ + \\source.zig:14:5: [address] in main + \\ try foo(); + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); + + cases.addCase(.{ + .name = "error union switch with call operand", + .source = + \\pub fn main() !void { + \\ try foo(); + \\ return error.TheSkyIsFalling; + \\} + \\ + \\noinline fn failure() error{ Fatal, NonFatal }!void { + \\ return error.NonFatal; + \\} + \\ + \\fn foo() error{Fatal}!void { + \\ return failure() catch |err| switch (err) { + \\ error.Fatal => return error.Fatal, + \\ error.NonFatal => return, + \\ }; + \\} + , + .expect_error = "TheSkyIsFalling", + .expect_trace = + \\source.zig:3:5: [address] in main + \\ return error.TheSkyIsFalling; + \\ ^ + , + .disable_trace_optimized = &.{ + .{ .x86_64, .linux }, + .{ .x86, .linux }, + .{ .x86_64, .windows }, + .{ .x86, .windows }, + }, + }); +} diff --git a/test/src/ErrorTrace.zig b/test/src/ErrorTrace.zig new file mode 100644 index 000000000000..ca726eaea379 --- /dev/null +++ b/test/src/ErrorTrace.zig @@ -0,0 +1,126 @@ +b: *std.Build, +step: *Step, +test_filters: []const []const u8, +targets: []const std.Build.ResolvedTarget, +optimize_modes: []const OptimizeMode, +convert_exe: *std.Build.Step.Compile, + +pub const Case = struct { + name: []const u8, + source: []const u8, + expect_error: []const u8, + expect_trace: []const u8, + /// On these arch/OS pairs we will not test the error trace on optimized LLVM builds because the + /// optimizations break the error trace. We will test the binary with error tracing disabled, + /// just to ensure that the expected error is still returned from `main`. + disable_trace_optimized: []const DisableConfig = &.{}, + + pub const DisableConfig = struct { std.Target.Cpu.Arch, std.Target.Os.Tag }; + pub const Backend = enum { llvm, selfhosted }; +}; + +pub fn addCase(self: *ErrorTrace, case: Case) void { + for (self.targets) |*target| { + const triple: ?[]const u8 = if (target.query.isNative()) null else t: { + break :t target.query.zigTriple(self.b.graph.arena) catch @panic("OOM"); + }; + for (self.optimize_modes) |optimize| { + self.addCaseConfig(case, target, triple, optimize, .llvm); + } + if (shouldTestNonLlvm(&target.result)) { + for (self.optimize_modes) |optimize| { + self.addCaseConfig(case, target, triple, optimize, .selfhosted); + } + } + } +} + +fn shouldTestNonLlvm(target: *const std.Target) bool { + return switch (target.cpu.arch) { + .x86_64 => switch (target.ofmt) { + .elf => true, + else => false, + }, + else => false, + }; +} + +fn addCaseConfig( + self: *ErrorTrace, + case: Case, + target: *const std.Build.ResolvedTarget, + triple: ?[]const u8, + optimize: OptimizeMode, + backend: Case.Backend, +) void { + const b = self.b; + + const error_tracing: bool = tracing: { + if (optimize == .Debug) break :tracing true; + if (backend != .llvm) break :tracing true; + for (case.disable_trace_optimized) |disable| { + const d_arch, const d_os = disable; + if (target.result.cpu.arch == d_arch and target.result.os.tag == d_os) { + // This particular configuration cannot do error tracing in optimized LLVM builds. + break :tracing false; + } + } + break :tracing true; + }; + + const annotated_case_name = b.fmt("check {s} ({s}{s}{s} {s})", .{ + case.name, + triple orelse "", + if (triple != null) " " else "", + @tagName(optimize), + @tagName(backend), + }); + if (self.test_filters.len > 0) { + for (self.test_filters) |test_filter| { + if (mem.indexOf(u8, annotated_case_name, test_filter)) |_| break; + } else return; + } + + const write_files = b.addWriteFiles(); + const source_zig = write_files.add("source.zig", case.source); + const exe = b.addExecutable(.{ + .name = "test", + .root_module = b.createModule(.{ + .root_source_file = source_zig, + .optimize = optimize, + .target = target.*, + .error_tracing = error_tracing, + .strip = false, + }), + .use_llvm = switch (backend) { + .llvm => true, + .selfhosted => false, + }, + }); + exe.bundle_ubsan_rt = false; + + const run = b.addRunArtifact(exe); + run.removeEnvironmentVariable("CLICOLOR_FORCE"); + run.setEnvironmentVariable("NO_COLOR", "1"); + run.expectExitCode(1); + run.expectStdOutEqual(""); + + const expected_stderr = switch (error_tracing) { + true => b.fmt("error: {s}\n{s}\n", .{ case.expect_error, case.expect_trace }), + false => b.fmt("error: {s}\n", .{case.expect_error}), + }; + + const check_run = b.addRunArtifact(self.convert_exe); + check_run.setName(annotated_case_name); + check_run.addFileArg(run.captureStdErr(.{})); + check_run.expectStdOutEqual(expected_stderr); + + self.step.dependOn(&check_run.step); +} + +const ErrorTrace = @This(); +const std = @import("std"); +const builtin = @import("builtin"); +const Step = std.Build.Step; +const OptimizeMode = std.builtin.OptimizeMode; +const mem = std.mem; diff --git a/test/src/StackTrace.zig b/test/src/StackTrace.zig index 9b51f4e4b2c3..e200957fbefe 100644 --- a/test/src/StackTrace.zig +++ b/test/src/StackTrace.zig @@ -1,75 +1,164 @@ b: *std.Build, step: *Step, -test_index: usize, test_filters: []const []const u8, -optimize_modes: []const OptimizeMode, -check_exe: *std.Build.Step.Compile, +targets: []const std.Build.ResolvedTarget, +convert_exe: *std.Build.Step.Compile, const Config = struct { name: []const u8, source: []const u8, - Debug: ?PerMode = null, - ReleaseSmall: ?PerMode = null, - ReleaseSafe: ?PerMode = null, - ReleaseFast: ?PerMode = null, - - const PerMode = struct { - expect: []const u8, - exclude_arch: []const std.Target.Cpu.Arch = &.{}, - exclude_os: []const std.Target.Os.Tag = &.{}, - error_tracing: ?bool = null, - }; + /// Whether this test case expects to have unwind tables / frame pointers. + unwind: enum { + /// This case assumes that some unwind strategy, safe or unsafe, is available. + any, + /// This case assumes that no unwinding strategy is available. + none, + /// This case assumes that a safe unwind strategy, like DWARF unwinding, is available. + safe, + /// This case assumes that at most, unsafe FP unwinding is available. + no_safe, + }, + /// If `true`, the expected exit code is that of the default panic handler, rather than 0. + expect_panic: bool, + /// When debug info is not stripped, stdout is expected to **contain** (not equal!) this string. + expect: []const u8, + /// When debug info *is* stripped, stdout is expected to **contain** (not equal!) this string. + expect_strip: []const u8, }; pub fn addCase(self: *StackTrace, config: Config) void { - self.addCaseInner(config, true); - if (shouldTestNonLlvm(&self.b.graph.host.result)) { - self.addCaseInner(config, false); + for (self.targets) |*target| { + addCaseTarget( + self, + config, + target, + if (target.query.isNative()) null else t: { + break :t target.query.zigTriple(self.b.graph.arena) catch @panic("OOM"); + }, + ); } } - -fn addCaseInner(self: *StackTrace, config: Config, use_llvm: bool) void { - if (config.Debug) |per_mode| - self.addExpect(config.name, config.source, .Debug, use_llvm, per_mode); - - if (config.ReleaseSmall) |per_mode| - self.addExpect(config.name, config.source, .ReleaseSmall, use_llvm, per_mode); - - if (config.ReleaseFast) |per_mode| - self.addExpect(config.name, config.source, .ReleaseFast, use_llvm, per_mode); - - if (config.ReleaseSafe) |per_mode| - self.addExpect(config.name, config.source, .ReleaseSafe, use_llvm, per_mode); -} - -fn shouldTestNonLlvm(target: *const std.Target) bool { - return switch (target.cpu.arch) { - .x86_64 => switch (target.ofmt) { - .elf => !target.os.tag.isBSD(), +fn addCaseTarget( + self: *StackTrace, + config: Config, + target: *const std.Build.ResolvedTarget, + triple: ?[]const u8, +) void { + const both_backends = switch (target.result.cpu.arch) { + .x86_64 => switch (target.result.ofmt) { + .elf => true, else => false, }, else => false, }; + const both_pie = switch (target.result.os.tag) { + .fuchsia, .openbsd => false, + else => true, + }; + const both_libc = switch (target.result.os.tag) { + .freebsd, .netbsd => false, + else => !target.result.requiresLibC(), + }; + + // On aarch64-macos, FP unwinding is blessed by Apple to always be reliable, and std.debug knows this. + const fp_unwind_is_safe = target.result.cpu.arch == .aarch64 and target.result.os.tag.isDarwin(); + + const use_llvm_vals: []const bool = if (both_backends) &.{ true, false } else &.{true}; + const pie_vals: []const ?bool = if (both_pie) &.{ true, false } else &.{null}; + const link_libc_vals: []const ?bool = if (both_libc) &.{ true, false } else &.{null}; + const strip_debug_vals: []const bool = &.{ true, false }; + + const UnwindInfo = packed struct(u2) { + tables: bool, + fp: bool, + const none: @This() = .{ .tables = false, .fp = false }; + const both: @This() = .{ .tables = true, .fp = true }; + const only_tables: @This() = .{ .tables = true, .fp = false }; + const only_fp: @This() = .{ .tables = false, .fp = true }; + }; + const unwind_info_vals: []const UnwindInfo = switch (config.unwind) { + .none => &.{.none}, + .any => &.{ .only_tables, .only_fp, .both }, + .safe => if (fp_unwind_is_safe) &.{ .only_tables, .only_fp, .both } else &.{ .only_tables, .both }, + .no_safe => if (fp_unwind_is_safe) &.{.none} else &.{ .none, .only_fp }, + }; + + for (use_llvm_vals) |use_llvm| { + for (pie_vals) |pie| { + for (link_libc_vals) |link_libc| { + for (strip_debug_vals) |strip_debug| { + for (unwind_info_vals) |unwind_info| { + self.addCaseInstance( + target, + triple, + config.name, + config.source, + use_llvm, + pie, + link_libc, + strip_debug, + !unwind_info.tables, + !unwind_info.fp, + config.expect_panic, + if (strip_debug) config.expect_strip else config.expect, + ); + } + } + } + } + } } -fn addExpect( +fn addCaseInstance( self: *StackTrace, + target: *const std.Build.ResolvedTarget, + triple: ?[]const u8, name: []const u8, source: []const u8, - optimize_mode: OptimizeMode, use_llvm: bool, - mode_config: Config.PerMode, + pie: ?bool, + link_libc: ?bool, + strip_debug: bool, + strip_unwind: bool, + omit_frame_pointer: bool, + expect_panic: bool, + expect_stderr: []const u8, ) void { - for (mode_config.exclude_arch) |tag| if (tag == builtin.cpu.arch) return; - for (mode_config.exclude_os) |tag| if (tag == builtin.os.tag) return; - const b = self.b; - const annotated_case_name = b.fmt("check {s} ({s} {s})", .{ - name, @tagName(optimize_mode), if (use_llvm) "llvm" else "selfhosted", + + if (strip_debug) { + // To enable this coverage, one of two things needs to happen: + // * The compiler needs to gain the ability to strip only debug info (not symbols) + // * `std.Build.Step.ObjCopy` needs to be un-regressed + return; + } + + if (strip_unwind) { + // To enable this coverage, `std.Build.Step.ObjCopy` needs to be un-regressed and gain the + // ability to remove individual sections. `-fno-unwind-tables` is insufficient because it + // does not prevent `.debug_frame` from being emitted. If we could, we would remove the + // following sections: + // * `.eh_frame`, `.eh_frame_hdr`, `.debug_frame` (Linux) + // * `__TEXT,__eh_frame`, `__TEXT,__unwind_info` (macOS) + return; + } + + const annotated_case_name = b.fmt("check {s} ({s}{s}{s}{s}{s}{s}{s}{s})", .{ + name, + triple orelse "", + if (triple != null) " " else "", + if (use_llvm) "llvm" else "selfhosted", + if (pie == true) " pie" else "", + if (link_libc == true) " libc" else "", + if (strip_debug) " strip" else "", + if (strip_unwind) " no_unwind" else "", + if (omit_frame_pointer) " no_fp" else "", }); - for (self.test_filters) |test_filter| { - if (mem.indexOf(u8, annotated_case_name, test_filter)) |_| break; - } else if (self.test_filters.len > 0) return; + if (self.test_filters.len > 0) { + for (self.test_filters) |test_filter| { + if (mem.indexOf(u8, annotated_case_name, test_filter)) |_| break; + } else return; + } const write_files = b.addWriteFiles(); const source_zig = write_files.add("source.zig", source); @@ -77,27 +166,34 @@ fn addExpect( .name = "test", .root_module = b.createModule(.{ .root_source_file = source_zig, - .optimize = optimize_mode, - .target = b.graph.host, - .error_tracing = mode_config.error_tracing, + .optimize = .Debug, + .target = target.*, + .omit_frame_pointer = omit_frame_pointer, + .link_libc = link_libc, + .unwind_tables = if (strip_unwind) .none else null, + // make panics single-threaded so that they don't include a thread ID + .single_threaded = expect_panic, }), .use_llvm = use_llvm, }); + exe.pie = pie; exe.bundle_ubsan_rt = false; const run = b.addRunArtifact(exe); run.removeEnvironmentVariable("CLICOLOR_FORCE"); run.setEnvironmentVariable("NO_COLOR", "1"); - run.expectExitCode(1); + run.addCheck(.{ .expect_term = term: { + if (!expect_panic) break :term .{ .Exited = 0 }; + if (target.result.os.tag == .windows) break :term .{ .Exited = 3 }; + break :term .{ .Signal = 6 }; + } }); run.expectStdOutEqual(""); - const check_run = b.addRunArtifact(self.check_exe); + const check_run = b.addRunArtifact(self.convert_exe); check_run.setName(annotated_case_name); check_run.addFileArg(run.captureStdErr(.{})); - check_run.addArgs(&.{ - @tagName(optimize_mode), - }); - check_run.expectStdOutEqual(mode_config.expect); + check_run.expectExitCode(0); + check_run.addCheck(.{ .expect_stdout_match = expect_stderr }); self.step.dependOn(&check_run.step); } diff --git a/test/src/check-stack-trace.zig b/test/src/check-stack-trace.zig deleted file mode 100644 index 411a2ab53e6c..000000000000 --- a/test/src/check-stack-trace.zig +++ /dev/null @@ -1,88 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); -const mem = std.mem; -const fs = std.fs; - -pub fn main() !void { - var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena_instance.deinit(); - const arena = arena_instance.allocator(); - - const args = try std.process.argsAlloc(arena); - - const input_path = args[1]; - const optimize_mode_text = args[2]; - - const input_bytes = try std.fs.cwd().readFileAlloc(input_path, arena, .limited(5 * 1024 * 1024)); - const optimize_mode = std.meta.stringToEnum(std.builtin.OptimizeMode, optimize_mode_text).?; - - var stderr = input_bytes; - - // process result - // - keep only basename of source file path - // - replace address with symbolic string - // - replace function name with symbolic string when optimize_mode != .Debug - // - skip empty lines - const got: []const u8 = got_result: { - var buf = std.array_list.Managed(u8).init(arena); - defer buf.deinit(); - if (stderr.len != 0 and stderr[stderr.len - 1] == '\n') stderr = stderr[0 .. stderr.len - 1]; - var it = mem.splitScalar(u8, stderr, '\n'); - process_lines: while (it.next()) |line| { - if (line.len == 0) continue; - - // offset search past `[drive]:` on windows - var pos: usize = if (builtin.os.tag == .windows) 2 else 0; - // locate delims/anchor - const delims = [_][]const u8{ ":", ":", ":", " in ", "(", ")" }; - var marks = [_]usize{0} ** delims.len; - for (delims, 0..) |delim, i| { - marks[i] = mem.indexOfPos(u8, line, pos, delim) orelse { - // unexpected pattern: emit raw line and cont - try buf.appendSlice(line); - try buf.appendSlice("\n"); - continue :process_lines; - }; - pos = marks[i] + delim.len; - } - // locate source basename - pos = mem.lastIndexOfScalar(u8, line[0..marks[0]], fs.path.sep) orelse { - // unexpected pattern: emit raw line and cont - try buf.appendSlice(line); - try buf.appendSlice("\n"); - continue :process_lines; - }; - // end processing if source basename changes - if (!mem.eql(u8, "source.zig", line[pos + 1 .. marks[0]])) break; - // emit substituted line - try buf.appendSlice(line[pos + 1 .. marks[2] + delims[2].len]); - try buf.appendSlice(" [address]"); - if (optimize_mode == .Debug) { - try buf.appendSlice(line[marks[3] .. marks[4] + delims[4].len]); - - const file_name = line[marks[4] + delims[4].len .. marks[5]]; - // The LLVM backend currently uses the object file name in the debug info here. - // This actually violates the DWARF specification (DWARF5 § 3.1.1, lines 24-27). - // The self-hosted backend uses the root Zig source file of the module (in compilance with the spec). - if (std.mem.eql(u8, file_name, "test") or - std.mem.eql(u8, file_name, "test_zcu.obj") or - std.mem.endsWith(u8, file_name, ".zig")) - { - try buf.appendSlice("[main_file]"); - } else { - // Something unexpected; include it verbatim. - try buf.appendSlice(file_name); - } - - try buf.appendSlice(line[marks[5]..]); - } else { - try buf.appendSlice(line[marks[3] .. marks[3] + delims[3].len]); - try buf.appendSlice("[function]"); - } - try buf.appendSlice("\n"); - } - break :got_result try buf.toOwnedSlice(); - }; - - try std.fs.File.stdout().writeAll(got); -} diff --git a/test/src/convert-stack-trace.zig b/test/src/convert-stack-trace.zig new file mode 100644 index 000000000000..b42d15a3e80c --- /dev/null +++ b/test/src/convert-stack-trace.zig @@ -0,0 +1,104 @@ +//! Accepts a stack trace in a file (whose path is given as argv[1]), and removes all +//! non-reproducible information from it, including addresses, module names, and file +//! paths. All module names are removed, file paths become just their basename, and +//! addresses are replaced with a fixed string. So, lines like this: +//! +//! /something/foo.zig:1:5: 0x12345678 in bar (main.o) +//! doThing(); +//! ^ +//! ???:?:?: 0x12345678 in qux (other.o) +//! ???:?:?: 0x12345678 in ??? (???) +//! +//! ...are turned into lines like this: +//! +//! foo.zig:1:5: [address] in bar +//! doThing(); +//! ^ +//! ???:?:?: [address] in qux +//! ???:?:?: [address] in ??? +//! +//! Additionally, lines reporting unwind errors are removed: +//! +//! Unwind error at address `/proc/self/exe:0x1016533` (unwind info unavailable), remaining frames may be incorrect +//! +//! With these transformations, the test harness can safely do string comparisons. + +pub fn main() !void { + var arena_instance: std.heap.ArenaAllocator = .init(std.heap.page_allocator); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + const args = try std.process.argsAlloc(arena); + if (args.len != 2) std.process.fatal("usage: convert-stack-trace path/to/test/output", .{}); + + var read_buf: [1024]u8 = undefined; + var write_buf: [1024]u8 = undefined; + + const in_file = try std.fs.cwd().openFile(args[1], .{}); + defer in_file.close(); + + const out_file: std.fs.File = .stdout(); + + var in_fr = in_file.reader(&read_buf); + var out_fw = out_file.writer(&write_buf); + + const w = &out_fw.interface; + + while (in_fr.interface.takeDelimiterInclusive('\n')) |in_line| { + if (std.mem.startsWith(u8, in_line, "Unwind error at address `")) { + // Remove these lines from the output. + continue; + } + + const src_col_end = std.mem.indexOf(u8, in_line, ": 0x") orelse { + try w.writeAll(in_line); + continue; + }; + const src_row_end = std.mem.lastIndexOfScalar(u8, in_line[0..src_col_end], ':') orelse { + try w.writeAll(in_line); + continue; + }; + const src_path_end = std.mem.lastIndexOfScalar(u8, in_line[0..src_row_end], ':') orelse { + try w.writeAll(in_line); + continue; + }; + + const addr_end = std.mem.indexOfPos(u8, in_line, src_col_end, " in ") orelse { + try w.writeAll(in_line); + continue; + }; + const symbol_end = std.mem.indexOfPos(u8, in_line, addr_end, " (") orelse { + try w.writeAll(in_line); + continue; + }; + if (!std.mem.endsWith(u8, std.mem.trimEnd(u8, in_line, "\n"), ")")) { + try w.writeAll(in_line); + continue; + } + + // Where '_' is a placeholder for an arbitrary string, we now know the line looks like: + // + // _:_:_: 0x_ in _ (_) + // + // That seems good enough to assume it's a stack trace frame! We'll rewrite it to: + // + // _:_:_: [address] in _ + // + // ...with that first '_' being replaced by its basename. + + const src_path = in_line[0..src_path_end]; + const basename_start = if (std.mem.lastIndexOfAny(u8, src_path, "/\\")) |i| i + 1 else 0; + const symbol_start = addr_end + " in ".len; + try w.writeAll(in_line[basename_start..src_col_end]); + try w.writeAll(": [address] in "); + try w.writeAll(in_line[symbol_start..symbol_end]); + try w.writeByte('\n'); + } else |err| switch (err) { + error.EndOfStream => {}, + else => |e| return e, + } + + try w.flush(); +} + +const std = @import("std"); diff --git a/test/stack_traces.zig b/test/stack_traces.zig index d2523daf52dd..d0f1acc08b28 100644 --- a/test/stack_traces.zig +++ b/test/stack_traces.zig @@ -1,878 +1,224 @@ -const std = @import("std"); -const os = std.os; -const tests = @import("tests.zig"); - -pub fn addCases(cases: *tests.StackTracesContext) void { +pub fn addCases(cases: *@import("tests.zig").StackTracesContext) void { cases.addCase(.{ - .name = "return", + .name = "simple panic", .source = - \\pub fn main() !void { - \\ return error.TheSkyIsFalling; + \\pub fn main() void { + \\ foo(); \\} - , - .Debug = .{ - .expect = - \\error: TheSkyIsFalling - \\source.zig:2:5: [address] in main ([main_file]) - \\ return error.TheSkyIsFalling; - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - .linux, // defeated by aggressive inlining - }, - .expect = - \\error: TheSkyIsFalling - \\source.zig:2:5: [address] in [function] - \\ return error.TheSkyIsFalling; - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, - }); - - cases.addCase(.{ - .name = "try return", - .source = - \\fn foo() !void { - \\ return error.TheSkyIsFalling; + \\fn foo() void { + \\ @panic("oh no"); \\} \\ - \\pub fn main() !void { - \\ try foo(); - \\} , - .Debug = .{ - .expect = - \\error: TheSkyIsFalling - \\source.zig:2:5: [address] in foo ([main_file]) - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in main ([main_file]) - \\ try foo(); - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - }, - .expect = - \\error: TheSkyIsFalling - \\source.zig:2:5: [address] in [function] - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in [function] - \\ try foo(); - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, - }); - cases.addCase(.{ - .name = "non-error return pops error trace", - .source = - \\fn bar() !void { - \\ return error.UhOh; - \\} + .unwind = .any, + .expect_panic = true, + .expect = + \\panic: oh no + \\source.zig:5:5: [address] in foo + \\ @panic("oh no"); + \\ ^ + \\source.zig:2:8: [address] in main + \\ foo(); + \\ ^ \\ - \\fn foo() !void { - \\ bar() catch { - \\ return; // non-error result: success - \\ }; - \\} + , + .expect_strip = + \\panic: oh no + \\???:?:?: [address] in source.foo + \\???:?:?: [address] in source.main \\ - \\pub fn main() !void { - \\ try foo(); - \\ return error.UnrelatedError; - \\} , - .Debug = .{ - .expect = - \\error: UnrelatedError - \\source.zig:13:5: [address] in main ([main_file]) - \\ return error.UnrelatedError; - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - .linux, // defeated by aggressive inlining - }, - .expect = - \\error: UnrelatedError - \\source.zig:13:5: [address] in [function] - \\ return error.UnrelatedError; - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: UnrelatedError - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: UnrelatedError - \\ - , - }, }); cases.addCase(.{ - .name = "continue in while loop", + .name = "simple panic with no unwind strategy", .source = - \\fn foo() !void { - \\ return error.UhOh; + \\pub fn main() void { + \\ foo(); \\} - \\ - \\pub fn main() !void { - \\ var i: usize = 0; - \\ while (i < 3) : (i += 1) { - \\ foo() catch continue; - \\ } - \\ return error.UnrelatedError; + \\fn foo() void { + \\ @panic("oh no"); \\} + \\ , - .Debug = .{ - .expect = - \\error: UnrelatedError - \\source.zig:10:5: [address] in main ([main_file]) - \\ return error.UnrelatedError; - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - .linux, // defeated by aggressive inlining - }, - .expect = - \\error: UnrelatedError - \\source.zig:10:5: [address] in [function] - \\ return error.UnrelatedError; - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: UnrelatedError - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: UnrelatedError - \\ - , - }, + .unwind = .none, + .expect_panic = true, + .expect = "panic: oh no", + .expect_strip = "panic: oh no", }); cases.addCase(.{ - .name = "try return + handled catch/if-else", + .name = "dump current trace", .source = - \\fn foo() !void { - \\ return error.TheSkyIsFalling; + \\pub fn main() void { + \\ foo(bar()); \\} - \\ - \\pub fn main() !void { - \\ foo() catch {}; // should not affect error trace - \\ if (foo()) |_| {} else |_| { - \\ // should also not affect error trace - \\ } - \\ try foo(); + \\fn bar() void { + \\ qux(123); \\} + \\fn foo(_: void) void {} + \\fn qux(x: u32) void { + \\ std.debug.dumpCurrentStackTrace(.{}); + \\ _ = x; + \\} + \\const std = @import("std"); + \\ , - .Debug = .{ - .expect = - \\error: TheSkyIsFalling - \\source.zig:2:5: [address] in foo ([main_file]) - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:10:5: [address] in main ([main_file]) - \\ try foo(); - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - .linux, // defeated by aggressive inlining - }, - .expect = - \\error: TheSkyIsFalling - \\source.zig:2:5: [address] in [function] - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:10:5: [address] in [function] - \\ try foo(); - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, - }); - - cases.addCase(.{ - .name = "break from inline loop pops error return trace", - .source = - \\fn foo() !void { return error.FooBar; } + .unwind = .safe, + .expect_panic = false, + .expect = + \\source.zig:9:36: [address] in qux + \\ std.debug.dumpCurrentStackTrace(.{}); + \\ ^ + \\source.zig:5:8: [address] in bar + \\ qux(123); + \\ ^ + \\source.zig:2:12: [address] in main + \\ foo(bar()); + \\ ^ \\ - \\pub fn main() !void { - \\ comptime var i: usize = 0; - \\ b: inline while (i < 5) : (i += 1) { - \\ foo() catch { - \\ break :b; // non-error break, success - \\ }; - \\ } - \\ // foo() was successfully handled, should not appear in trace + , + .expect_strip = + \\???:?:?: [address] in source.qux + \\???:?:?: [address] in source.bar + \\???:?:?: [address] in source.main \\ - \\ return error.BadTime; - \\} , - .Debug = .{ - .expect = - \\error: BadTime - \\source.zig:12:5: [address] in main ([main_file]) - \\ return error.BadTime; - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - .linux, // defeated by aggressive inlining - }, - .expect = - \\error: BadTime - \\source.zig:12:5: [address] in [function] - \\ return error.BadTime; - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: BadTime - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: BadTime - \\ - , - }, }); cases.addCase(.{ - .name = "catch and re-throw error", + .name = "dump current trace with no unwind strategy", .source = - \\fn foo() !void { - \\ return error.TheSkyIsFalling; + \\pub fn main() void { + \\ foo(bar()); \\} - \\ - \\pub fn main() !void { - \\ return foo() catch error.AndMyCarIsOutOfGas; + \\fn bar() void { + \\ qux(123); \\} - , - .Debug = .{ - .expect = - \\error: AndMyCarIsOutOfGas - \\source.zig:2:5: [address] in foo ([main_file]) - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in main ([main_file]) - \\ return foo() catch error.AndMyCarIsOutOfGas; - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - .linux, // defeated by aggressive inlining - }, - .expect = - \\error: AndMyCarIsOutOfGas - \\source.zig:2:5: [address] in [function] - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in [function] - \\ return foo() catch error.AndMyCarIsOutOfGas; - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: AndMyCarIsOutOfGas - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: AndMyCarIsOutOfGas - \\ - , - }, - }); - - cases.addCase(.{ - .name = "errors stored in var do not contribute to error trace", - .source = - \\fn foo() !void { - \\ return error.TheSkyIsFalling; + \\fn foo(_: void) void {} + \\fn qux(x: u32) void { + \\ std.debug.print("pre\n", .{}); + \\ std.debug.dumpCurrentStackTrace(.{}); + \\ std.debug.print("post\n", .{}); + \\ _ = x; \\} + \\const std = @import("std"); \\ - \\pub fn main() !void { - \\ // Once an error is stored in a variable, it is popped from the trace - \\ var x = foo(); - \\ x = {}; - \\ - \\ // As a result, this error trace will still be clean - \\ return error.SomethingUnrelatedWentWrong; - \\} , - .Debug = .{ - .expect = - \\error: SomethingUnrelatedWentWrong - \\source.zig:11:5: [address] in main ([main_file]) - \\ return error.SomethingUnrelatedWentWrong; - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - .linux, // defeated by aggressive inlining - }, - .expect = - \\error: SomethingUnrelatedWentWrong - \\source.zig:11:5: [address] in [function] - \\ return error.SomethingUnrelatedWentWrong; - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: SomethingUnrelatedWentWrong - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: SomethingUnrelatedWentWrong - \\ - , - }, + .unwind = .no_safe, + .expect_panic = false, + .expect = "pre\npost\n", + .expect_strip = "pre\npost\n", }); cases.addCase(.{ - .name = "error stored in const has trace preserved for duration of block", + .name = "dump captured trace", .source = - \\fn foo() !void { return error.TheSkyIsFalling; } - \\fn bar() !void { return error.InternalError; } - \\fn baz() !void { return error.UnexpectedReality; } - \\ - \\pub fn main() !void { - \\ const x = foo(); - \\ const y = b: { - \\ if (true) - \\ break :b bar(); - \\ - \\ break :b {}; - \\ }; - \\ x catch {}; - \\ y catch {}; - \\ // foo()/bar() error traces not popped until end of block - \\ - \\ { - \\ const z = baz(); - \\ z catch {}; - \\ // baz() error trace still alive here - \\ } - \\ // baz() error trace popped, foo(), bar() still alive - \\ return error.StillUnresolved; + \\pub fn main() void { + \\ var stack_trace_buf: [8]usize = undefined; + \\ dumpIt(&captureIt(&stack_trace_buf)); \\} - , - .Debug = .{ - .expect = - \\error: StillUnresolved - \\source.zig:1:18: [address] in foo ([main_file]) - \\fn foo() !void { return error.TheSkyIsFalling; } - \\ ^ - \\source.zig:2:18: [address] in bar ([main_file]) - \\fn bar() !void { return error.InternalError; } - \\ ^ - \\source.zig:23:5: [address] in main ([main_file]) - \\ return error.StillUnresolved; - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - .linux, // defeated by aggressive inlining - }, - .expect = - \\error: StillUnresolved - \\source.zig:1:18: [address] in [function] - \\fn foo() !void { return error.TheSkyIsFalling; } - \\ ^ - \\source.zig:2:18: [address] in [function] - \\fn bar() !void { return error.InternalError; } - \\ ^ - \\source.zig:23:5: [address] in [function] - \\ return error.StillUnresolved; - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: StillUnresolved - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: StillUnresolved - \\ - , - }, - }); - - cases.addCase(.{ - .name = "error passed to function has its trace preserved for duration of the call", - .source = - \\pub fn expectError(expected_error: anyerror, actual_error: anyerror!void) !void { - \\ actual_error catch |err| { - \\ if (err == expected_error) return {}; - \\ }; - \\ return error.TestExpectedError; + \\fn captureIt(buf: []usize) std.builtin.StackTrace { + \\ return captureItInner(buf); \\} + \\fn dumpIt(st: *const std.builtin.StackTrace) void { + \\ std.debug.dumpStackTrace(st); + \\} + \\fn captureItInner(buf: []usize) std.builtin.StackTrace { + \\ return std.debug.captureCurrentStackTrace(.{}, buf); + \\} + \\const std = @import("std"); \\ - \\fn alwaysErrors() !void { return error.ThisErrorShouldNotAppearInAnyTrace; } - \\fn foo() !void { return error.Foo; } + , + .unwind = .safe, + .expect_panic = false, + .expect = + \\source.zig:12:46: [address] in captureItInner + \\ return std.debug.captureCurrentStackTrace(.{}, buf); + \\ ^ + \\source.zig:6:26: [address] in captureIt + \\ return captureItInner(buf); + \\ ^ + \\source.zig:3:22: [address] in main + \\ dumpIt(&captureIt(&stack_trace_buf)); + \\ ^ \\ - \\pub fn main() !void { - \\ try expectError(error.ThisErrorShouldNotAppearInAnyTrace, alwaysErrors()); - \\ try expectError(error.ThisErrorShouldNotAppearInAnyTrace, alwaysErrors()); - \\ try expectError(error.Foo, foo()); + , + .expect_strip = + \\???:?:?: [address] in source.captureItInner + \\???:?:?: [address] in source.captureIt + \\???:?:?: [address] in source.main \\ - \\ // Only the error trace for this failing check should appear: - \\ try expectError(error.Bar, foo()); - \\} , - .Debug = .{ - .expect = - \\error: TestExpectedError - \\source.zig:9:18: [address] in foo ([main_file]) - \\fn foo() !void { return error.Foo; } - \\ ^ - \\source.zig:5:5: [address] in expectError ([main_file]) - \\ return error.TestExpectedError; - \\ ^ - \\source.zig:17:5: [address] in main ([main_file]) - \\ try expectError(error.Bar, foo()); - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - }, - .expect = - \\error: TestExpectedError - \\source.zig:9:18: [address] in [function] - \\fn foo() !void { return error.Foo; } - \\ ^ - \\source.zig:5:5: [address] in [function] - \\ return error.TestExpectedError; - \\ ^ - \\source.zig:17:5: [address] in [function] - \\ try expectError(error.Bar, foo()); - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: TestExpectedError - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: TestExpectedError - \\ - , - }, }); cases.addCase(.{ - .name = "try return from within catch", + .name = "dump captured trace with no unwind strategy", .source = - \\fn foo() !void { - \\ return error.TheSkyIsFalling; + \\pub fn main() void { + \\ var stack_trace_buf: [8]usize = undefined; + \\ dumpIt(&captureIt(&stack_trace_buf)); \\} - \\ - \\fn bar() !void { - \\ return error.AndMyCarIsOutOfGas; + \\fn captureIt(buf: []usize) std.builtin.StackTrace { + \\ return captureItInner(buf); \\} - \\ - \\pub fn main() !void { - \\ foo() catch { // error trace should include foo() - \\ try bar(); - \\ }; + \\fn dumpIt(st: *const std.builtin.StackTrace) void { + \\ std.debug.dumpStackTrace(st); \\} - , - .Debug = .{ - .expect = - \\error: AndMyCarIsOutOfGas - \\source.zig:2:5: [address] in foo ([main_file]) - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in bar ([main_file]) - \\ return error.AndMyCarIsOutOfGas; - \\ ^ - \\source.zig:11:9: [address] in main ([main_file]) - \\ try bar(); - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - }, - .expect = - \\error: AndMyCarIsOutOfGas - \\source.zig:2:5: [address] in [function] - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in [function] - \\ return error.AndMyCarIsOutOfGas; - \\ ^ - \\source.zig:11:9: [address] in [function] - \\ try bar(); - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: AndMyCarIsOutOfGas - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: AndMyCarIsOutOfGas - \\ - , - }, - }); - - cases.addCase(.{ - .name = "try return from within if-else", - .source = - \\fn foo() !void { - \\ return error.TheSkyIsFalling; - \\} - \\ - \\fn bar() !void { - \\ return error.AndMyCarIsOutOfGas; + \\fn captureItInner(buf: []usize) std.builtin.StackTrace { + \\ return std.debug.captureCurrentStackTrace(.{}, buf); \\} + \\const std = @import("std"); \\ - \\pub fn main() !void { - \\ if (foo()) |_| {} else |_| { // error trace should include foo() - \\ try bar(); - \\ } - \\} , - .Debug = .{ - .expect = - \\error: AndMyCarIsOutOfGas - \\source.zig:2:5: [address] in foo ([main_file]) - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in bar ([main_file]) - \\ return error.AndMyCarIsOutOfGas; - \\ ^ - \\source.zig:11:9: [address] in main ([main_file]) - \\ try bar(); - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - }, - .expect = - \\error: AndMyCarIsOutOfGas - \\source.zig:2:5: [address] in [function] - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in [function] - \\ return error.AndMyCarIsOutOfGas; - \\ ^ - \\source.zig:11:9: [address] in [function] - \\ try bar(); - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: AndMyCarIsOutOfGas - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: AndMyCarIsOutOfGas - \\ - , - }, + .unwind = .no_safe, + .expect_panic = false, + .expect = "(empty stack trace)\n", + .expect_strip = "(empty stack trace)\n", }); cases.addCase(.{ - .name = "try try return return", + .name = "dump captured trace on thread", .source = - \\fn foo() !void { - \\ try bar(); + \\pub fn main() !void { + \\ var stack_trace_buf: [8]usize = undefined; + \\ const t = try std.Thread.spawn(.{}, threadMain, .{&stack_trace_buf}); + \\ t.join(); \\} - \\ - \\fn bar() !void { - \\ return make_error(); + \\fn threadMain(stack_trace_buf: []usize) void { + \\ dumpIt(&captureIt(stack_trace_buf)); \\} - \\ - \\fn make_error() !void { - \\ return error.TheSkyIsFalling; + \\fn captureIt(buf: []usize) std.builtin.StackTrace { + \\ return captureItInner(buf); \\} - \\ - \\pub fn main() !void { - \\ try foo(); + \\fn dumpIt(st: *const std.builtin.StackTrace) void { + \\ std.debug.dumpStackTrace(st); + \\} + \\fn captureItInner(buf: []usize) std.builtin.StackTrace { + \\ return std.debug.captureCurrentStackTrace(.{}, buf); \\} - , - .Debug = .{ - .expect = - \\error: TheSkyIsFalling - \\source.zig:10:5: [address] in make_error ([main_file]) - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in bar ([main_file]) - \\ return make_error(); - \\ ^ - \\source.zig:2:5: [address] in foo ([main_file]) - \\ try bar(); - \\ ^ - \\source.zig:14:5: [address] in main ([main_file]) - \\ try foo(); - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .windows, // TODO - }, - .expect = - \\error: TheSkyIsFalling - \\source.zig:10:5: [address] in [function] - \\ return error.TheSkyIsFalling; - \\ ^ - \\source.zig:6:5: [address] in [function] - \\ return make_error(); - \\ ^ - \\source.zig:2:5: [address] in [function] - \\ try bar(); - \\ ^ - \\source.zig:14:5: [address] in [function] - \\ try foo(); - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, - }); - - cases.addCase(.{ - .name = "dumpCurrentStackTrace", - .source = \\const std = @import("std"); \\ - \\fn bar() void { - \\ std.debug.dumpCurrentStackTrace(@returnAddress()); - \\} - \\fn foo() void { - \\ bar(); - \\} - \\pub fn main() u8 { - \\ foo(); - \\ return 1; - \\} , - .Debug = .{ - // std.debug.sys_can_stack_trace - .exclude_arch = &.{ - .loongarch32, - .loongarch64, - .mips, - .mipsel, - .mips64, - .mips64el, - .s390x, - }, - .exclude_os = &.{ - .freebsd, - .openbsd, // integer overflow - .windows, // TODO intermittent failures - }, - .expect = - \\source.zig:7:8: [address] in foo ([main_file]) - \\ bar(); - \\ ^ - \\source.zig:10:8: [address] in main ([main_file]) - \\ foo(); - \\ ^ - \\ - , - }, - }); - cases.addCase(.{ - .name = "error union switch with call operand", - .source = - \\pub fn main() !void { - \\ try foo(); - \\ return error.TheSkyIsFalling; - \\} + .unwind = .safe, + .expect_panic = false, + .expect = + \\source.zig:16:46: [address] in captureItInner + \\ return std.debug.captureCurrentStackTrace(.{}, buf); + \\ ^ + \\source.zig:10:26: [address] in captureIt + \\ return captureItInner(buf); + \\ ^ + \\source.zig:7:22: [address] in threadMain + \\ dumpIt(&captureIt(stack_trace_buf)); + \\ ^ \\ - \\noinline fn failure() error{ Fatal, NonFatal }!void { - \\ return error.NonFatal; - \\} + , + .expect_strip = + \\???:?:?: [address] in source.captureItInner + \\???:?:?: [address] in source.captureIt + \\???:?:?: [address] in source.threadMain \\ - \\fn foo() error{Fatal}!void { - \\ return failure() catch |err| switch (err) { - \\ error.Fatal => return error.Fatal, - \\ error.NonFatal => return, - \\ }; - \\} , - .Debug = .{ - .expect = - \\error: TheSkyIsFalling - \\source.zig:3:5: [address] in main ([main_file]) - \\ return error.TheSkyIsFalling; - \\ ^ - \\ - , - }, - .ReleaseSafe = .{ - .exclude_os = &.{ - .freebsd, - .windows, // TODO - .linux, // defeated by aggressive inlining - .macos, // Broken in LLVM 20. - }, - .expect = - \\error: TheSkyIsFalling - \\source.zig:3:5: [address] in [function] - \\ return error.TheSkyIsFalling; - \\ ^ - \\ - , - .error_tracing = true, - }, - .ReleaseFast = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, - .ReleaseSmall = .{ - .expect = - \\error: TheSkyIsFalling - \\ - , - }, }); } diff --git a/test/tests.zig b/test/tests.zig index 318e39eb5ce0..b327e94a8f3b 100644 --- a/test/tests.zig +++ b/test/tests.zig @@ -6,11 +6,13 @@ const OptimizeMode = std.builtin.OptimizeMode; const Step = std.Build.Step; // Cases +const error_traces = @import("error_traces.zig"); const stack_traces = @import("stack_traces.zig"); const llvm_ir = @import("llvm_ir.zig"); const libc = @import("libc.zig"); // Implementations +pub const ErrorTracesContext = @import("src/ErrorTrace.zig"); pub const StackTracesContext = @import("src/StackTrace.zig"); pub const DebuggerContext = @import("src/Debugger.zig"); pub const LlvmIrContext = @import("src/LlvmIr.zig"); @@ -1857,28 +1859,53 @@ const c_abi_targets = blk: { }; }; +/// For stack trace tests, we only test native, because external executors are pretty unreliable at +/// stack tracing. However, if there's a 32-bit equivalent target which the host can trivially run, +/// we may as well at least test that! +fn nativeAndCompatible32bit(b: *std.Build, skip_non_native: bool) []const std.Build.ResolvedTarget { + const host = b.graph.host.result; + const only_native = (&b.graph.host)[0..1]; + if (skip_non_native) return only_native; + const arch32: std.Target.Cpu.Arch = switch (host.cpu.arch) { + .x86_64 => .x86, + .aarch64 => .arm, + .aarch64_be => .armeb, + else => return only_native, + }; + switch (host.os.tag) { + .windows => if (arch32.isArm()) return only_native, + .macos, .freebsd => if (arch32 == .x86) return only_native, + .linux, .netbsd => {}, + else => return only_native, + } + return b.graph.arena.dupe(std.Build.ResolvedTarget, &.{ + b.graph.host, + b.resolveTargetQuery(.{ .cpu_arch = arch32, .os_tag = host.os.tag }), + }) catch @panic("OOM"); +} + pub fn addStackTraceTests( b: *std.Build, test_filters: []const []const u8, - optimize_modes: []const OptimizeMode, + skip_non_native: bool, ) *Step { - const check_exe = b.addExecutable(.{ - .name = "check-stack-trace", + const convert_exe = b.addExecutable(.{ + .name = "convert-stack-trace", .root_module = b.createModule(.{ - .root_source_file = b.path("test/src/check-stack-trace.zig"), + .root_source_file = b.path("test/src/convert-stack-trace.zig"), .target = b.graph.host, .optimize = .Debug, }), }); const cases = b.allocator.create(StackTracesContext) catch @panic("OOM"); + cases.* = .{ .b = b, .step = b.step("test-stack-traces", "Run the stack trace tests"), - .test_index = 0, .test_filters = test_filters, - .optimize_modes = optimize_modes, - .check_exe = check_exe, + .targets = nativeAndCompatible32bit(b, skip_non_native), + .convert_exe = convert_exe, }; stack_traces.addCases(cases); @@ -1886,6 +1913,36 @@ pub fn addStackTraceTests( return cases.step; } +pub fn addErrorTraceTests( + b: *std.Build, + test_filters: []const []const u8, + optimize_modes: []const OptimizeMode, + skip_non_native: bool, +) *Step { + const convert_exe = b.addExecutable(.{ + .name = "convert-stack-trace", + .root_module = b.createModule(.{ + .root_source_file = b.path("test/src/convert-stack-trace.zig"), + .target = b.graph.host, + .optimize = .Debug, + }), + }); + + const cases = b.allocator.create(ErrorTracesContext) catch @panic("OOM"); + cases.* = .{ + .b = b, + .step = b.step("test-error-traces", "Run the error trace tests"), + .test_filters = test_filters, + .targets = nativeAndCompatible32bit(b, skip_non_native), + .optimize_modes = optimize_modes, + .convert_exe = convert_exe, + }; + + error_traces.addCases(cases); + + return cases.step; +} + fn compilerHasPackageManager(b: *std.Build) bool { // We can only use dependencies if the compiler was built with support for package management. // (zig2 doesn't support it, but we still need to construct a build graph to build stage3.) From e6eccc3c8f075b6feed6e089564fd6495758dd9d Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 12 Sep 2025 03:09:37 +0100 Subject: [PATCH 51/85] SelfInfo: remove x86-windows unwinding path Turns out that RtlCaptureStackBackTrace is actually just doing FP (ebp) unwinding under the hood, making this logic completely redundant with our own FP-walking implementation; see added comment for details. --- lib/std/debug/SelfInfo/WindowsModule.zig | 120 +++++++++-------------- 1 file changed, 46 insertions(+), 74 deletions(-) diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index d1fd070ec738..4bbc220c5ba0 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -264,86 +264,58 @@ pub const DebugInfo = struct { } }; -pub const supports_unwinding: bool = true; -pub const UnwindContext = switch (builtin.cpu.arch) { - .x86 => struct { - pc: usize, - frames: []usize, - frames_capacity: usize, - next_index: usize, - /// Marked `noinline` to ensure that `RtlCaptureStackBackTrace` includes our caller. - pub noinline fn init(ctx: *windows.CONTEXT, gpa: Allocator) Allocator.Error!UnwindContext { - const frames_buf = try gpa.alloc(usize, 1024); - errdefer comptime unreachable; - const frames_len = windows.ntdll.RtlCaptureStackBackTrace(0, frames_buf.len, @ptrCast(frames_buf.ptr), null); - const regs = ctx.getRegs(); - const first_index = for (frames_buf[0..frames_len], 0..) |ret_addr, idx| { - if (ret_addr == regs.ip) break idx; - } else i: { - // If we were called by an exception handler, `regs.ip` wasn't in the trace because - // RtlCaptureStackBackTrace omits the KiUserExceptionDispatcher frame, which is the - // one in `regs.ip`. In that case, we have to start one frame shallower instead, and - // we can figure out that frame's ip from the context's bp. - const start_addr_ptr: *const usize = @ptrFromInt(regs.bp + 4); - const start_addr = start_addr_ptr.*; - for (frames_buf[0..frames_len], 0..) |ret_addr, idx| { - if (ret_addr == start_addr) break :i idx; - } - // The IP in the context can't be found; return an empty trace. - gpa.free(frames_buf); - return .{ .pc = 0, .frames = &.{}, .frames_capacity = 0, .next_index = 0 }; - }; - return .{ - .pc = @returnAddress(), - .frames = frames_buf[0..frames_len], - .frames_capacity = 0, - .next_index = first_index, - }; - } - pub fn deinit(ctx: *UnwindContext, gpa: Allocator) void { - gpa.free(ctx.frames.ptr[0..ctx.frames_capacity]); - ctx.* = undefined; - } - pub fn getFp(ctx: *UnwindContext) usize { - _ = ctx; - return 0; - } - }, - else => struct { - pc: usize, - cur: windows.CONTEXT, - history_table: windows.UNWIND_HISTORY_TABLE, - pub fn init(ctx: *const windows.CONTEXT, gpa: Allocator) Allocator.Error!UnwindContext { - _ = gpa; - return .{ - .pc = @returnAddress(), - .cur = ctx.*, - .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE), - }; - } - pub fn deinit(ctx: *UnwindContext, gpa: Allocator) void { - _ = ctx; - _ = gpa; - } - pub fn getFp(ctx: *UnwindContext) usize { - return ctx.cur.getRegs().bp; - } - }, +pub const supports_unwinding: bool = switch (builtin.cpu.arch) { + else => true, + // On x86, `RtlVirtualUnwind` does not exist. We could in theory use `RtlCaptureStackBackTrace` + // instead, but on x86, it turns out that function is just... doing FP unwinding with esp! It's + // hard to find implementation details to confirm that, but the most authoritative source I have + // is an entry in the LLVM mailing list from 2020/08/16 which contains this quote: + // + // > x86 doesn't have what most architectures would consider an "unwinder" in the sense of + // > restoring registers; there is simply a linked list of frames that participate in SEH and + // > that desire to be called for a dynamic unwind operation, so RtlCaptureStackBackTrace + // > assumes that EBP-based frames are in use and walks an EBP-based frame chain on x86 - not + // > all x86 code is written with EBP-based frames so while even though we generally build the + // > OS that way, you might always run the risk of encountering external code that uses EBP as a + // > general purpose register for which such an unwind attempt for a stack trace would fail. + // + // Regardless, it's easy to effectively confirm this hypothesis just by compiling some code with + // `-fomit-frame-pointer -OReleaseFast` and observing that `RtlCaptureStackBackTrace` returns an + // empty trace when it's called in such an application. Note that without `-OReleaseFast` or + // similar, LLVM seems reluctant to ever clobber ebp, so you'll get a trace returned which just + // contains all of the kernel32/ntdll frames but none of your own. Don't be deceived---this is + // just coincidental! + // + // Anyway, the point is, the only stack walking primitive on x86-windows is FP unwinding. We + // *could* ask Microsoft to do that for us with `RtlCaptureStackBackTrace`... but better to just + // use our existing FP unwinder in `std.debug`! + .x86 => false, +}; +pub const UnwindContext = struct { + pc: usize, + cur: windows.CONTEXT, + history_table: windows.UNWIND_HISTORY_TABLE, + pub fn init(ctx: *const windows.CONTEXT, gpa: Allocator) Allocator.Error!UnwindContext { + _ = gpa; + return .{ + .pc = @returnAddress(), + .cur = ctx.*, + .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE), + }; + } + pub fn deinit(ctx: *UnwindContext, gpa: Allocator) void { + _ = ctx; + _ = gpa; + } + pub fn getFp(ctx: *UnwindContext) usize { + return ctx.cur.getRegs().bp; + } }; pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { _ = module; _ = gpa; _ = di; - if (builtin.cpu.arch == .x86) { - const i = context.next_index; - if (i == context.frames.len) return 0; - context.next_index += 1; - const ip = context.frames[i]; - context.pc = ip -| 1; - return ip; - } - const current_regs = context.cur.getRegs(); var image_base: windows.DWORD64 = undefined; if (windows.ntdll.RtlLookupFunctionEntry(current_regs.ip, &image_base, &context.history_table)) |runtime_function| { From cf13b40946d5fbac1eb8963418a19b12a69023e8 Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 12 Sep 2025 03:15:12 +0100 Subject: [PATCH 52/85] test-stack-traces: don't try to strip unwind tables on x86-windows --- test/src/StackTrace.zig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/src/StackTrace.zig b/test/src/StackTrace.zig index e200957fbefe..5735d89adee2 100644 --- a/test/src/StackTrace.zig +++ b/test/src/StackTrace.zig @@ -62,6 +62,8 @@ fn addCaseTarget( // On aarch64-macos, FP unwinding is blessed by Apple to always be reliable, and std.debug knows this. const fp_unwind_is_safe = target.result.cpu.arch == .aarch64 and target.result.os.tag.isDarwin(); + // On x86-windows, only FP unwinding is available. + const supports_unwind_tables = target.result.os.tag != .windows or target.result.cpu.arch != .x86; const use_llvm_vals: []const bool = if (both_backends) &.{ true, false } else &.{true}; const pie_vals: []const ?bool = if (both_pie) &.{ true, false } else &.{null}; @@ -88,6 +90,7 @@ fn addCaseTarget( for (link_libc_vals) |link_libc| { for (strip_debug_vals) |strip_debug| { for (unwind_info_vals) |unwind_info| { + if (unwind_info.tables and !supports_unwind_tables) continue; self.addCaseInstance( target, triple, @@ -97,7 +100,7 @@ fn addCaseTarget( pie, link_libc, strip_debug, - !unwind_info.tables, + !unwind_info.tables and supports_unwind_tables, !unwind_info.fp, config.expect_panic, if (strip_debug) config.expect_strip else config.expect, From 344ab62b3fa4fc286b76a51ac47f0b8363339bee Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 12 Sep 2025 03:24:24 +0100 Subject: [PATCH 53/85] std.debug: don't attempt SelfInfo unwinding when unsupported --- lib/std/debug.zig | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 56769826b1e8..1fdf4f249585 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -678,6 +678,12 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ tty_config.setColor(writer, .reset) catch {}; return; }, + error.CannotUnwindFromContext => { + tty_config.setColor(writer, .dim) catch {}; + try writer.print("Cannot print stack trace: context unwind unavailable for target\n", .{}); + tty_config.setColor(writer, .reset) catch {}; + return; + }, }; defer it.deinit(); if (!it.stratOk(options.allow_unsafe_unwind)) { @@ -787,7 +793,7 @@ const StackIterator = union(enum) { /// It is important that this function is marked `inline` so that it can safely use /// `@frameAddress` and `getContext` as the caller's stack frame and our own are one /// and the same. - inline fn init(context_opt: ?ThreadContextPtr, context_buf: *ThreadContextBuf) error{OutOfMemory}!StackIterator { + inline fn init(context_opt: ?ThreadContextPtr, context_buf: *ThreadContextBuf) error{ OutOfMemory, CannotUnwindFromContext }!StackIterator { if (builtin.cpu.arch.isSPARC()) { // Flush all the register windows on stack. if (builtin.cpu.has(.sparc, .v9)) { @@ -797,11 +803,12 @@ const StackIterator = union(enum) { } } if (context_opt) |context| { + if (!SelfInfo.supports_unwinding) return error.CannotUnwindFromContext; context_buf.* = context.*; relocateContext(context_buf); return .{ .di = try .init(context_buf, getDebugInfoAllocator()) }; } - if (getContext(context_buf)) { + if (SelfInfo.supports_unwinding and getContext(context_buf)) { return .{ .di = try .init(context_buf, getDebugInfoAllocator()) }; } return .{ .fp = @frameAddress() }; From 5f0073896977fe4a177b4b7817ce2b59160ab29d Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 12 Sep 2025 03:28:19 +0100 Subject: [PATCH 54/85] test-stack-traces: fix on x86-windows --- test/src/convert-stack-trace.zig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/src/convert-stack-trace.zig b/test/src/convert-stack-trace.zig index b42d15a3e80c..c7cd01a46002 100644 --- a/test/src/convert-stack-trace.zig +++ b/test/src/convert-stack-trace.zig @@ -20,6 +20,7 @@ //! Additionally, lines reporting unwind errors are removed: //! //! Unwind error at address `/proc/self/exe:0x1016533` (unwind info unavailable), remaining frames may be incorrect +//! Cannot print stack trace: safe unwind unavilable for target //! //! With these transformations, the test harness can safely do string comparisons. @@ -45,7 +46,9 @@ pub fn main() !void { const w = &out_fw.interface; while (in_fr.interface.takeDelimiterInclusive('\n')) |in_line| { - if (std.mem.startsWith(u8, in_line, "Unwind error at address `")) { + if (std.mem.eql(u8, in_line, "Cannot print stack trace: safe unwind unavailable for target\n") or + std.mem.startsWith(u8, in_line, "Unwind error at address `")) + { // Remove these lines from the output. continue; } From 51d08f4b9b051d66534b77462a3bfb9bace9f1fb Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 13 Sep 2025 10:29:20 +0100 Subject: [PATCH 55/85] fix compile errors and minor bugs --- lib/compiler/test_runner.zig | 8 ++++---- lib/std/debug.zig | 26 ++++++++++++++++---------- lib/std/debug/SelfInfo.zig | 6 +++--- lib/std/start.zig | 6 ++++-- lib/std/testing/FailingAllocator.zig | 2 +- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index 5e7bbd294c64..42c708cbfc7f 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -140,7 +140,7 @@ fn mainServer() !void { else => { fail = true; if (@errorReturnTrace()) |trace| { - std.debug.dumpStackTrace(trace.*); + std.debug.dumpStackTrace(trace); } }, }; @@ -182,7 +182,7 @@ fn mainServer() !void { error.SkipZigTest => return, else => { if (@errorReturnTrace()) |trace| { - std.debug.dumpStackTrace(trace.*); + std.debug.dumpStackTrace(trace); } std.debug.print("failed with error.{t}\n", .{err}); std.process.exit(1); @@ -261,7 +261,7 @@ fn mainTerminal() void { std.debug.print("FAIL ({t})\n", .{err}); } if (@errorReturnTrace()) |trace| { - std.debug.dumpStackTrace(trace.*); + std.debug.dumpStackTrace(trace); } test_node.end(); }, @@ -398,7 +398,7 @@ pub fn fuzz( error.SkipZigTest => return, else => { std.debug.lockStdErr(); - if (@errorReturnTrace()) |trace| std.debug.dumpStackTrace(trace.*); + if (@errorReturnTrace()) |trace| std.debug.dumpStackTrace(trace); std.debug.print("failed with error.{t}\n", .{err}); std.process.exit(1); }, diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 1fdf4f249585..a94b61901613 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -754,6 +754,10 @@ pub fn dumpCurrentStackTrace(options: StackUnwindOptions) void { /// Write a previously captured stack trace to `writer`, annotated with source locations. pub fn writeStackTrace(st: *const std.builtin.StackTrace, writer: *Writer, tty_config: tty.Config) Writer.Error!void { + // Fetch `st.index` straight away. Aside from avoiding redundant loads, this prevents issues if + // `st` is `@errorReturnTrace()` and errors are encountered while writing the stack trace. + const n_frames = st.index; + if (n_frames == 0) return writer.writeAll("(empty stack trace)\n"); const di_gpa = getDebugInfoAllocator(); const di = getSelfDebugInfo() catch |err| switch (err) { error.UnsupportedTarget => { @@ -763,14 +767,13 @@ pub fn writeStackTrace(st: *const std.builtin.StackTrace, writer: *Writer, tty_c return; }, }; - if (st.index == 0) return writer.writeAll("(empty stack trace)\n"); - const captured_frames = @min(st.index, st.instruction_addresses.len); + const captured_frames = @min(n_frames, st.instruction_addresses.len); for (st.instruction_addresses[0..captured_frames]) |return_address| { try printSourceAtAddress(di_gpa, di, writer, return_address -| 1, tty_config); } - if (st.index > captured_frames) { + if (n_frames > captured_frames) { tty_config.setColor(writer, .bold) catch {}; - try writer.print("({d} additional stack frames skipped...)\n", .{st.index - captured_frames}); + try writer.print("({d} additional stack frames skipped...)\n", .{n_frames - captured_frames}); tty_config.setColor(writer, .reset) catch {}; } } @@ -853,7 +856,7 @@ const StackIterator = union(enum) { const di = getSelfDebugInfo() catch unreachable; const di_gpa = getDebugInfoAllocator(); if (di.unwindFrame(di_gpa, unwind_context)) |ra| { - if (ra == 0) return .end; + if (ra <= 1) return .end; return .{ .frame = ra }; } else |err| { const pc = unwind_context.pc; @@ -888,7 +891,9 @@ const StackIterator = union(enum) { if (bp != 0 and bp <= fp) return .end; it.fp = bp; - return .{ .frame = ra_ptr.* }; + const ra = ra_ptr.*; + if (ra <= 1) return .end; + return .{ .frame = ra }; }, } } @@ -1409,11 +1414,12 @@ test "manage resources correctly" { return @returnAddress(); } }; - var discarding: std.io.Writer.Discarding = .init(&.{}); - var di: SelfInfo = try .open(testing.allocator); - defer di.deinit(); + const gpa = std.testing.allocator; + var discarding: std.Io.Writer.Discarding = .init(&.{}); + var di: SelfInfo = .init; + defer di.deinit(gpa); try printSourceAtAddress( - testing.allocator, + gpa, &di, &discarding.writer, S.showMyTrace(), diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 38027dbb5882..c3243edeb92d 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -18,8 +18,8 @@ const root = @import("root"); const SelfInfo = @This(); -modules: std.AutoArrayHashMapUnmanaged(usize, Module.DebugInfo), -lookup_cache: Module.LookupCache, +modules: if (target_supported) std.AutoArrayHashMapUnmanaged(usize, Module.DebugInfo) else void, +lookup_cache: if (target_supported) Module.LookupCache else void, pub const Error = error{ /// The required debug info is invalid or corrupted. @@ -40,7 +40,7 @@ pub const target_supported: bool = Module != void; /// Indicates whether the `SelfInfo` implementation has support for unwinding on this target. /// /// For whether DWARF unwinding is *theoretically* possible, see `Dwarf.abi.supportsUnwinding`. -pub const supports_unwinding: bool = Module.supports_unwinding; +pub const supports_unwinding: bool = target_supported and Module.supports_unwinding; pub const UnwindContext = if (supports_unwinding) Module.UnwindContext; diff --git a/lib/std/start.zig b/lib/std/start.zig index 0ea5c44c2b5f..13ce744a2ac7 100644 --- a/lib/std/start.zig +++ b/lib/std/start.zig @@ -635,8 +635,10 @@ pub inline fn callMain() u8 { else => {}, } std.log.err("{s}", .{@errorName(err)}); - if (@errorReturnTrace()) |trace| { - std.debug.dumpStackTrace(trace); + if (native_os != .freestanding) { + if (@errorReturnTrace()) |trace| { + std.debug.dumpStackTrace(trace); + } } return 1; }; diff --git a/lib/std/testing/FailingAllocator.zig b/lib/std/testing/FailingAllocator.zig index 916dbc665528..6476725a2f70 100644 --- a/lib/std/testing/FailingAllocator.zig +++ b/lib/std/testing/FailingAllocator.zig @@ -64,7 +64,7 @@ fn alloc( const self: *FailingAllocator = @ptrCast(@alignCast(ctx)); if (self.alloc_index == self.fail_index) { if (!self.has_induced_failure) { - const st = std.debug.captureCurrentStackTrace(return_address, &self.stack_addresses); + const st = std.debug.captureCurrentStackTrace(.{ .first_address = return_address }, &self.stack_addresses); @memset(self.stack_addresses[@min(st.index, self.stack_addresses.len)..], 0); self.has_induced_failure = true; } From 4cb84f8e486426e66e17cc91dd880d9bdcabc680 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 13 Sep 2025 10:29:36 +0100 Subject: [PATCH 56/85] test-standalone: update for std.debug changes --- test/standalone/coff_dwarf/build.zig | 13 ++-- test/standalone/coff_dwarf/main.zig | 39 ++++++----- test/standalone/stack_iterator/unwind.zig | 60 ++++++++-------- .../stack_iterator/unwind_freestanding.zig | 68 ++++++++++--------- 4 files changed, 96 insertions(+), 84 deletions(-) diff --git a/test/standalone/coff_dwarf/build.zig b/test/standalone/coff_dwarf/build.zig index d1433195473e..cd3a53ec8c30 100644 --- a/test/standalone/coff_dwarf/build.zig +++ b/test/standalone/coff_dwarf/build.zig @@ -1,9 +1,10 @@ const std = @import("std"); -const builtin = @import("builtin"); /// This tests the path where DWARF information is embedded in a COFF binary pub fn build(b: *std.Build) void { - switch (builtin.cpu.arch) { + const host = b.graph.host; + + switch (host.result.cpu.arch) { .aarch64, .x86, .x86_64, @@ -15,10 +16,10 @@ pub fn build(b: *std.Build) void { b.default_step = test_step; const optimize: std.builtin.OptimizeMode = .Debug; - const target = if (builtin.os.tag == .windows) - b.standardTargetOptions(.{}) - else - b.resolveTargetQuery(.{ .os_tag = .windows }); + const target = switch (host.result.os.tag) { + .windows => host, + else => b.resolveTargetQuery(.{ .os_tag = .windows }), + }; const exe = b.addExecutable(.{ .name = "main", diff --git a/test/standalone/coff_dwarf/main.zig b/test/standalone/coff_dwarf/main.zig index ce74876f08ff..411dbd913b6f 100644 --- a/test/standalone/coff_dwarf/main.zig +++ b/test/standalone/coff_dwarf/main.zig @@ -1,27 +1,34 @@ const std = @import("std"); -const assert = std.debug.assert; -const testing = std.testing; +const fatal = std.process.fatal; extern fn add(a: u32, b: u32, addr: *usize) u32; -pub fn main() !void { - var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; - defer assert(gpa.deinit() == .ok); - const allocator = gpa.allocator(); +pub fn main() void { + var debug_alloc_inst: std.heap.DebugAllocator(.{}) = .init; + defer std.debug.assert(debug_alloc_inst.deinit() == .ok); + const gpa = debug_alloc_inst.allocator(); - var debug_info = try std.debug.SelfInfo.open(allocator); - defer debug_info.deinit(); + var di: std.debug.SelfInfo = .init; + defer di.deinit(gpa); var add_addr: usize = undefined; _ = add(1, 2, &add_addr); - const module = try debug_info.getModuleForAddress(add_addr); - const symbol = try module.getSymbolAtAddress(allocator, add_addr); - defer if (symbol.source_location) |sl| allocator.free(sl.file_name); + const symbol = di.getSymbolAtAddress(gpa, add_addr) catch |err| fatal("failed to get symbol: {t}", .{err}); + defer if (symbol.source_location) |sl| gpa.free(sl.file_name); - try testing.expectEqualStrings("add", symbol.name); - try testing.expect(symbol.source_location != null); - try testing.expectEqualStrings("shared_lib.c", std.fs.path.basename(symbol.source_location.?.file_name)); - try testing.expectEqual(@as(u64, 3), symbol.source_location.?.line); - try testing.expectEqual(@as(u64, 0), symbol.source_location.?.column); + if (symbol.name == null) fatal("failed to resolve symbol name", .{}); + if (symbol.compile_unit_name == null) fatal("failed to resolve compile unit", .{}); + if (symbol.source_location == null) fatal("failed to resolve source location", .{}); + + if (!std.mem.eql(u8, symbol.name.?, "add")) { + fatal("incorrect symbol name '{s}'", .{symbol.name.?}); + } + const sl = &symbol.source_location.?; + if (!std.mem.eql(u8, std.fs.path.basename(sl.file_name), "shared_lib.c")) { + fatal("incorrect file name '{s}'", .{sl.file_name}); + } + if (sl.line != 3 or sl.column != 0) { + fatal("incorrect line/column :{d}:{d}", .{ sl.line, sl.column }); + } } diff --git a/test/standalone/stack_iterator/unwind.zig b/test/standalone/stack_iterator/unwind.zig index 8e06e832cbd5..d7df31830056 100644 --- a/test/standalone/stack_iterator/unwind.zig +++ b/test/standalone/stack_iterator/unwind.zig @@ -1,27 +1,19 @@ const std = @import("std"); const builtin = @import("builtin"); -const debug = std.debug; -const testing = std.testing; +const fatal = std.process.fatal; -noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame3(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { expected[0] = @returnAddress(); - - var context: debug.ThreadContext = undefined; - testing.expect(debug.getContext(&context)) catch @panic("failed to getContext"); - - const debug_info = debug.getSelfDebugInfo() catch @panic("failed to openSelfDebugInfo"); - var it = debug.StackIterator.initWithContext(expected[0], debug_info, &context, @frameAddress()) catch @panic("failed to initWithContext"); - defer it.deinit(); - - for (unwound) |*addr| { - if (it.next()) |return_address| addr.* = return_address; - } + return std.debug.captureCurrentStackTrace(.{ + .first_address = @returnAddress(), + .allow_unsafe_unwind = true, + }, addr_buf); } -noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame2(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { // Exercise different __unwind_info / DWARF CFI encodings by forcing some registers to be restored if (builtin.target.ofmt != .c) { - switch (builtin.cpu.arch) { + switch (builtin.target.cpu.arch) { .x86 => { if (builtin.omit_frame_pointer) { asm volatile ( @@ -67,10 +59,10 @@ noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { } expected[1] = @returnAddress(); - frame3(expected, unwound); + return frame3(expected, addr_buf); } -noinline fn frame1(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { expected[2] = @returnAddress(); // Use a stack frame that is too big to encode in __unwind_info's stack-immediate encoding @@ -78,22 +70,32 @@ noinline fn frame1(expected: *[4]usize, unwound: *[4]usize) void { var pad: [std.math.maxInt(u8) * @sizeOf(usize) + 1]u8 = undefined; _ = std.mem.doNotOptimizeAway(&pad); - frame2(expected, unwound); + return frame2(expected, addr_buf); } -noinline fn frame0(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame0(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { expected[3] = @returnAddress(); - frame1(expected, unwound); + return frame1(expected, addr_buf); } -pub fn main() !void { - // Disabled until the DWARF unwinder bugs on .aarch64 are solved - if (builtin.omit_frame_pointer and comptime builtin.target.os.tag.isDarwin() and builtin.cpu.arch == .aarch64) return; - - if (!std.debug.have_ucontext or !std.debug.have_getcontext) return; +pub fn main() void { + if (std.posix.ucontext_t == void and builtin.omit_frame_pointer) { + // Stack unwinding is impossible. + return; + } var expected: [4]usize = undefined; - var unwound: [4]usize = undefined; - frame0(&expected, &unwound); - try testing.expectEqual(expected, unwound); + var addr_buf: [4]usize = undefined; + const trace = frame0(&expected, &addr_buf); + // There may be *more* than 4 frames (due to the caller of `main`); that's okay. + if (trace.index < 4) { + fatal("expected at least 4 frames, got '{d}':\n{f}", .{ trace.index, &trace }); + } + if (!std.mem.eql(usize, trace.instruction_addresses, &expected)) { + const expected_trace: std.builtin.StackTrace = .{ + .index = 4, + .instruction_addresses = &expected, + }; + fatal("expected trace:\n{f}\nactual trace:\n{f}", .{ &expected_trace, &trace }); + } } diff --git a/test/standalone/stack_iterator/unwind_freestanding.zig b/test/standalone/stack_iterator/unwind_freestanding.zig index f655507b32d5..866f73d9bdf3 100644 --- a/test/standalone/stack_iterator/unwind_freestanding.zig +++ b/test/standalone/stack_iterator/unwind_freestanding.zig @@ -1,26 +1,21 @@ -/// Test StackIterator on 'freestanding' target. Based on unwind.zig. +//! Test StackIterator on 'freestanding' target. Based on unwind.zig. + const std = @import("std"); -const builtin = @import("builtin"); -const debug = std.debug; -noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame3(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { expected[0] = @returnAddress(); - - var it = debug.StackIterator.init(@returnAddress(), @frameAddress()); - defer it.deinit(); - - // Save StackIterator's frame addresses into `unwound`: - for (unwound) |*addr| { - if (it.next()) |return_address| addr.* = return_address; - } + return std.debug.captureCurrentStackTrace(.{ + .first_address = @returnAddress(), + .allow_unsafe_unwind = true, + }, addr_buf); } -noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame2(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { expected[1] = @returnAddress(); - frame3(expected, unwound); + return frame3(expected, addr_buf); } -noinline fn frame1(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { expected[2] = @returnAddress(); // Use a stack frame that is too big to encode in __unwind_info's stack-immediate encoding @@ -28,37 +23,44 @@ noinline fn frame1(expected: *[4]usize, unwound: *[4]usize) void { var pad: [std.math.maxInt(u8) * @sizeOf(usize) + 1]u8 = undefined; _ = std.mem.doNotOptimizeAway(&pad); - frame2(expected, unwound); + return frame2(expected, addr_buf); } -noinline fn frame0(expected: *[4]usize, unwound: *[4]usize) void { +noinline fn frame0(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { expected[3] = @returnAddress(); - frame1(expected, unwound); + return frame1(expected, addr_buf); } -// No-OS entrypoint +/// No-OS entrypoint export fn _start() callconv(.withStackAlign(.c, 1)) noreturn { var expected: [4]usize = undefined; - var unwound: [4]usize = undefined; - frame0(&expected, &unwound); + var addr_buf: [4]usize = undefined; + const trace = frame0(&expected, &addr_buf); - // Verify result (no std.testing in freestanding) - var missed: c_int = 0; - for (expected, unwound) |expectFA, actualFA| { - if (expectFA != actualFA) { - missed += 1; - } - } + // Since we can't print from this freestanding test, we'll just use the exit + // code to communicate error conditions. + + // Unlike `unwind.zig`, we can expect *exactly* 4 frames, as we are the + // actual entry point and the frame pointer will be 0 on entry. + if (trace.index != 4) exit(1); + if (trace.instruction_addresses[0] != expected[0]) exit(2); + if (trace.instruction_addresses[1] != expected[1]) exit(3); + if (trace.instruction_addresses[2] != expected[2]) exit(4); + if (trace.instruction_addresses[3] != expected[3]) exit(5); + + exit(0); +} - // Need to compile with the target OS as "freestanding" or "other" to - // exercise the StackIterator code, but when run as a regression test - // need to actually exit. So assume we're running on x86_64-linux ... +fn exit(code: u8) noreturn { + // We are intentionally compiling with the target OS being "freestanding" to + // exercise std.debug, but we still need to exit the process somehow; so do + // the appropriate x86_64-linux syscall. asm volatile ( \\movl $60, %%eax \\syscall : - : [missed] "{edi}" (missed), + : [code] "{edi}" (code), : .{ .edi = true, .eax = true }); - while (true) {} // unreached + unreachable; } From d28966785652822b7b763044ce4cb2421247a8f4 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 13 Sep 2025 12:16:11 +0100 Subject: [PATCH 57/85] std.debug.Pdb: fix leak --- lib/std/debug/Pdb.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/std/debug/Pdb.zig b/lib/std/debug/Pdb.zig index 008aad6ab674..66010ba3770f 100644 --- a/lib/std/debug/Pdb.zig +++ b/lib/std/debug/Pdb.zig @@ -171,6 +171,7 @@ pub fn parseInfoStream(self: *Pdb) !void { const string_table_index = str_tab_index: { const name_bytes_len = try reader.takeInt(u32, .little); const name_bytes = try reader.readAlloc(gpa, name_bytes_len); + defer gpa.free(name_bytes); const HashTableHeader = extern struct { size: u32, From f5c8d80e08fac9dcf2eb88941d68b44640a4c9b4 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 13 Sep 2025 12:54:31 +0100 Subject: [PATCH 58/85] windows_bat_args: fix path handling The input path could be cwd-relative, in which case it must be modified before it is written into the batch script. Also, remove usage of deprecated `GeneralPurposeAllocator` alias, rename `allocator` to `gpa`, use unmanaged `ArrayList`. --- test/standalone/windows_bat_args/fuzz.zig | 75 ++++++----- test/standalone/windows_bat_args/test.zig | 149 +++++++++++----------- 2 files changed, 117 insertions(+), 107 deletions(-) diff --git a/test/standalone/windows_bat_args/fuzz.zig b/test/standalone/windows_bat_args/fuzz.zig index 650494a26828..8b9895b52dd6 100644 --- a/test/standalone/windows_bat_args/fuzz.zig +++ b/test/standalone/windows_bat_args/fuzz.zig @@ -3,14 +3,14 @@ const builtin = @import("builtin"); const Allocator = std.mem.Allocator; pub fn main() anyerror!void { - var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; - defer if (gpa.deinit() == .leak) @panic("found memory leaks"); - const allocator = gpa.allocator(); + var debug_alloc_inst: std.heap.DebugAllocator(.{}) = .init; + defer std.debug.assert(debug_alloc_inst.deinit() == .ok); + const gpa = debug_alloc_inst.allocator(); - var it = try std.process.argsWithAllocator(allocator); + var it = try std.process.argsWithAllocator(gpa); defer it.deinit(); _ = it.next() orelse unreachable; // skip binary name - const child_exe_path = it.next() orelse unreachable; + const child_exe_path_orig = it.next() orelse unreachable; const iterations: u64 = iterations: { const arg = it.next() orelse "0"; @@ -42,58 +42,63 @@ pub fn main() anyerror!void { try tmp.dir.setAsCwd(); defer tmp.parent_dir.setAsCwd() catch {}; - var buf = try std.array_list.Managed(u8).initCapacity(allocator, 128); - defer buf.deinit(); - try buf.appendSlice("@echo off\n"); - try buf.append('"'); - try buf.appendSlice(child_exe_path); - try buf.append('"'); + // `child_exe_path_orig` might be relative; make it relative to our new cwd. + const child_exe_path = try std.fs.path.resolve(gpa, &.{ "..\\..\\..", child_exe_path_orig }); + defer gpa.free(child_exe_path); + + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(gpa); + try buf.print(gpa, + \\@echo off + \\"{s}" + , .{child_exe_path}); + // Trailing newline intentionally omitted above so we can add args. const preamble_len = buf.items.len; - try buf.appendSlice(" %*"); + try buf.appendSlice(gpa, " %*"); try tmp.dir.writeFile(.{ .sub_path = "args1.bat", .data = buf.items }); buf.shrinkRetainingCapacity(preamble_len); - try buf.appendSlice(" %1 %2 %3 %4 %5 %6 %7 %8 %9"); + try buf.appendSlice(gpa, " %1 %2 %3 %4 %5 %6 %7 %8 %9"); try tmp.dir.writeFile(.{ .sub_path = "args2.bat", .data = buf.items }); buf.shrinkRetainingCapacity(preamble_len); - try buf.appendSlice(" \"%~1\" \"%~2\" \"%~3\" \"%~4\" \"%~5\" \"%~6\" \"%~7\" \"%~8\" \"%~9\""); + try buf.appendSlice(gpa, " \"%~1\" \"%~2\" \"%~3\" \"%~4\" \"%~5\" \"%~6\" \"%~7\" \"%~8\" \"%~9\""); try tmp.dir.writeFile(.{ .sub_path = "args3.bat", .data = buf.items }); buf.shrinkRetainingCapacity(preamble_len); var i: u64 = 0; while (iterations == 0 or i < iterations) { - const rand_arg = try randomArg(allocator, rand); - defer allocator.free(rand_arg); + const rand_arg = try randomArg(gpa, rand); + defer gpa.free(rand_arg); - try testExec(allocator, &.{rand_arg}, null); + try testExec(gpa, &.{rand_arg}, null); i += 1; } } -fn testExec(allocator: std.mem.Allocator, args: []const []const u8, env: ?*std.process.EnvMap) !void { - try testExecBat(allocator, "args1.bat", args, env); - try testExecBat(allocator, "args2.bat", args, env); - try testExecBat(allocator, "args3.bat", args, env); +fn testExec(gpa: std.mem.Allocator, args: []const []const u8, env: ?*std.process.EnvMap) !void { + try testExecBat(gpa, "args1.bat", args, env); + try testExecBat(gpa, "args2.bat", args, env); + try testExecBat(gpa, "args3.bat", args, env); } -fn testExecBat(allocator: std.mem.Allocator, bat: []const u8, args: []const []const u8, env: ?*std.process.EnvMap) !void { - var argv = try std.array_list.Managed([]const u8).initCapacity(allocator, 1 + args.len); - defer argv.deinit(); - argv.appendAssumeCapacity(bat); - argv.appendSliceAssumeCapacity(args); +fn testExecBat(gpa: std.mem.Allocator, bat: []const u8, args: []const []const u8, env: ?*std.process.EnvMap) !void { + const argv = try gpa.alloc([]const u8, 1 + args.len); + defer gpa.free(argv); + argv[0] = bat; + @memcpy(argv[1..], args); const can_have_trailing_empty_args = std.mem.eql(u8, bat, "args3.bat"); const result = try std.process.Child.run(.{ - .allocator = allocator, + .allocator = gpa, .env_map = env, - .argv = argv.items, + .argv = argv, }); - defer allocator.free(result.stdout); - defer allocator.free(result.stderr); + defer gpa.free(result.stdout); + defer gpa.free(result.stderr); try std.testing.expectEqualStrings("", result.stderr); var it = std.mem.splitScalar(u8, result.stdout, '\x00'); @@ -109,7 +114,7 @@ fn testExecBat(allocator: std.mem.Allocator, bat: []const u8, args: []const []co } } -fn randomArg(allocator: Allocator, rand: std.Random) ![]const u8 { +fn randomArg(gpa: Allocator, rand: std.Random) ![]const u8 { const Choice = enum { backslash, quote, @@ -121,8 +126,8 @@ fn randomArg(allocator: Allocator, rand: std.Random) ![]const u8 { }; const choices = rand.uintAtMostBiased(u16, 256); - var buf = try std.array_list.Managed(u8).initCapacity(allocator, choices); - errdefer buf.deinit(); + var buf: std.ArrayList(u8) = try .initCapacity(gpa, choices); + errdefer buf.deinit(gpa); var last_codepoint: u21 = 0; for (0..choices) |_| { @@ -149,12 +154,12 @@ fn randomArg(allocator: Allocator, rand: std.Random) ![]const u8 { continue; } } - try buf.ensureUnusedCapacity(4); + try buf.ensureUnusedCapacity(gpa, 4); const unused_slice = buf.unusedCapacitySlice(); const len = std.unicode.wtf8Encode(codepoint, unused_slice) catch unreachable; buf.items.len += len; last_codepoint = codepoint; } - return buf.toOwnedSlice(); + return buf.toOwnedSlice(gpa); } diff --git a/test/standalone/windows_bat_args/test.zig b/test/standalone/windows_bat_args/test.zig index 19efcc52446b..4690d983f3a1 100644 --- a/test/standalone/windows_bat_args/test.zig +++ b/test/standalone/windows_bat_args/test.zig @@ -1,14 +1,14 @@ const std = @import("std"); pub fn main() anyerror!void { - var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; - defer if (gpa.deinit() == .leak) @panic("found memory leaks"); - const allocator = gpa.allocator(); + var debug_alloc_inst: std.heap.DebugAllocator(.{}) = .init; + defer std.debug.assert(debug_alloc_inst.deinit() == .ok); + const gpa = debug_alloc_inst.allocator(); - var it = try std.process.argsWithAllocator(allocator); + var it = try std.process.argsWithAllocator(gpa); defer it.deinit(); _ = it.next() orelse unreachable; // skip binary name - const child_exe_path = it.next() orelse unreachable; + const child_exe_path_orig = it.next() orelse unreachable; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); @@ -16,62 +16,67 @@ pub fn main() anyerror!void { try tmp.dir.setAsCwd(); defer tmp.parent_dir.setAsCwd() catch {}; - var buf = try std.array_list.Managed(u8).initCapacity(allocator, 128); - defer buf.deinit(); - try buf.appendSlice("@echo off\n"); - try buf.append('"'); - try buf.appendSlice(child_exe_path); - try buf.append('"'); + // `child_exe_path_orig` might be relative; make it relative to our new cwd. + const child_exe_path = try std.fs.path.resolve(gpa, &.{ "..\\..\\..", child_exe_path_orig }); + defer gpa.free(child_exe_path); + + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(gpa); + try buf.print(gpa, + \\@echo off + \\"{s}" + , .{child_exe_path}); + // Trailing newline intentionally omitted above so we can add args. const preamble_len = buf.items.len; - try buf.appendSlice(" %*"); + try buf.appendSlice(gpa, " %*"); try tmp.dir.writeFile(.{ .sub_path = "args1.bat", .data = buf.items }); buf.shrinkRetainingCapacity(preamble_len); - try buf.appendSlice(" %1 %2 %3 %4 %5 %6 %7 %8 %9"); + try buf.appendSlice(gpa, " %1 %2 %3 %4 %5 %6 %7 %8 %9"); try tmp.dir.writeFile(.{ .sub_path = "args2.bat", .data = buf.items }); buf.shrinkRetainingCapacity(preamble_len); - try buf.appendSlice(" \"%~1\" \"%~2\" \"%~3\" \"%~4\" \"%~5\" \"%~6\" \"%~7\" \"%~8\" \"%~9\""); + try buf.appendSlice(gpa, " \"%~1\" \"%~2\" \"%~3\" \"%~4\" \"%~5\" \"%~6\" \"%~7\" \"%~8\" \"%~9\""); try tmp.dir.writeFile(.{ .sub_path = "args3.bat", .data = buf.items }); buf.shrinkRetainingCapacity(preamble_len); // Test cases are from https://github.com/rust-lang/rust/blob/master/tests/ui/std/windows-bat-args.rs - try testExecError(error.InvalidBatchScriptArg, allocator, &.{"\x00"}); - try testExecError(error.InvalidBatchScriptArg, allocator, &.{"\n"}); - try testExecError(error.InvalidBatchScriptArg, allocator, &.{"\r"}); - try testExec(allocator, &.{ "a", "b" }, null); - try testExec(allocator, &.{ "c is for cat", "d is for dog" }, null); - try testExec(allocator, &.{ "\"", " \"" }, null); - try testExec(allocator, &.{ "\\", "\\" }, null); - try testExec(allocator, &.{">file.txt"}, null); - try testExec(allocator, &.{"whoami.exe"}, null); - try testExec(allocator, &.{"&a.exe"}, null); - try testExec(allocator, &.{"&echo hello "}, null); - try testExec(allocator, &.{ "&echo hello", "&whoami", ">file.txt" }, null); - try testExec(allocator, &.{"!TMP!"}, null); - try testExec(allocator, &.{"key=value"}, null); - try testExec(allocator, &.{"\"key=value\""}, null); - try testExec(allocator, &.{"key = value"}, null); - try testExec(allocator, &.{"key=[\"value\"]"}, null); - try testExec(allocator, &.{ "", "a=b" }, null); - try testExec(allocator, &.{"key=\"foo bar\""}, null); - try testExec(allocator, &.{"key=[\"my_value]"}, null); - try testExec(allocator, &.{"key=[\"my_value\",\"other-value\"]"}, null); - try testExec(allocator, &.{"key\\=value"}, null); - try testExec(allocator, &.{"key=\"&whoami\""}, null); - try testExec(allocator, &.{"key=\"value\"=5"}, null); - try testExec(allocator, &.{"key=[\">file.txt\"]"}, null); - try testExec(allocator, &.{"%hello"}, null); - try testExec(allocator, &.{"%PATH%"}, null); - try testExec(allocator, &.{"%%cd:~,%"}, null); - try testExec(allocator, &.{"%PATH%PATH%"}, null); - try testExec(allocator, &.{"\">file.txt"}, null); - try testExec(allocator, &.{"abc\"&echo hello"}, null); - try testExec(allocator, &.{"123\">file.txt"}, null); - try testExec(allocator, &.{"\"&echo hello&whoami.exe"}, null); - try testExec(allocator, &.{ "\"hello^\"world\"", "hello &echo oh no >file.txt" }, null); - try testExec(allocator, &.{"&whoami.exe"}, null); + try testExecError(error.InvalidBatchScriptArg, gpa, &.{"\x00"}); + try testExecError(error.InvalidBatchScriptArg, gpa, &.{"\n"}); + try testExecError(error.InvalidBatchScriptArg, gpa, &.{"\r"}); + try testExec(gpa, &.{ "a", "b" }, null); + try testExec(gpa, &.{ "c is for cat", "d is for dog" }, null); + try testExec(gpa, &.{ "\"", " \"" }, null); + try testExec(gpa, &.{ "\\", "\\" }, null); + try testExec(gpa, &.{">file.txt"}, null); + try testExec(gpa, &.{"whoami.exe"}, null); + try testExec(gpa, &.{"&a.exe"}, null); + try testExec(gpa, &.{"&echo hello "}, null); + try testExec(gpa, &.{ "&echo hello", "&whoami", ">file.txt" }, null); + try testExec(gpa, &.{"!TMP!"}, null); + try testExec(gpa, &.{"key=value"}, null); + try testExec(gpa, &.{"\"key=value\""}, null); + try testExec(gpa, &.{"key = value"}, null); + try testExec(gpa, &.{"key=[\"value\"]"}, null); + try testExec(gpa, &.{ "", "a=b" }, null); + try testExec(gpa, &.{"key=\"foo bar\""}, null); + try testExec(gpa, &.{"key=[\"my_value]"}, null); + try testExec(gpa, &.{"key=[\"my_value\",\"other-value\"]"}, null); + try testExec(gpa, &.{"key\\=value"}, null); + try testExec(gpa, &.{"key=\"&whoami\""}, null); + try testExec(gpa, &.{"key=\"value\"=5"}, null); + try testExec(gpa, &.{"key=[\">file.txt\"]"}, null); + try testExec(gpa, &.{"%hello"}, null); + try testExec(gpa, &.{"%PATH%"}, null); + try testExec(gpa, &.{"%%cd:~,%"}, null); + try testExec(gpa, &.{"%PATH%PATH%"}, null); + try testExec(gpa, &.{"\">file.txt"}, null); + try testExec(gpa, &.{"abc\"&echo hello"}, null); + try testExec(gpa, &.{"123\">file.txt"}, null); + try testExec(gpa, &.{"\"&echo hello&whoami.exe"}, null); + try testExec(gpa, &.{ "\"hello^\"world\"", "hello &echo oh no >file.txt" }, null); + try testExec(gpa, &.{"&whoami.exe"}, null); // Ensure that trailing space and . characters can't lead to unexpected bat/cmd script execution. // In many Windows APIs (including CreateProcess), trailing space and . characters are stripped @@ -89,17 +94,17 @@ pub fn main() anyerror!void { // > "args1.bat .. " // '"args1.bat .. "' is not recognized as an internal or external command, // operable program or batch file. - try std.testing.expectError(error.FileNotFound, testExecBat(allocator, "args1.bat .. ", &.{"abc"}, null)); + try std.testing.expectError(error.FileNotFound, testExecBat(gpa, "args1.bat .. ", &.{"abc"}, null)); const absolute_with_trailing = blk: { - const absolute_path = try std.fs.realpathAlloc(allocator, "args1.bat"); - defer allocator.free(absolute_path); - break :blk try std.mem.concat(allocator, u8, &.{ absolute_path, " .. " }); + const absolute_path = try std.fs.realpathAlloc(gpa, "args1.bat"); + defer gpa.free(absolute_path); + break :blk try std.mem.concat(gpa, u8, &.{ absolute_path, " .. " }); }; - defer allocator.free(absolute_with_trailing); - try std.testing.expectError(error.FileNotFound, testExecBat(allocator, absolute_with_trailing, &.{"abc"}, null)); + defer gpa.free(absolute_with_trailing); + try std.testing.expectError(error.FileNotFound, testExecBat(gpa, absolute_with_trailing, &.{"abc"}, null)); var env = env: { - var env = try std.process.getEnvMap(allocator); + var env = try std.process.getEnvMap(gpa); errdefer env.deinit(); // No escaping try env.put("FOO", "123"); @@ -110,37 +115,37 @@ pub fn main() anyerror!void { break :env env; }; defer env.deinit(); - try testExec(allocator, &.{"%FOO%"}, &env); + try testExec(gpa, &.{"%FOO%"}, &env); // Ensure that none of the `>file.txt`s have caused file.txt to be created try std.testing.expectError(error.FileNotFound, tmp.dir.access("file.txt", .{})); } -fn testExecError(err: anyerror, allocator: std.mem.Allocator, args: []const []const u8) !void { - return std.testing.expectError(err, testExec(allocator, args, null)); +fn testExecError(err: anyerror, gpa: std.mem.Allocator, args: []const []const u8) !void { + return std.testing.expectError(err, testExec(gpa, args, null)); } -fn testExec(allocator: std.mem.Allocator, args: []const []const u8, env: ?*std.process.EnvMap) !void { - try testExecBat(allocator, "args1.bat", args, env); - try testExecBat(allocator, "args2.bat", args, env); - try testExecBat(allocator, "args3.bat", args, env); +fn testExec(gpa: std.mem.Allocator, args: []const []const u8, env: ?*std.process.EnvMap) !void { + try testExecBat(gpa, "args1.bat", args, env); + try testExecBat(gpa, "args2.bat", args, env); + try testExecBat(gpa, "args3.bat", args, env); } -fn testExecBat(allocator: std.mem.Allocator, bat: []const u8, args: []const []const u8, env: ?*std.process.EnvMap) !void { - var argv = try std.array_list.Managed([]const u8).initCapacity(allocator, 1 + args.len); - defer argv.deinit(); - argv.appendAssumeCapacity(bat); - argv.appendSliceAssumeCapacity(args); +fn testExecBat(gpa: std.mem.Allocator, bat: []const u8, args: []const []const u8, env: ?*std.process.EnvMap) !void { + const argv = try gpa.alloc([]const u8, 1 + args.len); + defer gpa.free(argv); + argv[0] = bat; + @memcpy(argv[1..], args); const can_have_trailing_empty_args = std.mem.eql(u8, bat, "args3.bat"); const result = try std.process.Child.run(.{ - .allocator = allocator, + .allocator = gpa, .env_map = env, - .argv = argv.items, + .argv = argv, }); - defer allocator.free(result.stdout); - defer allocator.free(result.stderr); + defer gpa.free(result.stdout); + defer gpa.free(result.stderr); try std.testing.expectEqualStrings("", result.stderr); var it = std.mem.splitScalar(u8, result.stdout, '\x00'); From 2fefe0e4b39ab6f86e5af423304b8a7b2da5fe72 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 13 Sep 2025 17:54:31 +0100 Subject: [PATCH 59/85] tests: fix 32-bit compatible arch selection --- test/tests.zig | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/test/tests.zig b/test/tests.zig index b327e94a8f3b..a6bd061f38a9 100644 --- a/test/tests.zig +++ b/test/tests.zig @@ -1866,18 +1866,26 @@ fn nativeAndCompatible32bit(b: *std.Build, skip_non_native: bool) []const std.Bu const host = b.graph.host.result; const only_native = (&b.graph.host)[0..1]; if (skip_non_native) return only_native; - const arch32: std.Target.Cpu.Arch = switch (host.cpu.arch) { - .x86_64 => .x86, - .aarch64 => .arm, - .aarch64_be => .armeb, + const arch32: std.Target.Cpu.Arch = switch (host.os.tag) { + .windows => switch (host.cpu.arch) { + .x86_64 => .x86, + .aarch64 => .thumb, + .aarch64_be => .thumbeb, + else => return only_native, + }, + .freebsd => switch (host.cpu.arch) { + .aarch64 => .arm, + .aarch64_be => .armeb, + else => return only_native, + }, + .linux, .netbsd => switch (host.cpu.arch) { + .x86_64 => .x86, + .aarch64 => .arm, + .aarch64_be => .armeb, + else => return only_native, + }, else => return only_native, }; - switch (host.os.tag) { - .windows => if (arch32.isArm()) return only_native, - .macos, .freebsd => if (arch32 == .x86) return only_native, - .linux, .netbsd => {}, - else => return only_native, - } return b.graph.arena.dupe(std.Build.ResolvedTarget, &.{ b.graph.host, b.resolveTargetQuery(.{ .cpu_arch = arch32, .os_tag = host.os.tag }), From e9c0d43c5b6fc5065ac2e22e162b7d7476f5097d Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 13 Sep 2025 17:40:42 +0100 Subject: [PATCH 60/85] test-error-traces: skip some more optimized traces --- test/error_traces.zig | 25 +++++++++++++++++++------ test/src/ErrorTrace.zig | 4 ++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/test/error_traces.zig b/test/error_traces.zig index ea84f14ce5a8..d30a1b6e58fa 100644 --- a/test/error_traces.zig +++ b/test/error_traces.zig @@ -37,6 +37,8 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { .disable_trace_optimized = &.{ .{ .x86_64, .windows }, .{ .x86, .windows }, + .{ .x86_64, .macos }, + .{ .aarch64, .macos }, }, }); cases.addCase(.{ @@ -86,12 +88,6 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { \\ return error.UnrelatedError; \\ ^ , - .disable_trace_optimized = &.{ - .{ .x86_64, .linux }, - .{ .x86, .linux }, - .{ .x86_64, .windows }, - .{ .x86, .windows }, - }, }); cases.addCase(.{ @@ -121,6 +117,8 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { .disable_trace_optimized = &.{ .{ .x86_64, .windows }, .{ .x86, .windows }, + .{ .x86_64, .macos }, + .{ .aarch64, .macos }, }, }); @@ -172,6 +170,8 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { .disable_trace_optimized = &.{ .{ .x86_64, .windows }, .{ .x86, .windows }, + .{ .x86_64, .macos }, + .{ .aarch64, .macos }, }, }); @@ -242,6 +242,8 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { .disable_trace_optimized = &.{ .{ .x86_64, .windows }, .{ .x86, .windows }, + .{ .x86_64, .macos }, + .{ .aarch64, .macos }, }, }); @@ -282,6 +284,8 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { .disable_trace_optimized = &.{ .{ .x86_64, .windows }, .{ .x86, .windows }, + .{ .x86_64, .macos }, + .{ .aarch64, .macos }, }, }); @@ -317,6 +321,8 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { .disable_trace_optimized = &.{ .{ .x86_64, .windows }, .{ .x86, .windows }, + .{ .x86_64, .macos }, + .{ .aarch64, .macos }, }, }); @@ -352,6 +358,8 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { .disable_trace_optimized = &.{ .{ .x86_64, .windows }, .{ .x86, .windows }, + .{ .x86_64, .macos }, + .{ .aarch64, .macos }, }, }); @@ -392,6 +400,8 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { .disable_trace_optimized = &.{ .{ .x86_64, .windows }, .{ .x86, .windows }, + .{ .x86_64, .macos }, + .{ .aarch64, .macos }, }, }); @@ -423,8 +433,11 @@ pub fn addCases(cases: *@import("tests.zig").ErrorTracesContext) void { .disable_trace_optimized = &.{ .{ .x86_64, .linux }, .{ .x86, .linux }, + .{ .aarch64, .linux }, .{ .x86_64, .windows }, .{ .x86, .windows }, + .{ .x86_64, .macos }, + .{ .aarch64, .macos }, }, }); } diff --git a/test/src/ErrorTrace.zig b/test/src/ErrorTrace.zig index ca726eaea379..d8ff3803321c 100644 --- a/test/src/ErrorTrace.zig +++ b/test/src/ErrorTrace.zig @@ -13,6 +13,9 @@ pub const Case = struct { /// On these arch/OS pairs we will not test the error trace on optimized LLVM builds because the /// optimizations break the error trace. We will test the binary with error tracing disabled, /// just to ensure that the expected error is still returned from `main`. + /// + /// LLVM ReleaseSmall builds always have the trace disabled regardless of this field, because it + /// seems that LLVM is particularly good at optimizing traces away in those. disable_trace_optimized: []const DisableConfig = &.{}, pub const DisableConfig = struct { std.Target.Cpu.Arch, std.Target.Os.Tag }; @@ -58,6 +61,7 @@ fn addCaseConfig( const error_tracing: bool = tracing: { if (optimize == .Debug) break :tracing true; if (backend != .llvm) break :tracing true; + if (optimize == .ReleaseSmall) break :tracing false; for (case.disable_trace_optimized) |disable| { const d_arch, const d_os = disable; if (target.result.cpu.arch == d_arch and target.result.os.tag == d_os) { From 604fb3001d6480d1687ddeb988c64c8028d5e55a Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 13 Sep 2025 18:02:57 +0100 Subject: [PATCH 61/85] std.start: also don't print error trace targeting `.other` This only matters if `callMain` is called by a user, since `std.start` will never itself call `callMain` when `target.os.tag == .other`. However, it *is* a valid use case for a user to call `std.start.callMain` in their own startup logic, so this makes sense. --- lib/std/start.zig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/std/start.zig b/lib/std/start.zig index 13ce744a2ac7..60ddae43ac95 100644 --- a/lib/std/start.zig +++ b/lib/std/start.zig @@ -635,10 +635,11 @@ pub inline fn callMain() u8 { else => {}, } std.log.err("{s}", .{@errorName(err)}); - if (native_os != .freestanding) { - if (@errorReturnTrace()) |trace| { + switch (native_os) { + .freestanding, .other => {}, + else => if (@errorReturnTrace()) |trace| { std.debug.dumpStackTrace(trace); - } + }, } return 1; }; From b578cca022f4c9ce94439e2ee795639b3a23c8f5 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 15 Sep 2025 18:23:14 +0100 Subject: [PATCH 62/85] link.Dwarf: i just fixed error union values, s'nothin' else to it --- src/link/Dwarf.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 228a8d6a25a8..3aeffb03653f 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -4064,6 +4064,7 @@ fn updateLazyValue( }, .error_union => |error_union| { try wip_nav.abbrevCode(.aggregate_comptime_value); + try wip_nav.refType(.fromInterned(error_union.ty)); var err_buf: [4]u8 = undefined; const err_bytes = err_buf[0 .. std.math.divCeil(u17, zcu.errorSetBits(), 8) catch unreachable]; dwarf.writeInt(err_bytes, switch (error_union.val) { @@ -4101,7 +4102,6 @@ fn updateLazyValue( try diw.writeUleb128(err_bytes.len); try diw.writeAll(err_bytes); } - try wip_nav.refType(.fromInterned(error_union.ty)); try diw.writeUleb128(@intFromEnum(AbbrevCode.null)); }, .enum_literal => |enum_literal| { From a18fd41064493e742eacebc88e2afeadd54ff6f0 Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 17 Sep 2025 18:38:11 +0100 Subject: [PATCH 63/85] std: rework/remove ucontext_t Our usage of `ucontext_t` in the standard library was kind of problematic. We unnecessarily mimiced libc-specific structures, and our `getcontext` implementation was overkill for our use case of stack tracing. This commit introduces a new namespace, `std.debug.cpu_context`, which contains "context" types for various architectures (currently x86, x86_64, ARM, and AARCH64) containing the general-purpose CPU registers; the ones needed in practice for stack unwinding. Each implementation has a function `current` which populates the structure using inline assembly. The structure is user-overrideable, though that should only be necessary if the standard library does not have an implementation for the *architecture*: that is to say, none of this is OS-dependent. Of course, in POSIX signal handlers, we get a `ucontext_t` from the kernel. The function `std.debug.cpu_context.fromPosixSignalContext` converts this to a `std.debug.cpu_context.Native` with a big ol' target switch. This functionality is not exposed from `std.c` or `std.posix`, and neither are `ucontext_t`, `mcontext_t`, or `getcontext`. The rationale is that these types and functions do not conform to a specific ABI, and in fact tend to get updated over time based on CPU features and extensions; in addition, different libcs use different structures which are "partially compatible" with the kernel structure. Overall, it's a mess, but all we need is the kernel context, so we can just define a kernel-compatible structure as long as we don't claim C compatibility by putting it in `std.c` or `std.posix`. This change resulted in a few nice `std.debug` simplifications, but nothing too noteworthy. However, the main benefit of this change is that DWARF unwinding---sometimes necessary for collecting stack traces reliably---now requires far less target-specific integration. Also fix a bug I noticed in `PageAllocator` (I found this due to a bug in my distro's QEMU distribution; thanks, broken QEMU patch!) and I think a couple of minor bugs in `std.debug`. Resolves: #23801 Resolves: #23802 --- lib/std/c.zig | 207 ----- lib/std/c/darwin.zig | 101 --- lib/std/c/dragonfly.zig | 40 - lib/std/c/haiku.zig | 263 ------ lib/std/c/openbsd.zig | 47 - lib/std/debug.zig | 136 +-- lib/std/debug/Dwarf.zig | 57 +- lib/std/debug/Dwarf/abi.zig | 351 -------- lib/std/debug/Dwarf/expression.zig | 160 ++-- lib/std/debug/SelfInfo.zig | 229 ++--- lib/std/debug/SelfInfo/DarwinModule.zig | 57 +- lib/std/debug/SelfInfo/ElfModule.zig | 3 +- lib/std/debug/SelfInfo/WindowsModule.zig | 40 +- lib/std/debug/cpu_context.zig | 1019 ++++++++++++++++++++++ lib/std/heap/PageAllocator.zig | 2 +- lib/std/os/freebsd.zig | 73 ++ lib/std/os/linux.zig | 2 - lib/std/os/linux/aarch64.zig | 3 - lib/std/os/linux/arm.zig | 3 - lib/std/os/linux/hexagon.zig | 3 - lib/std/os/linux/loongarch64.zig | 3 - lib/std/os/linux/m68k.zig | 3 - lib/std/os/linux/mips.zig | 3 - lib/std/os/linux/mips64.zig | 3 - lib/std/os/linux/powerpc.zig | 3 - lib/std/os/linux/powerpc64.zig | 3 - lib/std/os/linux/riscv32.zig | 3 - lib/std/os/linux/riscv64.zig | 3 - lib/std/os/linux/s390x.zig | 3 - lib/std/os/linux/sparc64.zig | 3 - lib/std/os/linux/x86.zig | 14 - lib/std/os/linux/x86_64.zig | 95 -- lib/std/posix.zig | 4 - 33 files changed, 1416 insertions(+), 1523 deletions(-) delete mode 100644 lib/std/debug/Dwarf/abi.zig create mode 100644 lib/std/debug/cpu_context.zig diff --git a/lib/std/c.zig b/lib/std/c.zig index b2b851c43aa4..d1affab2075b 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -7035,205 +7035,6 @@ pub const timezone = switch (native_os) { else => void, }; -pub const ucontext_t = switch (native_os) { - .linux => linux.ucontext_t, // std.os.linux.ucontext_t is currently glibc-compatible, but it should probably not be. - .emscripten => emscripten.ucontext_t, - .macos, .ios, .tvos, .watchos, .visionos => extern struct { - onstack: c_int, - sigmask: sigset_t, - stack: stack_t, - link: ?*ucontext_t, - mcsize: u64, - mcontext: *mcontext_t, - __mcontext_data: mcontext_t, - }, - .freebsd => extern struct { - sigmask: sigset_t, - mcontext: mcontext_t, - link: ?*ucontext_t, - stack: stack_t, - flags: c_int, - __spare__: [4]c_int, - }, - .solaris, .illumos => extern struct { - flags: u64, - link: ?*ucontext_t, - sigmask: sigset_t, - stack: stack_t, - mcontext: mcontext_t, - brand_data: [3]?*anyopaque, - filler: [2]i64, - }, - .netbsd => extern struct { - flags: u32, - link: ?*ucontext_t, - sigmask: sigset_t, - stack: stack_t, - mcontext: mcontext_t, - __pad: [ - switch (builtin.cpu.arch) { - .x86 => 4, - .mips, .mipsel, .mips64, .mips64el => 14, - .arm, .armeb, .thumb, .thumbeb => 1, - .sparc, .sparc64 => if (@sizeOf(usize) == 4) 43 else 8, - else => 0, - } - ]u32, - }, - .dragonfly => extern struct { - sigmask: sigset_t, - mcontext: mcontext_t, - link: ?*ucontext_t, - stack: stack_t, - cofunc: ?*fn (?*ucontext_t, ?*anyopaque) void, - arg: ?*void, - _spare: [4]c_int, - }, - // https://github.com/SerenityOS/serenity/blob/87eac0e424cff4a1f941fb704b9362a08654c24d/Kernel/API/POSIX/ucontext.h#L19-L24 - .haiku, .serenity => extern struct { - link: ?*ucontext_t, - sigmask: sigset_t, - stack: stack_t, - mcontext: mcontext_t, - }, - .openbsd => openbsd.ucontext_t, - else => void, -}; -pub const mcontext_t = switch (native_os) { - .linux => linux.mcontext_t, - .emscripten => emscripten.mcontext_t, - .macos, .ios, .tvos, .watchos, .visionos => darwin.mcontext_t, - .freebsd => switch (builtin.cpu.arch) { - .x86_64 => extern struct { - onstack: u64, - rdi: u64, - rsi: u64, - rdx: u64, - rcx: u64, - r8: u64, - r9: u64, - rax: u64, - rbx: u64, - rbp: u64, - r10: u64, - r11: u64, - r12: u64, - r13: u64, - r14: u64, - r15: u64, - trapno: u32, - fs: u16, - gs: u16, - addr: u64, - flags: u32, - es: u16, - ds: u16, - err: u64, - rip: u64, - cs: u64, - rflags: u64, - rsp: u64, - ss: u64, - len: u64, - fpformat: u64, - ownedfp: u64, - fpstate: [64]u64 align(16), - fsbase: u64, - gsbase: u64, - xfpustate: u64, - xfpustate_len: u64, - spare: [4]u64, - }, - .aarch64 => extern struct { - gpregs: extern struct { - x: [30]u64, - lr: u64, - sp: u64, - elr: u64, - spsr: u32, - _pad: u32, - }, - fpregs: extern struct { - q: [32]u128, - sr: u32, - cr: u32, - flags: u32, - _pad: u32, - }, - flags: u32, - _pad: u32, - _spare: [8]u64, - }, - else => struct {}, - }, - .solaris, .illumos => extern struct { - gregs: [28]u64, - fpregs: solaris.fpregset_t, - }, - .netbsd => switch (builtin.cpu.arch) { - .aarch64, .aarch64_be => extern struct { - gregs: [35]u64, - fregs: [528]u8 align(16), - spare: [8]u64, - }, - .x86 => extern struct { - gregs: [19]u32, - fpregs: [161]u32, - mc_tlsbase: u32, - }, - .x86_64 => extern struct { - gregs: [26]u64, - mc_tlsbase: u64, - fpregs: [512]u8 align(8), - }, - else => struct {}, - }, - .dragonfly => dragonfly.mcontext_t, - .haiku => haiku.mcontext_t, - .serenity => switch (native_arch) { - // https://github.com/SerenityOS/serenity/blob/200e91cd7f1ec5453799a2720d4dc114a59cc289/Kernel/Arch/aarch64/mcontext.h#L15-L19 - .aarch64 => extern struct { - x: [31]u64, - sp: u64, - pc: u64, - }, - // https://github.com/SerenityOS/serenity/blob/66f8d0f031ef25c409dbb4fecaa454800fecae0f/Kernel/Arch/riscv64/mcontext.h#L15-L18 - .riscv64 => extern struct { - x: [31]u64, - pc: u64, - }, - // https://github.com/SerenityOS/serenity/blob/7b9ea3efdec9f86a1042893e8107d0b23aad8727/Kernel/Arch/x86_64/mcontext.h#L15-L40 - .x86_64 => extern struct { - rax: u64, - rcx: u64, - rdx: u64, - rbx: u64, - rsp: u64, - rbp: u64, - rsi: u64, - rdi: u64, - rip: u64, - r8: u64, - r9: u64, - r10: u64, - r11: u64, - r12: u64, - r13: u64, - r14: u64, - r15: u64, - rflags: u64, - cs: u32, - ss: u32, - ds: u32, - es: u32, - fs: u32, - gs: u32, - }, - else => struct {}, - }, - else => void, -}; - pub const user_desc = switch (native_os) { .linux => linux.user_desc, else => void, @@ -11238,13 +11039,6 @@ pub const LC = enum(c_int) { pub extern "c" fn setlocale(category: LC, locale: ?[*:0]const u8) ?[*:0]const u8; -pub const getcontext = if (builtin.target.abi.isAndroid() or builtin.target.os.tag == .openbsd or builtin.target.os.tag == .haiku) -{} // libc does not implement getcontext - else if (native_os == .linux and builtin.target.abi.isMusl()) - linux.getcontext - else - private.getcontext; - pub const max_align_t = if (native_abi == .msvc or native_abi == .itanium) f64 else if (native_os.isDarwin()) @@ -11668,7 +11462,6 @@ const private = struct { extern "c" fn shm_open(name: [*:0]const u8, flag: c_int, mode: mode_t) c_int; extern "c" fn pthread_setname_np(thread: pthread_t, name: [*:0]const u8) c_int; - extern "c" fn getcontext(ucp: *ucontext_t) c_int; extern "c" fn getrandom(buf_ptr: [*]u8, buf_len: usize, flags: c_uint) isize; extern "c" fn getentropy(buffer: [*]u8, size: usize) c_int; diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 2d3376b858ab..2ad979ecf2dc 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -348,107 +348,6 @@ pub const VM = struct { pub const exception_type_t = c_int; -pub const mcontext_t = switch (native_arch) { - .aarch64 => extern struct { - es: exception_state, - ss: thread_state, - ns: neon_state, - }, - .x86_64 => extern struct { - es: exception_state, - ss: thread_state, - fs: float_state, - }, - else => @compileError("unsupported arch"), -}; - -pub const exception_state = switch (native_arch) { - .aarch64 => extern struct { - far: u64, // Virtual Fault Address - esr: u32, // Exception syndrome - exception: u32, // Number of arm exception taken - }, - .x86_64 => extern struct { - trapno: u16, - cpu: u16, - err: u32, - faultvaddr: u64, - }, - else => @compileError("unsupported arch"), -}; - -pub const thread_state = switch (native_arch) { - .aarch64 => extern struct { - /// General purpose registers - regs: [29]u64, - /// Frame pointer x29 - fp: u64, - /// Link register x30 - lr: u64, - /// Stack pointer x31 - sp: u64, - /// Program counter - pc: u64, - /// Current program status register - cpsr: u32, - __pad: u32, - }, - .x86_64 => extern struct { - rax: u64, - rbx: u64, - rcx: u64, - rdx: u64, - rdi: u64, - rsi: u64, - rbp: u64, - rsp: u64, - r8: u64, - r9: u64, - r10: u64, - r11: u64, - r12: u64, - r13: u64, - r14: u64, - r15: u64, - rip: u64, - rflags: u64, - cs: u64, - fs: u64, - gs: u64, - }, - else => @compileError("unsupported arch"), -}; - -pub const neon_state = extern struct { - q: [32]u128, - fpsr: u32, - fpcr: u32, -}; - -pub const float_state = extern struct { - reserved: [2]c_int, - fcw: u16, - fsw: u16, - ftw: u8, - rsrv1: u8, - fop: u16, - ip: u32, - cs: u16, - rsrv2: u16, - dp: u32, - ds: u16, - rsrv3: u16, - mxcsr: u32, - mxcsrmask: u32, - stmm: [8]stmm_reg, - xmm: [16]xmm_reg, - rsrv4: [96]u8, - reserved1: c_int, -}; - -pub const stmm_reg = [16]u8; -pub const xmm_reg = [16]u8; - pub extern "c" fn NSVersionOfRunTimeLibrary(library_name: [*:0]const u8) u32; pub extern "c" fn _NSGetExecutablePath(buf: [*:0]u8, bufsize: *u32) c_int; pub extern "c" fn _dyld_image_count() u32; diff --git a/lib/std/c/dragonfly.zig b/lib/std/c/dragonfly.zig index 5a68ae4a5efd..40fb9c8b836b 100644 --- a/lib/std/c/dragonfly.zig +++ b/lib/std/c/dragonfly.zig @@ -13,46 +13,6 @@ pub extern "c" fn ptrace(request: c_int, pid: pid_t, addr: caddr_t, data: c_int) pub extern "c" fn umtx_sleep(ptr: *const volatile c_int, value: c_int, timeout: c_int) c_int; pub extern "c" fn umtx_wakeup(ptr: *const volatile c_int, count: c_int) c_int; -pub const mcontext_t = extern struct { - onstack: register_t, // XXX - sigcontext compat. - rdi: register_t, - rsi: register_t, - rdx: register_t, - rcx: register_t, - r8: register_t, - r9: register_t, - rax: register_t, - rbx: register_t, - rbp: register_t, - r10: register_t, - r11: register_t, - r12: register_t, - r13: register_t, - r14: register_t, - r15: register_t, - xflags: register_t, - trapno: register_t, - addr: register_t, - flags: register_t, - err: register_t, - rip: register_t, - cs: register_t, - rflags: register_t, - rsp: register_t, // machine state - ss: register_t, - - len: c_uint, // sizeof(mcontext_t) - fpformat: c_uint, - ownedfp: c_uint, - reserved: c_uint, - unused: [8]c_uint, - - // NOTE! 64-byte aligned as of here. Also must match savefpu structure. - fpregs: [256]c_int align(64), -}; - -pub const register_t = isize; - pub const E = enum(u16) { /// No error occurred. SUCCESS = 0, diff --git a/lib/std/c/haiku.zig b/lib/std/c/haiku.zig index e62e38716427..81cc3bc32500 100644 --- a/lib/std/c/haiku.zig +++ b/lib/std/c/haiku.zig @@ -273,269 +273,6 @@ pub const E = enum(i32) { pub const status_t = i32; -pub const mcontext_t = switch (builtin.cpu.arch) { - .arm, .thumb => extern struct { - r0: u32, - r1: u32, - r2: u32, - r3: u32, - r4: u32, - r5: u32, - r6: u32, - r7: u32, - r8: u32, - r9: u32, - r10: u32, - r11: u32, - r12: u32, - r13: u32, - r14: u32, - r15: u32, - cpsr: u32, - }, - .aarch64 => extern struct { - x: [10]u64, - lr: u64, - sp: u64, - elr: u64, - spsr: u64, - fp_q: [32]u128, - fpsr: u32, - fpcr: u32, - }, - .m68k => extern struct { - pc: u32, - d0: u32, - d1: u32, - d2: u32, - d3: u32, - d4: u32, - d5: u32, - d6: u32, - d7: u32, - a0: u32, - a1: u32, - a2: u32, - a3: u32, - a4: u32, - a5: u32, - a6: u32, - a7: u32, - ccr: u8, - f0: f64, - f1: f64, - f2: f64, - f3: f64, - f4: f64, - f5: f64, - f6: f64, - f7: f64, - f8: f64, - f9: f64, - f10: f64, - f11: f64, - f12: f64, - f13: f64, - }, - .mipsel => extern struct { - r0: u32, - }, - .powerpc => extern struct { - pc: u32, - r0: u32, - r1: u32, - r2: u32, - r3: u32, - r4: u32, - r5: u32, - r6: u32, - r7: u32, - r8: u32, - r9: u32, - r10: u32, - r11: u32, - r12: u32, - f0: f64, - f1: f64, - f2: f64, - f3: f64, - f4: f64, - f5: f64, - f6: f64, - f7: f64, - f8: f64, - f9: f64, - f10: f64, - f11: f64, - f12: f64, - f13: f64, - reserved: u32, - fpscr: u32, - ctr: u32, - xer: u32, - cr: u32, - msr: u32, - lr: u32, - }, - .riscv64 => extern struct { - x: [31]u64, - pc: u64, - f: [32]f64, - fcsr: u64, - }, - .sparc64 => extern struct { - g1: u64, - g2: u64, - g3: u64, - g4: u64, - g5: u64, - g6: u64, - g7: u64, - o0: u64, - o1: u64, - o2: u64, - o3: u64, - o4: u64, - o5: u64, - sp: u64, - o7: u64, - l0: u64, - l1: u64, - l2: u64, - l3: u64, - l4: u64, - l5: u64, - l6: u64, - l7: u64, - i0: u64, - i1: u64, - i2: u64, - i3: u64, - i4: u64, - i5: u64, - fp: u64, - i7: u64, - }, - .x86 => extern struct { - pub const old_extended_regs = extern struct { - control: u16, - reserved1: u16, - status: u16, - reserved2: u16, - tag: u16, - reserved3: u16, - eip: u32, - cs: u16, - opcode: u16, - datap: u32, - ds: u16, - reserved4: u16, - fp_mmx: [8][10]u8, - }; - - pub const fp_register = extern struct { value: [10]u8, reserved: [6]u8 }; - - pub const xmm_register = extern struct { value: [16]u8 }; - - pub const new_extended_regs = extern struct { - control: u16, - status: u16, - tag: u16, - opcode: u16, - eip: u32, - cs: u16, - reserved1: u16, - datap: u32, - ds: u16, - reserved2: u16, - mxcsr: u32, - reserved3: u32, - fp_mmx: [8]fp_register, - xmmx: [8]xmm_register, - reserved4: [224]u8, - }; - - pub const extended_regs = extern struct { - state: extern union { - old_format: old_extended_regs, - new_format: new_extended_regs, - }, - format: u32, - }; - - eip: u32, - eflags: u32, - eax: u32, - ecx: u32, - edx: u32, - esp: u32, - ebp: u32, - reserved: u32, - xregs: extended_regs, - edi: u32, - esi: u32, - ebx: u32, - }, - .x86_64 => extern struct { - pub const fp_register = extern struct { - value: [10]u8, - reserved: [6]u8, - }; - - pub const xmm_register = extern struct { - value: [16]u8, - }; - - pub const fpu_state = extern struct { - control: u16, - status: u16, - tag: u16, - opcode: u16, - rip: u64, - rdp: u64, - mxcsr: u32, - mscsr_mask: u32, - - fp_mmx: [8]fp_register, - xmm: [16]xmm_register, - reserved: [96]u8, - }; - - pub const xstate_hdr = extern struct { - bv: u64, - xcomp_bv: u64, - reserved: [48]u8, - }; - - pub const savefpu = extern struct { - fxsave: fpu_state, - xstate: xstate_hdr, - ymm: [16]xmm_register, - }; - - rax: u64, - rbx: u64, - rcx: u64, - rdx: u64, - rdi: u64, - rsi: u64, - rbp: u64, - r8: u64, - r9: u64, - r10: u64, - r11: u64, - r12: u64, - r13: u64, - r14: u64, - r15: u64, - rsp: u64, - rip: u64, - rflags: u64, - fpu: savefpu, - }, - else => void, -}; - pub const DirEnt = extern struct { /// device dev: dev_t, diff --git a/lib/std/c/openbsd.zig b/lib/std/c/openbsd.zig index 27e60b530db1..1dd65dca7b2f 100644 --- a/lib/std/c/openbsd.zig +++ b/lib/std/c/openbsd.zig @@ -144,53 +144,6 @@ pub const TCIO = enum(u32) { ION = 4, }; -pub const ucontext_t = switch (builtin.cpu.arch) { - .x86_64 => extern struct { - sc_rdi: c_long, - sc_rsi: c_long, - sc_rdx: c_long, - sc_rcx: c_long, - sc_r8: c_long, - sc_r9: c_long, - sc_r10: c_long, - sc_r11: c_long, - sc_r12: c_long, - sc_r13: c_long, - sc_r14: c_long, - sc_r15: c_long, - sc_rbp: c_long, - sc_rbx: c_long, - sc_rax: c_long, - sc_gs: c_long, - sc_fs: c_long, - sc_es: c_long, - sc_ds: c_long, - sc_trapno: c_long, - sc_err: c_long, - sc_rip: c_long, - sc_cs: c_long, - sc_rflags: c_long, - sc_rsp: c_long, - sc_ss: c_long, - - sc_fpstate: *anyopaque, // struct fxsave64 * - __sc_unused: c_int, - sc_mask: c_int, - sc_cookie: c_long, - }, - .aarch64 => extern struct { - __sc_unused: c_int, - sc_mask: c_int, - sc_sp: c_ulong, - sc_lr: c_ulong, - sc_elr: c_ulong, - sc_spsr: c_ulong, - sc_x: [30]c_ulong, - sc_cookie: c_long, - }, - else => @compileError("missing ucontext_t type definition"), -}; - pub const E = enum(u16) { /// No error occurred. SUCCESS = 0, diff --git a/lib/std/debug.zig b/lib/std/debug.zig index a94b61901613..088152d8733c 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -22,6 +22,7 @@ pub const ElfFile = @import("debug/ElfFile.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); pub const Info = @import("debug/Info.zig"); pub const Coverage = @import("debug/Coverage.zig"); +pub const cpu_context = @import("debug/cpu_context.zig"); pub const simple_panic = @import("debug/simple_panic.zig"); pub const no_panic = @import("debug/no_panic.zig"); @@ -331,66 +332,8 @@ test dumpHexFallible { try std.testing.expectEqualStrings(expected, aw.written()); } -/// Platform-specific thread state. This contains register state, and on some platforms -/// information about the stack. This is not safe to trivially copy, because some platforms -/// use internal pointers within this structure. After copying, call `relocateContext`. -pub const ThreadContext = ThreadContext: { - // Allow overriding the target's `ThreadContext` by exposing `root.debug.ThreadContext`. - if (@hasDecl(root, "debug") and @hasDecl(root.debug, "ThreadContext")) { - break :ThreadContext root.debug.ThreadContext; - } - - if (native_os == .windows) break :ThreadContext windows.CONTEXT; - if (posix.ucontext_t != void) break :ThreadContext posix.ucontext_t; - - break :ThreadContext noreturn; -}; -/// Updates any internal pointers of a `ThreadContext` after the caller copies it. -pub fn relocateContext(dest: *ThreadContext) void { - switch (native_os) { - .macos => dest.mcontext = &dest.__mcontext_data, - else => {}, - } -} -/// The value which is placed on the stack to make a copy of a `ThreadContext`. -const ThreadContextBuf = if (ThreadContext == noreturn) void else ThreadContext; -/// The pointer through which a `ThreadContext` is received from callers of stack tracing logic. -pub const ThreadContextPtr = if (ThreadContext == noreturn) noreturn else *const ThreadContext; - -/// Capture the current context. The register values in the context will reflect the -/// state after the platform `getcontext` function returns. -/// -/// It is valid to call this if the platform doesn't have context capturing support, -/// in that case `false` will be returned. This function is `inline` so that the `false` -/// is comptime-known at the call site in that case. -pub inline fn getContext(context: *ThreadContextBuf) bool { - // Allow overriding the target's `getContext` by exposing `root.debug.getContext`. - if (@hasDecl(root, "debug") and @hasDecl(root.debug, "getContext")) { - return root.debug.getContext(context); - } - - if (native_os == .windows) { - context.* = std.mem.zeroes(windows.CONTEXT); - windows.ntdll.RtlCaptureContext(context); - return true; - } - - if (@TypeOf(posix.system.getcontext) != void) { - if (posix.system.getcontext(context) != 0) return false; - if (native_os == .macos) { - assert(context.mcsize == @sizeOf(std.c.mcontext_t)); - - // On aarch64-macos, the system getcontext doesn't write anything into the pc - // register slot, it only writes lr. This makes the context consistent with - // other aarch64 getcontext implementations which write the current lr - // (where getcontext will return to) into both the lr and pc slot of the context. - if (native_arch == .aarch64) context.mcontext.ss.pc = context.mcontext.ss.lr; - } - return true; - } - - return false; -} +/// The pointer through which a `cpu_context.Native` is received from callers of stack tracing logic. +pub const CpuContextPtr = if (cpu_context.Native == noreturn) noreturn else *const cpu_context.Native; /// Invokes detectable illegal behavior when `ok` is `false`. /// @@ -616,10 +559,10 @@ pub const StackUnwindOptions = struct { /// used to omit intermediate handling code (for instance, a panic handler and its machinery) /// from stack traces. first_address: ?usize = null, - /// If not `null`, we will unwind from this `ThreadContext` instead of the current top of the - /// stack. The main use case here is printing stack traces from signal handlers, where the - /// kernel provides a `*const ThreadContext` of the state before the signal. - context: ?ThreadContextPtr = null, + /// If not `null`, we will unwind from this `cpu_context.Native` instead of the current top of + /// the stack. The main use case here is printing stack traces from signal handlers, where the + /// kernel provides a `*const cpu_context.Native` of the state before the signal. + context: ?CpuContextPtr = null, /// If `true`, stack unwinding strategies which may cause crashes are used as a last resort. /// If `false`, only known-safe mechanisms will be attempted. allow_unsafe_unwind: bool = false, @@ -630,8 +573,7 @@ pub const StackUnwindOptions = struct { /// /// See `writeCurrentStackTrace` to immediately print the trace instead of capturing it. pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) std.builtin.StackTrace { - var context_buf: ThreadContextBuf = undefined; - var it = StackIterator.init(options.context, &context_buf) catch { + var it = StackIterator.init(options.context) catch { return .{ .index = 0, .instruction_addresses = &.{} }; }; defer it.deinit(); @@ -670,14 +612,7 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ return; }, }; - var context_buf: ThreadContextBuf = undefined; - var it = StackIterator.init(options.context, &context_buf) catch |err| switch (err) { - error.OutOfMemory => { - tty_config.setColor(writer, .dim) catch {}; - try writer.print("Cannot print stack trace: out of memory\n", .{}); - tty_config.setColor(writer, .reset) catch {}; - return; - }, + var it = StackIterator.init(options.context) catch |err| switch (err) { error.CannotUnwindFromContext => { tty_config.setColor(writer, .dim) catch {}; try writer.print("Cannot print stack trace: context unwind unavailable for target\n", .{}); @@ -794,9 +729,9 @@ const StackIterator = union(enum) { fp: usize, /// It is important that this function is marked `inline` so that it can safely use - /// `@frameAddress` and `getContext` as the caller's stack frame and our own are one - /// and the same. - inline fn init(context_opt: ?ThreadContextPtr, context_buf: *ThreadContextBuf) error{ OutOfMemory, CannotUnwindFromContext }!StackIterator { + /// `@frameAddress` and `cpu_context.Native.current` as the caller's stack frame and + /// our own are one and the same. + inline fn init(opt_context_ptr: ?CpuContextPtr) error{CannotUnwindFromContext}!StackIterator { if (builtin.cpu.arch.isSPARC()) { // Flush all the register windows on stack. if (builtin.cpu.has(.sparc, .v9)) { @@ -805,14 +740,12 @@ const StackIterator = union(enum) { asm volatile ("ta 3" ::: .{ .memory = true }); // ST_FLUSH_WINDOWS } } - if (context_opt) |context| { + if (opt_context_ptr) |context_ptr| { if (!SelfInfo.supports_unwinding) return error.CannotUnwindFromContext; - context_buf.* = context.*; - relocateContext(context_buf); - return .{ .di = try .init(context_buf, getDebugInfoAllocator()) }; + return .{ .di = .init(context_ptr) }; } - if (SelfInfo.supports_unwinding and getContext(context_buf)) { - return .{ .di = try .init(context_buf, getDebugInfoAllocator()) }; + if (SelfInfo.supports_unwinding and cpu_context.Native != noreturn) { + return .{ .di = .init(&.current()) }; } return .{ .fp = @frameAddress() }; } @@ -1212,7 +1145,7 @@ pub const have_segfault_handling_support = switch (native_os) { .windows, => true, - .freebsd, .openbsd => ThreadContext != noreturn, + .freebsd, .openbsd => cpu_context.Native != noreturn, else => false, }; @@ -1309,33 +1242,8 @@ fn handleSegfaultPosix(sig: i32, info: *const posix.siginfo_t, ctx_ptr: ?*anyopa }; break :info .{ addr, name }; }; - - if (ThreadContext == noreturn) return handleSegfault(addr, name, null); - - // Some kernels don't align `ctx_ptr` properly, so we'll copy it into a local buffer. - var copied_ctx: posix.ucontext_t = undefined; - const orig_ctx: *align(1) posix.ucontext_t = @ptrCast(ctx_ptr); - copied_ctx = orig_ctx.*; - if (builtin.os.tag.isDarwin() and builtin.cpu.arch == .aarch64) { - // The kernel incorrectly writes the contents of `__mcontext_data` right after `mcontext`, - // rather than after the 8 bytes of padding that are supposed to sit between the two. Copy the - // contents to the right place so that the `mcontext` pointer will be correct after the - // `relocateContext` call below. - const WrittenContext = extern struct { - onstack: c_int, - sigmask: std.c.sigset_t, - stack: std.c.stack_t, - link: ?*std.c.ucontext_t, - mcsize: u64, - mcontext: *std.c.mcontext_t, - __mcontext_data: std.c.mcontext_t align(@sizeOf(usize)), // Disable padding after `mcontext`. - }; - const written_ctx: *align(1) WrittenContext = @ptrCast(ctx_ptr); - copied_ctx.__mcontext_data = written_ctx.__mcontext_data; - } - relocateContext(&copied_ctx); - - handleSegfault(addr, name, &copied_ctx); + const opt_cpu_context: ?cpu_context.Native = cpu_context.fromPosixSignalContext(ctx_ptr); + handleSegfault(addr, name, if (opt_cpu_context) |*ctx| ctx else null); } fn handleSegfaultWindows(info: *windows.EXCEPTION_POINTERS) callconv(.winapi) c_long { @@ -1347,10 +1255,10 @@ fn handleSegfaultWindows(info: *windows.EXCEPTION_POINTERS) callconv(.winapi) c_ windows.EXCEPTION_STACK_OVERFLOW => .{ "Stack overflow", null }, else => return windows.EXCEPTION_CONTINUE_SEARCH, }; - handleSegfault(addr, name, info.ContextRecord); + handleSegfault(addr, name, &cpu_context.fromWindowsContext(info.ContextRecord)); } -fn handleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?ThreadContextPtr) noreturn { +fn handleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?CpuContextPtr) noreturn { // Allow overriding the target-agnostic segfault handler by exposing `root.debug.handleSegfault`. if (@hasDecl(root, "debug") and @hasDecl(root.debug, "handleSegfault")) { return root.debug.handleSegfault(addr, name, opt_ctx); @@ -1358,7 +1266,7 @@ fn handleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?ThreadContextPtr) no return defaultHandleSegfault(addr, name, opt_ctx); } -pub fn defaultHandleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?ThreadContextPtr) noreturn { +pub fn defaultHandleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?CpuContextPtr) noreturn { // There is very similar logic to the following in `defaultPanic`. switch (panic_stage) { 0 => { diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 9a70746b0ad3..655a65b709fa 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -27,7 +27,6 @@ const Reader = std.Io.Reader; const Dwarf = @This(); pub const expression = @import("Dwarf/expression.zig"); -pub const abi = @import("Dwarf/abi.zig"); pub const call_frame = @import("Dwarf/call_frame.zig"); pub const Unwind = @import("Dwarf/Unwind.zig"); @@ -1415,7 +1414,7 @@ pub fn readUnitHeader(r: *Reader, endian: Endian) ScanError!UnitHeader { } /// Returns the DWARF register number for an x86_64 register number found in compact unwind info -pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { +pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u16 { return switch (unwind_reg_number) { 1 => 3, // RBX 2 => 12, // R12 @@ -1427,6 +1426,60 @@ pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { }; } +/// Returns `null` for CPU architectures without an instruction pointer register. +pub fn ipRegNum(arch: std.Target.Cpu.Arch) ?u16 { + return switch (arch) { + .x86 => 8, + .x86_64 => 16, + .arm, .armeb, .thumb, .thumbeb => 15, + .aarch64, .aarch64_be => 32, + else => null, + }; +} + +pub fn fpRegNum(arch: std.Target.Cpu.Arch) u16 { + return switch (arch) { + .x86 => 5, + .x86_64 => 6, + .arm, .armeb, .thumb, .thumbeb => 11, + .aarch64, .aarch64_be => 29, + else => unreachable, + }; +} + +pub fn spRegNum(arch: std.Target.Cpu.Arch) u16 { + return switch (arch) { + .x86 => 4, + .x86_64 => 7, + .arm, .armeb, .thumb, .thumbeb => 13, + .aarch64, .aarch64_be => 31, + else => unreachable, + }; +} + +/// Tells whether unwinding for this target is supported by the Dwarf standard. +/// +/// See also `std.debug.SelfInfo.supports_unwinding` which tells whether the Zig +/// standard library has a working implementation of unwinding for this target. +pub fn supportsUnwinding(target: *const std.Target) bool { + return switch (target.cpu.arch) { + .amdgcn, + .nvptx, + .nvptx64, + .spirv32, + .spirv64, + => false, + + // Enabling this causes relocation errors such as: + // error: invalid relocation type R_RISCV_SUB32 at offset 0x20 + .riscv64, .riscv64be, .riscv32, .riscv32be => false, + + // Conservative guess. Feel free to update this logic with any targets + // that are known to not support Dwarf unwinding. + else => true, + }; +} + /// This function is to make it handy to comment out the return and make it /// into a crash when working on this file. pub fn bad() error{InvalidDebugInfo} { diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig deleted file mode 100644 index 98a84392e641..000000000000 --- a/lib/std/debug/Dwarf/abi.zig +++ /dev/null @@ -1,351 +0,0 @@ -const builtin = @import("builtin"); - -const std = @import("../../std.zig"); -const mem = std.mem; -const posix = std.posix; -const Arch = std.Target.Cpu.Arch; - -/// Tells whether unwinding for this target is supported by the Dwarf standard. -/// -/// See also `std.debug.SelfInfo.supports_unwinding` which tells whether the Zig -/// standard library has a working implementation of unwinding for this target. -pub fn supportsUnwinding(target: *const std.Target) bool { - return switch (target.cpu.arch) { - .amdgcn, - .nvptx, - .nvptx64, - .spirv32, - .spirv64, - => false, - - // Enabling this causes relocation errors such as: - // error: invalid relocation type R_RISCV_SUB32 at offset 0x20 - .riscv64, .riscv64be, .riscv32, .riscv32be => false, - - // Conservative guess. Feel free to update this logic with any targets - // that are known to not support Dwarf unwinding. - else => true, - }; -} - -/// Returns `null` for CPU architectures without an instruction pointer register. -pub fn ipRegNum(arch: Arch) ?u8 { - return switch (arch) { - .x86 => 8, - .x86_64 => 16, - .arm, .armeb, .thumb, .thumbeb => 15, - .aarch64, .aarch64_be => 32, - else => null, - }; -} - -pub fn fpRegNum(arch: Arch, reg_context: RegisterContext) u8 { - return switch (arch) { - // GCC on OS X historically did the opposite of ELF for these registers - // (only in .eh_frame), and that is now the convention for MachO - .x86 => if (reg_context.eh_frame and reg_context.is_macho) 4 else 5, - .x86_64 => 6, - .arm, .armeb, .thumb, .thumbeb => 11, - .aarch64, .aarch64_be => 29, - else => unreachable, - }; -} - -pub fn spRegNum(arch: Arch, reg_context: RegisterContext) u8 { - return switch (arch) { - .x86 => if (reg_context.eh_frame and reg_context.is_macho) 5 else 4, - .x86_64 => 7, - .arm, .armeb, .thumb, .thumbeb => 13, - .aarch64, .aarch64_be => 31, - else => unreachable, - }; -} - -pub const RegisterContext = struct { - eh_frame: bool, - is_macho: bool, -}; - -pub const RegBytesError = error{ - InvalidRegister, - UnimplementedArch, - UnimplementedOs, - RegisterContextRequired, - ThreadContextNotSupported, -}; - -/// Returns a slice containing the backing storage for `reg_number`. -/// -/// This function assumes the Dwarf information corresponds not necessarily to -/// the current executable, but at least with a matching CPU architecture and -/// OS. It is planned to lift this limitation with a future enhancement. -/// -/// `reg_context` describes in what context the register number is used, as it can have different -/// meanings depending on the DWARF container. It is only required when getting the stack or -/// frame pointer register on some architectures. -pub fn regBytes( - thread_context_ptr: *std.debug.ThreadContext, - reg_number: u8, - reg_context: ?RegisterContext, -) RegBytesError![]u8 { - if (builtin.os.tag == .windows) { - return switch (builtin.cpu.arch) { - .x86 => switch (reg_number) { - 0 => mem.asBytes(&thread_context_ptr.Eax), - 1 => mem.asBytes(&thread_context_ptr.Ecx), - 2 => mem.asBytes(&thread_context_ptr.Edx), - 3 => mem.asBytes(&thread_context_ptr.Ebx), - 4 => mem.asBytes(&thread_context_ptr.Esp), - 5 => mem.asBytes(&thread_context_ptr.Ebp), - 6 => mem.asBytes(&thread_context_ptr.Esi), - 7 => mem.asBytes(&thread_context_ptr.Edi), - 8 => mem.asBytes(&thread_context_ptr.Eip), - 9 => mem.asBytes(&thread_context_ptr.EFlags), - 10 => mem.asBytes(&thread_context_ptr.SegCs), - 11 => mem.asBytes(&thread_context_ptr.SegSs), - 12 => mem.asBytes(&thread_context_ptr.SegDs), - 13 => mem.asBytes(&thread_context_ptr.SegEs), - 14 => mem.asBytes(&thread_context_ptr.SegFs), - 15 => mem.asBytes(&thread_context_ptr.SegGs), - else => error.InvalidRegister, - }, - .x86_64 => switch (reg_number) { - 0 => mem.asBytes(&thread_context_ptr.Rax), - 1 => mem.asBytes(&thread_context_ptr.Rdx), - 2 => mem.asBytes(&thread_context_ptr.Rcx), - 3 => mem.asBytes(&thread_context_ptr.Rbx), - 4 => mem.asBytes(&thread_context_ptr.Rsi), - 5 => mem.asBytes(&thread_context_ptr.Rdi), - 6 => mem.asBytes(&thread_context_ptr.Rbp), - 7 => mem.asBytes(&thread_context_ptr.Rsp), - 8 => mem.asBytes(&thread_context_ptr.R8), - 9 => mem.asBytes(&thread_context_ptr.R9), - 10 => mem.asBytes(&thread_context_ptr.R10), - 11 => mem.asBytes(&thread_context_ptr.R11), - 12 => mem.asBytes(&thread_context_ptr.R12), - 13 => mem.asBytes(&thread_context_ptr.R13), - 14 => mem.asBytes(&thread_context_ptr.R14), - 15 => mem.asBytes(&thread_context_ptr.R15), - 16 => mem.asBytes(&thread_context_ptr.Rip), - else => error.InvalidRegister, - }, - .aarch64, .aarch64_be => switch (reg_number) { - 0...30 => mem.asBytes(&thread_context_ptr.DUMMYUNIONNAME.X[reg_number]), - 31 => mem.asBytes(&thread_context_ptr.Sp), - 32 => mem.asBytes(&thread_context_ptr.Pc), - else => error.InvalidRegister, - }, - else => error.UnimplementedArch, - }; - } - - if (posix.ucontext_t == void) return error.ThreadContextNotSupported; - - const ucontext_ptr = thread_context_ptr; - return switch (builtin.cpu.arch) { - .x86 => switch (builtin.os.tag) { - .linux, .netbsd, .solaris, .illumos => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EAX]), - 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ECX]), - 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EDX]), - 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EBX]), - 4...5 => if (reg_context) |r| bytes: { - if (reg_number == 4) { - break :bytes if (r.eh_frame and r.is_macho) - mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EBP]) - else - mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ESP]); - } else { - break :bytes if (r.eh_frame and r.is_macho) - mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ESP]) - else - mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EBP]); - } - } else error.RegisterContextRequired, - 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ESI]), - 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EDI]), - 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EIP]), - 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EFL]), - 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.CS]), - 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.SS]), - 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.DS]), - 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ES]), - 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.FS]), - 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.GS]), - 16...23 => error.InvalidRegister, // TODO: Support loading ST0-ST7 from mcontext.fpregs - 32...39 => error.InvalidRegister, // TODO: Support loading XMM0-XMM7 from mcontext.fpregs - else => error.InvalidRegister, - }, - else => error.UnimplementedOs, - }, - .x86_64 => switch (builtin.os.tag) { - .linux, .solaris, .illumos => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RAX]), - 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RDX]), - 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RCX]), - 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RBX]), - 4 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RSI]), - 5 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RDI]), - 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RBP]), - 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RSP]), - 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R8]), - 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R9]), - 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R10]), - 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R11]), - 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R12]), - 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R13]), - 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R14]), - 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R15]), - 16 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RIP]), - 17...32 => |i| if (builtin.os.tag.isSolarish()) - mem.asBytes(&ucontext_ptr.mcontext.fpregs.chip_state.xmm[i - 17]) - else - mem.asBytes(&ucontext_ptr.mcontext.fpregs.xmm[i - 17]), - else => error.InvalidRegister, - }, - .freebsd => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.rax), - 1 => mem.asBytes(&ucontext_ptr.mcontext.rdx), - 2 => mem.asBytes(&ucontext_ptr.mcontext.rcx), - 3 => mem.asBytes(&ucontext_ptr.mcontext.rbx), - 4 => mem.asBytes(&ucontext_ptr.mcontext.rsi), - 5 => mem.asBytes(&ucontext_ptr.mcontext.rdi), - 6 => mem.asBytes(&ucontext_ptr.mcontext.rbp), - 7 => mem.asBytes(&ucontext_ptr.mcontext.rsp), - 8 => mem.asBytes(&ucontext_ptr.mcontext.r8), - 9 => mem.asBytes(&ucontext_ptr.mcontext.r9), - 10 => mem.asBytes(&ucontext_ptr.mcontext.r10), - 11 => mem.asBytes(&ucontext_ptr.mcontext.r11), - 12 => mem.asBytes(&ucontext_ptr.mcontext.r12), - 13 => mem.asBytes(&ucontext_ptr.mcontext.r13), - 14 => mem.asBytes(&ucontext_ptr.mcontext.r14), - 15 => mem.asBytes(&ucontext_ptr.mcontext.r15), - 16 => mem.asBytes(&ucontext_ptr.mcontext.rip), - // TODO: Extract xmm state from mcontext.fpstate? - else => error.InvalidRegister, - }, - .openbsd => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.sc_rax), - 1 => mem.asBytes(&ucontext_ptr.sc_rdx), - 2 => mem.asBytes(&ucontext_ptr.sc_rcx), - 3 => mem.asBytes(&ucontext_ptr.sc_rbx), - 4 => mem.asBytes(&ucontext_ptr.sc_rsi), - 5 => mem.asBytes(&ucontext_ptr.sc_rdi), - 6 => mem.asBytes(&ucontext_ptr.sc_rbp), - 7 => mem.asBytes(&ucontext_ptr.sc_rsp), - 8 => mem.asBytes(&ucontext_ptr.sc_r8), - 9 => mem.asBytes(&ucontext_ptr.sc_r9), - 10 => mem.asBytes(&ucontext_ptr.sc_r10), - 11 => mem.asBytes(&ucontext_ptr.sc_r11), - 12 => mem.asBytes(&ucontext_ptr.sc_r12), - 13 => mem.asBytes(&ucontext_ptr.sc_r13), - 14 => mem.asBytes(&ucontext_ptr.sc_r14), - 15 => mem.asBytes(&ucontext_ptr.sc_r15), - 16 => mem.asBytes(&ucontext_ptr.sc_rip), - // TODO: Extract xmm state from sc_fpstate? - else => error.InvalidRegister, - }, - .macos, .ios => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.ss.rax), - 1 => mem.asBytes(&ucontext_ptr.mcontext.ss.rdx), - 2 => mem.asBytes(&ucontext_ptr.mcontext.ss.rcx), - 3 => mem.asBytes(&ucontext_ptr.mcontext.ss.rbx), - 4 => mem.asBytes(&ucontext_ptr.mcontext.ss.rsi), - 5 => mem.asBytes(&ucontext_ptr.mcontext.ss.rdi), - 6 => mem.asBytes(&ucontext_ptr.mcontext.ss.rbp), - 7 => mem.asBytes(&ucontext_ptr.mcontext.ss.rsp), - 8 => mem.asBytes(&ucontext_ptr.mcontext.ss.r8), - 9 => mem.asBytes(&ucontext_ptr.mcontext.ss.r9), - 10 => mem.asBytes(&ucontext_ptr.mcontext.ss.r10), - 11 => mem.asBytes(&ucontext_ptr.mcontext.ss.r11), - 12 => mem.asBytes(&ucontext_ptr.mcontext.ss.r12), - 13 => mem.asBytes(&ucontext_ptr.mcontext.ss.r13), - 14 => mem.asBytes(&ucontext_ptr.mcontext.ss.r14), - 15 => mem.asBytes(&ucontext_ptr.mcontext.ss.r15), - 16 => mem.asBytes(&ucontext_ptr.mcontext.ss.rip), - else => error.InvalidRegister, - }, - else => error.UnimplementedOs, - }, - .arm, .armeb, .thumb, .thumbeb => switch (builtin.os.tag) { - .linux => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.arm_r0), - 1 => mem.asBytes(&ucontext_ptr.mcontext.arm_r1), - 2 => mem.asBytes(&ucontext_ptr.mcontext.arm_r2), - 3 => mem.asBytes(&ucontext_ptr.mcontext.arm_r3), - 4 => mem.asBytes(&ucontext_ptr.mcontext.arm_r4), - 5 => mem.asBytes(&ucontext_ptr.mcontext.arm_r5), - 6 => mem.asBytes(&ucontext_ptr.mcontext.arm_r6), - 7 => mem.asBytes(&ucontext_ptr.mcontext.arm_r7), - 8 => mem.asBytes(&ucontext_ptr.mcontext.arm_r8), - 9 => mem.asBytes(&ucontext_ptr.mcontext.arm_r9), - 10 => mem.asBytes(&ucontext_ptr.mcontext.arm_r10), - 11 => mem.asBytes(&ucontext_ptr.mcontext.arm_fp), - 12 => mem.asBytes(&ucontext_ptr.mcontext.arm_ip), - 13 => mem.asBytes(&ucontext_ptr.mcontext.arm_sp), - 14 => mem.asBytes(&ucontext_ptr.mcontext.arm_lr), - 15 => mem.asBytes(&ucontext_ptr.mcontext.arm_pc), - // CPSR is not allocated a register number (See: https://github.com/ARM-software/abi-aa/blob/main/aadwarf32/aadwarf32.rst, Section 4.1) - else => error.InvalidRegister, - }, - else => error.UnimplementedOs, - }, - .aarch64, .aarch64_be => switch (builtin.os.tag) { - .macos, .ios, .watchos => switch (reg_number) { - 0...28 => mem.asBytes(&ucontext_ptr.mcontext.ss.regs[reg_number]), - 29 => mem.asBytes(&ucontext_ptr.mcontext.ss.fp), - 30 => mem.asBytes(&ucontext_ptr.mcontext.ss.lr), - 31 => mem.asBytes(&ucontext_ptr.mcontext.ss.sp), - 32 => mem.asBytes(&ucontext_ptr.mcontext.ss.pc), - - // TODO: Find storage for this state - //34 => mem.asBytes(&ucontext_ptr.ra_sign_state), - - // V0-V31 - 64...95 => mem.asBytes(&ucontext_ptr.mcontext.ns.q[reg_number - 64]), - else => error.InvalidRegister, - }, - .netbsd => switch (reg_number) { - 0...34 => mem.asBytes(&ucontext_ptr.mcontext.gregs[reg_number]), - else => error.InvalidRegister, - }, - .freebsd => switch (reg_number) { - 0...29 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.x[reg_number]), - 30 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.lr), - 31 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.sp), - - // TODO: This seems wrong, but it was in the previous debug.zig code for mapping PC, check this - 32 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.elr), - - else => error.InvalidRegister, - }, - .openbsd => switch (reg_number) { - 0...30 => mem.asBytes(&ucontext_ptr.sc_x[reg_number]), - 31 => mem.asBytes(&ucontext_ptr.sc_sp), - 32 => mem.asBytes(&ucontext_ptr.sc_lr), - 33 => mem.asBytes(&ucontext_ptr.sc_elr), - 34 => mem.asBytes(&ucontext_ptr.sc_spsr), - else => error.InvalidRegister, - }, - else => switch (reg_number) { - 0...30 => mem.asBytes(&ucontext_ptr.mcontext.regs[reg_number]), - 31 => mem.asBytes(&ucontext_ptr.mcontext.sp), - 32 => mem.asBytes(&ucontext_ptr.mcontext.pc), - else => error.InvalidRegister, - }, - }, - else => error.UnimplementedArch, - }; -} - -/// Returns a pointer to a register stored in a ThreadContext, preserving the -/// pointer attributes of the context. -pub fn regValueNative( - thread_context_ptr: *std.debug.ThreadContext, - reg_number: u8, - reg_context: ?RegisterContext, -) !*align(1) usize { - const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); - if (@sizeOf(usize) != reg_bytes.len) return error.IncompatibleRegisterSize; - return @ptrCast(reg_bytes); -} diff --git a/lib/std/debug/Dwarf/expression.zig b/lib/std/debug/Dwarf/expression.zig index 68b49587c20e..b1751af7498a 100644 --- a/lib/std/debug/Dwarf/expression.zig +++ b/lib/std/debug/Dwarf/expression.zig @@ -5,12 +5,17 @@ const native_endian = native_arch.endian(); const std = @import("std"); const leb = std.leb; const OP = std.dwarf.OP; -const abi = std.debug.Dwarf.abi; const mem = std.mem; const assert = std.debug.assert; const testing = std.testing; const Writer = std.Io.Writer; +const regNative = std.debug.SelfInfo.DwarfUnwindContext.regNative; + +const ip_reg_num = std.debug.Dwarf.ipRegNum(native_arch).?; +const fp_reg_num = std.debug.Dwarf.fpRegNum(native_arch); +const sp_reg_num = std.debug.Dwarf.spRegNum(native_arch); + /// Expressions can be evaluated in different contexts, each requiring its own set of inputs. /// Callers should specify all the fields relevant to their context. If a field is required /// by the expression and it isn't in the context, error.IncompleteExpressionContext is returned. @@ -23,9 +28,7 @@ pub const Context = struct { object_address: ?*const anyopaque = null, /// .debug_addr section debug_addr: ?[]const u8 = null, - /// Thread context - thread_context: ?*std.debug.ThreadContext = null, - reg_context: ?abi.RegisterContext = null, + cpu_context: ?*std.debug.cpu_context.Native = null, /// Call frame address, if in a CFI context cfa: ?usize = null, /// This expression is a sub-expression from an OP.entry_value instruction @@ -62,7 +65,9 @@ pub const Error = error{ InvalidTypeLength, TruncatedIntegralType, -} || abi.RegBytesError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero, ReadFailed }; + + IncompatibleRegisterSize, +} || std.debug.cpu_context.DwarfRegisterError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero, ReadFailed }; /// A stack machine that can decode and run DWARF expressions. /// Expressions can be decoded for non-native address size and endianness, @@ -369,29 +374,20 @@ pub fn StackMachine(comptime options: Options) type { OP.breg0...OP.breg31, OP.bregx, => { - if (context.thread_context == null) return error.IncompleteExpressionContext; - - const base_register = operand.?.base_register; - var value: i64 = @intCast(mem.readInt(usize, (try abi.regBytes( - context.thread_context.?, - base_register.base_register, - context.reg_context, - ))[0..@sizeOf(usize)], native_endian)); - value += base_register.offset; - try self.stack.append(allocator, .{ .generic = @intCast(value) }); + const cpu_context = context.cpu_context orelse return error.IncompleteExpressionContext; + + const br = operand.?.base_register; + const value: i64 = @intCast((try regNative(cpu_context, br.base_register)).*); + try self.stack.append(allocator, .{ .generic = @intCast(value + br.offset) }); }, OP.regval_type => { - const register_type = operand.?.register_type; - const value = mem.readInt(usize, (try abi.regBytes( - context.thread_context.?, - register_type.register, - context.reg_context, - ))[0..@sizeOf(usize)], native_endian); + const cpu_context = context.cpu_context orelse return error.IncompleteExpressionContext; + const rt = operand.?.register_type; try self.stack.append(allocator, .{ .regval_type = .{ - .type_offset = register_type.type_offset, + .type_offset = rt.type_offset, .type_size = @sizeOf(addr_type), - .value = value, + .value = (try regNative(cpu_context, rt.register)).*, }, }); }, @@ -734,14 +730,14 @@ pub fn StackMachine(comptime options: Options) type { // TODO: The spec states that this sub-expression needs to observe the state (ie. registers) // as it was upon entering the current subprogram. If this isn't being called at the - // end of a frame unwind operation, an additional ThreadContext with this state will be needed. + // end of a frame unwind operation, an additional cpu_context.Native with this state will be needed. if (isOpcodeRegisterLocation(block[0])) { - if (context.thread_context == null) return error.IncompleteExpressionContext; + const cpu_context = context.cpu_context orelse return error.IncompleteExpressionContext; var block_stream: std.Io.Reader = .fixed(block); const register = (try readOperand(&block_stream, block[0], context)).?.register; - const value = mem.readInt(usize, (try abi.regBytes(context.thread_context.?, register, context.reg_context))[0..@sizeOf(usize)], native_endian); + const value = (try regNative(cpu_context, register)).*; try self.stack.append(allocator, .{ .generic = value }); } else { var stack_machine: Self = .{}; @@ -1149,55 +1145,39 @@ test "basics" { } // Register values - if (@sizeOf(std.debug.ThreadContext) != 0) { + if (std.debug.cpu_context.Native != noreturn) { stack_machine.reset(); program.clearRetainingCapacity(); - const reg_context = abi.RegisterContext{ - .eh_frame = true, - .is_macho = builtin.os.tag == .macos, - }; - var thread_context: std.debug.ThreadContext = undefined; - std.debug.relocateContext(&thread_context); + var cpu_context: std.debug.cpu_context.Native = undefined; const context = Context{ - .thread_context = &thread_context, - .reg_context = reg_context, + .cpu_context = &cpu_context, }; - // Only test register operations on arch / os that have them implemented - if (abi.regBytes(&thread_context, 0, reg_context)) |reg_bytes| { - - // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it - - mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); - (try abi.regValueNative(&thread_context, abi.fpRegNum(native_arch, reg_context), reg_context)).* = 1; - (try abi.regValueNative(&thread_context, abi.spRegNum(native_arch, reg_context), reg_context)).* = 2; - (try abi.regValueNative(&thread_context, abi.ipRegNum(native_arch).?, reg_context)).* = 3; - - try b.writeBreg(writer, abi.fpRegNum(native_arch, reg_context), @as(usize, 100)); - try b.writeBreg(writer, abi.spRegNum(native_arch, reg_context), @as(usize, 200)); - try b.writeBregx(writer, abi.ipRegNum(native_arch).?, @as(usize, 300)); - try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); - - _ = try stack_machine.run(program.written(), allocator, context, 0); - - const regval_type = stack_machine.stack.pop().?.regval_type; - try testing.expectEqual(@as(usize, 400), regval_type.type_offset); - try testing.expectEqual(@as(u8, @sizeOf(usize)), regval_type.type_size); - try testing.expectEqual(@as(usize, 0xee), regval_type.value); - - try testing.expectEqual(@as(usize, 303), stack_machine.stack.pop().?.generic); - try testing.expectEqual(@as(usize, 202), stack_machine.stack.pop().?.generic); - try testing.expectEqual(@as(usize, 101), stack_machine.stack.pop().?.generic); - } else |err| { - switch (err) { - error.UnimplementedArch, - error.UnimplementedOs, - error.ThreadContextNotSupported, - => {}, - else => return err, - } - } + const reg_bytes = try cpu_context.dwarfRegisterBytes(0); + + // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it + + mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); + (try regNative(&cpu_context, fp_reg_num)).* = 1; + (try regNative(&cpu_context, sp_reg_num)).* = 2; + (try regNative(&cpu_context, ip_reg_num)).* = 3; + + try b.writeBreg(writer, fp_reg_num, @as(usize, 100)); + try b.writeBreg(writer, sp_reg_num, @as(usize, 200)); + try b.writeBregx(writer, ip_reg_num, @as(usize, 300)); + try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); + + _ = try stack_machine.run(program.written(), allocator, context, 0); + + const regval_type = stack_machine.stack.pop().?.regval_type; + try testing.expectEqual(@as(usize, 400), regval_type.type_offset); + try testing.expectEqual(@as(u8, @sizeOf(usize)), regval_type.type_size); + try testing.expectEqual(@as(usize, 0xee), regval_type.value); + + try testing.expectEqual(@as(usize, 303), stack_machine.stack.pop().?.generic); + try testing.expectEqual(@as(usize, 202), stack_machine.stack.pop().?.generic); + try testing.expectEqual(@as(usize, 101), stack_machine.stack.pop().?.generic); } // Stack operations @@ -1585,38 +1565,24 @@ test "basics" { } // Register location description - const reg_context = abi.RegisterContext{ - .eh_frame = true, - .is_macho = builtin.os.tag == .macos, - }; - var thread_context: std.debug.ThreadContext = undefined; - std.debug.relocateContext(&thread_context); + var cpu_context: std.debug.cpu_context.Native = undefined; + std.debug.relocateContext(&cpu_context); context = Context{ - .thread_context = &thread_context, - .reg_context = reg_context, + .cpu_context = &cpu_context, }; - if (abi.regBytes(&thread_context, 0, reg_context)) |reg_bytes| { - mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); + const reg_bytes = try cpu_context.dwarfRegisterBytes(0); + mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); - var sub_program: std.Io.Writer.Allocating = .init(allocator); - defer sub_program.deinit(); - const sub_writer = &sub_program.writer; - try b.writeReg(sub_writer, 0); + var sub_program: std.Io.Writer.Allocating = .init(allocator); + defer sub_program.deinit(); + const sub_writer = &sub_program.writer; + try b.writeReg(sub_writer, 0); - stack_machine.reset(); - program.clearRetainingCapacity(); - try b.writeEntryValue(writer, sub_program.written()); - _ = try stack_machine.run(program.written(), allocator, context, null); - try testing.expectEqual(@as(usize, 0xee), stack_machine.stack.pop().?.generic); - } else |err| { - switch (err) { - error.UnimplementedArch, - error.UnimplementedOs, - error.ThreadContextNotSupported, - => {}, - else => return err, - } - } + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeEntryValue(writer, sub_program.written()); + _ = try stack_machine.run(program.written(), allocator, context, null); + try testing.expectEqual(@as(usize, 0xee), stack_machine.stack.pop().?.generic); } } diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index c3243edeb92d..321e67bb7c19 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -11,8 +11,7 @@ const mem = std.mem; const Allocator = std.mem.Allocator; const assert = std.debug.assert; const Dwarf = std.debug.Dwarf; -const regBytes = Dwarf.abi.regBytes; -const regValueNative = Dwarf.abi.regValueNative; +const CpuContext = std.debug.cpu_context.Native; const root = @import("root"); @@ -38,8 +37,6 @@ pub const Error = error{ pub const target_supported: bool = Module != void; /// Indicates whether the `SelfInfo` implementation has support for unwinding on this target. -/// -/// For whether DWARF unwinding is *theoretically* possible, see `Dwarf.abi.supportsUnwinding`. pub const supports_unwinding: bool = target_supported and Module.supports_unwinding; pub const UnwindContext = if (supports_unwinding) Module.UnwindContext; @@ -120,7 +117,7 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// pub const UnwindContext = struct { /// /// A PC value inside the function of the last unwound frame. /// pc: usize, -/// pub fn init(tc: *std.debug.ThreadContext, gpa: Allocator) Allocator.Error!UnwindContext; +/// pub fn init(ctx: *std.debug.cpu_context.Native, gpa: Allocator) Allocator.Error!UnwindContext; /// pub fn deinit(uc: *UnwindContext, gpa: Allocator) void; /// /// Returns the frame pointer associated with the last unwound stack frame. If the frame /// /// pointer is unknown, 0 may be returned instead. @@ -141,9 +138,26 @@ const Module: type = Module: { break :Module root.debug.Module; } break :Module switch (native_os) { - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => @import("SelfInfo/ElfModule.zig"), - .macos, .ios, .watchos, .tvos, .visionos => @import("SelfInfo/DarwinModule.zig"), - .uefi, .windows => @import("SelfInfo/WindowsModule.zig"), + .linux, + .netbsd, + .freebsd, + .dragonfly, + .openbsd, + .solaris, + .illumos, + => @import("SelfInfo/ElfModule.zig"), + + .macos, + .ios, + .watchos, + .tvos, + .visionos, + => @import("SelfInfo/DarwinModule.zig"), + + .uefi, + .windows, + => @import("SelfInfo/WindowsModule.zig"), + else => void, }; }; @@ -153,26 +167,25 @@ const Module: type = Module: { pub const DwarfUnwindContext = struct { cfa: ?usize, pc: usize, - thread_context: *std.debug.ThreadContext, - reg_context: Dwarf.abi.RegisterContext, + cpu_context: CpuContext, vm: Dwarf.Unwind.VirtualMachine, stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), - pub fn init(thread_context: *std.debug.ThreadContext, gpa: Allocator) error{}!DwarfUnwindContext { + pub fn init(cpu_context: *const CpuContext) DwarfUnwindContext { comptime assert(supports_unwinding); - _ = gpa; - const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; - const raw_pc_ptr = regValueNative(thread_context, ip_reg_num, null) catch { - unreachable; // error means unsupported, in which case `supports_unwinding` should have been `false` + // `@constCast` is safe because we aren't going to store to the resulting pointer. + const raw_pc_ptr = regNative(@constCast(cpu_context), ip_reg_num) catch |err| switch (err) { + error.InvalidRegister => unreachable, // `ip_reg_num` is definitely valid + error.UnsupportedRegister => unreachable, // the implementation needs to support ip + error.IncompatibleRegisterSize => unreachable, // ip is definitely `usize`-sized }; const pc = stripInstructionPtrAuthCode(raw_pc_ptr.*); return .{ .cfa = null, .pc = pc, - .thread_context = thread_context, - .reg_context = undefined, + .cpu_context = cpu_context.*, .vm = .{}, .stack_machine = .{}, }; @@ -185,17 +198,25 @@ pub const DwarfUnwindContext = struct { } pub fn getFp(self: *const DwarfUnwindContext) usize { - return (regValueNative(self.thread_context, Dwarf.abi.fpRegNum(native_arch, self.reg_context), self.reg_context) catch return 0).*; + // `@constCast` is safe because we aren't going to store to the resulting pointer. + const ptr = regNative(@constCast(&self.cpu_context), fp_reg_num) catch |err| switch (err) { + error.InvalidRegister => unreachable, // `fp_reg_num` is definitely valid + error.UnsupportedRegister => unreachable, // the implementation needs to support fp + error.IncompatibleRegisterSize => unreachable, // fp is a pointer so is `usize`-sized + }; + return ptr.*; } - /// Resolves the register rule and places the result into `out` (see regBytes) + /// Resolves the register rule and places the result into `out` (see regBytes). Returns `true` + /// iff the rule was undefined. This is *not* the same as `col.rule == .undefined`, because the + /// default rule may be undefined. pub fn resolveRegisterRule( context: *DwarfUnwindContext, gpa: Allocator, col: Dwarf.Unwind.VirtualMachine.Column, expression_context: std.debug.Dwarf.expression.Context, out: []u8, - ) !void { + ) !bool { switch (col.rule) { .default => { const register = col.register orelse return error.InvalidRegister; @@ -203,58 +224,74 @@ pub const DwarfUnwindContext = struct { // See the doc comment on `Dwarf.Unwind.VirtualMachine.RegisterRule.default`. if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 18) { // Callee-saved registers are initialized as if they had the .same_value rule - const src = try regBytes(context.thread_context, register, context.reg_context); + const src = try context.cpu_context.dwarfRegisterBytes(register); if (src.len != out.len) return error.RegisterSizeMismatch; @memcpy(out, src); - return; + return false; } @memset(out, undefined); + return true; }, .undefined => { @memset(out, undefined); + return true; }, .same_value => { // TODO: This copy could be eliminated if callers always copy the state then call this function to update it const register = col.register orelse return error.InvalidRegister; - const src = try regBytes(context.thread_context, register, context.reg_context); + const src = try context.cpu_context.dwarfRegisterBytes(register); if (src.len != out.len) return error.RegisterSizeMismatch; @memcpy(out, src); + return false; }, .offset => |offset| { - if (context.cfa) |cfa| { - const addr = try applyOffset(cfa, offset); - const ptr: *const usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - } else return error.InvalidCFA; + const cfa = context.cfa orelse return error.InvalidCFA; + const addr = try applyOffset(cfa, offset); + const ptr: *const usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + return false; }, .val_offset => |offset| { - if (context.cfa) |cfa| { - mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); - } else return error.InvalidCFA; + const cfa = context.cfa orelse return error.InvalidCFA; + mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); + return false; }, .register => |register| { - const src = try regBytes(context.thread_context, register, context.reg_context); + const src = try context.cpu_context.dwarfRegisterBytes(register); if (src.len != out.len) return error.RegisterSizeMismatch; @memcpy(out, src); + return false; }, .expression => |expression| { context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, gpa, expression_context, context.cfa.?); - const addr = if (value) |v| blk: { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - + const value = try context.stack_machine.run( + expression, + gpa, + expression_context, + context.cfa.?, + ) orelse return error.NoExpressionValue; + const addr = switch (value) { + .generic => |addr| addr, + else => return error.InvalidExpressionValue, + }; const ptr: *usize = @ptrFromInt(addr); mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + return false; }, .val_expression => |expression| { context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, gpa, expression_context, context.cfa.?); - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); - } else return error.NoExpressionValue; + const value = try context.stack_machine.run( + expression, + gpa, + expression_context, + context.cfa.?, + ) orelse return error.NoExpressionValue; + const val_raw = switch (value) { + .generic => |raw| raw, + else => return error.InvalidExpressionValue, + }; + mem.writeInt(usize, out[0..@sizeOf(usize)], val_raw, native_endian); + return false; }, .architectural => return error.UnimplementedRegisterRule, } @@ -277,9 +314,6 @@ pub const DwarfUnwindContext = struct { return unwindFrameInner(context, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, - error.UnimplementedArch, - error.UnimplementedOs, - error.ThreadContextNotSupported, error.UnimplementedRegisterRule, error.UnsupportedAddrSize, error.UnsupportedDwarfVersion, @@ -289,10 +323,10 @@ pub const DwarfUnwindContext = struct { error.UnimplementedTypedComparison, error.UnimplementedTypeConversion, error.UnknownExpressionOpcode, + error.UnsupportedRegister, => return error.UnsupportedDebugInfo, error.InvalidRegister, - error.RegisterContextRequired, error.ReadFailed, error.EndOfStream, error.IncompatibleRegisterSize, @@ -346,20 +380,17 @@ pub const DwarfUnwindContext = struct { // may not reference other debug sections anyway. var expression_context: Dwarf.expression.Context = .{ .format = format, - .thread_context = context.thread_context, - .reg_context = context.reg_context, + .cpu_context = &context.cpu_context, .cfa = context.cfa, }; context.vm.reset(); - context.reg_context.eh_frame = cie.version != 4; - context.reg_context.is_macho = native_os.isDarwin(); const row = try context.vm.runTo(gpa, pc_vaddr, cie, fde, @sizeOf(usize), native_endian); context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; - const value = (try regValueNative(context.thread_context, register, context.reg_context)).*; + const value = (try regNative(&context.cpu_context, register)).*; break :blk try applyOffset(value, offset); }, .expression => |expr| blk: { @@ -381,73 +412,41 @@ pub const DwarfUnwindContext = struct { expression_context.cfa = context.cfa; - // Buffering the modifications is done because copying the thread context is not portable, - // some implementations (ie. darwin) use internal pointers to the mcontext. - var arena: std.heap.ArenaAllocator = .init(gpa); - defer arena.deinit(); - const update_arena = arena.allocator(); - - const RegisterUpdate = struct { - // Backed by thread_context - dest: []u8, - // Backed by arena - src: []const u8, - prev: ?*@This(), - }; - - var update_tail: ?*RegisterUpdate = null; var has_return_address = true; + + // Create a copy of the CPU context, to which we will apply the new rules. + var new_cpu_context = context.cpu_context; + + // On all implemented architectures, the CFA is defined as being the previous frame's SP + (try regNative(&new_cpu_context, sp_reg_num)).* = context.cfa.?; + for (context.vm.rowColumns(row)) |column| { if (column.register) |register| { + const dest = try new_cpu_context.dwarfRegisterBytes(register); + const rule_undef = try context.resolveRegisterRule(gpa, column, expression_context, dest); if (register == cie.return_address_register) { - has_return_address = column.rule != .undefined; + has_return_address = !rule_undef; } - - const dest = try regBytes(context.thread_context, register, context.reg_context); - const src = try update_arena.alloc(u8, dest.len); - try context.resolveRegisterRule(gpa, column, expression_context, src); - - const new_update = try update_arena.create(RegisterUpdate); - new_update.* = .{ - .dest = dest, - .src = src, - .prev = update_tail, - }; - update_tail = new_update; } } - // On all implemented architectures, the CFA is defined as being the previous frame's SP - (try regValueNative(context.thread_context, Dwarf.abi.spRegNum(native_arch, context.reg_context), context.reg_context)).* = context.cfa.?; - - while (update_tail) |tail| { - @memcpy(tail.dest, tail.src); - update_tail = tail.prev; - } + const return_address: u64 = if (has_return_address) pc: { + const raw_ptr = try regNative(&new_cpu_context, cie.return_address_register); + break :pc stripInstructionPtrAuthCode(raw_ptr.*); + } else 0; - if (has_return_address) { - context.pc = stripInstructionPtrAuthCode((try regValueNative( - context.thread_context, - cie.return_address_register, - context.reg_context, - )).*); - } else { - context.pc = 0; - } + (try regNative(new_cpu_context, ip_reg_num)).* = return_address; - const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; - (try regValueNative(context.thread_context, ip_reg_num, context.reg_context)).* = context.pc; + // The new CPU context is complete; flush changes. + context.cpu_context = new_cpu_context; - // The call instruction will have pushed the address of the instruction that follows the call as the return address. - // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in - // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up - // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, - // we subtract one so that the next lookup is guaranteed to land inside the - // - // The exception to this rule is signal frames, where we return execution would be returned to the instruction - // that triggered the handler. - const return_address = context.pc; - if (context.pc > 0 and !cie.is_signal_frame) context.pc -= 1; + // Also update the stored pc. However, because `return_address` points to the instruction + // *after* the call, it could (in the case of noreturn functions) actually point outside of + // the caller's address range, meaning an FDE lookup would fail. We can handle this by + // subtracting 1 from `return_address` so that the next lookup is guaranteed to land inside + // the `call` instruction`. The exception to this rule is signal frames, where the return + // address is the same instruction that triggered the handler. + context.pc = if (cie.is_signal_frame) return_address else return_address -| 1; return return_address; } @@ -479,4 +478,18 @@ pub const DwarfUnwindContext = struct { return ptr; } + + pub fn regNative(ctx: *CpuContext, num: u16) error{ + InvalidRegister, + UnsupportedRegister, + IncompatibleRegisterSize, + }!*align(1) usize { + const bytes = try ctx.dwarfRegisterBytes(num); + if (bytes.len != @sizeOf(usize)) return error.IncompatibleRegisterSize; + return @ptrCast(bytes); + } + + const ip_reg_num = Dwarf.ipRegNum(native_arch).?; + const fp_reg_num = Dwarf.fpRegNum(native_arch); + const sp_reg_num = Dwarf.spRegNum(native_arch); }; diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index 5bce65b89f0d..fc2f1c89bb46 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -265,12 +265,9 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, error.OutOfMemory, error.Unexpected, => |e| return e, - error.UnimplementedArch, - error.UnimplementedOs, - error.ThreadContextNotSupported, + error.UnsupportedRegister, => return error.UnsupportedDebugInfo, error.InvalidRegister, - error.RegisterContextRequired, error.IncompatibleRegisterSize, => return error.InvalidDebugInfo, }; @@ -396,7 +393,6 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }; if (entry.raw_encoding == 0) return error.MissingDebugInfo; - const reg_context: Dwarf.abi.RegisterContext = .{ .eh_frame = false, .is_macho = true }; const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); const new_ip = switch (builtin.cpu.arch) { @@ -405,16 +401,16 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, .RBP_FRAME => ip: { const frame = encoding.value.x86_64.frame; - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const fp = (try dwarfRegNative(&context.cpu_context, fp_reg_num)).*; const new_sp = fp + 2 * @sizeOf(usize); const ip_ptr = fp + @sizeOf(usize); const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + (try dwarfRegNative(&context.cpu_context, fp_reg_num)).* = new_fp; + (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp; + (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip; const regs: [5]u3 = .{ frame.reg0, @@ -427,7 +423,7 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, if (reg == 0) continue; const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + (try dwarfRegNative(&context.cpu_context, reg_number)).* = @as(*const usize, @ptrFromInt(addr)).*; } break :ip new_ip; @@ -437,7 +433,7 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, => ip: { const frameless = encoding.value.x86_64.frameless; - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const sp = (try dwarfRegNative(&context.cpu_context, sp_reg_num)).*; const stack_size: usize = stack_size: { if (encoding.mode.x86_64 == .STACK_IMMD) { break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); @@ -487,7 +483,7 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); for (0..reg_count) |i| { const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + (try dwarfRegNative(&context.cpu_context, reg_number)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; reg_addr += @sizeOf(usize); } @@ -497,8 +493,8 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; const new_sp = ip_ptr + @sizeOf(usize); - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp; + (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip; break :ip new_ip; }, @@ -516,10 +512,10 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, .aarch64, .aarch64_be => switch (encoding.mode.arm64) { .OLD => return error.UnsupportedDebugInfo, .FRAMELESS => ip: { - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const sp = (try dwarfRegNative(&context.cpu_context, sp_reg_num)).*; const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + const new_ip = (try dwarfRegNative(&context.cpu_context, 30)).*; + (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp; break :ip new_ip; }, .DWARF => { @@ -535,15 +531,15 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, .FRAME => ip: { const frame = encoding.value.arm64.frame; - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const fp = (try dwarfRegNative(&context.cpu_context, fp_reg_num)).*; const ip_ptr = fp + @sizeOf(usize); var reg_addr = fp - @sizeOf(usize); inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { if (@field(frame.x_reg_pairs, field.name) != 0) { - (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + (try dwarfRegNative(&context.cpu_context, 19 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; reg_addr += @sizeOf(usize); - (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + (try dwarfRegNative(&context.cpu_context, 20 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; reg_addr += @sizeOf(usize); } } @@ -552,12 +548,12 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, if (@field(frame.d_reg_pairs, field.name) != 0) { // Only the lower half of the 128-bit V registers are restored during unwinding { - const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 8 + i, context.reg_context)); + const dest: *align(1) usize = @ptrCast(try context.cpu_context.dwarfRegisterBytes(64 + 8 + i)); dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; } reg_addr += @sizeOf(usize); { - const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 9 + i, context.reg_context)); + const dest: *align(1) usize = @ptrCast(try context.cpu_context.dwarfRegisterBytes(64 + 9 + i)); dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; } reg_addr += @sizeOf(usize); @@ -567,8 +563,8 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + (try dwarfRegNative(&context.cpu_context, fp_reg_num)).* = new_fp; + (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip; break :ip new_ip; }, @@ -782,13 +778,9 @@ test { _ = MachoSymbol; } -fn fpRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { - return Dwarf.abi.fpRegNum(builtin.target.cpu.arch, reg_context); -} -fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { - return Dwarf.abi.spRegNum(builtin.target.cpu.arch, reg_context); -} -const ip_reg_num = Dwarf.abi.ipRegNum(builtin.target.cpu.arch).?; +const ip_reg_num = Dwarf.ipRegNum(builtin.target.cpu.arch).?; +const fp_reg_num = Dwarf.fpRegNum(builtin.target.cpu.arch); +const sp_reg_num = Dwarf.spRegNum(builtin.target.cpu.arch); /// Uses `mmap` to map the file at `path` into memory. fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { @@ -821,8 +813,7 @@ const mem = std.mem; const posix = std.posix; const testing = std.testing; const Error = std.debug.SelfInfo.Error; -const regBytes = Dwarf.abi.regBytes; -const regValueNative = Dwarf.abi.regValueNative; +const dwarfRegNative = std.debug.SelfInfo.DwarfUnwindContext.regNative; const builtin = @import("builtin"); const native_endian = builtin.target.cpu.arch.endian(); diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 8a0acf8bb06f..fde61d8140d2 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -26,7 +26,6 @@ pub fn key(m: ElfModule) usize { pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!ElfModule { _ = cache; _ = gpa; - if (builtin.target.os.tag == .haiku) @panic("TODO implement lookup module for Haiku"); const DlIterContext = struct { /// input address: usize, @@ -261,7 +260,7 @@ pub const supports_unwinding: bool = s: { }; comptime { if (supports_unwinding) { - std.debug.assert(Dwarf.abi.supportsUnwinding(&builtin.target)); + std.debug.assert(Dwarf.supportsUnwinding(&builtin.target)); } } diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index 4bbc220c5ba0..8c88bd8b2f34 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -295,11 +295,45 @@ pub const UnwindContext = struct { pc: usize, cur: windows.CONTEXT, history_table: windows.UNWIND_HISTORY_TABLE, - pub fn init(ctx: *const windows.CONTEXT, gpa: Allocator) Allocator.Error!UnwindContext { - _ = gpa; + pub fn init(ctx: *const std.debug.cpu_context.Native) UnwindContext { return .{ .pc = @returnAddress(), - .cur = ctx.*, + .cur = switch (builtin.cpu.arch) { + .x86_64 => std.mem.zeroInit(windows.CONTEXT, .{ + .Rax = ctx.gprs.get(.rax), + .Rcx = ctx.gprs.get(.rcx), + .Rdx = ctx.gprs.get(.rdx), + .Rbx = ctx.gprs.get(.rbx), + .Rsp = ctx.gprs.get(.rsp), + .Rbp = ctx.gprs.get(.rbp), + .Rsi = ctx.gprs.get(.rsi), + .Rdi = ctx.gprs.get(.rdi), + .R8 = ctx.gprs.get(.r8), + .R9 = ctx.gprs.get(.r9), + .R10 = ctx.gprs.get(.r10), + .R11 = ctx.gprs.get(.r11), + .R12 = ctx.gprs.get(.r12), + .R13 = ctx.gprs.get(.r13), + .R14 = ctx.gprs.get(.r14), + .R15 = ctx.gprs.get(.r15), + .Rip = ctx.gprs.get(.rip), + }), + .aarch64, .aarch64_be => .{ + .ContextFlags = 0, + .Cpsr = 0, + .DUMMYUNIONNAME = .{ .X = ctx.x }, + .Sp = ctx.sp, + .Pc = ctx.pc, + .V = @splat(.{ .B = @splat(0) }), + .Fpcr = 0, + .Fpsr = 0, + .Bcr = @splat(0), + .Bvr = @splat(0), + .Wcr = @splat(0), + .Wvr = @splat(0), + }, + else => comptime unreachable, + }, .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE), }; } diff --git a/lib/std/debug/cpu_context.zig b/lib/std/debug/cpu_context.zig new file mode 100644 index 000000000000..9859575fa367 --- /dev/null +++ b/lib/std/debug/cpu_context.zig @@ -0,0 +1,1019 @@ +/// Register state for the native architecture, used by `std.debug` for stack unwinding. +/// `noreturn` if there is no implementation for the native architecture. +/// This can be overriden by exposing a declaration `root.debug.CpuContext`. +pub const Native = if (@hasDecl(root, "debug") and @hasDecl(root.debug, "CpuContext")) + root.debug.CpuContext +else switch (native_arch) { + .x86 => X86, + .x86_64 => X86_64, + .arm, .armeb, .thumb, .thumbeb => Arm, + .aarch64, .aarch64_be => Aarch64, + else => noreturn, +}; + +pub const DwarfRegisterError = error{ + InvalidRegister, + UnsupportedRegister, +}; + +pub fn fromPosixSignalContext(ctx_ptr: ?*const anyopaque) ?Native { + if (signal_ucontext_t == void) return null; + const uc: *const signal_ucontext_t = @ptrCast(@alignCast(ctx_ptr)); + return switch (native_arch) { + .x86 => switch (native_os) { + .linux, .netbsd, .solaris, .illumos => .{ .gprs = .init(.{ + .eax = uc.mcontext.gregs[std.posix.REG.EAX], + .ecx = uc.mcontext.gregs[std.posix.REG.ECX], + .edx = uc.mcontext.gregs[std.posix.REG.EDX], + .ebx = uc.mcontext.gregs[std.posix.REG.EBX], + .esp = uc.mcontext.gregs[std.posix.REG.ESP], + .ebp = uc.mcontext.gregs[std.posix.REG.EBP], + .esi = uc.mcontext.gregs[std.posix.REG.ESI], + .edi = uc.mcontext.gregs[std.posix.REG.EDI], + .eip = uc.mcontext.gregs[std.posix.REG.EIP], + }) }, + else => null, + }, + .x86_64 => switch (native_os) { + .linux, .solaris, .illumos => .{ .gprs = .init(.{ + .rax = uc.mcontext.gregs[std.posix.REG.RAX], + .rdx = uc.mcontext.gregs[std.posix.REG.RDX], + .rcx = uc.mcontext.gregs[std.posix.REG.RCX], + .rbx = uc.mcontext.gregs[std.posix.REG.RBX], + .rsi = uc.mcontext.gregs[std.posix.REG.RSI], + .rdi = uc.mcontext.gregs[std.posix.REG.RDI], + .rbp = uc.mcontext.gregs[std.posix.REG.RBP], + .rsp = uc.mcontext.gregs[std.posix.REG.RSP], + .r8 = uc.mcontext.gregs[std.posix.REG.R8], + .r9 = uc.mcontext.gregs[std.posix.REG.R9], + .r10 = uc.mcontext.gregs[std.posix.REG.R10], + .r11 = uc.mcontext.gregs[std.posix.REG.R11], + .r12 = uc.mcontext.gregs[std.posix.REG.R12], + .r13 = uc.mcontext.gregs[std.posix.REG.R13], + .r14 = uc.mcontext.gregs[std.posix.REG.R14], + .r15 = uc.mcontext.gregs[std.posix.REG.R15], + .rip = uc.mcontext.gregs[std.posix.REG.RIP], + }) }, + .freebsd => .{ .gprs = .init(.{ + .rax = uc.mcontext.rax, + .rdx = uc.mcontext.rdx, + .rcx = uc.mcontext.rcx, + .rbx = uc.mcontext.rbx, + .rsi = uc.mcontext.rsi, + .rdi = uc.mcontext.rdi, + .rbp = uc.mcontext.rbp, + .rsp = uc.mcontext.rsp, + .r8 = uc.mcontext.r8, + .r9 = uc.mcontext.r9, + .r10 = uc.mcontext.r10, + .r11 = uc.mcontext.r11, + .r12 = uc.mcontext.r12, + .r13 = uc.mcontext.r13, + .r14 = uc.mcontext.r14, + .r15 = uc.mcontext.r15, + .rip = uc.mcontext.rip, + }) }, + .openbsd => .{ .gprs = .init(.{ + .rax = @bitCast(uc.sc_rax), + .rdx = @bitCast(uc.sc_rdx), + .rcx = @bitCast(uc.sc_rcx), + .rbx = @bitCast(uc.sc_rbx), + .rsi = @bitCast(uc.sc_rsi), + .rdi = @bitCast(uc.sc_rdi), + .rbp = @bitCast(uc.sc_rbp), + .rsp = @bitCast(uc.sc_rsp), + .r8 = @bitCast(uc.sc_r8), + .r9 = @bitCast(uc.sc_r9), + .r10 = @bitCast(uc.sc_r10), + .r11 = @bitCast(uc.sc_r11), + .r12 = @bitCast(uc.sc_r12), + .r13 = @bitCast(uc.sc_r13), + .r14 = @bitCast(uc.sc_r14), + .r15 = @bitCast(uc.sc_r15), + .rip = @bitCast(uc.sc_rip), + }) }, + .macos, .ios => .{ .gprs = .init(.{ + .rax = uc.mcontext.ss.rax, + .rdx = uc.mcontext.ss.rdx, + .rcx = uc.mcontext.ss.rcx, + .rbx = uc.mcontext.ss.rbx, + .rsi = uc.mcontext.ss.rsi, + .rdi = uc.mcontext.ss.rdi, + .rbp = uc.mcontext.ss.rbp, + .rsp = uc.mcontext.ss.rsp, + .r8 = uc.mcontext.ss.r8, + .r9 = uc.mcontext.ss.r9, + .r10 = uc.mcontext.ss.r10, + .r11 = uc.mcontext.ss.r11, + .r12 = uc.mcontext.ss.r12, + .r13 = uc.mcontext.ss.r13, + .r14 = uc.mcontext.ss.r14, + .r15 = uc.mcontext.ss.r15, + .rip = uc.mcontext.ss.rip, + }) }, + else => null, + }, + .arm, .armeb, .thumb, .thumbeb => switch (builtin.os.tag) { + .linux => .{ + .r = .{ + uc.mcontext.arm_r0, + uc.mcontext.arm_r1, + uc.mcontext.arm_r2, + uc.mcontext.arm_r3, + uc.mcontext.arm_r4, + uc.mcontext.arm_r5, + uc.mcontext.arm_r6, + uc.mcontext.arm_r7, + uc.mcontext.arm_r8, + uc.mcontext.arm_r9, + uc.mcontext.arm_r10, + uc.mcontext.arm_fp, // r11 = fp + uc.mcontext.arm_ip, // r12 = ip + uc.mcontext.arm_sp, // r13 = sp + uc.mcontext.arm_lr, // r14 = lr + uc.mcontext.arm_pc, // r15 = pc + }, + }, + else => null, + }, + .aarch64, .aarch64_be => switch (builtin.os.tag) { + .macos, .ios, .tvos, .watchos, .visionos => .{ + .x = uc.mcontext.ss.regs ++ @as([2]u64, .{ + uc.mcontext.ss.fp, // x29 = fp + uc.mcontext.ss.lr, // x30 = lr + }), + .sp = uc.mcontext.ss.sp, + .pc = uc.mcontext.ss.pc, + }, + .netbsd => .{ + .x = uc.mcontext.gregs[0..31], + .sp = uc.mcontext.gregs[31], + .pc = uc.mcontext.gregs[32], + }, + .freebsd => .{ + .x = uc.mcontext.gpregs.x ++ @as([1]u64, .{ + uc.mcontext.gpregs.lr, // x30 = lr + }), + .sp = uc.mcontext.gpregs.sp, + // On aarch64, the register ELR_LR1 defines the address to return to after handling + // a CPU exception (ELR is "Exception Link Register"). FreeBSD's ucontext_t uses + // this as the field name, but it's the same thing as the context's PC. + .pc = uc.mcontext.gpregs.elr, + }, + .openbsd => .{ + .x = uc.sc_x ++ .{uc.sc_lr}, + .sp = uc.sc_sp, + // Not a bug; see freebsd above for explanation. + .pc = uc.sc_elr, + }, + .linux => .{ + .x = uc.mcontext.regs, + .sp = uc.mcontext.sp, + .pc = uc.mcontext.pc, + }, + else => null, + }, + else => null, + }; +} + +pub fn fromWindowsContext(ctx: *const std.os.windows.CONTEXT) Native { + return switch (native_arch) { + .x86 => .{ .gprs = .init(.{ + .eax = ctx.Eax, + .ecx = ctx.Ecx, + .edx = ctx.Edx, + .ebx = ctx.Ebx, + .esp = ctx.Esp, + .ebp = ctx.Ebp, + .esi = ctx.Esi, + .edi = ctx.Edi, + .eip = ctx.Eip, + }) }, + .x86_64 => .{ .gprs = .init(.{ + .rax = ctx.Rax, + .rdx = ctx.Rdx, + .rcx = ctx.Rcx, + .rbx = ctx.Rbx, + .rsi = ctx.Rsi, + .rdi = ctx.Rdi, + .rbp = ctx.Rbp, + .rsp = ctx.Rsp, + .r8 = ctx.R8, + .r9 = ctx.R9, + .r10 = ctx.R10, + .r11 = ctx.R11, + .r12 = ctx.R12, + .r13 = ctx.R13, + .r14 = ctx.R14, + .r15 = ctx.R15, + .rip = ctx.Rip, + }) }, + .aarch64, .aarch64_be => .{ + .x = ctx.DUMMYUNIONNAME.X[0..31].*, + .sp = ctx.Sp, + .pc = ctx.Pc, + }, + else => comptime unreachable, + }; +} + +pub const X86 = struct { + /// The first 8 registers here intentionally match the order of registers pushed + /// by PUSHA, which is also the order used by the DWARF register mappings. + pub const Gpr = enum { + // zig fmt: off + eax, ecx, edx, ebx, + esp, ebp, esi, edi, + eip, + // zig fmt: on + }; + gprs: std.enums.EnumArray(Gpr, u32), + + pub inline fn current() X86 { + var ctx: X86 = undefined; + asm volatile ( + \\movl %%eax, 0x00(%%edi) + \\movl %%ecx, 0x04(%%edi) + \\movl %%edx, 0x08(%%edi) + \\movl %%ebx, 0x0c(%%edi) + \\movl %%esp, 0x10(%%edi) + \\movl %%ebp, 0x14(%%edi) + \\movl %%esi, 0x18(%%edi) + \\movl %%edi, 0x1c(%%edi) + \\call 1f + \\1: + \\popl 0x20(%%edi) + : + : [gprs] "{edi}" (&ctx.gprs.values), + : .{ .memory = true }); + return ctx; + } + + pub fn dwarfRegisterBytes(ctx: *X86, register_num: u16) DwarfRegisterError![]u8 { + // System V Application Binary Interface Intel386 Architecture Processor Supplement Version 1.1 + // § 2.4.2 "DWARF Register Number Mapping" + switch (register_num) { + // The order of `Gpr` intentionally matches DWARF's mappings. + // + // x86-macos sometimes uses different mappings (ebp and esp are reversed when the unwind + // information is from `__eh_frame`). This deviation is not considered here, because + // x86-macos is a deprecated target which is not supported by the Zig Standard Library. + 0...8 => return @ptrCast(&ctx.gprs.values[register_num]), + + 9 => return error.UnsupportedRegister, // rflags + 11...18 => return error.UnsupportedRegister, // st0 - st7 + 21...28 => return error.UnsupportedRegister, // xmm0 - xmm7 + 29...36 => return error.UnsupportedRegister, // mm0 - mm7 + 39 => return error.UnsupportedRegister, // mxcsr + 40...45 => return error.UnsupportedRegister, // es, cs, ss, ds, fs, gs + 48 => return error.UnsupportedRegister, // tr + 49 => return error.UnsupportedRegister, // ldtr + 93...94 => return error.UnsupportedRegister, // fs.base, gs.base + + else => return error.InvalidRegister, + } + } +}; + +pub const X86_64 = struct { + /// MLUGG TODO: explain this order. why does DWARF have this? + pub const Gpr = enum { + // zig fmt: off + rax, rdx, rcx, rbx, + rsi, rdi, rbp, rsp, + r8, r9, r10, r11, + r12, r13, r14, r15, + rip, + // zig fmt: on + }; + gprs: std.enums.EnumArray(Gpr, u64), + + pub inline fn current() X86_64 { + var ctx: X86_64 = undefined; + asm volatile ( + \\movq %%rax, 0x00(%%rdi) + \\movq %%rdx, 0x08(%%rdi) + \\movq %%rcx, 0x10(%%rdi) + \\movq %%rbx, 0x18(%%rdi) + \\movq %%rsi, 0x20(%%rdi) + \\movq %%rdi, 0x28(%%rdi) + \\movq %%rbp, 0x30(%%rdi) + \\movq %%rsp, 0x38(%%rdi) + \\movq %%r8, 0x40(%%rdi) + \\movq %%r9, 0x48(%%rdi) + \\movq %%r10, 0x50(%%rdi) + \\movq %%r11, 0x58(%%rdi) + \\movq %%r12, 0x60(%%rdi) + \\movq %%r13, 0x68(%%rdi) + \\movq %%r14, 0x70(%%rdi) + \\movq %%r15, 0x78(%%rdi) + \\leaq (%%rip), %%rax + \\movq %%rax, 0x80(%%rdi) + \\movq 0x00(%%rdi), %%rax // restore saved rax + : + : [gprs] "{rdi}" (&ctx.gprs.values), + : .{ .memory = true }); + return ctx; + } + + pub fn dwarfRegisterBytes(ctx: *X86_64, register_num: u16) DwarfRegisterError![]u8 { + // System V Application Binary Interface AMD64 Architecture Processor Supplement + // § 3.6.2 "DWARF Register Number Mapping" + switch (register_num) { + // The order of `Gpr` intentionally matches DWARF's mappings. + 0...16 => return @ptrCast(&ctx.gprs.values[register_num]), + + 17...32 => return error.UnsupportedRegister, // xmm0 - xmm15 + 33...40 => return error.UnsupportedRegister, // st0 - st7 + 41...48 => return error.UnsupportedRegister, // mm0 - mm7 + 49 => return error.UnsupportedRegister, // rflags + 50...55 => return error.UnsupportedRegister, // es, cs, ss, ds, fs, gs + 58...59 => return error.UnsupportedRegister, // fs.base, gs.base + 62 => return error.UnsupportedRegister, // tr + 63 => return error.UnsupportedRegister, // ldtr + 64 => return error.UnsupportedRegister, // mxcsr + 65 => return error.UnsupportedRegister, // fcw + 66 => return error.UnsupportedRegister, // fsw + + else => return error.InvalidRegister, + } + } +}; + +pub const Arm = struct { + /// The numbered general-purpose registers R0 - R15. + r: [16]u32, + + pub inline fn current() Arm { + var ctx: Arm = undefined; + asm volatile ( + \\// For compatibility with Thumb, we can't write r13 (sp) or r15 (pc) with stm. + \\stm r0, {r0-r12} + \\str r13, [r0, #0x34] + \\str r14, [r0, #0x38] + \\str r15, [r0, #0x3c] + : + : [r] "{r0}" (&ctx.r), + : .{ .memory = true }); + return ctx; + } + + pub fn dwarfRegisterBytes(ctx: *Arm, register_num: u16) DwarfRegisterError![]u8 { + // DWARF for the Arm(r) Architecture § 4.1 "DWARF register names" + switch (register_num) { + // The order of `Gpr` intentionally matches DWARF's mappings. + 0...15 => return @ptrCast(&ctx.r[register_num]), + + 64...95 => return error.UnsupportedRegister, // S0 - S31 + 96...103 => return error.UnsupportedRegister, // F0 - F7 + 104...111 => return error.UnsupportedRegister, // wCGR0 - wCGR7, or ACC0 - ACC7 + 112...127 => return error.UnsupportedRegister, // wR0 - wR15 + 128 => return error.UnsupportedRegister, // SPSR + 129 => return error.UnsupportedRegister, // SPSR_FIQ + 130 => return error.UnsupportedRegister, // SPSR_IRQ + 131 => return error.UnsupportedRegister, // SPSR_ABT + 132 => return error.UnsupportedRegister, // SPSR_UND + 133 => return error.UnsupportedRegister, // SPSR_SVC + 143 => return error.UnsupportedRegister, // RA_AUTH_CODE + 144...150 => return error.UnsupportedRegister, // R8_USR - R14_USR + 151...157 => return error.UnsupportedRegister, // R8_FIQ - R14_FIQ + 158...159 => return error.UnsupportedRegister, // R13_IRQ - R14_IRQ + 160...161 => return error.UnsupportedRegister, // R13_ABT - R14_ABT + 162...163 => return error.UnsupportedRegister, // R13_UND - R14_UND + 164...165 => return error.UnsupportedRegister, // R13_SVC - R14_SVC + 192...199 => return error.UnsupportedRegister, // wC0 - wC7 + 256...287 => return error.UnsupportedRegister, // D0 - D31 + 320 => return error.UnsupportedRegister, // TPIDRURO + 321 => return error.UnsupportedRegister, // TPIDRURW + 322 => return error.UnsupportedRegister, // TPIDPR + 323 => return error.UnsupportedRegister, // HTPIDPR + 8192...16383 => return error.UnsupportedRegister, // Unspecified vendor co-processor register + + else => return error.InvalidRegister, + } + } +}; + +/// This is an `extern struct` so that inline assembly in `current` can use field offsets. +pub const Aarch64 = extern struct { + /// The numbered general-purpose registers X0 - X30. + x: [31]u64, + sp: u64, + pc: u64, + + pub inline fn current() Aarch64 { + var ctx: Aarch64 = undefined; + asm volatile ( + \\stp x0, x1, [x0, #0x000] + \\stp x2, x3, [x0, #0x010] + \\stp x4, x5, [x0, #0x020] + \\stp x6, x7, [x0, #0x030] + \\stp x8, x9, [x0, #0x040] + \\stp x10, x11, [x0, #0x050] + \\stp x12, x13, [x0, #0x060] + \\stp x14, x15, [x0, #0x070] + \\stp x16, x17, [x0, #0x080] + \\stp x18, x19, [x0, #0x090] + \\stp x20, x21, [x0, #0x0a0] + \\stp x22, x23, [x0, #0x0b0] + \\stp x24, x25, [x0, #0x0c0] + \\stp x26, x27, [x0, #0x0d0] + \\stp x28, x29, [x0, #0x0e0] + \\str x30, [x0, #0x0f0] + \\mov x1, sp + \\str x1, [x0, #0x0f8] + \\adr x1, . + \\str x1, [x0, #0x100] + \\ldr x1, [x0, #0x008] // restore saved x1 + : + : [gprs] "{x0}" (&ctx), + : .{ .memory = true }); + return ctx; + } + + pub fn dwarfRegisterBytes(ctx: *Aarch64, register_num: u16) DwarfRegisterError![]u8 { + // DWARF for the Arm(r) 64-bit Architecture (AArch64) § 4.1 "DWARF register names" + switch (register_num) { + // The order of `Gpr` intentionally matches DWARF's mappings. + 0...30 => return @ptrCast(&ctx.x[register_num]), + 31 => return @ptrCast(&ctx.sp), + 32 => return @ptrCast(&ctx.pc), + + 33 => return error.UnsupportedRegister, // ELF_mode + 34 => return error.UnsupportedRegister, // RA_SIGN_STATE + 35 => return error.UnsupportedRegister, // TPIDRRO_ELO + 36 => return error.UnsupportedRegister, // RPIDR_ELO + 37 => return error.UnsupportedRegister, // RPIDR_EL1 + 38 => return error.UnsupportedRegister, // RPIDR_EL2 + 39 => return error.UnsupportedRegister, // RPIDR_EL3 + 46 => return error.UnsupportedRegister, // VG + 47 => return error.UnsupportedRegister, // FFR + 48...63 => return error.UnsupportedRegister, // P0 - P15 + 64...95 => return error.UnsupportedRegister, // V0 - V31 + 96...127 => return error.UnsupportedRegister, // Z0 - Z31 + + else => return error.InvalidRegister, + } + } +}; + +const signal_ucontext_t = switch (native_os) { + .linux => std.os.linux.ucontext_t, + .emscripten => std.os.emscripten.ucontext_t, + .freebsd => std.os.freebsd.ucontext_t, + .macos, .ios, .tvos, .watchos, .visionos => extern struct { + onstack: c_int, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + link: ?*signal_ucontext_t, + mcsize: u64, + mcontext: *mcontext_t, + const mcontext_t = switch (native_arch) { + .aarch64 => extern struct { + es: extern struct { + far: u64, // Virtual Fault Address + esr: u32, // Exception syndrome + exception: u32, // Number of arm exception taken + }, + ss: extern struct { + /// General purpose registers + regs: [29]u64, + /// Frame pointer x29 + fp: u64, + /// Link register x30 + lr: u64, + /// Stack pointer x31 + sp: u64, + /// Program counter + pc: u64, + /// Current program status register + cpsr: u32, + __pad: u32, + }, + ns: extern struct { + q: [32]u128, + fpsr: u32, + fpcr: u32, + }, + }, + .x86_64 => extern struct { + es: extern struct { + trapno: u16, + cpu: u16, + err: u32, + faultvaddr: u64, + }, + ss: extern struct { + rax: u64, + rbx: u64, + rcx: u64, + rdx: u64, + rdi: u64, + rsi: u64, + rbp: u64, + rsp: u64, + r8: u64, + r9: u64, + r10: u64, + r11: u64, + r12: u64, + r13: u64, + r14: u64, + r15: u64, + rip: u64, + rflags: u64, + cs: u64, + fs: u64, + gs: u64, + }, + fs: extern struct { + reserved: [2]c_int, + fcw: u16, + fsw: u16, + ftw: u8, + rsrv1: u8, + fop: u16, + ip: u32, + cs: u16, + rsrv2: u16, + dp: u32, + ds: u16, + rsrv3: u16, + mxcsr: u32, + mxcsrmask: u32, + stmm: [8]stmm_reg, + xmm: [16]xmm_reg, + rsrv4: [96]u8, + reserved1: c_int, + + const stmm_reg = [16]u8; + const xmm_reg = [16]u8; + }, + }, + else => void, + }; + }, + .solaris, .illumos => extern struct { + flags: u64, + link: ?*signal_ucontext_t, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + mcontext: mcontext_t, + brand_data: [3]?*anyopaque, + filler: [2]i64, + const mcontext_t = extern struct { + gregs: [28]u64, + fpregs: std.c.fpregset_t, + }; + }, + .openbsd => switch (builtin.cpu.arch) { + .x86_64 => extern struct { + sc_rdi: c_long, + sc_rsi: c_long, + sc_rdx: c_long, + sc_rcx: c_long, + sc_r8: c_long, + sc_r9: c_long, + sc_r10: c_long, + sc_r11: c_long, + sc_r12: c_long, + sc_r13: c_long, + sc_r14: c_long, + sc_r15: c_long, + sc_rbp: c_long, + sc_rbx: c_long, + sc_rax: c_long, + sc_gs: c_long, + sc_fs: c_long, + sc_es: c_long, + sc_ds: c_long, + sc_trapno: c_long, + sc_err: c_long, + sc_rip: c_long, + sc_cs: c_long, + sc_rflags: c_long, + sc_rsp: c_long, + sc_ss: c_long, + + sc_fpstate: *anyopaque, // struct fxsave64 * + __sc_unused: c_int, + sc_mask: c_int, + sc_cookie: c_long, + }, + .aarch64 => extern struct { + __sc_unused: c_int, + sc_mask: c_int, + sc_sp: c_ulong, + sc_lr: c_ulong, + sc_elr: c_ulong, + sc_spsr: c_ulong, + sc_x: [30]c_ulong, + sc_cookie: c_long, + }, + else => void, + }, + .netbsd => extern struct { + flags: u32, + link: ?*signal_ucontext_t, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + mcontext: mcontext_t, + __pad: [ + switch (builtin.cpu.arch) { + .x86 => 4, + .mips, .mipsel, .mips64, .mips64el => 14, + .arm, .armeb, .thumb, .thumbeb => 1, + .sparc, .sparc64 => if (@sizeOf(usize) == 4) 43 else 8, + else => 0, + } + ]u32, + const mcontext_t = switch (builtin.cpu.arch) { + .aarch64, .aarch64_be => extern struct { + gregs: [35]u64, + fregs: [528]u8 align(16), + spare: [8]u64, + }, + .x86 => extern struct { + gregs: [19]u32, + fpregs: [161]u32, + mc_tlsbase: u32, + }, + .x86_64 => extern struct { + gregs: [26]u64, + mc_tlsbase: u64, + fpregs: [512]u8 align(8), + }, + else => void, + }; + }, + .dragonfly => extern struct { + sigmask: std.c.sigset_t, + mcontext: mcontext_t, + link: ?*signal_ucontext_t, + stack: std.c.stack_t, + cofunc: ?*fn (?*signal_ucontext_t, ?*anyopaque) void, + arg: ?*void, + _spare: [4]c_int, + const mcontext_t = extern struct { + const register_t = isize; + onstack: register_t, // XXX - sigcontext compat. + rdi: register_t, + rsi: register_t, + rdx: register_t, + rcx: register_t, + r8: register_t, + r9: register_t, + rax: register_t, + rbx: register_t, + rbp: register_t, + r10: register_t, + r11: register_t, + r12: register_t, + r13: register_t, + r14: register_t, + r15: register_t, + xflags: register_t, + trapno: register_t, + addr: register_t, + flags: register_t, + err: register_t, + rip: register_t, + cs: register_t, + rflags: register_t, + rsp: register_t, // machine state + ss: register_t, + + len: c_uint, // sizeof(mcontext_t) + fpformat: c_uint, + ownedfp: c_uint, + reserved: c_uint, + unused: [8]c_uint, + + // NOTE! 64-byte aligned as of here. Also must match savefpu structure. + fpregs: [256]c_int align(64), + }; + }, + .serenity => extern struct { + link: ?*signal_ucontext_t, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + mcontext: mcontext_t, + const mcontext_t = switch (builtin.cpu.arch) { + // https://github.com/SerenityOS/serenity/blob/200e91cd7f1ec5453799a2720d4dc114a59cc289/Kernel/Arch/aarch64/mcontext.h#L15-L19 + .aarch64 => extern struct { + x: [31]u64, + sp: u64, + pc: u64, + }, + // https://github.com/SerenityOS/serenity/blob/66f8d0f031ef25c409dbb4fecaa454800fecae0f/Kernel/Arch/riscv64/mcontext.h#L15-L18 + .riscv64 => extern struct { + x: [31]u64, + pc: u64, + }, + // https://github.com/SerenityOS/serenity/blob/7b9ea3efdec9f86a1042893e8107d0b23aad8727/Kernel/Arch/x86_64/mcontext.h#L15-L40 + .x86_64 => extern struct { + rax: u64, + rcx: u64, + rdx: u64, + rbx: u64, + rsp: u64, + rbp: u64, + rsi: u64, + rdi: u64, + rip: u64, + r8: u64, + r9: u64, + r10: u64, + r11: u64, + r12: u64, + r13: u64, + r14: u64, + r15: u64, + rflags: u64, + cs: u32, + ss: u32, + ds: u32, + es: u32, + fs: u32, + gs: u32, + }, + else => void, + }; + }, + .haiku => extern struct { + link: ?*signal_ucontext_t, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + mcontext: mcontext_t, + const mcontext_t = switch (builtin.cpu.arch) { + .arm, .thumb => extern struct { + r0: u32, + r1: u32, + r2: u32, + r3: u32, + r4: u32, + r5: u32, + r6: u32, + r7: u32, + r8: u32, + r9: u32, + r10: u32, + r11: u32, + r12: u32, + r13: u32, + r14: u32, + r15: u32, + cpsr: u32, + }, + .aarch64 => extern struct { + x: [10]u64, + lr: u64, + sp: u64, + elr: u64, + spsr: u64, + fp_q: [32]u128, + fpsr: u32, + fpcr: u32, + }, + .m68k => extern struct { + pc: u32, + d0: u32, + d1: u32, + d2: u32, + d3: u32, + d4: u32, + d5: u32, + d6: u32, + d7: u32, + a0: u32, + a1: u32, + a2: u32, + a3: u32, + a4: u32, + a5: u32, + a6: u32, + a7: u32, + ccr: u8, + f0: f64, + f1: f64, + f2: f64, + f3: f64, + f4: f64, + f5: f64, + f6: f64, + f7: f64, + f8: f64, + f9: f64, + f10: f64, + f11: f64, + f12: f64, + f13: f64, + }, + .mipsel => extern struct { + r0: u32, + }, + .powerpc => extern struct { + pc: u32, + r0: u32, + r1: u32, + r2: u32, + r3: u32, + r4: u32, + r5: u32, + r6: u32, + r7: u32, + r8: u32, + r9: u32, + r10: u32, + r11: u32, + r12: u32, + f0: f64, + f1: f64, + f2: f64, + f3: f64, + f4: f64, + f5: f64, + f6: f64, + f7: f64, + f8: f64, + f9: f64, + f10: f64, + f11: f64, + f12: f64, + f13: f64, + reserved: u32, + fpscr: u32, + ctr: u32, + xer: u32, + cr: u32, + msr: u32, + lr: u32, + }, + .riscv64 => extern struct { + x: [31]u64, + pc: u64, + f: [32]f64, + fcsr: u64, + }, + .sparc64 => extern struct { + g1: u64, + g2: u64, + g3: u64, + g4: u64, + g5: u64, + g6: u64, + g7: u64, + o0: u64, + o1: u64, + o2: u64, + o3: u64, + o4: u64, + o5: u64, + sp: u64, + o7: u64, + l0: u64, + l1: u64, + l2: u64, + l3: u64, + l4: u64, + l5: u64, + l6: u64, + l7: u64, + i0: u64, + i1: u64, + i2: u64, + i3: u64, + i4: u64, + i5: u64, + fp: u64, + i7: u64, + }, + .x86 => extern struct { + pub const old_extended_regs = extern struct { + control: u16, + reserved1: u16, + status: u16, + reserved2: u16, + tag: u16, + reserved3: u16, + eip: u32, + cs: u16, + opcode: u16, + datap: u32, + ds: u16, + reserved4: u16, + fp_mmx: [8][10]u8, + }; + + pub const fp_register = extern struct { value: [10]u8, reserved: [6]u8 }; + + pub const xmm_register = extern struct { value: [16]u8 }; + + pub const new_extended_regs = extern struct { + control: u16, + status: u16, + tag: u16, + opcode: u16, + eip: u32, + cs: u16, + reserved1: u16, + datap: u32, + ds: u16, + reserved2: u16, + mxcsr: u32, + reserved3: u32, + fp_mmx: [8]fp_register, + xmmx: [8]xmm_register, + reserved4: [224]u8, + }; + + pub const extended_regs = extern struct { + state: extern union { + old_format: old_extended_regs, + new_format: new_extended_regs, + }, + format: u32, + }; + + eip: u32, + eflags: u32, + eax: u32, + ecx: u32, + edx: u32, + esp: u32, + ebp: u32, + reserved: u32, + xregs: extended_regs, + edi: u32, + esi: u32, + ebx: u32, + }, + .x86_64 => extern struct { + pub const fp_register = extern struct { + value: [10]u8, + reserved: [6]u8, + }; + + pub const xmm_register = extern struct { + value: [16]u8, + }; + + pub const fpu_state = extern struct { + control: u16, + status: u16, + tag: u16, + opcode: u16, + rip: u64, + rdp: u64, + mxcsr: u32, + mscsr_mask: u32, + + fp_mmx: [8]fp_register, + xmm: [16]xmm_register, + reserved: [96]u8, + }; + + pub const xstate_hdr = extern struct { + bv: u64, + xcomp_bv: u64, + reserved: [48]u8, + }; + + pub const savefpu = extern struct { + fxsave: fpu_state, + xstate: xstate_hdr, + ymm: [16]xmm_register, + }; + + rax: u64, + rbx: u64, + rcx: u64, + rdx: u64, + rdi: u64, + rsi: u64, + rbp: u64, + r8: u64, + r9: u64, + r10: u64, + r11: u64, + r12: u64, + r13: u64, + r14: u64, + r15: u64, + rsp: u64, + rip: u64, + rflags: u64, + fpu: savefpu, + }, + else => void, + }; + }, + else => void, +}; + +const std = @import("../std.zig"); +const root = @import("root"); +const builtin = @import("builtin"); +const native_arch = @import("builtin").target.cpu.arch; +const native_os = @import("builtin").target.os.tag; diff --git a/lib/std/heap/PageAllocator.zig b/lib/std/heap/PageAllocator.zig index 106460387abc..f3e3857b581a 100644 --- a/lib/std/heap/PageAllocator.zig +++ b/lib/std/heap/PageAllocator.zig @@ -183,7 +183,7 @@ pub fn realloc(uncasted_memory: []u8, new_len: usize, may_move: bool) ?[*]u8 { if (posix.MREMAP != void) { // TODO: if the next_mmap_addr_hint is within the remapped range, update it - const new_memory = posix.mremap(memory.ptr, memory.len, new_len, .{ .MAYMOVE = may_move }, null) catch return null; + const new_memory = posix.mremap(memory.ptr, page_aligned_len, new_size_aligned, .{ .MAYMOVE = may_move }, null) catch return null; return new_memory.ptr; } diff --git a/lib/std/os/freebsd.zig b/lib/std/os/freebsd.zig index 4c68405c22e0..aedb2d8ccef8 100644 --- a/lib/std/os/freebsd.zig +++ b/lib/std/os/freebsd.zig @@ -3,6 +3,7 @@ const fd_t = std.c.fd_t; const off_t = std.c.off_t; const unexpectedErrno = std.posix.unexpectedErrno; const errno = std.posix.errno; +const builtin = @import("builtin"); pub const CopyFileRangeError = std.posix.UnexpectedError || error{ /// If infd is not open for reading or outfd is not open for writing, or @@ -47,3 +48,75 @@ pub fn copy_file_range(fd_in: fd_t, off_in: ?*i64, fd_out: fd_t, off_out: ?*i64, else => |err| return unexpectedErrno(err), } } + +pub const ucontext_t = extern struct { + sigmask: std.c.sigset_t, + mcontext: mcontext_t, + link: ?*ucontext_t, + stack: std.c.stack_t, + flags: c_int, + __spare__: [4]c_int, + const mcontext_t = switch (builtin.cpu.arch) { + .x86_64 => extern struct { + onstack: u64, + rdi: u64, + rsi: u64, + rdx: u64, + rcx: u64, + r8: u64, + r9: u64, + rax: u64, + rbx: u64, + rbp: u64, + r10: u64, + r11: u64, + r12: u64, + r13: u64, + r14: u64, + r15: u64, + trapno: u32, + fs: u16, + gs: u16, + addr: u64, + flags: u32, + es: u16, + ds: u16, + err: u64, + rip: u64, + cs: u64, + rflags: u64, + rsp: u64, + ss: u64, + len: u64, + fpformat: u64, + ownedfp: u64, + fpstate: [64]u64 align(16), + fsbase: u64, + gsbase: u64, + xfpustate: u64, + xfpustate_len: u64, + spare: [4]u64, + }, + .aarch64 => extern struct { + gpregs: extern struct { + x: [30]u64, + lr: u64, + sp: u64, + elr: u64, + spsr: u32, + _pad: u32, + }, + fpregs: extern struct { + q: [32]u128, + sr: u32, + cr: u32, + flags: u32, + _pad: u32, + }, + flags: u32, + _pad: u32, + _spare: [8]u64, + }, + else => void, + }; +}; diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 48e9f08d5d82..45cf597c7697 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -49,7 +49,6 @@ const arch_bits = switch (native_arch) { .s390x => @import("linux/s390x.zig"), else => struct { pub const ucontext_t = void; - pub const getcontext = {}; }, }; @@ -112,7 +111,6 @@ pub const timeval = arch_bits.timeval; pub const timezone = arch_bits.timezone; pub const ucontext_t = arch_bits.ucontext_t; pub const user_desc = arch_bits.user_desc; -pub const getcontext = arch_bits.getcontext; pub const tls = @import("linux/tls.zig"); pub const BPF = @import("linux/bpf.zig"); diff --git a/lib/std/os/linux/aarch64.zig b/lib/std/os/linux/aarch64.zig index 5fcf04c58b5b..6538e1f175e1 100644 --- a/lib/std/os/linux/aarch64.zig +++ b/lib/std/os/linux/aarch64.zig @@ -260,7 +260,4 @@ pub const ucontext_t = extern struct { mcontext: mcontext_t, }; -/// TODO -pub const getcontext = {}; - pub const Elf_Symndx = u32; diff --git a/lib/std/os/linux/arm.zig b/lib/std/os/linux/arm.zig index 5f41607efe3c..693fe5aafdab 100644 --- a/lib/std/os/linux/arm.zig +++ b/lib/std/os/linux/arm.zig @@ -310,7 +310,4 @@ pub const ucontext_t = extern struct { regspace: [64]u64, }; -/// TODO -pub const getcontext = {}; - pub const Elf_Symndx = u32; diff --git a/lib/std/os/linux/hexagon.zig b/lib/std/os/linux/hexagon.zig index 255faba20f39..ea54829e65d3 100644 --- a/lib/std/os/linux/hexagon.zig +++ b/lib/std/os/linux/hexagon.zig @@ -237,6 +237,3 @@ pub const VDSO = void; /// TODO pub const ucontext_t = void; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/loongarch64.zig b/lib/std/os/linux/loongarch64.zig index b6ff38fccda5..f57050ddb7e5 100644 --- a/lib/std/os/linux/loongarch64.zig +++ b/lib/std/os/linux/loongarch64.zig @@ -250,6 +250,3 @@ pub const ucontext_t = extern struct { }; pub const Elf_Symndx = u32; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/m68k.zig b/lib/std/os/linux/m68k.zig index 78851c3928ce..82d854e49838 100644 --- a/lib/std/os/linux/m68k.zig +++ b/lib/std/os/linux/m68k.zig @@ -258,6 +258,3 @@ pub const VDSO = void; /// TODO pub const ucontext_t = void; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/mips.zig b/lib/std/os/linux/mips.zig index c971bbd739e3..f0a235a59824 100644 --- a/lib/std/os/linux/mips.zig +++ b/lib/std/os/linux/mips.zig @@ -349,6 +349,3 @@ pub const Elf_Symndx = u32; /// TODO pub const ucontext_t = void; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/mips64.zig b/lib/std/os/linux/mips64.zig index 91fcacef38a1..11ad1029145f 100644 --- a/lib/std/os/linux/mips64.zig +++ b/lib/std/os/linux/mips64.zig @@ -328,6 +328,3 @@ pub const Elf_Symndx = u32; /// TODO pub const ucontext_t = void; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/powerpc.zig b/lib/std/os/linux/powerpc.zig index 3e876bb3f064..32e66d8c78db 100644 --- a/lib/std/os/linux/powerpc.zig +++ b/lib/std/os/linux/powerpc.zig @@ -381,6 +381,3 @@ pub const ucontext_t = extern struct { }; pub const Elf_Symndx = u32; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/powerpc64.zig b/lib/std/os/linux/powerpc64.zig index 70b11a86bb6b..fb4686c7b2b6 100644 --- a/lib/std/os/linux/powerpc64.zig +++ b/lib/std/os/linux/powerpc64.zig @@ -376,6 +376,3 @@ pub const ucontext_t = extern struct { }; pub const Elf_Symndx = u32; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/riscv32.zig b/lib/std/os/linux/riscv32.zig index 01b600213502..f70fcefd14b5 100644 --- a/lib/std/os/linux/riscv32.zig +++ b/lib/std/os/linux/riscv32.zig @@ -255,6 +255,3 @@ pub const ucontext_t = extern struct { sigmask: [1024 / @bitSizeOf(c_ulong)]c_ulong, // Currently a libc-compatible (1024-bit) sigmask mcontext: mcontext_t, }; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/riscv64.zig b/lib/std/os/linux/riscv64.zig index 577cf3ec48f1..d2eb4f9b4d94 100644 --- a/lib/std/os/linux/riscv64.zig +++ b/lib/std/os/linux/riscv64.zig @@ -255,6 +255,3 @@ pub const ucontext_t = extern struct { sigmask: [1024 / @bitSizeOf(c_ulong)]c_ulong, // Currently a libc-compatible (1024-bit) sigmask mcontext: mcontext_t, }; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/s390x.zig b/lib/std/os/linux/s390x.zig index 620aedfb698d..71594d4a6509 100644 --- a/lib/std/os/linux/s390x.zig +++ b/lib/std/os/linux/s390x.zig @@ -273,6 +273,3 @@ pub const mcontext_t = extern struct { __regs2: [18]u32, __regs3: [16]f64, }; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/sparc64.zig b/lib/std/os/linux/sparc64.zig index 13773438880b..1859ba5c05ea 100644 --- a/lib/std/os/linux/sparc64.zig +++ b/lib/std/os/linux/sparc64.zig @@ -426,6 +426,3 @@ pub const ucontext_t = extern struct { stack: stack_t, sigset: [1024 / @bitSizeOf(c_ulong)]c_ulong, // Currently a libc-compatible (1024-bit) sigmask }; - -/// TODO -pub const getcontext = {}; diff --git a/lib/std/os/linux/x86.zig b/lib/std/os/linux/x86.zig index eed85a2ad0be..41b11a35e3a3 100644 --- a/lib/std/os/linux/x86.zig +++ b/lib/std/os/linux/x86.zig @@ -436,17 +436,3 @@ pub fn getContextInternal() callconv(.naked) usize { [sigset_size] "i" (linux.NSIG / 8), : .{ .cc = true, .memory = true, .eax = true, .ecx = true, .edx = true }); } - -pub inline fn getcontext(context: *ucontext_t) usize { - // This method is used so that getContextInternal can control - // its prologue in order to read ESP from a constant offset. - // An aligned stack is not needed for getContextInternal. - var clobber_edx: usize = undefined; - return asm volatile ( - \\ calll %[getContextInternal:P] - : [_] "={eax}" (-> usize), - [_] "={edx}" (clobber_edx), - : [_] "{edx}" (context), - [getContextInternal] "X" (&getContextInternal), - : .{ .cc = true, .memory = true, .ecx = true }); -} diff --git a/lib/std/os/linux/x86_64.zig b/lib/std/os/linux/x86_64.zig index 583fad872d17..72944c37aca6 100644 --- a/lib/std/os/linux/x86_64.zig +++ b/lib/std/os/linux/x86_64.zig @@ -352,98 +352,3 @@ pub const ucontext_t = extern struct { sigmask: [1024 / @bitSizeOf(c_ulong)]c_ulong, // Currently a glibc-compatible (1024-bit) sigmask. fpregs_mem: [64]usize, // Not part of kernel ABI, only part of glibc ucontext_t }; - -fn gpRegisterOffset(comptime reg_index: comptime_int) usize { - return @offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "gregs") + @sizeOf(usize) * reg_index; -} - -fn getContextInternal() callconv(.naked) usize { - // TODO: Read GS/FS registers? - asm volatile ( - \\ movq $0, %[flags_offset:c](%%rdi) - \\ movq $0, %[link_offset:c](%%rdi) - \\ movq %%r8, %[r8_offset:c](%%rdi) - \\ movq %%r9, %[r9_offset:c](%%rdi) - \\ movq %%r10, %[r10_offset:c](%%rdi) - \\ movq %%r11, %[r11_offset:c](%%rdi) - \\ movq %%r12, %[r12_offset:c](%%rdi) - \\ movq %%r13, %[r13_offset:c](%%rdi) - \\ movq %%r14, %[r14_offset:c](%%rdi) - \\ movq %%r15, %[r15_offset:c](%%rdi) - \\ movq %%rdi, %[rdi_offset:c](%%rdi) - \\ movq %%rsi, %[rsi_offset:c](%%rdi) - \\ movq %%rbp, %[rbp_offset:c](%%rdi) - \\ movq %%rbx, %[rbx_offset:c](%%rdi) - \\ movq %%rdx, %[rdx_offset:c](%%rdi) - \\ movq %%rax, %[rax_offset:c](%%rdi) - \\ movq %%rcx, %[rcx_offset:c](%%rdi) - \\ movq (%%rsp), %%rcx - \\ movq %%rcx, %[rip_offset:c](%%rdi) - \\ leaq 8(%%rsp), %%rcx - \\ movq %%rcx, %[rsp_offset:c](%%rdi) - \\ pushfq - \\ popq %[efl_offset:c](%%rdi) - \\ leaq %[fpmem_offset:c](%%rdi), %%rcx - \\ movq %%rcx, %[fpstate_offset:c](%%rdi) - \\ fnstenv (%%rcx) - \\ fldenv (%%rcx) - \\ stmxcsr %[mxcsr_offset:c](%%rdi) - \\ leaq %[stack_offset:c](%%rdi), %%rsi - \\ movq %%rdi, %%r8 - \\ xorl %%edi, %%edi - \\ movl %[sigaltstack], %%eax - \\ syscall - \\ testq %%rax, %%rax - \\ jnz 0f - \\ movl %[sigprocmask], %%eax - \\ xorl %%esi, %%esi - \\ leaq %[sigmask_offset:c](%%r8), %%rdx - \\ movl %[sigset_size], %%r10d - \\ syscall - \\0: - \\ retq - : - : [flags_offset] "i" (@offsetOf(ucontext_t, "flags")), - [link_offset] "i" (@offsetOf(ucontext_t, "link")), - [r8_offset] "i" (comptime gpRegisterOffset(REG.R8)), - [r9_offset] "i" (comptime gpRegisterOffset(REG.R9)), - [r10_offset] "i" (comptime gpRegisterOffset(REG.R10)), - [r11_offset] "i" (comptime gpRegisterOffset(REG.R11)), - [r12_offset] "i" (comptime gpRegisterOffset(REG.R12)), - [r13_offset] "i" (comptime gpRegisterOffset(REG.R13)), - [r14_offset] "i" (comptime gpRegisterOffset(REG.R14)), - [r15_offset] "i" (comptime gpRegisterOffset(REG.R15)), - [rdi_offset] "i" (comptime gpRegisterOffset(REG.RDI)), - [rsi_offset] "i" (comptime gpRegisterOffset(REG.RSI)), - [rbp_offset] "i" (comptime gpRegisterOffset(REG.RBP)), - [rbx_offset] "i" (comptime gpRegisterOffset(REG.RBX)), - [rdx_offset] "i" (comptime gpRegisterOffset(REG.RDX)), - [rax_offset] "i" (comptime gpRegisterOffset(REG.RAX)), - [rcx_offset] "i" (comptime gpRegisterOffset(REG.RCX)), - [rsp_offset] "i" (comptime gpRegisterOffset(REG.RSP)), - [rip_offset] "i" (comptime gpRegisterOffset(REG.RIP)), - [efl_offset] "i" (comptime gpRegisterOffset(REG.EFL)), - [fpstate_offset] "i" (@offsetOf(ucontext_t, "mcontext") + @offsetOf(mcontext_t, "fpregs")), - [fpmem_offset] "i" (@offsetOf(ucontext_t, "fpregs_mem")), - [mxcsr_offset] "i" (@offsetOf(ucontext_t, "fpregs_mem") + @offsetOf(fpstate, "mxcsr")), - [sigaltstack] "i" (@intFromEnum(linux.SYS.sigaltstack)), - [stack_offset] "i" (@offsetOf(ucontext_t, "stack")), - [sigprocmask] "i" (@intFromEnum(linux.SYS.rt_sigprocmask)), - [sigmask_offset] "i" (@offsetOf(ucontext_t, "sigmask")), - [sigset_size] "i" (@sizeOf(sigset_t)), - : .{ .cc = true, .memory = true, .rax = true, .rcx = true, .rdx = true, .rdi = true, .rsi = true, .r8 = true, .r10 = true, .r11 = true }); -} - -pub inline fn getcontext(context: *ucontext_t) usize { - // This method is used so that getContextInternal can control - // its prologue in order to read RSP from a constant offset - // An aligned stack is not needed for getContextInternal. - var clobber_rdi: usize = undefined; - return asm volatile ( - \\ callq %[getContextInternal:P] - : [_] "={rax}" (-> usize), - [_] "={rdi}" (clobber_rdi), - : [_] "{rdi}" (context), - [getContextInternal] "X" (&getContextInternal), - : .{ .cc = true, .memory = true, .rcx = true, .rdx = true, .rsi = true, .r8 = true, .r10 = true, .r11 = true }); -} diff --git a/lib/std/posix.zig b/lib/std/posix.zig index 14f2935649f7..3b77e58df58e 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -47,8 +47,6 @@ else switch (native_os) { .linux => linux, .plan9 => std.os.plan9, else => struct { - pub const getcontext = {}; - pub const ucontext_t = void; pub const pid_t = void; pub const pollfd = void; pub const fd_t = void; @@ -142,7 +140,6 @@ pub const in_pktinfo = system.in_pktinfo; pub const in6_pktinfo = system.in6_pktinfo; pub const ino_t = system.ino_t; pub const linger = system.linger; -pub const mcontext_t = system.mcontext_t; pub const mode_t = system.mode_t; pub const msghdr = system.msghdr; pub const msghdr_const = system.msghdr_const; @@ -171,7 +168,6 @@ pub const timespec = system.timespec; pub const timestamp_t = system.timestamp_t; pub const timeval = system.timeval; pub const timezone = system.timezone; -pub const ucontext_t = system.ucontext_t; pub const uid_t = system.uid_t; pub const user_desc = system.user_desc; pub const utsname = system.utsname; From dd8d59686a069fdb72d9f0753e3482ff99cce98c Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 17 Sep 2025 23:03:45 +0100 Subject: [PATCH 64/85] std.debug: miscellaneous fixes Mostly on macOS, since Loris showed me a not-great stack trace, and I spent 8 hours trying to make it better. The dyld shared cache is designed in a way which makes this really hard to do right, and documentation is non-existent, but this *seems* to work pretty well. I'll leave the ruling on whether I did a good job to CI and our users. --- lib/std/c.zig | 3 + lib/std/c/darwin.zig | 8 + lib/std/debug.zig | 75 ++++++-- lib/std/debug/SelfInfo.zig | 54 ++---- lib/std/debug/SelfInfo/DarwinModule.zig | 224 ++++++++++++++++------- lib/std/debug/SelfInfo/ElfModule.zig | 2 +- lib/std/debug/SelfInfo/WindowsModule.zig | 36 +++- lib/std/debug/cpu_context.zig | 6 + 8 files changed, 276 insertions(+), 132 deletions(-) diff --git a/lib/std/c.zig b/lib/std/c.zig index d1affab2075b..4c139d502326 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -10994,6 +10994,9 @@ pub extern "c" fn dlclose(handle: *anyopaque) c_int; pub extern "c" fn dlsym(handle: ?*anyopaque, symbol: [*:0]const u8) ?*anyopaque; pub extern "c" fn dlerror() ?[*:0]u8; +pub const dladdr = if (native_os.isDarwin()) darwin.dladdr else {}; +pub const dl_info = if (native_os.isDarwin()) darwin.dl_info else {}; + pub extern "c" fn sync() void; pub extern "c" fn syncfs(fd: c_int) c_int; pub extern "c" fn fsync(fd: c_int) c_int; diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 2ad979ecf2dc..cf7d3127ebe9 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -354,6 +354,14 @@ pub extern "c" fn _dyld_image_count() u32; pub extern "c" fn _dyld_get_image_header(image_index: u32) ?*mach_header; pub extern "c" fn _dyld_get_image_vmaddr_slide(image_index: u32) usize; pub extern "c" fn _dyld_get_image_name(image_index: u32) [*:0]const u8; +pub extern "c" fn dladdr(addr: *const anyopaque, info: *dl_info) c_int; + +pub const dl_info = extern struct { + fname: [*:0]const u8, + fbase: *anyopaque, + sname: ?[*:0]const u8, + saddr: ?*anyopaque, +}; pub const COPYFILE = packed struct(u32) { ACL: bool = false, diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 088152d8733c..5b7a6bf71522 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -585,12 +585,14 @@ pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) while (true) switch (it.next()) { .switch_to_fp => if (!it.stratOk(options.allow_unsafe_unwind)) break, .end => break, - .frame => |return_address| { + .frame => |pc_addr| { if (wait_for) |target| { - if (return_address != target) continue; + // Possible off-by-one error: `pc_addr` might be one less than the return address (so + // that it falls *inside* the function call), while `target` *is* a return address. + if (pc_addr != target and pc_addr + 1 != target) continue; wait_for = null; } - if (frame_idx < addr_buf.len) addr_buf[frame_idx] = return_address; + if (frame_idx < addr_buf.len) addr_buf[frame_idx] = pc_addr; frame_idx += 1; }, }; @@ -631,6 +633,7 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ var printed_any_frame = false; while (true) switch (it.next()) { .switch_to_fp => |unwind_error| { + if (StackIterator.fp_unwind_is_safe) continue; // no need to even warn const module_name = di.getModuleNameForAddress(di_gpa, unwind_error.address) catch "???"; const caption: []const u8 = switch (unwind_error.err) { error.MissingDebugInfo => "unwind info unavailable", @@ -658,12 +661,14 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ } }, .end => break, - .frame => |return_address| { + .frame => |pc_addr| { if (wait_for) |target| { - if (return_address != target) continue; + // Possible off-by-one error: `pc_addr` might be one less than the return address (so + // that it falls *inside* the function call), while `target` *is* a return address. + if (pc_addr != target and pc_addr + 1 != target) continue; wait_for = null; } - try printSourceAtAddress(di_gpa, di, writer, return_address -| 1, tty_config); + try printSourceAtAddress(di_gpa, di, writer, pc_addr, tty_config); printed_any_frame = true; }, }; @@ -703,8 +708,8 @@ pub fn writeStackTrace(st: *const std.builtin.StackTrace, writer: *Writer, tty_c }, }; const captured_frames = @min(n_frames, st.instruction_addresses.len); - for (st.instruction_addresses[0..captured_frames]) |return_address| { - try printSourceAtAddress(di_gpa, di, writer, return_address -| 1, tty_config); + for (st.instruction_addresses[0..captured_frames]) |pc_addr| { + try printSourceAtAddress(di_gpa, di, writer, pc_addr, tty_config); } if (n_frames > captured_frames) { tty_config.setColor(writer, .bold) catch {}; @@ -725,6 +730,8 @@ pub fn dumpStackTrace(st: *const std.builtin.StackTrace) void { const StackIterator = union(enum) { /// Unwinding using debug info (e.g. DWARF CFI). di: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn, + /// We will first report the *current* PC of this `UnwindContext`, then we will switch to `di`. + di_first: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn, /// Naive frame-pointer-based unwinding. Very simple, but typically unreliable. fp: usize, @@ -742,9 +749,12 @@ const StackIterator = union(enum) { } if (opt_context_ptr) |context_ptr| { if (!SelfInfo.supports_unwinding) return error.CannotUnwindFromContext; - return .{ .di = .init(context_ptr) }; + // Use `di_first` here so we report the PC in the context before unwinding any further. + return .{ .di_first = .init(context_ptr) }; } if (SelfInfo.supports_unwinding and cpu_context.Native != noreturn) { + // We don't need `di_first` here, because our PC is in `std.debug`; we're only interested + // in our caller's frame and above. return .{ .di = .init(&.current()) }; } return .{ .fp = @frameAddress() }; @@ -752,7 +762,7 @@ const StackIterator = union(enum) { fn deinit(si: *StackIterator) void { switch (si.*) { .fp => {}, - .di => |*unwind_context| unwind_context.deinit(getDebugInfoAllocator()), + .di, .di_first => |*unwind_context| unwind_context.deinit(getDebugInfoAllocator()), } } @@ -763,7 +773,7 @@ const StackIterator = union(enum) { /// Whether the current unwind strategy is allowed given `allow_unsafe`. fn stratOk(it: *const StackIterator, allow_unsafe: bool) bool { return switch (it.*) { - .di => true, + .di, .di_first => true, // If we omitted frame pointers from *this* compilation, FP unwinding would crash // immediately regardless of anything. But FPs could also be omitted from a different // linked object, so it's not guaranteed to be safe, unless the target specifically @@ -773,11 +783,11 @@ const StackIterator = union(enum) { } const Result = union(enum) { - /// A stack frame has been found; this is the corresponding return address. + /// A stack frame has been found; this is the corresponding program counter address. frame: usize, /// The end of the stack has been reached. end, - /// We were using the `.di` strategy, but are now switching to `.fp` due to this error. + /// We were using `SelfInfo.UnwindInfo`, but are now switching to FP unwinding due to this error. switch_to_fp: struct { address: usize, err: SelfInfo.Error, @@ -785,20 +795,25 @@ const StackIterator = union(enum) { }; fn next(it: *StackIterator) Result { switch (it.*) { + .di_first => |unwind_context| { + const first_pc = unwind_context.pc; + if (first_pc == 0) return .end; + it.* = .{ .di = unwind_context }; + return .{ .frame = first_pc }; + }, .di => |*unwind_context| { const di = getSelfDebugInfo() catch unreachable; const di_gpa = getDebugInfoAllocator(); - if (di.unwindFrame(di_gpa, unwind_context)) |ra| { - if (ra <= 1) return .end; - return .{ .frame = ra }; - } else |err| { + di.unwindFrame(di_gpa, unwind_context) catch |err| { const pc = unwind_context.pc; it.* = .{ .fp = unwind_context.getFp() }; return .{ .switch_to_fp = .{ .address = pc, .err = err, } }; - } + }; + const pc = unwind_context.pc; + return if (pc == 0) .end else .{ .frame = pc }; }, .fp => |fp| { if (fp == 0) return .end; // we reached the "sentinel" base pointer @@ -824,9 +839,9 @@ const StackIterator = union(enum) { if (bp != 0 and bp <= fp) return .end; it.fp = bp; - const ra = ra_ptr.*; + const ra = stripInstructionPtrAuthCode(ra_ptr.*); if (ra <= 1) return .end; - return .{ .frame = ra }; + return .{ .frame = ra - 1 }; }, } } @@ -860,6 +875,26 @@ const StackIterator = union(enum) { } }; +/// Some platforms use pointer authentication: the upper bits of instruction pointers contain a +/// signature. This function clears those signature bits to make the pointer directly usable. +pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { + if (native_arch.isAARCH64()) { + // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) + // The save / restore is because `xpaclri` operates on x30 (LR) + return asm ( + \\mov x16, x30 + \\mov x30, x15 + \\hint 0x07 + \\mov x15, x30 + \\mov x30, x16 + : [ret] "={x15}" (-> usize), + : [ptr] "{x15}" (ptr), + : .{ .x16 = true }); + } + + return ptr; +} + fn printSourceAtAddress(gpa: Allocator, debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) Writer.Error!void { const symbol: Symbol = debug_info.getSymbolAtAddress(gpa, address) catch |err| switch (err) { error.MissingDebugInfo, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 321e67bb7c19..efa9d782f64a 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -2,7 +2,6 @@ //! goal of minimal code bloat and compilation speed penalty. const builtin = @import("builtin"); -const native_os = builtin.os.tag; const native_endian = native_arch.endian(); const native_arch = builtin.cpu.arch; @@ -13,6 +12,8 @@ const assert = std.debug.assert; const Dwarf = std.debug.Dwarf; const CpuContext = std.debug.cpu_context.Native; +const stripInstructionPtrAuthCode = std.debug.stripInstructionPtrAuthCode; + const root = @import("root"); const SelfInfo = @This(); @@ -52,7 +53,7 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void { if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); } -pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { +pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!void { comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.key()); @@ -115,7 +116,7 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// pub const supports_unwinding: bool; /// /// Only required if `supports_unwinding == true`. /// pub const UnwindContext = struct { -/// /// A PC value inside the function of the last unwound frame. +/// /// A PC value representing the location in the last frame. /// pc: usize, /// pub fn init(ctx: *std.debug.cpu_context.Native, gpa: Allocator) Allocator.Error!UnwindContext; /// pub fn deinit(uc: *UnwindContext, gpa: Allocator) void; @@ -123,21 +124,22 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// /// pointer is unknown, 0 may be returned instead. /// pub fn getFp(uc: *UnwindContext) usize; /// }; -/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame and returns -/// /// the next return address (which may be 0 indicating end of stack). +/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame. +/// /// The caller will read the new instruction poiter from the `pc` field. +/// /// `pc = 0` indicates end of stack / no more frames. /// pub fn unwindFrame( /// mod: *const Module, /// gpa: Allocator, /// di: *DebugInfo, /// ctx: *UnwindContext, -/// ) SelfInfo.Error!usize; +/// ) SelfInfo.Error!void; /// ``` const Module: type = Module: { // Allow overriding the target-specific `SelfInfo` implementation by exposing `root.debug.Module`. if (@hasDecl(root, "debug") and @hasDecl(root.debug, "Module")) { break :Module root.debug.Module; } - break :Module switch (native_os) { + break :Module switch (builtin.os.tag) { .linux, .netbsd, .freebsd, @@ -222,7 +224,7 @@ pub const DwarfUnwindContext = struct { const register = col.register orelse return error.InvalidRegister; // The default type is usually undefined, but can be overriden by ABI authors. // See the doc comment on `Dwarf.Unwind.VirtualMachine.RegisterRule.default`. - if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 18) { + if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { // Callee-saved registers are initialized as if they had the .same_value rule const src = try context.cpu_context.dwarfRegisterBytes(register); if (src.len != out.len) return error.RegisterSizeMismatch; @@ -310,7 +312,7 @@ pub const DwarfUnwindContext = struct { unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, - ) Error!usize { + ) Error!void { return unwindFrameInner(context, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, @@ -358,9 +360,10 @@ pub const DwarfUnwindContext = struct { unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, - ) !usize { - if (!supports_unwinding) return error.UnsupportedCpuArchitecture; - if (context.pc == 0) return 0; + ) !void { + comptime assert(supports_unwinding); + + if (context.pc == 0) return; const pc_vaddr = context.pc - load_offset; @@ -430,12 +433,12 @@ pub const DwarfUnwindContext = struct { } } - const return_address: u64 = if (has_return_address) pc: { + const return_address: usize = if (has_return_address) pc: { const raw_ptr = try regNative(&new_cpu_context, cie.return_address_register); break :pc stripInstructionPtrAuthCode(raw_ptr.*); } else 0; - (try regNative(new_cpu_context, ip_reg_num)).* = return_address; + (try regNative(&new_cpu_context, ip_reg_num)).* = return_address; // The new CPU context is complete; flush changes. context.cpu_context = new_cpu_context; @@ -444,11 +447,9 @@ pub const DwarfUnwindContext = struct { // *after* the call, it could (in the case of noreturn functions) actually point outside of // the caller's address range, meaning an FDE lookup would fail. We can handle this by // subtracting 1 from `return_address` so that the next lookup is guaranteed to land inside - // the `call` instruction`. The exception to this rule is signal frames, where the return + // the `call` instruction. The exception to this rule is signal frames, where the return // address is the same instruction that triggered the handler. context.pc = if (cie.is_signal_frame) return_address else return_address -| 1; - - return return_address; } /// Since register rules are applied (usually) during a panic, /// checked addition / subtraction is used so that we can return @@ -459,25 +460,6 @@ pub const DwarfUnwindContext = struct { else try std.math.sub(usize, base, @as(usize, @intCast(-offset))); } - /// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. - /// This function clears these signature bits to make the pointer usable. - pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { - if (native_arch.isAARCH64()) { - // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) - // The save / restore is because `xpaclri` operates on x30 (LR) - return asm ( - \\mov x16, x30 - \\mov x30, x15 - \\hint 0x07 - \\mov x15, x30 - \\mov x30, x16 - : [ret] "={x15}" (-> usize), - : [ptr] "{x15}" (ptr), - : .{ .x16 = true }); - } - - return ptr; - } pub fn regNative(ctx: *CpuContext, num: u16) error{ InvalidRegister, diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index fc2f1c89bb46..e3cbeb7eddbd 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -1,6 +1,5 @@ /// The runtime address where __TEXT is loaded. text_base: usize, -load_offset: usize, name: []const u8, pub fn key(m: *const DarwinModule) usize { @@ -12,38 +11,14 @@ pub const LookupCache = void; pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!DarwinModule { _ = cache; _ = gpa; - const image_count = std.c._dyld_image_count(); - for (0..image_count) |image_idx| { - const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; - const text_base = @intFromPtr(header); - if (address < text_base) continue; - const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); - - // Find the __TEXT segment - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const text_segment_cmd = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break segment_cmd; - } else continue; - - const seg_start = load_offset + text_segment_cmd.vmaddr; - assert(seg_start == text_base); - const seg_end = seg_start + text_segment_cmd.vmsize; - if (address < seg_start or address >= seg_end) continue; - - // We've found the matching __TEXT segment. This is the image we need. - return .{ - .text_base = text_base, - .load_offset = load_offset, - .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), - }; + var info: std.c.dl_info = undefined; + switch (std.c.dladdr(@ptrFromInt(address), &info)) { + 0 => return error.MissingDebugInfo, + else => return .{ + .name = std.mem.span(info.fname), + .text_base = @intFromPtr(info.fbase), + }, } - return error.MissingDebugInfo; } fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { const header: *std.macho.mach_header = @ptrFromInt(module.text_base); @@ -52,56 +27,115 @@ fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { .ncmds = header.ncmds, .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], }; - const sections = while (it.next()) |load_cmd| { + const sections, const text_vmaddr = while (it.next()) |load_cmd| { if (load_cmd.cmd() != .SEGMENT_64) continue; const segment_cmd = load_cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break load_cmd.getSections(); + break .{ load_cmd.getSections(), segment_cmd.vmaddr }; } else unreachable; + const vmaddr_slide = module.text_base - text_vmaddr; + var unwind_info: ?[]const u8 = null; var eh_frame: ?[]const u8 = null; for (sections) |sect| { if (mem.eql(u8, sect.sectName(), "__unwind_info")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); unwind_info = sect_ptr[0..@intCast(sect.size)]; } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr))); + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); eh_frame = sect_ptr[0..@intCast(sect.size)]; } } return .{ + .vmaddr_slide = vmaddr_slide, .unwind_info = unwind_info, .eh_frame = eh_frame, }; } fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO { - const mapped_mem = try mapDebugInfoFile(module.name); - errdefer posix.munmap(mapped_mem); + const all_mapped_memory = try mapDebugInfoFile(module.name); + errdefer posix.munmap(all_mapped_memory); + + // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal + // binary": a simple file format which contains Mach-O binaries for multiple targets. For + // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images + // for both ARM64 Macs and x86_64 Macs. + if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; + const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; + // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. + const mapped_macho = switch (magic) { + macho.MH_MAGIC_64 => all_mapped_memory, + + macho.FAT_CIGAM => mapped_macho: { + // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing + // is big-endian, so we'll be swapping some bytes. + if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; + const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); + const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); + const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; + const native_cpu_type = switch (builtin.cpu.arch) { + .x86_64 => macho.CPU_TYPE_X86_64, + .aarch64 => macho.CPU_TYPE_ARM64, + else => comptime unreachable, + }; + for (archs) |*arch| { + if (@byteSwap(arch.cputype) != native_cpu_type) continue; + const offset = @byteSwap(arch.offset); + const size = @byteSwap(arch.size); + break :mapped_macho all_mapped_memory[offset..][0..size]; + } + // Our native architecture was not present in the fat binary. + return error.MissingDebugInfo; + }, + + // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It + // will be fairly easy to add support here if necessary; it's very similar to above. + macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + else => return error.InvalidDebugInfo, + }; + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); if (hdr.magic != macho.MH_MAGIC_64) return error.InvalidDebugInfo; - const symtab: macho.symtab_command = symtab: { + const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { var it: macho.LoadCommandIterator = .{ .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], }; + var symtab: ?macho.symtab_command = null; + var text_vmaddr: ?u64 = null; while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { + if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; + text_vmaddr = seg_cmd.vmaddr; + }, else => {}, }; - return error.MissingDebugInfo; + break :lc_iter .{ + symtab orelse return error.MissingDebugInfo, + text_vmaddr orelse return error.MissingDebugInfo, + }; }; - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); + const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; + const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); defer symbols.deinit(gpa); + // This map is temporary; it is used only to detect duplicates here. This is + // necessary because we prefer to use STAB ("symbolic debugging table") symbols, + // but they might not be present, so we track normal symbols too. + // Indices match 1-1 with those of `symbols`. + var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; + defer symbol_names.deinit(gpa); + try symbol_names.ensureUnusedCapacity(gpa, syms.len); + var ofile: u32 = undefined; var last_sym: MachoSymbol = undefined; var state: enum { @@ -115,7 +149,25 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO } = .init; for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) continue; + if (sym.n_type.bits.is_stab == 0) { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf, .pbud, .indr, .abs, _ => continue, + .sect => { + const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(.{ + .strx = sym.n_strx, + .addr = sym.n_value, + .ofile = MachoSymbol.unknown_ofile, + }); + } + }, + } + continue; + } // TODO handle globals N_GSYM, and statics N_STSYM switch (sym.n_type.stab) { @@ -132,7 +184,6 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO last_sym = .{ .strx = 0, .addr = sym.n_value, - .size = 0, .ofile = ofile, }; }, @@ -145,14 +196,22 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO }, .fun_strx => { state = .fun_size; - last_sym.size = @intCast(sym.n_value); }, else => return error.InvalidDebugInfo, }, .ensym => switch (state) { .fun_size => { state = .ensym; - symbols.appendAssumeCapacity(last_sym); + if (last_sym.strx != 0) { + const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(last_sym); + } else { + symbols.items[gop.index] = last_sym; + } + } }, else => return error.InvalidDebugInfo, }, @@ -168,9 +227,12 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO } switch (state) { - .init => return error.MissingDebugInfo, + .init => { + // Missing STAB symtab entries is still okay, unless there were also no normal symbols. + if (symbols.items.len == 0) return error.MissingDebugInfo; + }, .oso_close => {}, - else => return error.InvalidDebugInfo, + else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab } const symbols_slice = try symbols.toOwnedSlice(gpa); @@ -182,10 +244,11 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); return .{ - .mapped_memory = mapped_mem, + .mapped_memory = all_mapped_memory, .symbols = symbols_slice, .strings = strings, .ofiles = .empty, + .vaddr_offset = module.text_base - text_vmaddr, }; } pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { @@ -195,7 +258,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu }; const loaded_macho = &di.loaded_macho.?; - const vaddr = address - module.load_offset; + const vaddr = address - loaded_macho.vaddr_offset; const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; // offset of `address` from start of `symbol` @@ -212,6 +275,11 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu .source_location = null, }; + if (symbol.ofile == MachoSymbol.unknown_ofile) { + // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. + return sym_only_result; + } + const o_file: *DebugInfo.OFile = of: { const gop = try loaded_macho.ofiles.getOrPut(gpa, symbol.ofile); if (!gop.found_existing) { @@ -233,7 +301,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol, + .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, .compile_unit_name = compile_unit.die.getAttrString( &o_file.dwarf, native_endian, @@ -256,7 +324,7 @@ pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext; /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { +pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!void { return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, @@ -272,7 +340,7 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, => return error.InvalidDebugInfo, }; } -fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { +fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !void { if (di.unwind == null) di.unwind = module.loadUnwindInfo(); const unwind = &di.unwind.?; @@ -500,11 +568,11 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }, .DWARF => { const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide; return context.unwindFrame( gpa, &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - module.load_offset, + unwind.vmaddr_slide, @intCast(encoding.value.x86_64.dwarf), ); }, @@ -520,11 +588,11 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }, .DWARF => { const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset; + const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide; return context.unwindFrame( gpa, &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - module.load_offset, + unwind.vmaddr_slide, @intCast(encoding.value.x86_64.dwarf), ); }, @@ -572,9 +640,7 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, else => comptime unreachable, // unimplemented }; - context.pc = UnwindContext.stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; + context.pc = std.debug.stripInstructionPtrAuthCode(new_ip) -| 1; } pub const DebugInfo = struct { unwind: ?Unwind, @@ -590,6 +656,7 @@ pub const DebugInfo = struct { for (loaded_macho.ofiles.values()) |*ofile| { ofile.dwarf.deinit(gpa); ofile.symbols_by_name.deinit(gpa); + posix.munmap(ofile.mapped_memory); } loaded_macho.ofiles.deinit(gpa); gpa.free(loaded_macho.symbols); @@ -598,6 +665,9 @@ pub const DebugInfo = struct { } const Unwind = struct { + /// The slide applied to the following sections. So, `unwind_info.ptr` is this many bytes + /// higher than the vmaddr of `__unwind_info`, and likewise for `__eh_frame`. + vmaddr_slide: u64, // Backed by the in-memory sections mapped by the loader unwind_info: ?[]const u8, eh_frame: ?[]const u8, @@ -606,21 +676,31 @@ pub const DebugInfo = struct { const LoadedMachO = struct { mapped_memory: []align(std.heap.page_size_min) const u8, symbols: []const MachoSymbol, - strings: [:0]const u8, + strings: []const u8, /// Key is index into `strings` of the file path. ofiles: std.AutoArrayHashMapUnmanaged(u32, OFile), + /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is + /// because the segments in the file on disk might differ from the ones in memory. Normally + /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: + /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in + /// the dyld cache (dyld actually restart itself from cache after loading it), and the two + /// versions have (very) different segment base addresses. It's sort of like a large slide + /// has been applied to all addresses in memory. For an optimal experience, we consider the + /// on-disk vmaddr instead of the in-memory one. + vaddr_offset: usize, }; const OFile = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, dwarf: Dwarf, - strtab: [:0]const u8, + strtab: []const u8, symtab: []align(1) const macho.nlist_64, /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed /// through `SymbolAdapter`, so that the symbol name is used as the logical key. symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), const SymbolAdapter = struct { - strtab: [:0]const u8, + strtab: []const u8, symtab: []align(1) const macho.nlist_64, pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { _ = ctx; @@ -663,7 +743,7 @@ pub const DebugInfo = struct { if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1 :0]; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; @@ -717,6 +797,7 @@ pub const DebugInfo = struct { try dwarf.open(gpa, native_endian); return .{ + .mapped_memory = mapped_mem, .dwarf = dwarf, .strtab = strtab, .symtab = symtab, @@ -728,8 +809,9 @@ pub const DebugInfo = struct { const MachoSymbol = struct { strx: u32, addr: u64, - size: u32, + /// Value may be `unknown_ofile`. ofile: u32, + const unknown_ofile = std.math.maxInt(u32); fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { _ = context; return lhs.addr < rhs.addr; @@ -754,9 +836,9 @@ const MachoSymbol = struct { test find { const symbols: []const MachoSymbol = &.{ - .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 100, .strx = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .ofile = undefined }, }; try testing.expectEqual(null, find(symbols, 0)); diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index fde61d8140d2..e080665497b2 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -230,7 +230,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro else => unreachable, } } -pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { +pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!void { if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di); std.debug.assert(di.unwind[0] != null); for (&di.unwind) |*opt_unwind| { diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index 8c88bd8b2f34..75abc39ff503 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -332,6 +332,34 @@ pub const UnwindContext = struct { .Wcr = @splat(0), .Wvr = @splat(0), }, + .thumb => .{ + .ContextFlags = 0, + .R0 = ctx.r[0], + .R1 = ctx.r[1], + .R2 = ctx.r[2], + .R3 = ctx.r[3], + .R4 = ctx.r[4], + .R5 = ctx.r[5], + .R6 = ctx.r[6], + .R7 = ctx.r[7], + .R8 = ctx.r[8], + .R9 = ctx.r[9], + .R10 = ctx.r[10], + .R11 = ctx.r[11], + .R12 = ctx.r[12], + .Sp = ctx.r[13], + .Lr = ctx.r[14], + .Pc = ctx.r[15], + .Cpsr = 0, + .Fpcsr = 0, + .Padding = 0, + .DUMMYUNIONNAME = .{ .S = @splat(0) }, + .Bvr = @splat(0), + .Bcr = @splat(0), + .Wvr = @splat(0), + .Wcr = @splat(0), + .Padding2 = @splat(0), + }, else => comptime unreachable, }, .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE), @@ -345,7 +373,7 @@ pub const UnwindContext = struct { return ctx.cur.getRegs().bp; } }; -pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { +pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !void { _ = module; _ = gpa; _ = di; @@ -374,10 +402,10 @@ pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, const next_regs = context.cur.getRegs(); const tib = &windows.teb().NtTib; if (next_regs.sp < @intFromPtr(tib.StackLimit) or next_regs.sp > @intFromPtr(tib.StackBase)) { - return 0; + context.pc = 0; + } else { + context.pc = next_regs.ip -| 1; } - context.pc = next_regs.ip -| 1; - return next_regs.ip; } const WindowsModule = @This(); diff --git a/lib/std/debug/cpu_context.zig b/lib/std/debug/cpu_context.zig index 9859575fa367..b9dd49767fb0 100644 --- a/lib/std/debug/cpu_context.zig +++ b/lib/std/debug/cpu_context.zig @@ -214,6 +214,12 @@ pub fn fromWindowsContext(ctx: *const std.os.windows.CONTEXT) Native { .sp = ctx.Sp, .pc = ctx.Pc, }, + .thumb => .{ .r = .{ + ctx.R0, ctx.R1, ctx.R2, ctx.R3, + ctx.R4, ctx.R5, ctx.R6, ctx.R7, + ctx.R8, ctx.R9, ctx.R10, ctx.R11, + ctx.R12, ctx.Sp, ctx.Lr, ctx.Pc, + } }, else => comptime unreachable, }; } From abb2b1e2daffdadca7edc26634631215df901950 Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 17 Sep 2025 23:30:04 +0100 Subject: [PATCH 65/85] std.debug: update support checks --- lib/std/debug.zig | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 5b7a6bf71522..e22f2f734535 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -175,18 +175,12 @@ pub const runtime_safety = switch (builtin.mode) { .ReleaseFast, .ReleaseSmall => false, }; +/// Whether we can unwind the stack on this target, allowing capturing and/or printing the current +/// stack trace. It is still legal to call `captureCurrentStackTrace`, `writeCurrentStackTrace`, and +/// `dumpCurrentStackTrace` if this is `false`; it will just print an error / capture an empty +/// trace due to missing functionality. This value is just intended as a heuristic to avoid +/// pointless work e.g. capturing always-empty stack traces. pub const sys_can_stack_trace = switch (builtin.cpu.arch) { - // Observed to go into an infinite loop. - // TODO: Make this work. - .loongarch32, - .loongarch64, - .mips, - .mipsel, - .mips64, - .mips64el, - .s390x, - => false, - // `@returnAddress()` in LLVM 10 gives // "Non-Emscripten WebAssembly hasn't implemented __builtin_return_address". // On Emscripten, Zig only supports `@returnAddress()` in debug builds @@ -1178,9 +1172,10 @@ pub const have_segfault_handling_support = switch (native_os) { .solaris, .illumos, .windows, + .freebsd, + .openbsd, => true, - .freebsd, .openbsd => cpu_context.Native != noreturn, else => false, }; From 3a9c680ad7b88ed6d9684626be9138ffc44b73b7 Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 17 Sep 2025 23:52:52 +0100 Subject: [PATCH 66/85] std: allow disabling stack tracing This option disables both capturing and printing stack traces. The default is to disable if debug info is stripped. --- lib/std/debug.zig | 22 ++++++++++++++++------ lib/std/std.zig | 16 ++++++++++++++++ test/cases/disable_stack_tracing.zig | 28 ++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 test/cases/disable_stack_tracing.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index e22f2f734535..b10f98ee7b91 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -567,13 +567,11 @@ pub const StackUnwindOptions = struct { /// /// See `writeCurrentStackTrace` to immediately print the trace instead of capturing it. pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) std.builtin.StackTrace { - var it = StackIterator.init(options.context) catch { - return .{ .index = 0, .instruction_addresses = &.{} }; - }; + const empty_trace: std.builtin.StackTrace = .{ .index = 0, .instruction_addresses = &.{} }; + if (!std.options.allow_stack_tracing) return empty_trace; + var it = StackIterator.init(options.context) catch return empty_trace; defer it.deinit(); - if (!it.stratOk(options.allow_unsafe_unwind)) { - return .{ .index = 0, .instruction_addresses = &.{} }; - } + if (!it.stratOk(options.allow_unsafe_unwind)) return empty_trace; var frame_idx: usize = 0; var wait_for = options.first_address; while (true) switch (it.next()) { @@ -599,6 +597,12 @@ pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) /// /// See `captureCurrentStackTrace` to capture the trace addresses into a buffer instead of printing. pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_config: tty.Config) Writer.Error!void { + if (!std.options.allow_stack_tracing) { + tty_config.setColor(writer, .dim) catch {}; + try writer.print("Cannot print stack trace: stack tracing is disabled\n", .{}); + tty_config.setColor(writer, .reset) catch {}; + return; + } const di_gpa = getDebugInfoAllocator(); const di = getSelfDebugInfo() catch |err| switch (err) { error.UnsupportedTarget => { @@ -688,6 +692,12 @@ pub fn dumpCurrentStackTrace(options: StackUnwindOptions) void { /// Write a previously captured stack trace to `writer`, annotated with source locations. pub fn writeStackTrace(st: *const std.builtin.StackTrace, writer: *Writer, tty_config: tty.Config) Writer.Error!void { + if (!std.options.allow_stack_tracing) { + tty_config.setColor(writer, .dim) catch {}; + try writer.print("Cannot print stack trace: stack tracing is disabled\n", .{}); + tty_config.setColor(writer, .reset) catch {}; + return; + } // Fetch `st.index` straight away. Aside from avoiding redundant loads, this prevents issues if // `st` is `@errorReturnTrace()` and errors are encountered while writing the stack trace. const n_frames = st.index; diff --git a/lib/std/std.zig b/lib/std/std.zig index 4a7f9bd8666a..4e68d1d61157 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -171,6 +171,22 @@ pub const Options = struct { http_enable_ssl_key_log_file: bool = @import("builtin").mode == .Debug, side_channels_mitigations: crypto.SideChannelsMitigations = crypto.default_side_channels_mitigations, + + /// Whether to allow capturing and writing stack traces. This affects the following functions: + /// * `debug.captureCurrentStackTrace` + /// * `debug.writeCurrentStackTrace` + /// * `debug.dumpCurrentStackTrace` + /// * `debug.writeStackTrace` + /// * `debug.dumpStackTrace` + /// + /// Stack traces can generally be collected and printed when debug info is stripped, but are + /// often less useful since they usually cannot be mapped to source locations and/or have bad + /// source locations. The stack tracing logic can also be quite large, which may be undesirable, + /// particularly in ReleaseSmall. + /// + /// If this is `false`, then captured stack traces will always be empty, and attempts to write + /// stack traces will just print an error to the relevant `Io.Writer` and return. + allow_stack_tracing: bool = !@import("builtin").strip_debug_info, }; // This forces the start.zig file to be imported, and the comptime logic inside that diff --git a/test/cases/disable_stack_tracing.zig b/test/cases/disable_stack_tracing.zig new file mode 100644 index 000000000000..044eaf701234 --- /dev/null +++ b/test/cases/disable_stack_tracing.zig @@ -0,0 +1,28 @@ +pub const std_options: std.Options = .{ + .allow_stack_tracing = false, +}; + +pub fn main() !void { + var st_buf: [8]usize = undefined; + var buf: [1024]u8 = undefined; + var stdout = std.fs.File.stdout().writer(&buf); + + const captured_st = try foo(&stdout.interface, &st_buf); + try std.debug.writeStackTrace(&captured_st, &stdout.interface, .no_color); + try stdout.interface.print("stack trace index: {d}\n", .{captured_st.index}); + + try stdout.interface.flush(); +} +fn foo(w: *std.Io.Writer, st_buf: []usize) !std.builtin.StackTrace { + try std.debug.writeCurrentStackTrace(.{}, w, .no_color); + return std.debug.captureCurrentStackTrace(.{}, st_buf); +} + +const std = @import("std"); + +// run +// +// Cannot print stack trace: stack tracing is disabled +// Cannot print stack trace: stack tracing is disabled +// stack trace index: 0 +// From 0c24b8ec66e7f8ade2b1014fd5c06110354bcf43 Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 18 Sep 2025 00:03:03 +0100 Subject: [PATCH 67/85] update to new std.debug changes --- lib/std/debug/Dwarf/expression.zig | 5 +---- src/crash_report.zig | 2 +- src/target.zig | 2 +- test/standalone/stack_iterator/unwind.zig | 10 +++++----- test/standalone/stack_iterator/unwind_freestanding.zig | 8 ++++---- 5 files changed, 12 insertions(+), 15 deletions(-) diff --git a/lib/std/debug/Dwarf/expression.zig b/lib/std/debug/Dwarf/expression.zig index b1751af7498a..3291de350662 100644 --- a/lib/std/debug/Dwarf/expression.zig +++ b/lib/std/debug/Dwarf/expression.zig @@ -1566,10 +1566,7 @@ test "basics" { // Register location description var cpu_context: std.debug.cpu_context.Native = undefined; - std.debug.relocateContext(&cpu_context); - context = Context{ - .cpu_context = &cpu_context, - }; + context = .{ .cpu_context = &cpu_context }; const reg_bytes = try cpu_context.dwarfRegisterBytes(0); mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); diff --git a/src/crash_report.zig b/src/crash_report.zig index 8c686459fee1..b051752c7a02 100644 --- a/src/crash_report.zig +++ b/src/crash_report.zig @@ -17,7 +17,7 @@ pub const debug = struct { /// crash earlier than that. pub var zig_argv0: []const u8 = "zig"; -fn handleSegfaultImpl(addr: ?usize, name: []const u8, opt_ctx: ?std.debug.ThreadContextPtr) noreturn { +fn handleSegfaultImpl(addr: ?usize, name: []const u8, opt_ctx: ?std.debug.CpuContextPtr) noreturn { @branchHint(.cold); dumpCrashContext() catch {}; std.debug.defaultHandleSegfault(addr, name, opt_ctx); diff --git a/src/target.zig b/src/target.zig index 53ef68bfc5d4..6f139e785d7a 100644 --- a/src/target.zig +++ b/src/target.zig @@ -512,7 +512,7 @@ pub fn defaultUnwindTables(target: *const std.Target, libunwind: bool, libtsan: if (target.os.tag.isDarwin()) return .async; if (libunwind) return .async; if (libtsan) return .async; - if (std.debug.Dwarf.abi.supportsUnwinding(target)) return .async; + if (std.debug.Dwarf.supportsUnwinding(target)) return .async; return .none; } diff --git a/test/standalone/stack_iterator/unwind.zig b/test/standalone/stack_iterator/unwind.zig index d7df31830056..1775ff1b00ad 100644 --- a/test/standalone/stack_iterator/unwind.zig +++ b/test/standalone/stack_iterator/unwind.zig @@ -3,7 +3,7 @@ const builtin = @import("builtin"); const fatal = std.process.fatal; noinline fn frame3(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[0] = @returnAddress(); + expected[0] = @returnAddress() - 1; return std.debug.captureCurrentStackTrace(.{ .first_address = @returnAddress(), .allow_unsafe_unwind = true, @@ -58,12 +58,12 @@ noinline fn frame2(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTr } } - expected[1] = @returnAddress(); + expected[1] = @returnAddress() - 1; return frame3(expected, addr_buf); } noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[2] = @returnAddress(); + expected[2] = @returnAddress() - 1; // Use a stack frame that is too big to encode in __unwind_info's stack-immediate encoding // to exercise the stack-indirect encoding path @@ -74,12 +74,12 @@ noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTr } noinline fn frame0(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[3] = @returnAddress(); + expected[3] = @returnAddress() - 1; return frame1(expected, addr_buf); } pub fn main() void { - if (std.posix.ucontext_t == void and builtin.omit_frame_pointer) { + if (std.debug.cpu_context.Native == noreturn and builtin.omit_frame_pointer) { // Stack unwinding is impossible. return; } diff --git a/test/standalone/stack_iterator/unwind_freestanding.zig b/test/standalone/stack_iterator/unwind_freestanding.zig index 866f73d9bdf3..f686bfbe1236 100644 --- a/test/standalone/stack_iterator/unwind_freestanding.zig +++ b/test/standalone/stack_iterator/unwind_freestanding.zig @@ -3,7 +3,7 @@ const std = @import("std"); noinline fn frame3(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[0] = @returnAddress(); + expected[0] = @returnAddress() - 1; return std.debug.captureCurrentStackTrace(.{ .first_address = @returnAddress(), .allow_unsafe_unwind = true, @@ -11,12 +11,12 @@ noinline fn frame3(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTr } noinline fn frame2(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[1] = @returnAddress(); + expected[1] = @returnAddress() - 1; return frame3(expected, addr_buf); } noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[2] = @returnAddress(); + expected[2] = @returnAddress() - 1; // Use a stack frame that is too big to encode in __unwind_info's stack-immediate encoding // to exercise the stack-indirect encoding path @@ -27,7 +27,7 @@ noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTr } noinline fn frame0(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[3] = @returnAddress(); + expected[3] = @returnAddress() - 1; return frame1(expected, addr_buf); } From 23d6381e8b5fb63bc9cba5cc5c78b7946ca4746a Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 18 Sep 2025 10:35:14 +0100 Subject: [PATCH 68/85] std.debug: fix typo --- lib/std/debug/SelfInfo/DarwinModule.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index e3cbeb7eddbd..ed77bc4f5a6f 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -203,7 +203,7 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO .fun_size => { state = .ensym; if (last_sym.strx != 0) { - const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const name = std.mem.sliceTo(strings[last_sym.strx..], 0); const gop = symbol_names.getOrPutAssumeCapacity(name); if (!gop.found_existing) { assert(gop.index == symbols.items.len); From 9434bab3134edadae7ae7e575f6b025cafc6a59a Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 18 Sep 2025 12:54:33 +0100 Subject: [PATCH 69/85] std: work around crash parsing LLVM PDB This crash exists on master, and seems to have existed since 2019; I think it's just very rare and depends on the exact binary generated. In theory, a stream block should always be a "data" block rather than a FPM block; the FPMs use blocks `1, 4097, 8193, ...` and `2, 4097, 8194, ...` respectively. However, I have observed LLVM emitting an otherwise valid PDB which maps FPM blocks into streams. This is not a bug in `std.debug.Pdb`, because `llvm-pdbutil` agrees with our stream indices. I think this is arguably an LLVM bug; however, we don't really lose anything from just weakening this check. To be fair, MSF doesn't have an explicit specification, and LLVM's documentation (which is the closest thing we have) does not explicitly state that FPM blocks cannot be mapped into streams, so perhaps this is actually valid. In the rare case that LLVM emits this, previously, stack traces would have been completely useless; now, stack traces will work okay. --- lib/std/debug/Pdb.zig | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/std/debug/Pdb.zig b/lib/std/debug/Pdb.zig index 66010ba3770f..c10b361f7293 100644 --- a/lib/std/debug/Pdb.zig +++ b/lib/std/debug/Pdb.zig @@ -413,8 +413,7 @@ const Msf = struct { return error.InvalidDebugInfo; if (superblock.free_block_map_block != 1 and superblock.free_block_map_block != 2) return error.InvalidDebugInfo; - const file_len = try file_reader.getSize(); - if (superblock.num_blocks * superblock.block_size != file_len) + if (superblock.num_blocks * superblock.block_size != try file_reader.getSize()) return error.InvalidDebugInfo; switch (superblock.block_size) { // llvm only supports 4096 but we can handle any of these values @@ -428,6 +427,7 @@ const Msf = struct { try file_reader.seekTo(superblock.block_size * superblock.block_map_addr); const dir_blocks = try gpa.alloc(u32, dir_block_count); + errdefer gpa.free(dir_blocks); for (dir_blocks) |*b| { b.* = try file_reader.interface.takeInt(u32, .little); } @@ -451,25 +451,25 @@ const Msf = struct { const streams = try gpa.alloc(MsfStream, stream_count); errdefer gpa.free(streams); - for (streams, 0..) |*stream, i| { - const size = stream_sizes[i]; + for (streams, stream_sizes) |*stream, size| { if (size == 0) { stream.* = .empty; - } else { - const blocks = try gpa.alloc(u32, size); - errdefer gpa.free(blocks); - for (blocks) |*block| { - const block_id = try directory.interface.takeInt(u32, .little); - const n = (block_id % superblock.block_size); - // 0 is for pdb.SuperBlock, 1 and 2 for FPMs. - if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.block_size > file_len) - return error.InvalidBlockIndex; - block.* = block_id; - } - const buffer = try gpa.alloc(u8, 64); - errdefer gpa.free(buffer); - stream.* = .init(superblock.block_size, file_reader, blocks, buffer); + continue; + } + const blocks = try gpa.alloc(u32, size); + errdefer gpa.free(blocks); + for (blocks) |*block| { + const block_id = try directory.interface.takeInt(u32, .little); + // Index 0 is reserved for the superblock. + // In theory, every page which is `n * block_size + 1` or `n * block_size + 2` + // is also reserved, for one of the FPMs. However, LLVM has been observed to map + // these into actual streams, so allow it for compatibility. + if (block_id == 0 or block_id >= superblock.num_blocks) return error.InvalidBlockIndex; + block.* = block_id; } + const buffer = try gpa.alloc(u8, 64); + errdefer gpa.free(buffer); + stream.* = .init(superblock.block_size, file_reader, blocks, buffer); } const end = directory.logicalPos(); From 2ab650b4817cbb22244c17de828e82cbb0ccf15e Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 18 Sep 2025 13:32:47 +0100 Subject: [PATCH 70/85] std.debug: go back to storing return addresses instead of call addresses ...and just deal with signal handlers by adding 1 to create a fake "return address". The system I tried out where the addresses returned by `StackIterator` were pre-subtracted didn't play nicely with error traces, which in hindsight, makes perfect sense. This definition also removes some ugly off-by-one issues in matching `first_address`, so I do think this is a better approach. --- lib/std/debug.zig | 39 ++++++++++--------- lib/std/debug/SelfInfo.zig | 35 ++++++++++------- lib/std/debug/SelfInfo/DarwinModule.zig | 12 ++++-- lib/std/debug/SelfInfo/ElfModule.zig | 2 +- lib/std/debug/SelfInfo/WindowsModule.zig | 9 +++-- test/standalone/stack_iterator/unwind.zig | 8 ++-- .../stack_iterator/unwind_freestanding.zig | 8 ++-- 7 files changed, 65 insertions(+), 48 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index b10f98ee7b91..23d134f84cdf 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -577,14 +577,12 @@ pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) while (true) switch (it.next()) { .switch_to_fp => if (!it.stratOk(options.allow_unsafe_unwind)) break, .end => break, - .frame => |pc_addr| { + .frame => |ret_addr| { if (wait_for) |target| { - // Possible off-by-one error: `pc_addr` might be one less than the return address (so - // that it falls *inside* the function call), while `target` *is* a return address. - if (pc_addr != target and pc_addr + 1 != target) continue; + if (ret_addr != target) continue; wait_for = null; } - if (frame_idx < addr_buf.len) addr_buf[frame_idx] = pc_addr; + if (frame_idx < addr_buf.len) addr_buf[frame_idx] = ret_addr; frame_idx += 1; }, }; @@ -659,14 +657,14 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ } }, .end => break, - .frame => |pc_addr| { + .frame => |ret_addr| { if (wait_for) |target| { - // Possible off-by-one error: `pc_addr` might be one less than the return address (so - // that it falls *inside* the function call), while `target` *is* a return address. - if (pc_addr != target and pc_addr + 1 != target) continue; + if (ret_addr != target) continue; wait_for = null; } - try printSourceAtAddress(di_gpa, di, writer, pc_addr, tty_config); + // `ret_addr` is the return address, which is *after* the function call. + // Subtract 1 to get an address *in* the function call for a better source location. + try printSourceAtAddress(di_gpa, di, writer, ret_addr -| 1, tty_config); printed_any_frame = true; }, }; @@ -712,8 +710,10 @@ pub fn writeStackTrace(st: *const std.builtin.StackTrace, writer: *Writer, tty_c }, }; const captured_frames = @min(n_frames, st.instruction_addresses.len); - for (st.instruction_addresses[0..captured_frames]) |pc_addr| { - try printSourceAtAddress(di_gpa, di, writer, pc_addr, tty_config); + for (st.instruction_addresses[0..captured_frames]) |ret_addr| { + // `ret_addr` is the return address, which is *after* the function call. + // Subtract 1 to get an address *in* the function call for a better source location. + try printSourceAtAddress(di_gpa, di, writer, ret_addr -| 1, tty_config); } if (n_frames > captured_frames) { tty_config.setColor(writer, .bold) catch {}; @@ -787,7 +787,7 @@ const StackIterator = union(enum) { } const Result = union(enum) { - /// A stack frame has been found; this is the corresponding program counter address. + /// A stack frame has been found; this is the corresponding return address. frame: usize, /// The end of the stack has been reached. end, @@ -797,18 +797,21 @@ const StackIterator = union(enum) { err: SelfInfo.Error, }, }; + fn next(it: *StackIterator) Result { switch (it.*) { .di_first => |unwind_context| { const first_pc = unwind_context.pc; if (first_pc == 0) return .end; it.* = .{ .di = unwind_context }; - return .{ .frame = first_pc }; + // The caller expects *return* addresses, where they will subtract 1 to find the address of the call. + // However, we have the actual current PC, which should not be adjusted. Compensate by adding 1. + return .{ .frame = first_pc +| 1 }; }, .di => |*unwind_context| { const di = getSelfDebugInfo() catch unreachable; const di_gpa = getDebugInfoAllocator(); - di.unwindFrame(di_gpa, unwind_context) catch |err| { + const ret_addr = di.unwindFrame(di_gpa, unwind_context) catch |err| { const pc = unwind_context.pc; it.* = .{ .fp = unwind_context.getFp() }; return .{ .switch_to_fp = .{ @@ -816,8 +819,8 @@ const StackIterator = union(enum) { .err = err, } }; }; - const pc = unwind_context.pc; - return if (pc == 0) .end else .{ .frame = pc }; + if (ret_addr <= 1) return .end; + return .{ .frame = ret_addr }; }, .fp => |fp| { if (fp == 0) return .end; // we reached the "sentinel" base pointer @@ -845,7 +848,7 @@ const StackIterator = union(enum) { it.fp = bp; const ra = stripInstructionPtrAuthCode(ra_ptr.*); if (ra <= 1) return .end; - return .{ .frame = ra - 1 }; + return .{ .frame = ra }; }, } } diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index efa9d782f64a..6934b3d396b3 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -53,7 +53,7 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void { if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); } -pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!void { +pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); const gop = try self.modules.getOrPut(gpa, module.key()); @@ -124,15 +124,14 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// /// pointer is unknown, 0 may be returned instead. /// pub fn getFp(uc: *UnwindContext) usize; /// }; -/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame. -/// /// The caller will read the new instruction poiter from the `pc` field. -/// /// `pc = 0` indicates end of stack / no more frames. +/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame, and returns +/// /// the frame's return address. /// pub fn unwindFrame( /// mod: *const Module, /// gpa: Allocator, /// di: *DebugInfo, /// ctx: *UnwindContext, -/// ) SelfInfo.Error!void; +/// ) SelfInfo.Error!usize; /// ``` const Module: type = Module: { // Allow overriding the target-specific `SelfInfo` implementation by exposing `root.debug.Module`. @@ -312,7 +311,7 @@ pub const DwarfUnwindContext = struct { unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, - ) Error!void { + ) Error!usize { return unwindFrameInner(context, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, @@ -360,10 +359,10 @@ pub const DwarfUnwindContext = struct { unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, - ) !void { + ) !usize { comptime assert(supports_unwinding); - if (context.pc == 0) return; + if (context.pc == 0) return 0; const pc_vaddr = context.pc - load_offset; @@ -443,13 +442,19 @@ pub const DwarfUnwindContext = struct { // The new CPU context is complete; flush changes. context.cpu_context = new_cpu_context; - // Also update the stored pc. However, because `return_address` points to the instruction - // *after* the call, it could (in the case of noreturn functions) actually point outside of - // the caller's address range, meaning an FDE lookup would fail. We can handle this by - // subtracting 1 from `return_address` so that the next lookup is guaranteed to land inside - // the `call` instruction. The exception to this rule is signal frames, where the return - // address is the same instruction that triggered the handler. - context.pc = if (cie.is_signal_frame) return_address else return_address -| 1; + // The caller will subtract 1 from the return address to get an address corresponding to the + // function call. However, if this is a signal frame, that's actually incorrect, because the + // "return address" we have is the instruction which triggered the signal (if the signal + // handler returned, the instruction would be re-run). Compensate for this by incrementing + // the address in that case. + const adjusted_ret_addr = if (cie.is_signal_frame) return_address +| 1 else return_address; + + // We also want to do that same subtraction here to get the PC for the next frame's FDE. + // This is because if the callee was noreturn, then the function call might be the caller's + // last instruction, so `return_address` might actually point outside of it! + context.pc = adjusted_ret_addr -| 1; + + return adjusted_ret_addr; } /// Since register rules are applied (usually) during a panic, /// checked addition / subtraction is used so that we can return diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index ed77bc4f5a6f..fa2872035713 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -324,7 +324,7 @@ pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext; /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!void { +pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, @@ -340,7 +340,7 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, => return error.InvalidDebugInfo, }; } -fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !void { +fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { if (di.unwind == null) di.unwind = module.loadUnwindInfo(); const unwind = &di.unwind.?; @@ -640,7 +640,13 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, else => comptime unreachable, // unimplemented }; - context.pc = std.debug.stripInstructionPtrAuthCode(new_ip) -| 1; + const ret_addr = std.debug.stripInstructionPtrAuthCode(new_ip); + + // Like `DwarfUnwindContext.unwindFrame`, adjust our next lookup pc in case the `call` was this + // function's last instruction making `ret_addr` one byte past its end. + context.pc = ret_addr -| 1; + + return ret_addr; } pub const DebugInfo = struct { unwind: ?Unwind, diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index e080665497b2..fde61d8140d2 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -230,7 +230,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro else => unreachable, } } -pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!void { +pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di); std.debug.assert(di.unwind[0] != null); for (&di.unwind) |*opt_unwind| { diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index 75abc39ff503..1fdf69b2a0a2 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -373,7 +373,7 @@ pub const UnwindContext = struct { return ctx.cur.getRegs().bp; } }; -pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !void { +pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { _ = module; _ = gpa; _ = di; @@ -403,9 +403,12 @@ pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, const tib = &windows.teb().NtTib; if (next_regs.sp < @intFromPtr(tib.StackLimit) or next_regs.sp > @intFromPtr(tib.StackBase)) { context.pc = 0; - } else { - context.pc = next_regs.ip -| 1; + return 0; } + // Like `DwarfUnwindContext.unwindFrame`, adjust our next lookup pc in case the `call` was this + // function's last instruction making `next_regs.ip` one byte past its end. + context.pc = next_regs.ip -| 1; + return next_regs.ip; } const WindowsModule = @This(); diff --git a/test/standalone/stack_iterator/unwind.zig b/test/standalone/stack_iterator/unwind.zig index 1775ff1b00ad..69a766f58b10 100644 --- a/test/standalone/stack_iterator/unwind.zig +++ b/test/standalone/stack_iterator/unwind.zig @@ -3,7 +3,7 @@ const builtin = @import("builtin"); const fatal = std.process.fatal; noinline fn frame3(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[0] = @returnAddress() - 1; + expected[0] = @returnAddress(); return std.debug.captureCurrentStackTrace(.{ .first_address = @returnAddress(), .allow_unsafe_unwind = true, @@ -58,12 +58,12 @@ noinline fn frame2(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTr } } - expected[1] = @returnAddress() - 1; + expected[1] = @returnAddress(); return frame3(expected, addr_buf); } noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[2] = @returnAddress() - 1; + expected[2] = @returnAddress(); // Use a stack frame that is too big to encode in __unwind_info's stack-immediate encoding // to exercise the stack-indirect encoding path @@ -74,7 +74,7 @@ noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTr } noinline fn frame0(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[3] = @returnAddress() - 1; + expected[3] = @returnAddress(); return frame1(expected, addr_buf); } diff --git a/test/standalone/stack_iterator/unwind_freestanding.zig b/test/standalone/stack_iterator/unwind_freestanding.zig index f686bfbe1236..866f73d9bdf3 100644 --- a/test/standalone/stack_iterator/unwind_freestanding.zig +++ b/test/standalone/stack_iterator/unwind_freestanding.zig @@ -3,7 +3,7 @@ const std = @import("std"); noinline fn frame3(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[0] = @returnAddress() - 1; + expected[0] = @returnAddress(); return std.debug.captureCurrentStackTrace(.{ .first_address = @returnAddress(), .allow_unsafe_unwind = true, @@ -11,12 +11,12 @@ noinline fn frame3(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTr } noinline fn frame2(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[1] = @returnAddress() - 1; + expected[1] = @returnAddress(); return frame3(expected, addr_buf); } noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[2] = @returnAddress() - 1; + expected[2] = @returnAddress(); // Use a stack frame that is too big to encode in __unwind_info's stack-immediate encoding // to exercise the stack-indirect encoding path @@ -27,7 +27,7 @@ noinline fn frame1(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTr } noinline fn frame0(expected: *[4]usize, addr_buf: *[4]usize) std.builtin.StackTrace { - expected[3] = @returnAddress() - 1; + expected[3] = @returnAddress(); return frame1(expected, addr_buf); } From dae703d3c028eab3bf98d89d2bca1abc75f864fa Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 18 Sep 2025 15:03:45 +0100 Subject: [PATCH 71/85] std.posix.abort: only trigger breakpoint on Windows if being debugged Processes should reasonably be able to expect their children to abort with typical exit codes, rather than a debugger breakpoint signal. This flag in the PEB is what would be checked by `IsDebuggerPresent` in kernel32, which is the function you would typically use for this purpose. This fixes `test-stack-trace` failures on Windows, as these tests were expecting exit code 3 to indicate abort. --- lib/std/posix.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/posix.zig b/lib/std/posix.zig index 3b77e58df58e..3e4c7f05ed00 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -689,7 +689,7 @@ pub fn abort() noreturn { // even when linking libc on Windows we use our own abort implementation. // See https://github.com/ziglang/zig/issues/2071 for more details. if (native_os == .windows) { - if (builtin.mode == .Debug) { + if (builtin.mode == .Debug and windows.peb().BeingDebugged != 0) { @breakpoint(); } windows.kernel32.ExitProcess(3); From 084e92879a1732374fac5f1a05a27f42b9306d22 Mon Sep 17 00:00:00 2001 From: mlugg Date: Thu, 18 Sep 2025 15:52:02 +0100 Subject: [PATCH 72/85] std: don't get CPU context when using CBE targeting MSVC Calling `current` here causes compilation failures as the C backend currently does not emit valid MSVC inline assembly. This change means that when building for MSVC with the self-hosted C backend, only FP unwinding can be used. --- lib/std/debug.zig | 8 +++++++- lib/std/debug/cpu_context.zig | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 23d134f84cdf..3d8bc565e709 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -756,7 +756,13 @@ const StackIterator = union(enum) { // Use `di_first` here so we report the PC in the context before unwinding any further. return .{ .di_first = .init(context_ptr) }; } - if (SelfInfo.supports_unwinding and cpu_context.Native != noreturn) { + // Workaround the C backend being unable to use inline assembly on MSVC by disabling the + // call to `current`. This effectively constrains stack trace collection and dumping to FP + // unwinding when building with CBE for MSVC. + if (!(builtin.zig_backend == .stage2_c and builtin.target.abi == .msvc) and + SelfInfo.supports_unwinding and + cpu_context.Native != noreturn) + { // We don't need `di_first` here, because our PC is in `std.debug`; we're only interested // in our caller's frame and above. return .{ .di = .init(&.current()) }; diff --git a/lib/std/debug/cpu_context.zig b/lib/std/debug/cpu_context.zig index b9dd49767fb0..fdf6ebd243b2 100644 --- a/lib/std/debug/cpu_context.zig +++ b/lib/std/debug/cpu_context.zig @@ -316,7 +316,7 @@ pub const X86_64 = struct { \\movq %%r15, 0x78(%%rdi) \\leaq (%%rip), %%rax \\movq %%rax, 0x80(%%rdi) - \\movq 0x00(%%rdi), %%rax // restore saved rax + \\movq 0x00(%%rdi), %%rax : : [gprs] "{rdi}" (&ctx.gprs.values), : .{ .memory = true }); @@ -431,7 +431,7 @@ pub const Aarch64 = extern struct { \\str x1, [x0, #0x0f8] \\adr x1, . \\str x1, [x0, #0x100] - \\ldr x1, [x0, #0x008] // restore saved x1 + \\ldr x1, [x0, #0x008] : : [gprs] "{x0}" (&ctx), : .{ .memory = true }); From 9c1821d3bfadc5eddd4dff271a4920c03ee0ffea Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 19 Sep 2025 11:55:50 +0100 Subject: [PATCH 73/85] ElfModule: fix assertion failure --- lib/std/debug/SelfInfo/ElfModule.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index fde61d8140d2..0812beaf7a8e 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -204,7 +204,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro } else unwinds: { // There is no `.eh_frame_hdr` section. There may still be an `.eh_frame` or `.debug_frame` // section, but we'll have to load the binary to get at it. - try module.loadElf(gpa, di); + if (di.loaded_elf == null) try module.loadElf(gpa, di); const opt_debug_frame = &di.loaded_elf.?.debug_frame; const opt_eh_frame = &di.loaded_elf.?.eh_frame; var i: usize = 0; From 099a95041054e456ebefbd75f6a4f9f6961002be Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 19 Sep 2025 13:35:12 +0100 Subject: [PATCH 74/85] std.debug.SelfInfo: thread safety This has been a TODO for ages, but in the past it didn't really matter because stack traces are typically printed to stderr for which a mutex is held so in practice there was a mutex guarding usage of `SelfInfo`. However, now that `SelfInfo` is also used for simply capturing traces, thread safety is needed. Instead of just a single mutex, though, there are a couple of different mutexes involved; this helps make critical sections smaller, particularly when unwinding the stack as `unwindFrame` doesn't typically need to hold any lock at all. --- lib/std/debug.zig | 10 ++-- lib/std/debug/Dwarf.zig | 2 +- lib/std/debug/SelfInfo.zig | 59 +++++++++++++++++----- lib/std/debug/SelfInfo/DarwinModule.zig | 24 ++++++++- lib/std/debug/SelfInfo/ElfModule.zig | 62 ++++++++++++++++-------- lib/std/debug/SelfInfo/WindowsModule.zig | 23 +++++++-- 6 files changed, 136 insertions(+), 44 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 3d8bc565e709..e587ddd5c9d1 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -238,7 +238,6 @@ pub fn print(comptime fmt: []const u8, args: anytype) void { nosuspend bw.print(fmt, args) catch return; } -/// TODO multithreaded awareness /// Marked `inline` to propagate a comptime-known error to callers. pub inline fn getSelfDebugInfo() !*SelfInfo { if (!SelfInfo.target_supported) return error.UnsupportedTarget; @@ -1169,7 +1168,8 @@ test printLineFromFile { } } -/// TODO multithreaded awareness +/// The returned allocator should be thread-safe if the compilation is multi-threaded, because +/// multiple threads could capture and/or print stack traces simultaneously. fn getDebugInfoAllocator() Allocator { // Allow overriding the debug info allocator by exposing `root.debug.getDebugInfoAllocator`. if (@hasDecl(root, "debug") and @hasDecl(root.debug, "getDebugInfoAllocator")) { @@ -1177,10 +1177,10 @@ fn getDebugInfoAllocator() Allocator { } // Otherwise, use a global arena backed by the page allocator const S = struct { - var arena: ?std.heap.ArenaAllocator = null; + var arena: std.heap.ArenaAllocator = .init(std.heap.page_allocator); + var ts_arena: std.heap.ThreadSafeAllocator = .{ .child_allocator = arena.allocator() }; }; - if (S.arena == null) S.arena = .init(std.heap.page_allocator); - return S.arena.?.allocator(); + return S.ts_arena.allocator(); } /// Whether or not the current target can print useful debug information when a segfault occurs. diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 655a65b709fa..3934777ea5c5 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -346,7 +346,7 @@ pub fn deinit(di: *Dwarf, gpa: Allocator) void { di.* = undefined; } -pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { +pub fn getSymbolName(di: *const Dwarf, address: u64) ?[]const u8 { // Iterate the function list backwards so that we see child DIEs before their parents. This is // important because `DW_TAG_inlined_subroutine` DIEs will have a range which is a sub-range of // their caller, and we want to return the callee's name, not the caller's. diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 6934b3d396b3..675d1f0a8e09 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -18,7 +18,21 @@ const root = @import("root"); const SelfInfo = @This(); -modules: if (target_supported) std.AutoArrayHashMapUnmanaged(usize, Module.DebugInfo) else void, +/// Locks access to `modules`. However, does *not* lock the `Module.DebugInfo`, nor `lookup_cache` +/// the implementation is responsible for locking as needed in its exposed methods. +/// +/// TODO: to allow `SelfInfo` to work on freestanding, we currently just don't use this mutex there. +/// That's a bad solution, but a better one depends on the standard library's general support for +/// "bring your own OS" being improved. +modules_mutex: switch (builtin.os.tag) { + else => std.Thread.Mutex, + .freestanding, .other => struct { + fn lock(_: @This()) void {} + fn unlock(_: @This()) void {} + }, +}, +/// Value is allocated into gpa to give it a stable pointer. +modules: if (target_supported) std.AutoArrayHashMapUnmanaged(usize, *Module.DebugInfo) else void, lookup_cache: if (target_supported) Module.LookupCache else void, pub const Error = error{ @@ -43,12 +57,16 @@ pub const supports_unwinding: bool = target_supported and Module.supports_unwind pub const UnwindContext = if (supports_unwinding) Module.UnwindContext; pub const init: SelfInfo = .{ + .modules_mutex = .{}, .modules = .empty, .lookup_cache = if (Module.LookupCache != void) .init, }; pub fn deinit(self: *SelfInfo, gpa: Allocator) void { - for (self.modules.values()) |*di| di.deinit(gpa); + for (self.modules.values()) |di| { + di.deinit(gpa); + gpa.destroy(di); + } self.modules.deinit(gpa); if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); } @@ -56,21 +74,35 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void { pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { comptime assert(supports_unwinding); const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); - const gop = try self.modules.getOrPut(gpa, module.key()); - self.modules.lockPointers(); - defer self.modules.unlockPointers(); - if (!gop.found_existing) gop.value_ptr.* = .init; - return module.unwindFrame(gpa, gop.value_ptr, context); + const di: *Module.DebugInfo = di: { + self.modules_mutex.lock(); + defer self.modules_mutex.unlock(); + const gop = try self.modules.getOrPut(gpa, module.key()); + if (gop.found_existing) break :di gop.value_ptr.*; + errdefer _ = self.modules.pop().?; + const di = try gpa.create(Module.DebugInfo); + di.* = .init; + gop.value_ptr.* = di; + break :di di; + }; + return module.unwindFrame(gpa, di, context); } pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol { comptime assert(target_supported); const module: Module = try .lookup(&self.lookup_cache, gpa, address); - const gop = try self.modules.getOrPut(gpa, module.key()); - self.modules.lockPointers(); - defer self.modules.unlockPointers(); - if (!gop.found_existing) gop.value_ptr.* = .init; - return module.getSymbolAtAddress(gpa, gop.value_ptr, address); + const di: *Module.DebugInfo = di: { + self.modules_mutex.lock(); + defer self.modules_mutex.unlock(); + const gop = try self.modules.getOrPut(gpa, module.key()); + if (gop.found_existing) break :di gop.value_ptr.*; + errdefer _ = self.modules.pop().?; + const di = try gpa.create(Module.DebugInfo); + di.* = .init; + gop.value_ptr.* = di; + break :di di; + }; + return module.getSymbolAtAddress(gpa, di, address); } pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { @@ -88,6 +120,9 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) /// be valid to consider the entire application one module, or on the other hand to consider each /// object file a module. /// +/// Because different threads can collect stack traces concurrently, the implementation must be able +/// to tolerate concurrent calls to any method it implements. +/// /// This type must must expose the following declarations: /// /// ``` diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index fa2872035713..29178b5068b9 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -252,6 +252,15 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO }; } pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { + // We need the lock for a few things: + // * loading the Mach-O module + // * loading the referenced object file + // * scanning the DWARF of that object file + // * building the line number table of that object file + // That's enough that it doesn't really seem worth scoping the lock more tightly than the whole function.. + di.mutex.lock(); + defer di.mutex.unlock(); + if (di.loaded_macho == null) di.loaded_macho = module.loadMachO(gpa) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| return e, else => return error.ReadFailed, @@ -341,8 +350,12 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }; } fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - if (di.unwind == null) di.unwind = module.loadUnwindInfo(); - const unwind = &di.unwind.?; + const unwind: *const DebugInfo.Unwind = u: { + di.mutex.lock(); + defer di.mutex.unlock(); + if (di.unwind == null) di.unwind = module.loadUnwindInfo(); + break :u &di.unwind.?; + }; const unwind_info = unwind.unwind_info orelse return error.MissingDebugInfo; if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidDebugInfo; @@ -649,10 +662,17 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, return ret_addr; } pub const DebugInfo = struct { + /// Held while checking and/or populating `unwind` or `loaded_macho`. + /// Once a field is populated and the pointer `&di.loaded_macho.?` or `&di.unwind.?` has been + /// gotten, the lock is released; i.e. it is not held while *using* the loaded info. + mutex: std.Thread.Mutex, + unwind: ?Unwind, loaded_macho: ?LoadedMachO, pub const init: DebugInfo = .{ + .mutex = .{}, + .unwind = null, .loaded_macho = null, }; diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 0812beaf7a8e..f80e33a7a017 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -7,16 +7,26 @@ gnu_eh_frame: ?[]const u8, pub const LookupCache = void; pub const DebugInfo = struct { + /// Held while checking and/or populating `loaded_elf`/`scanned_dwarf`/`unwind`. + /// Once data is populated and a pointer to the field has been gotten, the lock + /// is released; i.e. it is not held while *using* the loaded debug info. + mutex: std.Thread.Mutex, + loaded_elf: ?ElfFile, scanned_dwarf: bool, unwind: [2]?Dwarf.Unwind, pub const init: DebugInfo = .{ + .mutex = .{}, .loaded_elf = null, .scanned_dwarf = false, .unwind = @splat(null), }; pub fn deinit(di: *DebugInfo, gpa: Allocator) void { if (di.loaded_elf) |*loaded_elf| loaded_elf.deinit(gpa); + for (di.unwind) |*opt_unwind| { + const unwind = &(opt_unwind orelse continue); + unwind.deinit(gpa); + } } }; @@ -145,34 +155,41 @@ fn loadElf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void } } pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { - if (di.loaded_elf == null) try module.loadElf(gpa, di); const vaddr = address - module.load_offset; - if (di.loaded_elf.?.dwarf) |*dwarf| { - if (!di.scanned_dwarf) { - dwarf.open(gpa, native_endian) catch |err| switch (err) { + { + di.mutex.lock(); + defer di.mutex.unlock(); + if (di.loaded_elf == null) try module.loadElf(gpa, di); + const loaded_elf = &di.loaded_elf.?; + // We need the lock if using DWARF, as we might scan the DWARF or build a line number table. + if (loaded_elf.dwarf) |*dwarf| { + if (!di.scanned_dwarf) { + dwarf.open(gpa, native_endian) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.ReadFailed, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; + di.scanned_dwarf = true; + } + return dwarf.getSymbol(gpa, native_endian, vaddr) catch |err| switch (err) { error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, => |e| return e, + error.ReadFailed, error.EndOfStream, error.Overflow, - error.ReadFailed, error.StreamTooLong, => return error.InvalidDebugInfo, }; - di.scanned_dwarf = true; } - return dwarf.getSymbol(gpa, native_endian, vaddr) catch |err| switch (err) { - error.InvalidDebugInfo, - error.MissingDebugInfo, - error.OutOfMemory, - => |e| return e, - error.ReadFailed, - error.EndOfStream, - error.Overflow, - error.StreamTooLong, - => return error.InvalidDebugInfo, - }; + // Otherwise, we're just going to scan the symtab, which we don't need the lock for; fall out of this block. } // When there's no DWARF available, fall back to searching the symtab. return di.loaded_elf.?.searchSymtab(gpa, vaddr) catch |err| switch (err) { @@ -231,9 +248,14 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro } } pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { - if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di); - std.debug.assert(di.unwind[0] != null); - for (&di.unwind) |*opt_unwind| { + const unwinds: *const [2]?Dwarf.Unwind = u: { + di.mutex.lock(); + defer di.mutex.unlock(); + if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di); + std.debug.assert(di.unwind[0] != null); + break :u &di.unwind; + }; + for (unwinds) |*opt_unwind| { const unwind = &(opt_unwind.* orelse break); return context.unwindFrame(gpa, unwind, module.load_offset, null) catch |err| switch (err) { error.MissingDebugInfo => continue, // try the next one diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig index 1fdf69b2a0a2..1f4139583e99 100644 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ b/lib/std/debug/SelfInfo/WindowsModule.zig @@ -9,14 +9,14 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) std.debug.Sel if (lookupInCache(cache, address)) |m| return m; { // Check a new module hasn't been loaded + cache.rwlock.lock(); + defer cache.rwlock.unlock(); cache.modules.clearRetainingCapacity(); - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); if (handle == windows.INVALID_HANDLE_VALUE) { return windows.unexpectedError(windows.GetLastError()); } defer windows.CloseHandle(handle); - var entry: windows.MODULEENTRY32 = undefined; entry.dwSize = @sizeOf(windows.MODULEENTRY32); if (windows.kernel32.Module32First(handle, &entry) != 0) { @@ -30,12 +30,18 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) std.debug.Sel return error.MissingDebugInfo; } pub fn getSymbolAtAddress(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, address: usize) std.debug.SelfInfo.Error!std.debug.Symbol { + // The `Pdb` API doesn't really allow us *any* thread-safe access, and the `Dwarf` API isn't + // great for it either; just lock the whole thing. + di.mutex.lock(); + defer di.mutex.unlock(); + if (!di.loaded) module.loadDebugInfo(gpa, di) catch |err| switch (err) { error.OutOfMemory, error.InvalidDebugInfo, error.MissingDebugInfo, error.Unexpected => |e| return e, error.FileNotFound => return error.MissingDebugInfo, error.UnknownPDBVersion => return error.UnsupportedDebugInfo, else => return error.ReadFailed, }; + // Translate the runtime address into a virtual address into the module const vaddr = address - module.base_address; @@ -50,7 +56,9 @@ pub fn getSymbolAtAddress(module: *const WindowsModule, gpa: Allocator, di: *Deb return error.MissingDebugInfo; } -fn lookupInCache(cache: *const LookupCache, address: usize) ?WindowsModule { +fn lookupInCache(cache: *LookupCache, address: usize) ?WindowsModule { + cache.rwlock.lockShared(); + defer cache.rwlock.unlockShared(); for (cache.modules.items) |*entry| { const base_address = @intFromPtr(entry.modBaseAddr); if (address >= base_address and address < base_address + entry.modBaseSize) { @@ -182,13 +190,19 @@ fn loadDebugInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) ! di.loaded = true; } pub const LookupCache = struct { + rwlock: std.Thread.RwLock, modules: std.ArrayListUnmanaged(windows.MODULEENTRY32), - pub const init: LookupCache = .{ .modules = .empty }; + pub const init: LookupCache = .{ + .rwlock = .{}, + .modules = .empty, + }; pub fn deinit(lc: *LookupCache, gpa: Allocator) void { lc.modules.deinit(gpa); } }; pub const DebugInfo = struct { + mutex: std.Thread.Mutex, + loaded: bool, coff_image_base: u64, @@ -205,6 +219,7 @@ pub const DebugInfo = struct { coff_section_headers: []coff.SectionHeader, pub const init: DebugInfo = .{ + .mutex = .{}, .loaded = false, .coff_image_base = undefined, .mapped_file = null, From c41bf996848a32c60e6f1dac89769d33a1b83178 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 20 Sep 2025 14:23:39 +0100 Subject: [PATCH 75/85] std.debug: don't assume return address register is defined if not specified This logic was causing some occasional infinite looping on ARM, where the `.debug_frame` section is often incomplete since the `.exidx` section is used for unwind information. But the information we're getting from the compiler is totally *valid*: it's leaving the rule as the default, which is (as with most architectures) equivalent to `.undefined`! --- lib/std/debug/SelfInfo.zig | 43 +++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 675d1f0a8e09..2da5834ba6e8 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -243,6 +243,15 @@ pub const DwarfUnwindContext = struct { return ptr.*; } + /// The default rule is typically equivalent to `.undefined`, but ABIs may define it differently. + fn defaultRuleBehavior(register: u8) enum { undefined, same_value } { + if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { + // The default rule for callee-saved registers on AArch64 acts like the `.same_value` rule + return .same_value; + } + return .undefined; + } + /// Resolves the register rule and places the result into `out` (see regBytes). Returns `true` /// iff the rule was undefined. This is *not* the same as `col.rule == .undefined`, because the /// default rule may be undefined. @@ -256,17 +265,18 @@ pub const DwarfUnwindContext = struct { switch (col.rule) { .default => { const register = col.register orelse return error.InvalidRegister; - // The default type is usually undefined, but can be overriden by ABI authors. - // See the doc comment on `Dwarf.Unwind.VirtualMachine.RegisterRule.default`. - if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { - // Callee-saved registers are initialized as if they had the .same_value rule - const src = try context.cpu_context.dwarfRegisterBytes(register); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return false; + switch (defaultRuleBehavior(register)) { + .undefined => { + @memset(out, undefined); + return true; + }, + .same_value => { + const src = try context.cpu_context.dwarfRegisterBytes(register); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + return false; + }, } - @memset(out, undefined); - return true; }, .undefined => { @memset(out, undefined); @@ -449,7 +459,9 @@ pub const DwarfUnwindContext = struct { expression_context.cfa = context.cfa; - var has_return_address = true; + // If the rule for the return address register is 'undefined', that indicates there is no + // return address, i.e. this is the end of the stack. + var explicit_has_return_address: ?bool = null; // Create a copy of the CPU context, to which we will apply the new rules. var new_cpu_context = context.cpu_context; @@ -462,11 +474,18 @@ pub const DwarfUnwindContext = struct { const dest = try new_cpu_context.dwarfRegisterBytes(register); const rule_undef = try context.resolveRegisterRule(gpa, column, expression_context, dest); if (register == cie.return_address_register) { - has_return_address = !rule_undef; + explicit_has_return_address = !rule_undef; } } } + // If the return address register did not have an explicitly specified rules then it uses + // the default rule, which is usually equivalent to '.undefined', i.e. end-of-stack. + const has_return_address = explicit_has_return_address orelse switch (defaultRuleBehavior(cie.return_address_register)) { + .undefined => false, + .same_value => return error.InvalidDebugInfo, // this doesn't make sense, we would get stuck in an infinite loop + }; + const return_address: usize = if (has_return_address) pc: { const raw_ptr = try regNative(&new_cpu_context, cie.return_address_register); break :pc stripInstructionPtrAuthCode(raw_ptr.*); From b0f222777c38088d90041ba1f28bfb1341cc76c6 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 20 Sep 2025 14:44:10 +0100 Subject: [PATCH 76/85] std.debug: cap total stack trace frames ...just in case there is broken debug info and/or bad values on the stack, either of which could cause stack unwinding to potentially loop forever. --- lib/std/debug.zig | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index e587ddd5c9d1..4dc178b02de7 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -571,12 +571,19 @@ pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) var it = StackIterator.init(options.context) catch return empty_trace; defer it.deinit(); if (!it.stratOk(options.allow_unsafe_unwind)) return empty_trace; + var total_frames: usize = 0; var frame_idx: usize = 0; var wait_for = options.first_address; while (true) switch (it.next()) { .switch_to_fp => if (!it.stratOk(options.allow_unsafe_unwind)) break, .end => break, .frame => |ret_addr| { + if (total_frames > 10_000) { + // Limit the number of frames in case of (e.g.) broken debug information which is + // getting unwinding stuck in a loop. + break; + } + total_frames += 1; if (wait_for) |target| { if (ret_addr != target) continue; wait_for = null; @@ -624,6 +631,7 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ tty_config.setColor(writer, .reset) catch {}; return; } + var total_frames: usize = 0; var wait_for = options.first_address; var printed_any_frame = false; while (true) switch (it.next()) { @@ -657,6 +665,16 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ }, .end => break, .frame => |ret_addr| { + if (total_frames > 10_000) { + tty_config.setColor(writer, .dim) catch {}; + try writer.print( + "Stopping trace after {d} frames (large frame count may indicate broken debug info)\n", + .{total_frames}, + ); + tty_config.setColor(writer, .reset) catch {}; + return; + } + total_frames += 1; if (wait_for) |target| { if (ret_addr != target) continue; wait_for = null; From f7e0ff8a5fb3e8426e8f64bba271eae358d123b4 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 20 Sep 2025 15:16:42 +0100 Subject: [PATCH 77/85] std: clarify cpu_context register order rationale --- lib/std/debug/cpu_context.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/std/debug/cpu_context.zig b/lib/std/debug/cpu_context.zig index fdf6ebd243b2..6499de21b155 100644 --- a/lib/std/debug/cpu_context.zig +++ b/lib/std/debug/cpu_context.zig @@ -225,8 +225,9 @@ pub fn fromWindowsContext(ctx: *const std.os.windows.CONTEXT) Native { } pub const X86 = struct { - /// The first 8 registers here intentionally match the order of registers pushed - /// by PUSHA, which is also the order used by the DWARF register mappings. + /// The first 8 registers here intentionally match the order of registers in the x86 instruction + /// encoding. This order is inherited by the PUSHA instruction and the DWARF register mappings, + /// among other things. pub const Gpr = enum { // zig fmt: off eax, ecx, edx, ebx, @@ -283,7 +284,9 @@ pub const X86 = struct { }; pub const X86_64 = struct { - /// MLUGG TODO: explain this order. why does DWARF have this? + /// The order here intentionally matches the order of the DWARF register mappings. It's unclear + /// where those mappings actually originated from---the ordering of the first 4 registers seems + /// quite unusual---but it is currently convenient for us to match DWARF. pub const Gpr = enum { // zig fmt: off rax, rdx, rcx, rbx, From 950a9d2a10d4a2430e1c26eb49f60bfc92adff77 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 22 Sep 2025 15:27:16 +0100 Subject: [PATCH 78/85] typo --- lib/std/debug/SelfInfo/ElfModule.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index f80e33a7a017..d18e295848e7 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -23,8 +23,8 @@ pub const DebugInfo = struct { }; pub fn deinit(di: *DebugInfo, gpa: Allocator) void { if (di.loaded_elf) |*loaded_elf| loaded_elf.deinit(gpa); - for (di.unwind) |*opt_unwind| { - const unwind = &(opt_unwind orelse continue); + for (&di.unwind) |*opt_unwind| { + const unwind = &(opt_unwind.* orelse continue); unwind.deinit(gpa); } } From dbda011ae67982949d59258a6ebd8f36080334fa Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 22 Sep 2025 15:39:16 +0100 Subject: [PATCH 79/85] std.debug.SelfInfo: mark ARM unwinding as unsupported We need to parse the `.ARM.exidx` section to be able to reliably unwind the stack on ARM. --- lib/std/debug/SelfInfo/ElfModule.zig | 4 +++- test/src/StackTrace.zig | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index d18e295848e7..32d767a44f2c 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -266,8 +266,10 @@ pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, con } pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext; pub const supports_unwinding: bool = s: { + // Notably, we are yet to support unwinding on ARM. There, unwinding is not done through + // `.eh_frame`, but instead with the `.ARM.exidx` section, which has a different format. const archs: []const std.Target.Cpu.Arch = switch (builtin.target.os.tag) { - .linux => &.{ .x86, .x86_64, .arm, .armeb, .thumb, .thumbeb, .aarch64, .aarch64_be }, + .linux => &.{ .x86, .x86_64, .aarch64, .aarch64_be }, .netbsd => &.{ .x86, .x86_64, .aarch64, .aarch64_be }, .freebsd => &.{ .x86_64, .aarch64, .aarch64_be }, .openbsd => &.{.x86_64}, diff --git a/test/src/StackTrace.zig b/test/src/StackTrace.zig index 5735d89adee2..3d35a4f935da 100644 --- a/test/src/StackTrace.zig +++ b/test/src/StackTrace.zig @@ -62,8 +62,12 @@ fn addCaseTarget( // On aarch64-macos, FP unwinding is blessed by Apple to always be reliable, and std.debug knows this. const fp_unwind_is_safe = target.result.cpu.arch == .aarch64 and target.result.os.tag.isDarwin(); - // On x86-windows, only FP unwinding is available. - const supports_unwind_tables = target.result.os.tag != .windows or target.result.cpu.arch != .x86; + const supports_unwind_tables = switch (target.result.os.tag) { + // x86-windows just has no way to do stack unwinding other then using frame pointers. + .windows => target.result.cpu.arch != .x86, + // We do not yet implement support for the AArch32 exception table section `.ARM.exidx`. + else => !target.result.cpu.arch.isArm(), + }; const use_llvm_vals: []const bool = if (both_backends) &.{ true, false } else &.{true}; const pie_vals: []const ?bool = if (both_pie) &.{ true, false } else &.{null}; From 3f84b6c80ed3306f040dd98b8ccba561a052167a Mon Sep 17 00:00:00 2001 From: mlugg Date: Wed, 24 Sep 2025 00:36:17 +0100 Subject: [PATCH 80/85] cbe: workaround GCC miscompilation This was causing a zig2 miscomp, which emitted slightly broken debug information, which caused extremely slow stack unwinding. We're working on fixing or reporting this upstream, but we can use this workaround for now, because GCC guarantees arithmetic signed shift. --- lib/zig.h | 8 ++++++++ stage1/zig.h | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/lib/zig.h b/lib/zig.h index 5c96b4bea0d3..5253912490ce 100644 --- a/lib/zig.h +++ b/lib/zig.h @@ -1510,8 +1510,16 @@ static inline zig_u128 zig_shl_u128(zig_u128 lhs, uint8_t rhs) { } static inline zig_i128 zig_shr_i128(zig_i128 lhs, uint8_t rhs) { + // This works around a GCC miscompilation, but it has the side benefit of + // emitting better code. It is behind the `#if` because it depends on + // arithmetic right shift, which is implementation-defined in C, but should + // be guaranteed on any GCC-compatible compiler. +#if defined(zig_gnuc) + return lhs >> rhs; +#else zig_i128 sign_mask = lhs < zig_make_i128(0, 0) ? -zig_make_i128(0, 1) : zig_make_i128(0, 0); return ((lhs ^ sign_mask) >> rhs) ^ sign_mask; +#endif } static inline zig_i128 zig_shl_i128(zig_i128 lhs, uint8_t rhs) { diff --git a/stage1/zig.h b/stage1/zig.h index 5c96b4bea0d3..5253912490ce 100644 --- a/stage1/zig.h +++ b/stage1/zig.h @@ -1510,8 +1510,16 @@ static inline zig_u128 zig_shl_u128(zig_u128 lhs, uint8_t rhs) { } static inline zig_i128 zig_shr_i128(zig_i128 lhs, uint8_t rhs) { + // This works around a GCC miscompilation, but it has the side benefit of + // emitting better code. It is behind the `#if` because it depends on + // arithmetic right shift, which is implementation-defined in C, but should + // be guaranteed on any GCC-compatible compiler. +#if defined(zig_gnuc) + return lhs >> rhs; +#else zig_i128 sign_mask = lhs < zig_make_i128(0, 0) ? -zig_make_i128(0, 1) : zig_make_i128(0, 0); return ((lhs ^ sign_mask) >> rhs) ^ sign_mask; +#endif } static inline zig_i128 zig_shl_i128(zig_i128 lhs, uint8_t rhs) { From 156cd8f678ebdcccc48382d093a3ef7e45c85a45 Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 26 Sep 2025 10:52:09 +0100 Subject: [PATCH 81/85] std.debug: significantly speed up capturing stack traces By my estimation, these changes speed up DWARF unwinding when using the self-hosted x86_64 backend by around 7x. There are two very significant enhancements: we no longer iterate frames which don't fit in the stack trace buffer, and we cache register rules (in a fixed buffer) to avoid re-parsing and evaluating CFI instructions in most cases. Alongside this are a bunch of smaller enhancements, such as pre-caching the result of evaluating the CIE's initial instructions, avoiding re-parsing of CIEs, and big simplifications to the `Dwarf.Unwind.VirtualMachine` logic. --- lib/std/debug.zig | 15 +- lib/std/debug/Dwarf.zig | 1 - lib/std/debug/Dwarf/Unwind.zig | 145 +++-- lib/std/debug/Dwarf/Unwind/VirtualMachine.zig | 554 +++++++++++------- lib/std/debug/Dwarf/call_frame.zig | 288 --------- lib/std/debug/SelfInfo.zig | 341 +++++------ lib/std/debug/SelfInfo/DarwinModule.zig | 87 ++- lib/std/debug/SelfInfo/ElfModule.zig | 189 +++--- 8 files changed, 811 insertions(+), 809 deletions(-) delete mode 100644 lib/std/debug/Dwarf/call_frame.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 4dc178b02de7..abd3e8102f55 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -572,9 +572,12 @@ pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) defer it.deinit(); if (!it.stratOk(options.allow_unsafe_unwind)) return empty_trace; var total_frames: usize = 0; - var frame_idx: usize = 0; + var index: usize = 0; var wait_for = options.first_address; - while (true) switch (it.next()) { + // Ideally, we would iterate the whole stack so that the `index` in the returned trace was + // indicative of how many frames were skipped. However, this has a significant runtime cost + // in some cases, so at least for now, we don't do that. + while (index < addr_buf.len) switch (it.next()) { .switch_to_fp => if (!it.stratOk(options.allow_unsafe_unwind)) break, .end => break, .frame => |ret_addr| { @@ -588,13 +591,13 @@ pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) if (ret_addr != target) continue; wait_for = null; } - if (frame_idx < addr_buf.len) addr_buf[frame_idx] = ret_addr; - frame_idx += 1; + addr_buf[index] = ret_addr; + index += 1; }, }; return .{ - .index = frame_idx, - .instruction_addresses = addr_buf[0..@min(frame_idx, addr_buf.len)], + .index = index, + .instruction_addresses = addr_buf[0..index], }; } /// Write the current stack trace to `writer`, annotated with source locations. diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 3934777ea5c5..cfba366162ff 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -27,7 +27,6 @@ const Reader = std.Io.Reader; const Dwarf = @This(); pub const expression = @import("Dwarf/expression.zig"); -pub const call_frame = @import("Dwarf/call_frame.zig"); pub const Unwind = @import("Dwarf/Unwind.zig"); /// Useful to temporarily enable while working on this file. diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 2eaa89c40425..e251a9175df6 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -10,7 +10,7 @@ //! The typical usage of `Unwind` is as follows: //! //! * Initialize with `initEhFrameHdr` or `initSection`, depending on the available data -//! * Call `prepareLookup` to construct a search table if necessary +//! * Call `prepare` to scan CIEs and, if necessary, construct a search table //! * Call `lookupPc` to find the section offset of the FDE corresponding to a PC //! * Call `getFde` to load the corresponding FDE and CIE //! * Check that the PC does indeed fall in that range (`lookupPc` may return a false positive) @@ -18,7 +18,7 @@ //! //! In some cases, such as when using the "compact unwind" data in Mach-O binaries, the FDE offsets //! may already be known. In that case, no call to `lookupPc` is necessary, which means the call to -//! `prepareLookup` can also be omitted. +//! `prepare` can be optimized to only scan CIEs. pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); @@ -45,7 +45,7 @@ frame_section: struct { /// A structure allowing fast lookups of the FDE corresponding to a particular PC. We use a binary /// search table for the lookup; essentially, a list of all FDEs ordered by PC range. `null` means -/// the lookup data is not yet populated, so `prepareLookup` must be called before `lookupPc`. +/// the lookup data is not yet populated, so `prepare` must be called before `lookupPc`. lookup: ?union(enum) { /// The `.eh_frame_hdr` section contains a pre-computed search table which we can use. eh_frame_hdr: struct { @@ -58,6 +58,12 @@ lookup: ?union(enum) { sorted_fdes: []SortedFdeEntry, }, +/// Initially empty; populated by `prepare`. +cie_list: std.MultiArrayList(struct { + offset: u64, + cie: CommonInformationEntry, +}), + const SortedFdeEntry = struct { /// This FDE's value of `pc_begin`. pc_begin: u64, @@ -83,6 +89,7 @@ pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_p .vaddr = section_vaddr, .table = table, } } else null, + .cie_list = .empty, }; } @@ -98,16 +105,21 @@ pub fn initSection(section: Section, section_vaddr: u64, section_bytes: []const .vaddr = section_vaddr, }, .lookup = null, + .cie_list = .empty, }; } -/// Technically, it is only necessary to call this if `prepareLookup` has previously been called, -/// since no other function here allocates resources. pub fn deinit(unwind: *Unwind, gpa: Allocator) void { if (unwind.lookup) |lookup| switch (lookup) { .eh_frame_hdr => {}, .sorted_fdes => |fdes| gpa.free(fdes), }; + for (unwind.cie_list.items(.cie)) |*cie| { + if (cie.last_row) |*lr| { + gpa.free(lr.cols); + } + } + unwind.cie_list.deinit(gpa); } /// Decoded version of the `.eh_frame_hdr` section. @@ -236,7 +248,6 @@ const EntryHeader = union(enum) { bytes_len: u64, }, fde: struct { - format: Format, /// Offset into the section of the corresponding CIE, *including* its entry header. cie_offset: u64, /// Remaining bytes in the FDE. These are parseable by `FrameDescriptionEntry.parse`. @@ -290,7 +301,6 @@ const EntryHeader = union(enum) { .debug_frame => cie_ptr_or_id, }; return .{ .fde = .{ - .format = unit_header.format, .cie_offset = cie_offset, .bytes_len = remaining_bytes, } }; @@ -299,6 +309,7 @@ const EntryHeader = union(enum) { pub const CommonInformationEntry = struct { version: u8, + format: Format, /// In version 4, CIEs can specify the address size used in the CIE and associated FDEs. /// This value must be used *only* to parse associated FDEs in `FrameDescriptionEntry.parse`. @@ -318,6 +329,12 @@ pub const CommonInformationEntry = struct { initial_instructions: []const u8, + last_row: ?struct { + offset: u64, + cfa: VirtualMachine.CfaRule, + cols: []VirtualMachine.Column, + }, + pub const AugmentationKind = enum { none, gcc_eh, lsb_z }; /// This function expects to read the CIE starting with the version field. @@ -326,6 +343,7 @@ pub const CommonInformationEntry = struct { /// `length_offset` specifies the offset of this CIE's length field in the /// .eh_frame / .debug_frame section. fn parse( + format: Format, cie_bytes: []const u8, section: Section, default_addr_size_bytes: u8, @@ -384,6 +402,7 @@ pub const CommonInformationEntry = struct { }; return .{ + .format = format, .version = version, .addr_size_bytes = addr_size_bytes, .segment_selector_size = segment_selector_size, @@ -394,6 +413,7 @@ pub const CommonInformationEntry = struct { .is_signal_frame = is_signal_frame, .augmentation_kind = aug_kind, .initial_instructions = r.buffered(), + .last_row = null, }; } }; @@ -411,7 +431,7 @@ pub const FrameDescriptionEntry = struct { /// module's `.eh_frame` section, this will equal `fde_bytes.ptr`. fde_vaddr: u64, fde_bytes: []const u8, - cie: CommonInformationEntry, + cie: *const CommonInformationEntry, endian: Endian, ) !FrameDescriptionEntry { if (cie.segment_selector_size != 0) return error.UnsupportedAddrSize; @@ -446,11 +466,18 @@ pub const FrameDescriptionEntry = struct { } }; -/// Builds the PC FDE lookup table if it is not already built. It is required to call this function -/// at least once before calling `lookupPc`. Once this function is called, memory has been allocated -/// and so `deinit` (matching this `gpa`) is required to free it. -pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endian: Endian) !void { - if (unwind.lookup != null) return; +/// Builds the CIE list and FDE lookup table if they are not already built. It is required to call +/// this function at least once before calling `lookupPc` or `getFde`. If only `getFde` is needed, +/// then `need_lookup` can be set to `false` to make this function more efficient. +pub fn prepare( + unwind: *Unwind, + gpa: Allocator, + addr_size_bytes: u8, + endian: Endian, + need_lookup: bool, +) !void { + if (unwind.cie_list.len > 0 and (!need_lookup or unwind.lookup != null)) return; + unwind.cie_list.clearRetainingCapacity(); const section = unwind.frame_section; @@ -462,21 +489,28 @@ pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endia const entry_offset = r.seek; switch (try EntryHeader.read(&r, entry_offset, section.id, endian)) { .cie => |cie_info| { - // Ignore CIEs for now; we'll parse them when we read a corresponding FDE - try r.discardAll(cast(usize, cie_info.bytes_len) orelse return error.EndOfStream); + // We will pre-populate a list of CIEs for efficiency: this avoids work re-parsing + // them every time we look up an FDE. It also lets us cache the result of evaluating + // the CIE's initial CFI instructions, which is useful because in the vast majority + // of cases those instructions will be needed to reach the PC we are unwinding to. + const bytes_len = cast(usize, cie_info.bytes_len) orelse return error.EndOfStream; + const idx = unwind.cie_list.len; + try unwind.cie_list.append(gpa, .{ + .offset = entry_offset, + .cie = try .parse(cie_info.format, try r.take(bytes_len), section.id, addr_size_bytes), + }); + errdefer _ = unwind.cie_list.pop().?; + try VirtualMachine.populateCieLastRow(gpa, &unwind.cie_list.items(.cie)[idx], addr_size_bytes, endian); continue; }, .fde => |fde_info| { - if (fde_info.cie_offset > section.bytes.len) return error.EndOfStream; - var cie_r: Reader = .fixed(section.bytes[@intCast(fde_info.cie_offset)..]); - const cie_info = switch (try EntryHeader.read(&cie_r, fde_info.cie_offset, section.id, endian)) { - .cie => |cie_info| cie_info, - .fde, .terminator => return bad(), // this is meant to be a CIE - }; - const cie_bytes_len = cast(usize, cie_info.bytes_len) orelse return error.EndOfStream; - const fde_bytes_len = cast(usize, fde_info.bytes_len) orelse return error.EndOfStream; - const cie: CommonInformationEntry = try .parse(try cie_r.take(cie_bytes_len), section.id, addr_size_bytes); - const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(fde_bytes_len), cie, endian); + const bytes_len = cast(usize, fde_info.bytes_len) orelse return error.EndOfStream; + if (!need_lookup) { + try r.discardAll(bytes_len); + continue; + } + const cie = unwind.findCie(fde_info.cie_offset) orelse return error.InvalidDebugInfo; + const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(bytes_len), cie, endian); try fde_list.append(gpa, .{ .pc_begin = fde.pc_begin, .fde_offset = entry_offset, @@ -502,12 +536,30 @@ pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endia unwind.lookup = .{ .sorted_fdes = final_fdes }; } +fn findCie(unwind: *const Unwind, offset: u64) ?*const CommonInformationEntry { + const offsets = unwind.cie_list.items(.offset); + if (offsets.len == 0) return null; + var start: usize = 0; + var len: usize = offsets.len; + while (len > 1) { + const mid = len / 2; + if (offset < offsets[start + mid]) { + len = mid; + } else { + start += mid; + len -= mid; + } + } + if (offsets[start] != offset) return null; + return &unwind.cie_list.items(.cie)[start]; +} + /// Given a program counter value, returns the offset of the corresponding FDE, or `null` if no /// matching FDE was found. The returned offset can be passed to `getFde` to load the data /// associated with the FDE. /// -/// Before calling this function, `prepareLookup` must return successfully at least once, to ensure -/// that `unwind.lookup` is populated. +/// Before calling this function, `prepare` must return successfully at least once, to ensure that +/// `unwind.lookup` is populated. /// /// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must /// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`. @@ -524,20 +576,25 @@ pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: End }, .sorted_fdes => |sorted_fdes| sorted_fdes, }; - const first_bad_idx = std.sort.partitionPoint(SortedFdeEntry, sorted_fdes, pc, struct { - fn canIncludePc(target_pc: u64, entry: SortedFdeEntry) bool { - return target_pc >= entry.pc_begin; // i.e. does 'entry_pc..' include 'target_pc' + if (sorted_fdes.len == 0) return null; + var start: usize = 0; + var len: usize = sorted_fdes.len; + while (len > 1) { + const half = len / 2; + if (pc < sorted_fdes[start + half].pc_begin) { + len = half; + } else { + start += half; + len -= half; } - }.canIncludePc); - // `first_bad_idx` is the index of the first FDE whose `pc_begin` is too high to include `pc`. - // So if any FDE matches, it'll be the one at `first_bad_idx - 1` (maybe false positive). - if (first_bad_idx == 0) return null; - return sorted_fdes[first_bad_idx - 1].fde_offset; + } + // If any FDE matches, it'll be the one at `start` (maybe false positive). + return sorted_fdes[start].fde_offset; } /// Get the FDE at a given offset, as well as its associated CIE. This offset typically comes from /// `lookupPc`. The CFI instructions within can be evaluated with `VirtualMachine`. -pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { +pub fn getFde(unwind: *const Unwind, fde_offset: u64, endian: Endian) !struct { *const CommonInformationEntry, FrameDescriptionEntry } { const section = unwind.frame_section; if (fde_offset > section.bytes.len) return error.EndOfStream; @@ -547,19 +604,7 @@ pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endia .cie, .terminator => return bad(), // This is meant to be an FDE }; - const cie_offset = fde_info.cie_offset; - if (cie_offset > section.bytes.len) return error.EndOfStream; - var cie_reader: Reader = .fixed(section.bytes[@intCast(cie_offset)..]); - const cie_info = switch (try EntryHeader.read(&cie_reader, cie_offset, section.id, endian)) { - .cie => |info| info, - .fde, .terminator => return bad(), // This is meant to be a CIE - }; - - const cie: CommonInformationEntry = try .parse( - try cie_reader.take(cast(usize, cie_info.bytes_len) orelse return error.EndOfStream), - section.id, - addr_size_bytes, - ); + const cie = unwind.findCie(fde_info.cie_offset) orelse return error.InvalidDebugInfo; const fde: FrameDescriptionEntry = try .parse( section.vaddr + fde_offset + fde_reader.seek, try fde_reader.take(cast(usize, fde_info.bytes_len) orelse return error.EndOfStream), @@ -567,7 +612,7 @@ pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endia endian, ); - return .{ cie_info.format, cie, fde }; + return .{ cie, fde }; } const EhPointerContext = struct { diff --git a/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig index 997af95cbd31..319841ea7f18 100644 --- a/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig +++ b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig @@ -5,9 +5,9 @@ pub const RegisterRule = union(enum) { /// The spec says that the default rule for each column is the undefined rule. /// However, it also allows ABI / compiler authors to specify alternate defaults, so /// there is a distinction made here. - default: void, - undefined: void, - same_value: void, + default, + undefined, + same_value, /// offset(N) offset: i64, /// val_offset(N) @@ -18,38 +18,39 @@ pub const RegisterRule = union(enum) { expression: []const u8, /// val_expression(E) val_expression: []const u8, - /// Augmenter-defined rule - architectural: void, +}; + +pub const CfaRule = union(enum) { + none, + reg_off: struct { + register: u8, + offset: i64, + }, + expression: []const u8, }; /// Each row contains unwinding rules for a set of registers. pub const Row = struct { /// Offset from `FrameDescriptionEntry.pc_begin` offset: u64 = 0, - /// Special-case column that defines the CFA (Canonical Frame Address) rule. - /// The register field of this column defines the register that CFA is derived from. - cfa: Column = .{}, + cfa: CfaRule = .none, /// The register fields in these columns define the register the rule applies to. - columns: ColumnRange = .{}, - /// Indicates that the next write to any column in this row needs to copy - /// the backing column storage first, as it may be referenced by previous rows. - copy_on_write: bool = false, + columns: ColumnRange = .{ .start = undefined, .len = 0 }, }; pub const Column = struct { - register: ?u8 = null, - rule: RegisterRule = .{ .default = {} }, + register: u8, + rule: RegisterRule, }; const ColumnRange = struct { - /// Index into `columns` of the first column in this row. - start: usize = undefined, - len: u8 = 0, + start: usize, + len: u8, }; columns: std.ArrayList(Column) = .empty, stack: std.ArrayList(struct { - cfa: Column, + cfa: CfaRule, columns: ColumnRange, }) = .empty, current_row: Row = .{}, @@ -71,235 +72,388 @@ pub fn reset(self: *VirtualMachine) void { } /// Return a slice backed by the row's non-CFA columns -pub fn rowColumns(self: VirtualMachine, row: Row) []Column { +pub fn rowColumns(self: *const VirtualMachine, row: *const Row) []Column { if (row.columns.len == 0) return &.{}; return self.columns.items[row.columns.start..][0..row.columns.len]; } /// Either retrieves or adds a column for `register` (non-CFA) in the current row. fn getOrAddColumn(self: *VirtualMachine, gpa: Allocator, register: u8) !*Column { - for (self.rowColumns(self.current_row)) |*c| { + for (self.rowColumns(&self.current_row)) |*c| { if (c.register == register) return c; } if (self.current_row.columns.len == 0) { self.current_row.columns.start = self.columns.items.len; + } else { + assert(self.current_row.columns.start + self.current_row.columns.len == self.columns.items.len); } self.current_row.columns.len += 1; const column = try self.columns.addOne(gpa); column.* = .{ .register = register, + .rule = .default, }; return column; } +pub fn populateCieLastRow( + gpa: Allocator, + cie: *Unwind.CommonInformationEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !void { + assert(cie.last_row == null); + + var vm: VirtualMachine = .{}; + defer vm.deinit(gpa); + + try vm.evalInstructions( + gpa, + cie, + std.math.maxInt(u64), + cie.initial_instructions, + addr_size_bytes, + endian, + ); + + cie.last_row = .{ + .offset = vm.current_row.offset, + .cfa = vm.current_row.cfa, + .cols = try gpa.dupe(Column, vm.rowColumns(&vm.current_row)), + }; +} + /// Runs the CIE instructions, then the FDE instructions. Execution halts /// once the row that corresponds to `pc` is known, and the row is returned. pub fn runTo( - self: *VirtualMachine, + vm: *VirtualMachine, gpa: Allocator, pc: u64, - cie: Dwarf.Unwind.CommonInformationEntry, - fde: Dwarf.Unwind.FrameDescriptionEntry, + cie: *const Unwind.CommonInformationEntry, + fde: *const Unwind.FrameDescriptionEntry, addr_size_bytes: u8, endian: std.builtin.Endian, ) !Row { - assert(self.cie_row == null); - assert(pc >= fde.pc_begin); - assert(pc < fde.pc_begin + fde.pc_range); + assert(vm.cie_row == null); - var prev_row: Row = self.current_row; + const target_offset = pc - fde.pc_begin; + assert(target_offset < fde.pc_range); - const instruction_slices: [2][]const u8 = .{ - cie.initial_instructions, - fde.instructions, - }; - for (instruction_slices, [2]bool{ true, false }) |slice, is_cie_stream| { - var stream: std.Io.Reader = .fixed(slice); - while (stream.seek < slice.len) { - const instruction: Dwarf.call_frame.Instruction = try .read(&stream, addr_size_bytes, endian); - prev_row = try self.step(gpa, cie, is_cie_stream, instruction); - if (pc < fde.pc_begin + self.current_row.offset) return prev_row; + const instruction_bytes: []const u8 = insts: { + if (target_offset < cie.last_row.?.offset) { + break :insts cie.initial_instructions; } - } + // This is the more common case: start from the CIE's last row. + assert(vm.columns.items.len == 0); + vm.current_row = .{ + .offset = cie.last_row.?.offset, + .cfa = cie.last_row.?.cfa, + .columns = .{ + .start = 0, + .len = @intCast(cie.last_row.?.cols.len), + }, + }; + try vm.columns.appendSlice(gpa, cie.last_row.?.cols); + vm.cie_row = vm.current_row; + break :insts fde.instructions; + }; - return self.current_row; + try vm.evalInstructions( + gpa, + cie, + target_offset, + instruction_bytes, + addr_size_bytes, + endian, + ); + return vm.current_row; } -fn resolveCopyOnWrite(self: *VirtualMachine, gpa: Allocator) !void { - if (!self.current_row.copy_on_write) return; +/// Evaluates instructions from `instruction_bytes` until `target_addr` is reached or all +/// instructions have been evaluated. +fn evalInstructions( + vm: *VirtualMachine, + gpa: Allocator, + cie: *const Unwind.CommonInformationEntry, + target_addr: u64, + instruction_bytes: []const u8, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !void { + var fr: std.Io.Reader = .fixed(instruction_bytes); + while (fr.seek < fr.buffer.len) { + switch (try Instruction.read(&fr, addr_size_bytes, endian)) { + .nop => { + // If there was one nop, there's a good chance we've reached the padding and so + // everything left is a nop, which is represented by a 0 byte. + if (std.mem.allEqual(u8, fr.buffered(), 0)) return; + }, + + .remember_state => { + try vm.stack.append(gpa, .{ + .cfa = vm.current_row.cfa, + .columns = vm.current_row.columns, + }); + const cols_len = vm.current_row.columns.len; + const copy_start = vm.columns.items.len; + assert(vm.current_row.columns.start == copy_start - cols_len); + try vm.columns.ensureUnusedCapacity(gpa, cols_len); // to prevent aliasing issues + vm.columns.appendSliceAssumeCapacity(vm.columns.items[copy_start - cols_len ..]); + vm.current_row.columns.start = copy_start; + }, + .restore_state => { + const restored = vm.stack.pop() orelse return error.InvalidOperation; + vm.columns.shrinkRetainingCapacity(restored.columns.start + restored.columns.len); + + vm.current_row.cfa = restored.cfa; + vm.current_row.columns = restored.columns; + }, - const new_start = self.columns.items.len; - if (self.current_row.columns.len > 0) { - try self.columns.ensureUnusedCapacity(gpa, self.current_row.columns.len); - self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); - self.current_row.columns.start = new_start; + .advance_loc => |delta| { + const new_addr = vm.current_row.offset + delta * cie.code_alignment_factor; + if (new_addr > target_addr) return; + vm.current_row.offset = new_addr; + }, + .set_loc => |new_addr| { + if (new_addr <= vm.current_row.offset) return error.InvalidOperation; + if (cie.segment_selector_size != 0) return error.InvalidOperation; // unsupported + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 + + if (new_addr > target_addr) return; + vm.current_row.offset = new_addr; + }, + + .register => |reg| { + const column = try vm.getOrAddColumn(gpa, reg.index); + column.rule = switch (reg.rule) { + .restore => rule: { + const cie_row = &(vm.cie_row orelse return error.InvalidOperation); + for (vm.rowColumns(cie_row)) |cie_col| { + if (cie_col.register == reg.index) break :rule cie_col.rule; + } + break :rule .default; + }, + .undefined => .undefined, + .same_value => .same_value, + .offset_uf => |off| .{ .offset = @as(i64, @intCast(off)) * cie.data_alignment_factor }, + .offset_sf => |off| .{ .offset = off * cie.data_alignment_factor }, + .val_offset_uf => |off| .{ .val_offset = @as(i64, @intCast(off)) * cie.data_alignment_factor }, + .val_offset_sf => |off| .{ .val_offset = off * cie.data_alignment_factor }, + .register => |callee_reg| .{ .register = callee_reg }, + .expr => |len| .{ .expression = try takeExprBlock(&fr, len) }, + .val_expr => |len| .{ .val_expression = try takeExprBlock(&fr, len) }, + }; + }, + .def_cfa => |cfa| vm.current_row.cfa = .{ .reg_off = .{ + .register = cfa.register, + .offset = @intCast(cfa.offset), + } }, + .def_cfa_sf => |cfa| vm.current_row.cfa = .{ .reg_off = .{ + .register = cfa.register, + .offset = cfa.offset_sf * cie.data_alignment_factor, + } }, + .def_cfa_reg => |register| switch (vm.current_row.cfa) { + .none, .expression => return error.InvalidOperation, + .reg_off => |*ro| ro.register = register, + }, + .def_cfa_offset => |offset| switch (vm.current_row.cfa) { + .none, .expression => return error.InvalidOperation, + .reg_off => |*ro| ro.offset = @intCast(offset), + }, + .def_cfa_offset_sf => |offset_sf| switch (vm.current_row.cfa) { + .none, .expression => return error.InvalidOperation, + .reg_off => |*ro| ro.offset = offset_sf * cie.data_alignment_factor, + }, + .def_cfa_expr => |len| { + vm.current_row.cfa = .{ .expression = try takeExprBlock(&fr, len) }; + }, + } } } -/// Executes a single instruction. -/// If this instruction is from the CIE, `is_initial` should be set. -/// Returns the value of `current_row` before executing this instruction. -pub fn step( - self: *VirtualMachine, - gpa: Allocator, - cie: Dwarf.Unwind.CommonInformationEntry, - is_initial: bool, - instruction: Dwarf.call_frame.Instruction, -) !Row { - // CIE instructions must be run before FDE instructions - assert(!is_initial or self.cie_row == null); - if (!is_initial and self.cie_row == null) { - self.cie_row = self.current_row; - self.current_row.copy_on_write = true; - } +fn takeExprBlock(r: *std.Io.Reader, len: usize) error{ ReadFailed, InvalidOperand }![]const u8 { + return r.take(len) catch |err| switch (err) { + error.ReadFailed => |e| return e, + error.EndOfStream => return error.InvalidOperand, + }; +} - const prev_row = self.current_row; - switch (instruction) { - .set_loc => |i| { - if (i.address <= self.current_row.offset) return error.InvalidOperation; - if (cie.segment_selector_size != 0) return error.InvalidOperation; // unsupported - // TODO: Check cie.segment_selector_size != 0 for DWARFV4 - self.current_row.offset = i.address; - }, - inline .advance_loc, - .advance_loc1, - .advance_loc2, - .advance_loc4, - => |i| { - self.current_row.offset += i.delta * cie.code_alignment_factor; - self.current_row.copy_on_write = true; - }, - inline .offset, - .offset_extended, - .offset_extended_sf, - => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; - }, - inline .restore, - .restore_extended, - => |i| { - try self.resolveCopyOnWrite(gpa); - if (self.cie_row) |cie_row| { - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = for (self.rowColumns(cie_row)) |cie_column| { - if (cie_column.register == i.register) break cie_column.rule; - } else .{ .default = {} }; - } else return error.InvalidOperation; - }, - .nop => {}, - .undefined => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ .undefined = {} }; - }, - .same_value => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ .same_value = {} }; - }, - .register => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ .register = i.target_register }; - }, - .remember_state => { - try self.stack.append(gpa, .{ - .cfa = self.current_row.cfa, - .columns = self.current_row.columns, - }); - self.current_row.copy_on_write = true; - }, - .restore_state => { - const restored = self.stack.pop() orelse return error.InvalidOperation; - self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); - try self.columns.ensureUnusedCapacity(gpa, restored.columns.len); - - self.current_row.cfa = restored.cfa; - self.current_row.columns.start = self.columns.items.len; - self.current_row.columns.len = restored.columns.len; - self.columns.appendSliceAssumeCapacity(self.columns.items[restored.columns.start..][0..restored.columns.len]); - }, - .def_cfa => |i| { - try self.resolveCopyOnWrite(gpa); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = @intCast(i.offset) }, - }; - }, - .def_cfa_sf => |i| { - try self.resolveCopyOnWrite(gpa); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, - }; - }, - .def_cfa_register => |i| { - try self.resolveCopyOnWrite(gpa); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.register = i.register; - }, - .def_cfa_offset => |i| { - try self.resolveCopyOnWrite(gpa); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = @intCast(i.offset), - }; - }, - .def_cfa_offset_sf => |i| { - try self.resolveCopyOnWrite(gpa); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .def_cfa_expression => |i| { - try self.resolveCopyOnWrite(gpa); - self.current_row.cfa.register = undefined; - self.current_row.cfa.rule = .{ - .expression = i.block, - }; +const OpcodeByte = packed struct(u8) { + low: packed union { + operand: u6, + extended: enum(u6) { + nop = 0, + set_loc = 1, + advance_loc1 = 2, + advance_loc2 = 3, + advance_loc4 = 4, + offset_extended = 5, + restore_extended = 6, + undefined = 7, + same_value = 8, + register = 9, + remember_state = 10, + restore_state = 11, + def_cfa = 12, + def_cfa_register = 13, + def_cfa_offset = 14, + def_cfa_expression = 15, + expression = 16, + offset_extended_sf = 17, + def_cfa_sf = 18, + def_cfa_offset_sf = 19, + val_offset = 20, + val_offset_sf = 21, + val_expression = 22, + _, }, - .expression => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ - .expression = i.block, - }; - }, - .val_offset => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ - .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, - }; - }, - .val_offset_sf => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .val_expression => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ - .val_expression = i.block, - }; + }, + opcode: enum(u2) { + extended = 0, + advance_loc = 1, + offset = 2, + restore = 3, + }, +}; + +pub const Instruction = union(enum) { + nop, + remember_state, + restore_state, + advance_loc: u32, + set_loc: u64, + + register: struct { + index: u8, + rule: union(enum) { + restore, // restore from cie + undefined, + same_value, + offset_uf: u64, + offset_sf: i64, + val_offset_uf: u64, + val_offset_sf: i64, + register: u8, + /// Value is the number of bytes in the DWARF expression, which the caller must read. + expr: usize, + /// Value is the number of bytes in the DWARF expression, which the caller must read. + val_expr: usize, }, - } + }, - return prev_row; -} + def_cfa: struct { + register: u8, + offset: u64, + }, + def_cfa_sf: struct { + register: u8, + offset_sf: i64, + }, + def_cfa_reg: u8, + def_cfa_offset: u64, + def_cfa_offset_sf: i64, + /// Value is the number of bytes in the DWARF expression, which the caller must read. + def_cfa_expr: usize, + + pub fn read( + reader: *std.Io.Reader, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Instruction { + const inst: OpcodeByte = @bitCast(try reader.takeByte()); + return switch (inst.opcode) { + .advance_loc => .{ .advance_loc = inst.low.operand }, + .offset => .{ .register = .{ + .index = inst.low.operand, + .rule = .{ .offset_uf = try reader.takeLeb128(u64) }, + } }, + .restore => .{ .register = .{ + .index = inst.low.operand, + .rule = .restore, + } }, + .extended => switch (inst.low.extended) { + .nop => .nop, + .remember_state => .remember_state, + .restore_state => .restore_state, + .advance_loc1 => .{ .advance_loc = try reader.takeByte() }, + .advance_loc2 => .{ .advance_loc = try reader.takeInt(u16, endian) }, + .advance_loc4 => .{ .advance_loc = try reader.takeInt(u32, endian) }, + .set_loc => .{ .set_loc = switch (addr_size_bytes) { + 2 => try reader.takeInt(u16, endian), + 4 => try reader.takeInt(u32, endian), + 8 => try reader.takeInt(u64, endian), + else => return error.UnsupportedAddrSize, + } }, + + .offset_extended => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .offset_uf = try reader.takeLeb128(u64) }, + } }, + .offset_extended_sf => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .offset_sf = try reader.takeLeb128(i64) }, + } }, + .restore_extended => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .restore, + } }, + .undefined => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .undefined, + } }, + .same_value => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .same_value, + } }, + .register => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .register = try reader.takeLeb128(u8) }, + } }, + .val_offset => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .val_offset_uf = try reader.takeLeb128(u64) }, + } }, + .val_offset_sf => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .val_offset_sf = try reader.takeLeb128(i64) }, + } }, + .expression => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .expr = try reader.takeLeb128(usize) }, + } }, + .val_expression => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .val_expr = try reader.takeLeb128(usize) }, + } }, + + .def_cfa => .{ .def_cfa = .{ + .register = try reader.takeLeb128(u8), + .offset = try reader.takeLeb128(u64), + } }, + .def_cfa_sf => .{ .def_cfa_sf = .{ + .register = try reader.takeLeb128(u8), + .offset_sf = try reader.takeLeb128(i64), + } }, + .def_cfa_register => .{ .def_cfa_reg = try reader.takeLeb128(u8) }, + .def_cfa_offset => .{ .def_cfa_offset = try reader.takeLeb128(u64) }, + .def_cfa_offset_sf => .{ .def_cfa_offset_sf = try reader.takeLeb128(i64) }, + .def_cfa_expression => .{ .def_cfa_expr = try reader.takeLeb128(usize) }, + + _ => switch (@intFromEnum(inst.low.extended)) { + 0x1C...0x3F => return error.UnimplementedUserOpcode, + else => return error.InvalidOpcode, + }, + }, + }; + } +}; const std = @import("../../../std.zig"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; -const Dwarf = std.debug.Dwarf; +const Unwind = std.debug.Dwarf.Unwind; const VirtualMachine = @This(); diff --git a/lib/std/debug/Dwarf/call_frame.zig b/lib/std/debug/Dwarf/call_frame.zig deleted file mode 100644 index 8f1758f4eb92..000000000000 --- a/lib/std/debug/Dwarf/call_frame.zig +++ /dev/null @@ -1,288 +0,0 @@ -const std = @import("../../std.zig"); -const Reader = std.Io.Reader; - -/// TODO merge with std.dwarf.CFA -const Opcode = enum(u8) { - advance_loc = 0x1 << 6, - offset = 0x2 << 6, - restore = 0x3 << 6, - - nop = 0x00, - set_loc = 0x01, - advance_loc1 = 0x02, - advance_loc2 = 0x03, - advance_loc4 = 0x04, - offset_extended = 0x05, - restore_extended = 0x06, - undefined = 0x07, - same_value = 0x08, - register = 0x09, - remember_state = 0x0a, - restore_state = 0x0b, - def_cfa = 0x0c, - def_cfa_register = 0x0d, - def_cfa_offset = 0x0e, - def_cfa_expression = 0x0f, - expression = 0x10, - offset_extended_sf = 0x11, - def_cfa_sf = 0x12, - def_cfa_offset_sf = 0x13, - val_offset = 0x14, - val_offset_sf = 0x15, - val_expression = 0x16, - - // These opcodes encode an operand in the lower 6 bits of the opcode itself - pub const lo_inline = @intFromEnum(Opcode.advance_loc); - pub const hi_inline = @intFromEnum(Opcode.restore) | 0b111111; - - // These opcodes are trailed by zero or more operands - pub const lo_reserved = @intFromEnum(Opcode.nop); - pub const hi_reserved = @intFromEnum(Opcode.val_expression); - - // Vendor-specific opcodes - pub const lo_user = 0x1c; - pub const hi_user = 0x3f; -}; - -/// The returned slice points into `reader.buffer`. -fn readBlock(reader: *Reader) ![]const u8 { - const block_len = try reader.takeLeb128(usize); - return reader.take(block_len) catch |err| switch (err) { - error.EndOfStream => return error.InvalidOperand, - error.ReadFailed => |e| return e, - }; -} - -pub const Instruction = union(Opcode) { - advance_loc: struct { - delta: u8, - }, - offset: struct { - register: u8, - offset: u64, - }, - restore: struct { - register: u8, - }, - nop: void, - set_loc: struct { - address: u64, - }, - advance_loc1: struct { - delta: u8, - }, - advance_loc2: struct { - delta: u16, - }, - advance_loc4: struct { - delta: u32, - }, - offset_extended: struct { - register: u8, - offset: u64, - }, - restore_extended: struct { - register: u8, - }, - undefined: struct { - register: u8, - }, - same_value: struct { - register: u8, - }, - register: struct { - register: u8, - target_register: u8, - }, - remember_state: void, - restore_state: void, - def_cfa: struct { - register: u8, - offset: u64, - }, - def_cfa_register: struct { - register: u8, - }, - def_cfa_offset: struct { - offset: u64, - }, - def_cfa_expression: struct { - block: []const u8, - }, - expression: struct { - register: u8, - block: []const u8, - }, - offset_extended_sf: struct { - register: u8, - offset: i64, - }, - def_cfa_sf: struct { - register: u8, - offset: i64, - }, - def_cfa_offset_sf: struct { - offset: i64, - }, - val_offset: struct { - register: u8, - offset: u64, - }, - val_offset_sf: struct { - register: u8, - offset: i64, - }, - val_expression: struct { - register: u8, - block: []const u8, - }, - - /// `reader` must be a `Reader.fixed` so that regions of its buffer are never invalidated. - pub fn read( - reader: *Reader, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !Instruction { - switch (try reader.takeByte()) { - Opcode.lo_inline...Opcode.hi_inline => |opcode| { - const e: Opcode = @enumFromInt(opcode & 0b11000000); - const value: u6 = @intCast(opcode & 0b111111); - return switch (e) { - .advance_loc => .{ - .advance_loc = .{ .delta = value }, - }, - .offset => .{ - .offset = .{ - .register = value, - .offset = try reader.takeLeb128(u64), - }, - }, - .restore => .{ - .restore = .{ .register = value }, - }, - else => unreachable, - }; - }, - Opcode.lo_reserved...Opcode.hi_reserved => |opcode| { - const e: Opcode = @enumFromInt(opcode); - return switch (e) { - .advance_loc, - .offset, - .restore, - => unreachable, - .nop => .{ .nop = {} }, - .set_loc => .{ .set_loc = .{ - .address = switch (addr_size_bytes) { - 2 => try reader.takeInt(u16, endian), - 4 => try reader.takeInt(u32, endian), - 8 => try reader.takeInt(u64, endian), - else => return error.UnsupportedAddrSize, - }, - } }, - .advance_loc1 => .{ - .advance_loc1 = .{ .delta = try reader.takeByte() }, - }, - .advance_loc2 => .{ - .advance_loc2 = .{ .delta = try reader.takeInt(u16, endian) }, - }, - .advance_loc4 => .{ - .advance_loc4 = .{ .delta = try reader.takeInt(u32, endian) }, - }, - .offset_extended => .{ - .offset_extended = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(u64), - }, - }, - .restore_extended => .{ - .restore_extended = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .undefined => .{ - .undefined = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .same_value => .{ - .same_value = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .register => .{ - .register = .{ - .register = try reader.takeLeb128(u8), - .target_register = try reader.takeLeb128(u8), - }, - }, - .remember_state => .{ .remember_state = {} }, - .restore_state => .{ .restore_state = {} }, - .def_cfa => .{ - .def_cfa = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(u64), - }, - }, - .def_cfa_register => .{ - .def_cfa_register = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .def_cfa_offset => .{ - .def_cfa_offset = .{ - .offset = try reader.takeLeb128(u64), - }, - }, - .def_cfa_expression => .{ - .def_cfa_expression = .{ - .block = try readBlock(reader), - }, - }, - .expression => .{ - .expression = .{ - .register = try reader.takeLeb128(u8), - .block = try readBlock(reader), - }, - }, - .offset_extended_sf => .{ - .offset_extended_sf = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(i64), - }, - }, - .def_cfa_sf => .{ - .def_cfa_sf = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(i64), - }, - }, - .def_cfa_offset_sf => .{ - .def_cfa_offset_sf = .{ - .offset = try reader.takeLeb128(i64), - }, - }, - .val_offset => .{ - .val_offset = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(u64), - }, - }, - .val_offset_sf => .{ - .val_offset_sf = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(i64), - }, - }, - .val_expression => .{ - .val_expression = .{ - .register = try reader.takeLeb128(u8), - .block = try readBlock(reader), - }, - }, - }; - }, - Opcode.lo_user...Opcode.hi_user => return error.UnimplementedUserOpcode, - else => return error.InvalidOpcode, - } - } -}; diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 2da5834ba6e8..bb05ce521683 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -207,6 +207,36 @@ pub const DwarfUnwindContext = struct { vm: Dwarf.Unwind.VirtualMachine, stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), + pub const Cache = struct { + /// TODO: to allow `DwarfUnwindContext` to work on freestanding, we currently just don't use + /// this mutex there. That's a bad solution, but a better one depends on the standard + /// library's general support for "bring your own OS" being improved. + mutex: switch (builtin.os.tag) { + else => std.Thread.Mutex, + .freestanding, .other => struct { + fn lock(_: @This()) void {} + fn unlock(_: @This()) void {} + }, + }, + buf: [num_slots]Slot, + const num_slots = 2048; + const Slot = struct { + const max_regs = 32; + pc: usize, + cie: *const Dwarf.Unwind.CommonInformationEntry, + cfa_rule: Dwarf.Unwind.VirtualMachine.CfaRule, + rules_regs: [max_regs]u16, + rules: [max_regs]Dwarf.Unwind.VirtualMachine.RegisterRule, + num_rules: u8, + }; + /// This is a function rather than a declaration to avoid lowering a very large struct value + /// into the binary when most of it is `undefined`. + pub fn init(c: *Cache) void { + c.mutex = .{}; + for (&c.buf) |*slot| slot.pc = 0; + } + }; + pub fn init(cpu_context: *const CpuContext) DwarfUnwindContext { comptime assert(supports_unwinding); @@ -243,126 +273,30 @@ pub const DwarfUnwindContext = struct { return ptr.*; } - /// The default rule is typically equivalent to `.undefined`, but ABIs may define it differently. - fn defaultRuleBehavior(register: u8) enum { undefined, same_value } { - if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { - // The default rule for callee-saved registers on AArch64 acts like the `.same_value` rule - return .same_value; - } - return .undefined; - } - - /// Resolves the register rule and places the result into `out` (see regBytes). Returns `true` - /// iff the rule was undefined. This is *not* the same as `col.rule == .undefined`, because the - /// default rule may be undefined. - pub fn resolveRegisterRule( - context: *DwarfUnwindContext, - gpa: Allocator, - col: Dwarf.Unwind.VirtualMachine.Column, - expression_context: std.debug.Dwarf.expression.Context, - out: []u8, - ) !bool { - switch (col.rule) { - .default => { - const register = col.register orelse return error.InvalidRegister; - switch (defaultRuleBehavior(register)) { - .undefined => { - @memset(out, undefined); - return true; - }, - .same_value => { - const src = try context.cpu_context.dwarfRegisterBytes(register); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return false; - }, - } - }, - .undefined => { - @memset(out, undefined); - return true; - }, - .same_value => { - // TODO: This copy could be eliminated if callers always copy the state then call this function to update it - const register = col.register orelse return error.InvalidRegister; - const src = try context.cpu_context.dwarfRegisterBytes(register); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return false; - }, - .offset => |offset| { - const cfa = context.cfa orelse return error.InvalidCFA; - const addr = try applyOffset(cfa, offset); - const ptr: *const usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - return false; - }, - .val_offset => |offset| { - const cfa = context.cfa orelse return error.InvalidCFA; - mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); - return false; - }, - .register => |register| { - const src = try context.cpu_context.dwarfRegisterBytes(register); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return false; - }, - .expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run( - expression, - gpa, - expression_context, - context.cfa.?, - ) orelse return error.NoExpressionValue; - const addr = switch (value) { - .generic => |addr| addr, - else => return error.InvalidExpressionValue, - }; - const ptr: *usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - return false; - }, - .val_expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run( - expression, - gpa, - expression_context, - context.cfa.?, - ) orelse return error.NoExpressionValue; - const val_raw = switch (value) { - .generic => |raw| raw, - else => return error.InvalidExpressionValue, - }; - mem.writeInt(usize, out[0..@sizeOf(usize)], val_raw, native_endian); - return false; - }, - .architectural => return error.UnimplementedRegisterRule, - } - } - /// Unwind a stack frame using DWARF unwinding info, updating the register context. /// /// If `.eh_frame_hdr` is available and complete, it will be used to binary search for the FDE. /// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. The latter /// may require lazily loading the data in those sections. /// - /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info + /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when using macOS' + /// `__unwind_info` section. pub fn unwindFrame( context: *DwarfUnwindContext, + cache: *Cache, gpa: Allocator, unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, ) Error!usize { - return unwindFrameInner(context, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, + return unwindFrameInner(context, cache, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.UnsupportedDebugInfo, + error.OutOfMemory, + => |e| return e, - error.UnimplementedRegisterRule, error.UnsupportedAddrSize, - error.UnsupportedDwarfVersion, error.UnimplementedUserOpcode, error.UnimplementedExpressionCall, error.UnimplementedOpcode, @@ -394,12 +328,12 @@ pub const DwarfUnwindContext = struct { error.InvalidExpressionValue, error.NoExpressionValue, error.RegisterSizeMismatch, - error.InvalidCFA, => return error.InvalidDebugInfo, }; } fn unwindFrameInner( context: *DwarfUnwindContext, + cache: *Cache, gpa: Allocator, unwind: *const Dwarf.Unwind, load_offset: usize, @@ -411,57 +345,85 @@ pub const DwarfUnwindContext = struct { const pc_vaddr = context.pc - load_offset; - const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( - pc_vaddr, - @sizeOf(usize), - native_endian, - ) orelse return error.MissingDebugInfo; - const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian); + const cache_slot: Cache.Slot = slot: { + const slot_idx = std.hash.int(pc_vaddr) % Cache.num_slots; - // Check if the FDE *actually* includes the pc (`lookupPc` can return false positives). - if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) { - return error.MissingDebugInfo; - } + { + cache.mutex.lock(); + defer cache.mutex.unlock(); + if (cache.buf[slot_idx].pc == pc_vaddr) break :slot cache.buf[slot_idx]; + } + + const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( + pc_vaddr, + @sizeOf(usize), + native_endian, + ) orelse return error.MissingDebugInfo; + const cie, const fde = try unwind.getFde(fde_offset, native_endian); - // Do not set `compile_unit` because the spec states that CFIs - // may not reference other debug sections anyway. - var expression_context: Dwarf.expression.Context = .{ - .format = format, - .cpu_context = &context.cpu_context, - .cfa = context.cfa, + // Check if the FDE *actually* includes the pc (`lookupPc` can return false positives). + if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) { + return error.MissingDebugInfo; + } + + context.vm.reset(); + + const row = try context.vm.runTo(gpa, pc_vaddr, cie, &fde, @sizeOf(usize), native_endian); + + if (row.columns.len > Cache.Slot.max_regs) return error.UnsupportedDebugInfo; + + var slot: Cache.Slot = .{ + .pc = pc_vaddr, + .cie = cie, + .cfa_rule = row.cfa, + .rules_regs = undefined, + .rules = undefined, + .num_rules = 0, + }; + for (context.vm.rowColumns(&row)) |col| { + const i = slot.num_rules; + slot.rules_regs[i] = col.register; + slot.rules[i] = col.rule; + slot.num_rules += 1; + } + + { + cache.mutex.lock(); + defer cache.mutex.unlock(); + cache.buf[slot_idx] = slot; + } + + break :slot slot; }; - context.vm.reset(); + const format = cache_slot.cie.format; + const return_address_register = cache_slot.cie.return_address_register; - const row = try context.vm.runTo(gpa, pc_vaddr, cie, fde, @sizeOf(usize), native_endian); - context.cfa = switch (row.cfa.rule) { - .val_offset => |offset| blk: { - const register = row.cfa.register orelse return error.InvalidCFARule; - const value = (try regNative(&context.cpu_context, register)).*; - break :blk try applyOffset(value, offset); + context.cfa = switch (cache_slot.cfa_rule) { + .none => return error.InvalidCFARule, + .reg_off => |ro| cfa: { + const ptr = try regNative(&context.cpu_context, ro.register); + break :cfa try applyOffset(ptr.*, ro.offset); }, - .expression => |expr| blk: { + .expression => |expr| cfa: { context.stack_machine.reset(); - const value = try context.stack_machine.run( - expr, - gpa, - expression_context, - context.cfa, - ); - - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; + const value = try context.stack_machine.run(expr, gpa, .{ + .format = format, + .cpu_context = &context.cpu_context, + }, context.cfa) orelse return error.NoExpressionValue; + switch (value) { + .generic => |g| break :cfa g, + else => return error.InvalidExpressionValue, + } }, - else => return error.InvalidCFARule, }; - expression_context.cfa = context.cfa; - - // If the rule for the return address register is 'undefined', that indicates there is no - // return address, i.e. this is the end of the stack. - var explicit_has_return_address: ?bool = null; + // If unspecified, we'll use the default rule for the return address register, which is + // typically equivalent to `.undefined` (meaning there is no return address), but may be + // overriden by ABIs. + var has_return_address: bool = builtin.cpu.arch.isAARCH64() and + return_address_register >= 19 and + return_address_register <= 28; // Create a copy of the CPU context, to which we will apply the new rules. var new_cpu_context = context.cpu_context; @@ -469,25 +431,78 @@ pub const DwarfUnwindContext = struct { // On all implemented architectures, the CFA is defined as being the previous frame's SP (try regNative(&new_cpu_context, sp_reg_num)).* = context.cfa.?; - for (context.vm.rowColumns(row)) |column| { - if (column.register) |register| { - const dest = try new_cpu_context.dwarfRegisterBytes(register); - const rule_undef = try context.resolveRegisterRule(gpa, column, expression_context, dest); - if (register == cie.return_address_register) { - explicit_has_return_address = !rule_undef; - } + const rules_len = cache_slot.num_rules; + for (cache_slot.rules_regs[0..rules_len], cache_slot.rules[0..rules_len]) |register, rule| { + const new_val: union(enum) { + same, + undefined, + val: usize, + bytes: []const u8, + } = switch (rule) { + .default => val: { + // The default rule is typically equivalent to `.undefined`, but ABIs may override it. + if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { + break :val .same; + } + break :val .undefined; + }, + .undefined => .undefined, + .same_value => .same, + .offset => |offset| val: { + const ptr: *const usize = @ptrFromInt(try applyOffset(context.cfa.?, offset)); + break :val .{ .val = ptr.* }; + }, + .val_offset => |offset| .{ .val = try applyOffset(context.cfa.?, offset) }, + .register => |r| .{ .bytes = try context.cpu_context.dwarfRegisterBytes(r) }, + .expression => |expr| val: { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expr, gpa, .{ + .format = format, + .cpu_context = &context.cpu_context, + }, context.cfa.?) orelse return error.NoExpressionValue; + const ptr: *const usize = switch (value) { + .generic => |addr| @ptrFromInt(addr), + else => return error.InvalidExpressionValue, + }; + break :val .{ .val = ptr.* }; + }, + .val_expression => |expr| val: { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expr, gpa, .{ + .format = format, + .cpu_context = &context.cpu_context, + }, context.cfa.?) orelse return error.NoExpressionValue; + switch (value) { + .generic => |val| break :val .{ .val = val }, + else => return error.InvalidExpressionValue, + } + }, + }; + switch (new_val) { + .same => {}, + .undefined => { + const dest = try new_cpu_context.dwarfRegisterBytes(@intCast(register)); + @memset(dest, undefined); + }, + .val => |val| { + const dest = try new_cpu_context.dwarfRegisterBytes(@intCast(register)); + if (dest.len != @sizeOf(usize)) return error.RegisterSizeMismatch; + const dest_ptr: *align(1) usize = @ptrCast(dest); + dest_ptr.* = val; + }, + .bytes => |src| { + const dest = try new_cpu_context.dwarfRegisterBytes(@intCast(register)); + if (dest.len != src.len) return error.RegisterSizeMismatch; + @memcpy(dest, src); + }, + } + if (register == return_address_register) { + has_return_address = new_val != .undefined; } } - // If the return address register did not have an explicitly specified rules then it uses - // the default rule, which is usually equivalent to '.undefined', i.e. end-of-stack. - const has_return_address = explicit_has_return_address orelse switch (defaultRuleBehavior(cie.return_address_register)) { - .undefined => false, - .same_value => return error.InvalidDebugInfo, // this doesn't make sense, we would get stuck in an infinite loop - }; - const return_address: usize = if (has_return_address) pc: { - const raw_ptr = try regNative(&new_cpu_context, cie.return_address_register); + const raw_ptr = try regNative(&new_cpu_context, return_address_register); break :pc stripInstructionPtrAuthCode(raw_ptr.*); } else 0; @@ -501,7 +516,7 @@ pub const DwarfUnwindContext = struct { // "return address" we have is the instruction which triggered the signal (if the signal // handler returned, the instruction would be re-run). Compensate for this by incrementing // the address in that case. - const adjusted_ret_addr = if (cie.is_signal_frame) return_address +| 1 else return_address; + const adjusted_ret_addr = if (cache_slot.cie.is_signal_frame) return_address +| 1 else return_address; // We also want to do that same subtraction here to get the PC for the next frame's FDE. // This is because if the callee was noreturn, then the function call might be the caller's diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index 29178b5068b9..caf2176f7527 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -20,7 +20,7 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!DarwinM }, } } -fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { +fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, out: *DebugInfo) !void { const header: *std.macho.mach_header = @ptrFromInt(module.text_base); var it: macho.LoadCommandIterator = .{ @@ -36,21 +36,57 @@ fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { const vmaddr_slide = module.text_base - text_vmaddr; - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; + var opt_unwind_info: ?[]const u8 = null; + var opt_eh_frame: ?[]const u8 = null; for (sections) |sect| { if (mem.eql(u8, sect.sectName(), "__unwind_info")) { const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); - unwind_info = sect_ptr[0..@intCast(sect.size)]; + opt_unwind_info = sect_ptr[0..@intCast(sect.size)]; } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); - eh_frame = sect_ptr[0..@intCast(sect.size)]; + opt_eh_frame = sect_ptr[0..@intCast(sect.size)]; } } - return .{ + const eh_frame = opt_eh_frame orelse { + out.unwind = .{ + .vmaddr_slide = vmaddr_slide, + .unwind_info = opt_unwind_info, + .dwarf = null, + .dwarf_cache = undefined, + }; + return; + }; + var dwarf: Dwarf.Unwind = .initSection(.eh_frame, @intFromPtr(eh_frame.ptr) - vmaddr_slide, eh_frame); + errdefer dwarf.deinit(gpa); + // We don't need lookups, so this call is just for scanning CIEs. + dwarf.prepare(gpa, @sizeOf(usize), native_endian, false) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.InvalidOperand, + error.InvalidOpcode, + error.InvalidOperation, + => return error.InvalidDebugInfo, + error.UnsupportedAddrSize, + error.UnsupportedDwarfVersion, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, + }; + + const dwarf_cache = try gpa.create(UnwindContext.Cache); + errdefer gpa.destroy(dwarf_cache); + dwarf_cache.init(); + + out.unwind = .{ .vmaddr_slide = vmaddr_slide, - .unwind_info = unwind_info, - .eh_frame = eh_frame, + .unwind_info = opt_unwind_info, + .dwarf = dwarf, + .dwarf_cache = dwarf_cache, }; } fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO { @@ -350,10 +386,10 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }; } fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - const unwind: *const DebugInfo.Unwind = u: { + const unwind: *DebugInfo.Unwind = u: { di.mutex.lock(); defer di.mutex.unlock(); - if (di.unwind == null) di.unwind = module.loadUnwindInfo(); + if (di.unwind == null) try module.loadUnwindInfo(gpa, di); break :u &di.unwind.?; }; @@ -580,14 +616,8 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, break :ip new_ip; }, .DWARF => { - const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide; - return context.unwindFrame( - gpa, - &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - unwind.vmaddr_slide, - @intCast(encoding.value.x86_64.dwarf), - ); + const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); + return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.x86_64.dwarf); }, }, .aarch64, .aarch64_be => switch (encoding.mode.arm64) { @@ -600,14 +630,8 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, break :ip new_ip; }, .DWARF => { - const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide; - return context.unwindFrame( - gpa, - &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - unwind.vmaddr_slide, - @intCast(encoding.value.x86_64.dwarf), - ); + const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); + return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.arm64.dwarf); }, .FRAME => ip: { const frame = encoding.value.arm64.frame; @@ -691,12 +715,15 @@ pub const DebugInfo = struct { } const Unwind = struct { - /// The slide applied to the following sections. So, `unwind_info.ptr` is this many bytes - /// higher than the vmaddr of `__unwind_info`, and likewise for `__eh_frame`. + /// The slide applied to the `__unwind_info` and `__eh_frame` sections. + /// So, `unwind_info.ptr` is this many bytes higher than the section's vmaddr. vmaddr_slide: u64, - // Backed by the in-memory sections mapped by the loader + /// Backed by the in-memory section mapped by the loader. unwind_info: ?[]const u8, - eh_frame: ?[]const u8, + /// Backed by the in-memory `__eh_frame` section mapped by the loader. + dwarf: ?Dwarf.Unwind, + /// This is `undefined` if `dwarf == null`. + dwarf_cache: *UnwindContext.Cache, }; const LoadedMachO = struct { diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 32d767a44f2c..eead810a86d2 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -3,8 +3,22 @@ name: []const u8, build_id: ?[]const u8, gnu_eh_frame: ?[]const u8, -/// No cache needed, because `dl_iterate_phdr` is already fast. -pub const LookupCache = void; +pub const LookupCache = struct { + rwlock: std.Thread.RwLock, + ranges: std.ArrayList(Range), + const Range = struct { + start: usize, + len: usize, + mod: ElfModule, + }; + pub const init: LookupCache = .{ + .rwlock = .{}, + .ranges = .empty, + }; + pub fn deinit(lc: *LookupCache, gpa: Allocator) void { + lc.ranges.deinit(gpa); + } +}; pub const DebugInfo = struct { /// Held while checking and/or populating `loaded_elf`/`scanned_dwarf`/`unwind`. @@ -14,18 +28,24 @@ pub const DebugInfo = struct { loaded_elf: ?ElfFile, scanned_dwarf: bool, - unwind: [2]?Dwarf.Unwind, + unwind: if (supports_unwinding) [2]?Dwarf.Unwind else void, + unwind_cache: if (supports_unwinding) *UnwindContext.Cache else void, + pub const init: DebugInfo = .{ .mutex = .{}, .loaded_elf = null, .scanned_dwarf = false, - .unwind = @splat(null), + .unwind = if (supports_unwinding) @splat(null), + .unwind_cache = undefined, }; pub fn deinit(di: *DebugInfo, gpa: Allocator) void { if (di.loaded_elf) |*loaded_elf| loaded_elf.deinit(gpa); - for (&di.unwind) |*opt_unwind| { - const unwind = &(opt_unwind.* orelse continue); - unwind.deinit(gpa); + if (supports_unwinding) { + if (di.unwind[0] != null) gpa.destroy(di.unwind_cache); + for (&di.unwind) |*opt_unwind| { + const unwind = &(opt_unwind.* orelse continue); + unwind.deinit(gpa); + } } } }; @@ -34,75 +54,84 @@ pub fn key(m: ElfModule) usize { return m.load_offset; } pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!ElfModule { - _ = cache; - _ = gpa; - const DlIterContext = struct { - /// input - address: usize, - /// output - module: ElfModule, + if (lookupInCache(cache, address)) |m| return m; - fn callback(info: *std.posix.dl_phdr_info, size: usize, context: *@This()) !void { - _ = size; - // The base address is too high - if (context.address < info.addr) - return; + { + // Check a new module hasn't been loaded + cache.rwlock.lock(); + defer cache.rwlock.unlock(); + const DlIterContext = struct { + ranges: *std.ArrayList(LookupCache.Range), + gpa: Allocator, - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; + fn callback(info: *std.posix.dl_phdr_info, size: usize, context: *@This()) !void { + _ = size; - // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.module = .{ - .load_offset = info.addr, - // Android libc uses NULL instead of "" to mark the main program - .name = mem.sliceTo(info.name, 0) orelse "", - .build_id = null, - .gnu_eh_frame = null, - }; - break; + var mod: ElfModule = .{ + .load_offset = info.addr, + // Android libc uses NULL instead of "" to mark the main program + .name = mem.sliceTo(info.name, 0) orelse "", + .build_id = null, + .gnu_eh_frame = null, + }; + + // Populate `build_id` and `gnu_eh_frame` + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + elf.PT_NOTE => { + // Look for .note.gnu.build-id + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); + const name_size = r.takeInt(u32, native_endian) catch continue; + const desc_size = r.takeInt(u32, native_endian) catch continue; + const note_type = r.takeInt(u32, native_endian) catch continue; + const name = r.take(name_size) catch continue; + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, name, "GNU\x00")) continue; + const desc = r.take(desc_size) catch continue; + mod.build_id = desc; + }, + elf.PT_GNU_EH_FRAME => { + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + mod.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; + }, + else => {}, + } } - } else return; - for (info.phdr[0..info.phnum]) |phdr| { - switch (phdr.p_type) { - elf.PT_NOTE => { - // Look for .note.gnu.build-id - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); - const name_size = r.takeInt(u32, native_endian) catch continue; - const desc_size = r.takeInt(u32, native_endian) catch continue; - const note_type = r.takeInt(u32, native_endian) catch continue; - const name = r.take(name_size) catch continue; - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, name, "GNU\x00")) continue; - const desc = r.take(desc_size) catch continue; - context.module.build_id = desc; - }, - elf.PT_GNU_EH_FRAME => { - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; - }, - else => {}, + // Now that `mod` is populated, create the ranges + for (info.phdr[0..info.phnum]) |phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + try context.ranges.append(context.gpa, .{ + // Overflowing addition handles VSDOs having p_vaddr = 0xffffffffff700000 + .start = info.addr +% phdr.p_vaddr, + .len = phdr.p_memsz, + .mod = mod, + }); } } + }; + cache.ranges.clearRetainingCapacity(); + var ctx: DlIterContext = .{ + .ranges = &cache.ranges, + .gpa = gpa, + }; + try std.posix.dl_iterate_phdr(&ctx, error{OutOfMemory}, DlIterContext.callback); + } - // Stop the iteration - return error.Found; - } - }; - var ctx: DlIterContext = .{ - .address = address, - .module = undefined, - }; - std.posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { - error.Found => return ctx.module, - }; + if (lookupInCache(cache, address)) |m| return m; return error.MissingDebugInfo; } +fn lookupInCache(cache: *LookupCache, address: usize) ?ElfModule { + cache.rwlock.lockShared(); + defer cache.rwlock.unlockShared(); + for (cache.ranges.items) |*range| { + if (address >= range.start and address < range.start + range.len) { + return range.mod; + } + } + return null; +} fn loadElf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { std.debug.assert(di.loaded_elf == null); std.debug.assert(!di.scanned_dwarf); @@ -199,11 +228,23 @@ pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugIn }; } fn prepareUnwindLookup(unwind: *Dwarf.Unwind, gpa: Allocator) Error!void { - unwind.prepareLookup(gpa, @sizeOf(usize), native_endian) catch |err| switch (err) { + unwind.prepare(gpa, @sizeOf(usize), native_endian, true) catch |err| switch (err) { error.ReadFailed => unreachable, // it's all fixed buffers - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, - error.EndOfStream, error.Overflow, error.StreamTooLong => return error.InvalidDebugInfo, - error.UnsupportedAddrSize, error.UnsupportedDwarfVersion => return error.UnsupportedDebugInfo, + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.InvalidOperand, + error.InvalidOpcode, + error.InvalidOperation, + => return error.InvalidDebugInfo, + error.UnsupportedAddrSize, + error.UnsupportedDwarfVersion, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, }; } fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { @@ -240,12 +281,18 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro }; errdefer for (unwinds) |*u| u.deinit(gpa); for (unwinds) |*u| try prepareUnwindLookup(u, gpa); + + const unwind_cache = try gpa.create(UnwindContext.Cache); + errdefer gpa.destroy(unwind_cache); + unwind_cache.init(); + switch (unwinds.len) { 0 => unreachable, 1 => di.unwind = .{ unwinds[0], null }, 2 => di.unwind = .{ unwinds[0], unwinds[1] }, else => unreachable, } + di.unwind_cache = unwind_cache; } pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { const unwinds: *const [2]?Dwarf.Unwind = u: { @@ -257,7 +304,7 @@ pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, con }; for (unwinds) |*opt_unwind| { const unwind = &(opt_unwind.* orelse break); - return context.unwindFrame(gpa, unwind, module.load_offset, null) catch |err| switch (err) { + return context.unwindFrame(di.unwind_cache, gpa, unwind, module.load_offset, null) catch |err| switch (err) { error.MissingDebugInfo => continue, // try the next one else => |e| return e, }; From 8950831d3c4af4dd169e0a404e25e8aa9b045caa Mon Sep 17 00:00:00 2001 From: mlugg Date: Fri, 26 Sep 2025 12:00:41 +0100 Subject: [PATCH 82/85] Dwarf.Unwind: handle macOS deviation from standard Apparently the `__eh_frame` in Mach-O binaries doesn't include the terminator entry, but in all other respects it acts like `.eh_frame` rather than `.debug_frame`. I have no idea. --- lib/std/debug/Dwarf/Unwind.zig | 14 ++++++++++---- lib/std/debug/SelfInfo/DarwinModule.zig | 2 +- lib/std/debug/SelfInfo/ElfModule.zig | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index e251a9175df6..90e531b34912 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -475,10 +475,15 @@ pub fn prepare( addr_size_bytes: u8, endian: Endian, need_lookup: bool, + /// The `__eh_frame` section in Mach-O binaries deviates from the standard `.eh_frame` section + /// in one way which this function needs to be aware of. + is_macho: bool, ) !void { if (unwind.cie_list.len > 0 and (!need_lookup or unwind.lookup != null)) return; unwind.cie_list.clearRetainingCapacity(); + if (is_macho) assert(unwind.lookup == null or unwind.lookup.? != .eh_frame_hdr); + const section = unwind.frame_section; var r: Reader = .fixed(section.bytes); @@ -519,10 +524,11 @@ pub fn prepare( .terminator => break true, } } else false; - switch (section.id) { - .eh_frame => if (!saw_terminator) return bad(), // `.eh_frame` indicates the end of the CIE/FDE list with a sentinel entry - .debug_frame => if (saw_terminator) return bad(), // `.debug_frame` uses the section bounds and does not specify a sentinel entry - } + const expect_terminator = switch (section.id) { + .eh_frame => !is_macho, // `.eh_frame` indicates the end of the CIE/FDE list with a sentinel entry, though macOS omits this + .debug_frame => false, // `.debug_frame` uses the section bounds and does not specify a sentinel entry + }; + if (saw_terminator != expect_terminator) return bad(); std.mem.sortUnstable(SortedFdeEntry, fde_list.items, {}, struct { fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index caf2176f7527..71e43a9a7481 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -59,7 +59,7 @@ fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, out: *DebugInfo) var dwarf: Dwarf.Unwind = .initSection(.eh_frame, @intFromPtr(eh_frame.ptr) - vmaddr_slide, eh_frame); errdefer dwarf.deinit(gpa); // We don't need lookups, so this call is just for scanning CIEs. - dwarf.prepare(gpa, @sizeOf(usize), native_endian, false) catch |err| switch (err) { + dwarf.prepare(gpa, @sizeOf(usize), native_endian, false, true) catch |err| switch (err) { error.ReadFailed => unreachable, // it's all fixed buffers error.InvalidDebugInfo, error.MissingDebugInfo, diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index eead810a86d2..7ce24e2e2a60 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -228,7 +228,7 @@ pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugIn }; } fn prepareUnwindLookup(unwind: *Dwarf.Unwind, gpa: Allocator) Error!void { - unwind.prepare(gpa, @sizeOf(usize), native_endian, true) catch |err| switch (err) { + unwind.prepare(gpa, @sizeOf(usize), native_endian, true, false) catch |err| switch (err) { error.ReadFailed => unreachable, // it's all fixed buffers error.InvalidDebugInfo, error.MissingDebugInfo, From a90eb50c8034134d9f22aeffb4ea11e83b056cc7 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sat, 27 Sep 2025 11:30:35 +0100 Subject: [PATCH 83/85] typo --- lib/std/debug/cpu_context.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/debug/cpu_context.zig b/lib/std/debug/cpu_context.zig index 6499de21b155..1089e74aa639 100644 --- a/lib/std/debug/cpu_context.zig +++ b/lib/std/debug/cpu_context.zig @@ -146,7 +146,7 @@ pub fn fromPosixSignalContext(ctx_ptr: ?*const anyopaque) ?Native { .pc = uc.mcontext.ss.pc, }, .netbsd => .{ - .x = uc.mcontext.gregs[0..31], + .x = uc.mcontext.gregs[0..31].*, .sp = uc.mcontext.gregs[31], .pc = uc.mcontext.gregs[32], }, From 12ceb896faebf25195d8b360e4972dd2bf23ede1 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 29 Sep 2025 13:56:56 +0100 Subject: [PATCH 84/85] Dwarf.Unwind: fix typo --- lib/std/debug/Dwarf/Unwind.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 90e531b34912..8c4c1a19e6c4 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -391,7 +391,7 @@ pub const CommonInformationEntry = struct { 'P' => { const enc: EH.PE = @bitCast(try aug_data.takeByte()); const endian: Endian = .little; // irrelevant because we're discarding the value anyway - _ = try readEhPointerAbs(&r, enc.type, addr_size_bytes, endian); // we ignore the personality routine; endianness is irrelevant since we're discarding + _ = try readEhPointerAbs(&aug_data, enc.type, addr_size_bytes, endian); // we ignore the personality routine; endianness is irrelevant since we're discarding }, 'R' => fde_pointer_enc = @bitCast(try aug_data.takeByte()), 'S' => is_signal_frame = true, From 1120546f72405ac263dce7414eb71ca4e6c96fc8 Mon Sep 17 00:00:00 2001 From: mlugg Date: Tue, 30 Sep 2025 11:06:21 +0100 Subject: [PATCH 85/85] std.debug.SelfInfo: remove shared logic There were only a few dozen lines of common logic, and they frankly introduced more complexity than they eliminated. Instead, let's accept that the implementations of `SelfInfo` are all pretty different and want to track different state. This probably fixes some synchronization and memory bugs by simplifying a bunch of stuff. It also improves the DWARF unwind cache, making it around twice as fast in a debug build with the self-hosted x86_64 backend, because we no longer have to redundantly go through the hashmap lookup logic to find the module. Unwinding on Windows will also see a slight performance boost from this change, because `RtlVirtualUnwind` does not need to know the module whatsoever, so the old `SelfInfo` implementation was doing redundant work. Lastly, this makes it even easier to implement `SelfInfo` on freestanding targets; there is no longer a need to emulate a real module system, since the user controls the whole implementation! There are various other small refactors here in the `SelfInfo` implementations as well as in the DWARF unwinding logic. This change turned out to make a lot of stuff simpler! --- lib/std/debug.zig | 97 ++- lib/std/debug/Dwarf.zig | 5 +- lib/std/debug/Dwarf/SelfUnwinder.zig | 334 ++++++++ lib/std/debug/Dwarf/Unwind.zig | 20 +- lib/std/debug/Dwarf/expression.zig | 2 +- lib/std/debug/SelfInfo.zig | 551 ------------- lib/std/debug/SelfInfo/Darwin.zig | 993 +++++++++++++++++++++++ lib/std/debug/SelfInfo/DarwinModule.zig | 954 ---------------------- lib/std/debug/SelfInfo/Elf.zig | 427 ++++++++++ lib/std/debug/SelfInfo/ElfModule.zig | 349 -------- lib/std/debug/SelfInfo/Windows.zig | 559 +++++++++++++ lib/std/debug/SelfInfo/WindowsModule.zig | 442 ---------- test/standalone/coff_dwarf/main.zig | 2 +- 13 files changed, 2415 insertions(+), 2320 deletions(-) create mode 100644 lib/std/debug/Dwarf/SelfUnwinder.zig delete mode 100644 lib/std/debug/SelfInfo.zig create mode 100644 lib/std/debug/SelfInfo/Darwin.zig delete mode 100644 lib/std/debug/SelfInfo/DarwinModule.zig create mode 100644 lib/std/debug/SelfInfo/Elf.zig delete mode 100644 lib/std/debug/SelfInfo/ElfModule.zig create mode 100644 lib/std/debug/SelfInfo/Windows.zig delete mode 100644 lib/std/debug/SelfInfo/WindowsModule.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index abd3e8102f55..7e2ec0509291 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -19,11 +19,85 @@ const root = @import("root"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const ElfFile = @import("debug/ElfFile.zig"); -pub const SelfInfo = @import("debug/SelfInfo.zig"); pub const Info = @import("debug/Info.zig"); pub const Coverage = @import("debug/Coverage.zig"); pub const cpu_context = @import("debug/cpu_context.zig"); +/// This type abstracts the target-specific implementation of accessing this process' own debug +/// information behind a generic interface which supports looking up source locations associated +/// with addresses, as well as unwinding the stack where a safe mechanism to do so exists. +/// +/// The Zig Standard Library provides default implementations of `SelfInfo` for common targets, but +/// the implementation can be overriden by exposing `root.debug.SelfInfo`. Setting `SelfInfo` to +/// `void` indicates that the `SelfInfo` API is not supported. +/// +/// This type must expose the following declarations: +/// +/// ``` +/// pub const init: SelfInfo; +/// pub fn deinit(si: *SelfInfo, gpa: Allocator) void; +/// +/// /// Returns the symbol and source location of the instruction at `address`. +/// pub fn getSymbol(si: *SelfInfo, gpa: Allocator, address: usize) SelfInfoError!Symbol; +/// /// Returns a name for the "module" (e.g. shared library or executable image) containing `address`. +/// pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) SelfInfoError![]const u8; +/// +/// /// Whether a reliable stack unwinding strategy, such as DWARF unwinding, is available. +/// pub const can_unwind: bool; +/// /// Only required if `can_unwind == true`. +/// pub const UnwindContext = struct { +/// /// An address representing the instruction pointer in the last frame. +/// pc: usize, +/// +/// pub fn init(ctx: *cpu_context.Native, gpa: Allocator) Allocator.Error!UnwindContext; +/// pub fn deinit(ctx: *UnwindContext, gpa: Allocator) void; +/// /// Returns the frame pointer associated with the last unwound stack frame. +/// /// If the frame pointer is unknown, 0 may be returned instead. +/// pub fn getFp(uc: *UnwindContext) usize; +/// }; +/// /// Only required if `can_unwind == true`. Unwinds a single stack frame, returning the frame's +/// /// return address, or 0 if the end of the stack has been reached. +/// pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) SelfInfoError!usize; +/// ``` +pub const SelfInfo = if (@hasDecl(root, "debug") and @hasDecl(root.debug, "SelfInfo")) + root.debug.SelfInfo +else switch (native_os) { + .linux, + .netbsd, + .freebsd, + .dragonfly, + .openbsd, + .solaris, + .illumos, + => @import("debug/SelfInfo/Elf.zig"), + + .macos, + .ios, + .watchos, + .tvos, + .visionos, + => @import("debug/SelfInfo/Darwin.zig"), + + .uefi, + .windows, + => @import("debug/SelfInfo/Windows.zig"), + + else => void, +}; + +pub const SelfInfoError = error{ + /// The required debug info is invalid or corrupted. + InvalidDebugInfo, + /// The required debug info could not be found. + MissingDebugInfo, + /// The required debug info was found, and may be valid, but is not supported by this implementation. + UnsupportedDebugInfo, + /// The required debug info could not be read from disk due to some IO error. + ReadFailed, + OutOfMemory, + Unexpected, +}; + pub const simple_panic = @import("debug/simple_panic.zig"); pub const no_panic = @import("debug/no_panic.zig"); @@ -240,7 +314,7 @@ pub fn print(comptime fmt: []const u8, args: anytype) void { /// Marked `inline` to propagate a comptime-known error to callers. pub inline fn getSelfDebugInfo() !*SelfInfo { - if (!SelfInfo.target_supported) return error.UnsupportedTarget; + if (SelfInfo == void) return error.UnsupportedTarget; const S = struct { var self_info: SelfInfo = .init; }; @@ -640,7 +714,7 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_ while (true) switch (it.next()) { .switch_to_fp => |unwind_error| { if (StackIterator.fp_unwind_is_safe) continue; // no need to even warn - const module_name = di.getModuleNameForAddress(di_gpa, unwind_error.address) catch "???"; + const module_name = di.getModuleName(di_gpa, unwind_error.address) catch "???"; const caption: []const u8 = switch (unwind_error.err) { error.MissingDebugInfo => "unwind info unavailable", error.InvalidDebugInfo => "unwind info invalid", @@ -753,9 +827,9 @@ pub fn dumpStackTrace(st: *const std.builtin.StackTrace) void { const StackIterator = union(enum) { /// Unwinding using debug info (e.g. DWARF CFI). - di: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn, + di: if (SelfInfo != void and SelfInfo.can_unwind) SelfInfo.UnwindContext else noreturn, /// We will first report the *current* PC of this `UnwindContext`, then we will switch to `di`. - di_first: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn, + di_first: if (SelfInfo != void and SelfInfo.can_unwind) SelfInfo.UnwindContext else noreturn, /// Naive frame-pointer-based unwinding. Very simple, but typically unreliable. fp: usize, @@ -772,7 +846,7 @@ const StackIterator = union(enum) { } } if (opt_context_ptr) |context_ptr| { - if (!SelfInfo.supports_unwinding) return error.CannotUnwindFromContext; + if (SelfInfo == void or !SelfInfo.can_unwind) return error.CannotUnwindFromContext; // Use `di_first` here so we report the PC in the context before unwinding any further. return .{ .di_first = .init(context_ptr) }; } @@ -780,7 +854,8 @@ const StackIterator = union(enum) { // call to `current`. This effectively constrains stack trace collection and dumping to FP // unwinding when building with CBE for MSVC. if (!(builtin.zig_backend == .stage2_c and builtin.target.abi == .msvc) and - SelfInfo.supports_unwinding and + SelfInfo != void and + SelfInfo.can_unwind and cpu_context.Native != noreturn) { // We don't need `di_first` here, because our PC is in `std.debug`; we're only interested @@ -820,7 +895,7 @@ const StackIterator = union(enum) { /// We were using `SelfInfo.UnwindInfo`, but are now switching to FP unwinding due to this error. switch_to_fp: struct { address: usize, - err: SelfInfo.Error, + err: SelfInfoError, }, }; @@ -929,7 +1004,7 @@ pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { } fn printSourceAtAddress(gpa: Allocator, debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) Writer.Error!void { - const symbol: Symbol = debug_info.getSymbolAtAddress(gpa, address) catch |err| switch (err) { + const symbol: Symbol = debug_info.getSymbol(gpa, address) catch |err| switch (err) { error.MissingDebugInfo, error.UnsupportedDebugInfo, error.InvalidDebugInfo, @@ -953,7 +1028,7 @@ fn printSourceAtAddress(gpa: Allocator, debug_info: *SelfInfo, writer: *Writer, symbol.source_location, address, symbol.name orelse "???", - symbol.compile_unit_name orelse debug_info.getModuleNameForAddress(gpa, address) catch "???", + symbol.compile_unit_name orelse debug_info.getModuleName(gpa, address) catch "???", tty_config, ); } @@ -1386,7 +1461,7 @@ pub fn dumpStackPointerAddr(prefix: []const u8) void { } test "manage resources correctly" { - if (!SelfInfo.target_supported) return error.SkipZigTest; + if (SelfInfo == void) return error.SkipZigTest; const S = struct { noinline fn showMyTrace() usize { return @returnAddress(); diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index cfba366162ff..7af76d02a1db 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -28,6 +28,7 @@ const Dwarf = @This(); pub const expression = @import("Dwarf/expression.zig"); pub const Unwind = @import("Dwarf/Unwind.zig"); +pub const SelfUnwinder = @import("Dwarf/SelfUnwinder.zig"); /// Useful to temporarily enable while working on this file. const debug_debug_mode = false; @@ -1458,8 +1459,8 @@ pub fn spRegNum(arch: std.Target.Cpu.Arch) u16 { /// Tells whether unwinding for this target is supported by the Dwarf standard. /// -/// See also `std.debug.SelfInfo.supports_unwinding` which tells whether the Zig -/// standard library has a working implementation of unwinding for this target. +/// See also `std.debug.SelfInfo.can_unwind` which tells whether the Zig standard +/// library has a working implementation of unwinding for the current target. pub fn supportsUnwinding(target: *const std.Target) bool { return switch (target.cpu.arch) { .amdgcn, diff --git a/lib/std/debug/Dwarf/SelfUnwinder.zig b/lib/std/debug/Dwarf/SelfUnwinder.zig new file mode 100644 index 000000000000..8ee08180ddc7 --- /dev/null +++ b/lib/std/debug/Dwarf/SelfUnwinder.zig @@ -0,0 +1,334 @@ +//! Implements stack unwinding based on `Dwarf.Unwind`. The caller is responsible for providing the +//! initialized `Dwarf.Unwind` from the `.debug_frame` (or equivalent) section; this type handles +//! computing and applying the CFI register rules to evolve a `std.debug.cpu_context.Native` through +//! stack frames, hence performing the virtual unwind. +//! +//! Notably, this type is a valid implementation of `std.debug.SelfInfo.UnwindContext`. + +/// The state of the CPU in the current stack frame. +cpu_state: std.debug.cpu_context.Native, +/// The value of the Program Counter in this frame. This is almost the same as the value of the IP +/// register in `cpu_state`, but may be off by one because the IP is typically a *return* address. +pc: usize, + +cfi_vm: Dwarf.Unwind.VirtualMachine, +expr_vm: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), + +pub const CacheEntry = struct { + const max_regs = 32; + + pc: usize, + cie: *const Dwarf.Unwind.CommonInformationEntry, + cfa_rule: Dwarf.Unwind.VirtualMachine.CfaRule, + num_rules: u8, + rules_regs: [max_regs]u16, + rules: [max_regs]Dwarf.Unwind.VirtualMachine.RegisterRule, + + pub fn find(entries: []const CacheEntry, pc: usize) ?*const CacheEntry { + assert(pc != 0); + const idx = std.hash.int(pc) % entries.len; + const entry = &entries[idx]; + return if (entry.pc == pc) entry else null; + } + + pub fn populate(entry: *const CacheEntry, entries: []CacheEntry) void { + const idx = std.hash.int(entry.pc) % entries.len; + entries[idx] = entry.*; + } + + pub const empty: CacheEntry = .{ + .pc = 0, + .cie = undefined, + .cfa_rule = undefined, + .num_rules = undefined, + .rules_regs = undefined, + .rules = undefined, + }; +}; + +pub fn init(cpu_context: *const std.debug.cpu_context.Native) SelfUnwinder { + // `@constCast` is safe because we aren't going to store to the resulting pointer. + const raw_pc_ptr = regNative(@constCast(cpu_context), ip_reg_num) catch |err| switch (err) { + error.InvalidRegister => unreachable, // `ip_reg_num` is definitely valid + error.UnsupportedRegister => unreachable, // the implementation needs to support ip + error.IncompatibleRegisterSize => unreachable, // ip is definitely `usize`-sized + }; + const pc = stripInstructionPtrAuthCode(raw_pc_ptr.*); + return .{ + .cpu_state = cpu_context.*, + .pc = pc, + .cfi_vm = .{}, + .expr_vm = .{}, + }; +} + +pub fn deinit(unwinder: *SelfUnwinder, gpa: Allocator) void { + unwinder.cfi_vm.deinit(gpa); + unwinder.expr_vm.deinit(gpa); + unwinder.* = undefined; +} + +pub fn getFp(unwinder: *const SelfUnwinder) usize { + // `@constCast` is safe because we aren't going to store to the resulting pointer. + const ptr = regNative(@constCast(&unwinder.cpu_state), fp_reg_num) catch |err| switch (err) { + error.InvalidRegister => unreachable, // `fp_reg_num` is definitely valid + error.UnsupportedRegister => unreachable, // the implementation needs to support fp + error.IncompatibleRegisterSize => unreachable, // fp is a pointer so is `usize`-sized + }; + return ptr.*; +} + +/// Compute the rule set for the address `unwinder.pc` from the information in `unwind`. The caller +/// may store the returned rule set in a simple fixed-size cache keyed on the `pc` field to avoid +/// frequently recomputing register rules when unwinding many times. +/// +/// To actually apply the computed rules, see `next`. +pub fn computeRules( + unwinder: *SelfUnwinder, + gpa: Allocator, + unwind: *const Dwarf.Unwind, + load_offset: usize, + explicit_fde_offset: ?usize, +) !CacheEntry { + assert(unwinder.pc != 0); + + const pc_vaddr = unwinder.pc - load_offset; + + const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( + pc_vaddr, + @sizeOf(usize), + native_endian, + ) orelse return error.MissingDebugInfo; + const cie, const fde = try unwind.getFde(fde_offset, native_endian); + + // `lookupPc` can return false positives, so check if the FDE *actually* includes the pc + if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) { + return error.MissingDebugInfo; + } + + unwinder.cfi_vm.reset(); + const row = try unwinder.cfi_vm.runTo(gpa, pc_vaddr, cie, &fde, @sizeOf(usize), native_endian); + const cols = unwinder.cfi_vm.rowColumns(&row); + + if (cols.len > CacheEntry.max_regs) return error.UnsupportedDebugInfo; + + var entry: CacheEntry = .{ + .pc = unwinder.pc, + .cie = cie, + .cfa_rule = row.cfa, + .num_rules = @intCast(cols.len), + .rules_regs = undefined, + .rules = undefined, + }; + for (cols, 0..) |col, i| { + entry.rules_regs[i] = col.register; + entry.rules[i] = col.rule; + } + return entry; +} + +/// Applies the register rules given in `cache_entry` to the current state of `unwinder`. The caller +/// is responsible for ensuring that `cache_entry` contains the correct rule set for `unwinder.pc`. +/// +/// `unwinder.cpu_state` and `unwinder.pc` are updated to refer to the next frame, and this frame's +/// return address is returned as a `usize`. +pub fn next(unwinder: *SelfUnwinder, gpa: Allocator, cache_entry: *const CacheEntry) std.debug.SelfInfoError!usize { + return unwinder.nextInner(gpa, cache_entry) catch |err| switch (err) { + error.OutOfMemory, + error.InvalidDebugInfo, + => |e| return e, + + error.UnsupportedRegister, + error.UnimplementedExpressionCall, + error.UnimplementedOpcode, + error.UnimplementedUserOpcode, + error.UnimplementedTypedComparison, + error.UnimplementedTypeConversion, + error.UnknownExpressionOpcode, + => return error.UnsupportedDebugInfo, + + error.ReadFailed, + error.EndOfStream, + error.Overflow, + error.IncompatibleRegisterSize, + error.InvalidRegister, + error.IncompleteExpressionContext, + error.InvalidCFAOpcode, + error.InvalidExpression, + error.InvalidFrameBase, + error.InvalidIntegralTypeSize, + error.InvalidSubExpression, + error.InvalidTypeLength, + error.TruncatedIntegralType, + error.DivisionByZero, + => return error.InvalidDebugInfo, + }; +} + +fn nextInner(unwinder: *SelfUnwinder, gpa: Allocator, cache_entry: *const CacheEntry) !usize { + const format = cache_entry.cie.format; + const return_address_register = cache_entry.cie.return_address_register; + + const cfa = switch (cache_entry.cfa_rule) { + .none => return error.InvalidDebugInfo, + .reg_off => |ro| cfa: { + const ptr = try regNative(&unwinder.cpu_state, ro.register); + break :cfa try applyOffset(ptr.*, ro.offset); + }, + .expression => |expr| cfa: { + // On all implemented architectures, the CFA is defined to be the previous frame's SP + const prev_cfa_val = (try regNative(&unwinder.cpu_state, sp_reg_num)).*; + unwinder.expr_vm.reset(); + const value = try unwinder.expr_vm.run(expr, gpa, .{ + .format = format, + .cpu_context = &unwinder.cpu_state, + }, prev_cfa_val) orelse return error.InvalidDebugInfo; + switch (value) { + .generic => |g| break :cfa g, + else => return error.InvalidDebugInfo, + } + }, + }; + + // If unspecified, we'll use the default rule for the return address register, which is + // typically equivalent to `.undefined` (meaning there is no return address), but may be + // overriden by ABIs. + var has_return_address: bool = builtin.cpu.arch.isAARCH64() and + return_address_register >= 19 and + return_address_register <= 28; + + // Create a copy of the CPU state, to which we will apply the new rules. + var new_cpu_state = unwinder.cpu_state; + + // On all implemented architectures, the CFA is defined to be the previous frame's SP + (try regNative(&new_cpu_state, sp_reg_num)).* = cfa; + + const rules_len = cache_entry.num_rules; + for (cache_entry.rules_regs[0..rules_len], cache_entry.rules[0..rules_len]) |register, rule| { + const new_val: union(enum) { + same, + undefined, + val: usize, + bytes: []const u8, + } = switch (rule) { + .default => val: { + // The default rule is typically equivalent to `.undefined`, but ABIs may override it. + if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { + break :val .same; + } + break :val .undefined; + }, + .undefined => .undefined, + .same_value => .same, + .offset => |offset| val: { + const ptr: *const usize = @ptrFromInt(try applyOffset(cfa, offset)); + break :val .{ .val = ptr.* }; + }, + .val_offset => |offset| .{ .val = try applyOffset(cfa, offset) }, + .register => |r| .{ .bytes = try unwinder.cpu_state.dwarfRegisterBytes(r) }, + .expression => |expr| val: { + unwinder.expr_vm.reset(); + const value = try unwinder.expr_vm.run(expr, gpa, .{ + .format = format, + .cpu_context = &unwinder.cpu_state, + }, cfa) orelse return error.InvalidDebugInfo; + const ptr: *const usize = switch (value) { + .generic => |addr| @ptrFromInt(addr), + else => return error.InvalidDebugInfo, + }; + break :val .{ .val = ptr.* }; + }, + .val_expression => |expr| val: { + unwinder.expr_vm.reset(); + const value = try unwinder.expr_vm.run(expr, gpa, .{ + .format = format, + .cpu_context = &unwinder.cpu_state, + }, cfa) orelse return error.InvalidDebugInfo; + switch (value) { + .generic => |val| break :val .{ .val = val }, + else => return error.InvalidDebugInfo, + } + }, + }; + switch (new_val) { + .same => {}, + .undefined => { + const dest = try new_cpu_state.dwarfRegisterBytes(@intCast(register)); + @memset(dest, undefined); + }, + .val => |val| { + const dest = try new_cpu_state.dwarfRegisterBytes(@intCast(register)); + if (dest.len != @sizeOf(usize)) return error.InvalidDebugInfo; + const dest_ptr: *align(1) usize = @ptrCast(dest); + dest_ptr.* = val; + }, + .bytes => |src| { + const dest = try new_cpu_state.dwarfRegisterBytes(@intCast(register)); + if (dest.len != src.len) return error.InvalidDebugInfo; + @memcpy(dest, src); + }, + } + if (register == return_address_register) { + has_return_address = new_val != .undefined; + } + } + + const return_address: usize = if (has_return_address) pc: { + const raw_ptr = try regNative(&new_cpu_state, return_address_register); + break :pc stripInstructionPtrAuthCode(raw_ptr.*); + } else 0; + + (try regNative(&new_cpu_state, ip_reg_num)).* = return_address; + + // The new CPU state is complete; flush changes. + unwinder.cpu_state = new_cpu_state; + + // The caller will subtract 1 from the return address to get an address corresponding to the + // function call. However, if this is a signal frame, that's actually incorrect, because the + // "return address" we have is the instruction which triggered the signal (if the signal + // handler returned, the instruction would be re-run). Compensate for this by incrementing + // the address in that case. + const adjusted_ret_addr = if (cache_entry.cie.is_signal_frame) return_address +| 1 else return_address; + + // We also want to do that same subtraction here to get the PC for the next frame's FDE. + // This is because if the callee was noreturn, then the function call might be the caller's + // last instruction, so `return_address` might actually point outside of it! + unwinder.pc = adjusted_ret_addr -| 1; + + return adjusted_ret_addr; +} + +pub fn regNative(ctx: *std.debug.cpu_context.Native, num: u16) error{ + InvalidRegister, + UnsupportedRegister, + IncompatibleRegisterSize, +}!*align(1) usize { + const bytes = try ctx.dwarfRegisterBytes(num); + if (bytes.len != @sizeOf(usize)) return error.IncompatibleRegisterSize; + return @ptrCast(bytes); +} + +/// Since register rules are applied (usually) during a panic, +/// checked addition / subtraction is used so that we can return +/// an error and fall back to FP-based unwinding. +fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); +} + +const ip_reg_num = Dwarf.ipRegNum(builtin.target.cpu.arch).?; +const fp_reg_num = Dwarf.fpRegNum(builtin.target.cpu.arch); +const sp_reg_num = Dwarf.spRegNum(builtin.target.cpu.arch); + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const assert = std.debug.assert; +const stripInstructionPtrAuthCode = std.debug.stripInstructionPtrAuthCode; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); + +const SelfUnwinder = @This(); diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 8c4c1a19e6c4..d351c0421e5c 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -530,16 +530,18 @@ pub fn prepare( }; if (saw_terminator != expect_terminator) return bad(); - std.mem.sortUnstable(SortedFdeEntry, fde_list.items, {}, struct { - fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { - ctx; - return a.pc_begin < b.pc_begin; - } - }.lessThan); + if (need_lookup) { + std.mem.sortUnstable(SortedFdeEntry, fde_list.items, {}, struct { + fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { + ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); - // This temporary is necessary to avoid an RLS footgun where `lookup` ends up non-null `undefined` on OOM. - const final_fdes = try fde_list.toOwnedSlice(gpa); - unwind.lookup = .{ .sorted_fdes = final_fdes }; + // This temporary is necessary to avoid an RLS footgun where `lookup` ends up non-null `undefined` on OOM. + const final_fdes = try fde_list.toOwnedSlice(gpa); + unwind.lookup = .{ .sorted_fdes = final_fdes }; + } } fn findCie(unwind: *const Unwind, offset: u64) ?*const CommonInformationEntry { diff --git a/lib/std/debug/Dwarf/expression.zig b/lib/std/debug/Dwarf/expression.zig index 3291de350662..4460bd2bc21a 100644 --- a/lib/std/debug/Dwarf/expression.zig +++ b/lib/std/debug/Dwarf/expression.zig @@ -10,7 +10,7 @@ const assert = std.debug.assert; const testing = std.testing; const Writer = std.Io.Writer; -const regNative = std.debug.SelfInfo.DwarfUnwindContext.regNative; +const regNative = std.debug.Dwarf.SelfUnwinder.regNative; const ip_reg_num = std.debug.Dwarf.ipRegNum(native_arch).?; const fp_reg_num = std.debug.Dwarf.fpRegNum(native_arch); diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig deleted file mode 100644 index bb05ce521683..000000000000 --- a/lib/std/debug/SelfInfo.zig +++ /dev/null @@ -1,551 +0,0 @@ -//! Cross-platform abstraction for this binary's own debug information, with a -//! goal of minimal code bloat and compilation speed penalty. - -const builtin = @import("builtin"); -const native_endian = native_arch.endian(); -const native_arch = builtin.cpu.arch; - -const std = @import("../std.zig"); -const mem = std.mem; -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const Dwarf = std.debug.Dwarf; -const CpuContext = std.debug.cpu_context.Native; - -const stripInstructionPtrAuthCode = std.debug.stripInstructionPtrAuthCode; - -const root = @import("root"); - -const SelfInfo = @This(); - -/// Locks access to `modules`. However, does *not* lock the `Module.DebugInfo`, nor `lookup_cache` -/// the implementation is responsible for locking as needed in its exposed methods. -/// -/// TODO: to allow `SelfInfo` to work on freestanding, we currently just don't use this mutex there. -/// That's a bad solution, but a better one depends on the standard library's general support for -/// "bring your own OS" being improved. -modules_mutex: switch (builtin.os.tag) { - else => std.Thread.Mutex, - .freestanding, .other => struct { - fn lock(_: @This()) void {} - fn unlock(_: @This()) void {} - }, -}, -/// Value is allocated into gpa to give it a stable pointer. -modules: if (target_supported) std.AutoArrayHashMapUnmanaged(usize, *Module.DebugInfo) else void, -lookup_cache: if (target_supported) Module.LookupCache else void, - -pub const Error = error{ - /// The required debug info is invalid or corrupted. - InvalidDebugInfo, - /// The required debug info could not be found. - MissingDebugInfo, - /// The required debug info was found, and may be valid, but is not supported by this implementation. - UnsupportedDebugInfo, - /// The required debug info could not be read from disk due to some IO error. - ReadFailed, - OutOfMemory, - Unexpected, -}; - -/// Indicates whether the `SelfInfo` implementation has support for this target. -pub const target_supported: bool = Module != void; - -/// Indicates whether the `SelfInfo` implementation has support for unwinding on this target. -pub const supports_unwinding: bool = target_supported and Module.supports_unwinding; - -pub const UnwindContext = if (supports_unwinding) Module.UnwindContext; - -pub const init: SelfInfo = .{ - .modules_mutex = .{}, - .modules = .empty, - .lookup_cache = if (Module.LookupCache != void) .init, -}; - -pub fn deinit(self: *SelfInfo, gpa: Allocator) void { - for (self.modules.values()) |di| { - di.deinit(gpa); - gpa.destroy(di); - } - self.modules.deinit(gpa); - if (Module.LookupCache != void) self.lookup_cache.deinit(gpa); -} - -pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { - comptime assert(supports_unwinding); - const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc); - const di: *Module.DebugInfo = di: { - self.modules_mutex.lock(); - defer self.modules_mutex.unlock(); - const gop = try self.modules.getOrPut(gpa, module.key()); - if (gop.found_existing) break :di gop.value_ptr.*; - errdefer _ = self.modules.pop().?; - const di = try gpa.create(Module.DebugInfo); - di.* = .init; - gop.value_ptr.* = di; - break :di di; - }; - return module.unwindFrame(gpa, di, context); -} - -pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol { - comptime assert(target_supported); - const module: Module = try .lookup(&self.lookup_cache, gpa, address); - const di: *Module.DebugInfo = di: { - self.modules_mutex.lock(); - defer self.modules_mutex.unlock(); - const gop = try self.modules.getOrPut(gpa, module.key()); - if (gop.found_existing) break :di gop.value_ptr.*; - errdefer _ = self.modules.pop().?; - const di = try gpa.create(Module.DebugInfo); - di.* = .init; - gop.value_ptr.* = di; - break :di di; - }; - return module.getSymbolAtAddress(gpa, di, address); -} - -pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { - comptime assert(target_supported); - const module: Module = try .lookup(&self.lookup_cache, gpa, address); - if (module.name.len == 0) return error.MissingDebugInfo; - return module.name; -} - -/// `void` indicates that `SelfInfo` is not supported for this target. -/// -/// This type contains the target-specific implementation. Logically, a `Module` represents a subset -/// of the executable with its own debug information. This typically corresponds to what ELF calls a -/// module, i.e. a shared library or executable image, but could be anything. For instance, it would -/// be valid to consider the entire application one module, or on the other hand to consider each -/// object file a module. -/// -/// Because different threads can collect stack traces concurrently, the implementation must be able -/// to tolerate concurrent calls to any method it implements. -/// -/// This type must must expose the following declarations: -/// -/// ``` -/// /// Holds state cached by the implementation between calls to `lookup`. -/// /// This may be `void`, in which case the inner declarations can be omitted. -/// pub const LookupCache = struct { -/// pub const init: LookupCache; -/// pub fn deinit(lc: *LookupCache, gpa: Allocator) void; -/// }; -/// /// Holds debug information associated with a particular `Module`. -/// pub const DebugInfo = struct { -/// pub const init: DebugInfo; -/// }; -/// /// Finds the `Module` corresponding to `address`. -/// pub fn lookup(lc: *LookupCache, gpa: Allocator, address: usize) SelfInfo.Error!Module; -/// /// Returns a unique identifier for this `Module`, such as a load address. -/// pub fn key(mod: *const Module) usize; -/// /// Locates and loads location information for the symbol corresponding to `address`. -/// pub fn getSymbolAtAddress( -/// mod: *const Module, -/// gpa: Allocator, -/// di: *DebugInfo, -/// address: usize, -/// ) SelfInfo.Error!std.debug.Symbol; -/// /// Whether a reliable stack unwinding strategy, such as DWARF unwinding, is available. -/// pub const supports_unwinding: bool; -/// /// Only required if `supports_unwinding == true`. -/// pub const UnwindContext = struct { -/// /// A PC value representing the location in the last frame. -/// pc: usize, -/// pub fn init(ctx: *std.debug.cpu_context.Native, gpa: Allocator) Allocator.Error!UnwindContext; -/// pub fn deinit(uc: *UnwindContext, gpa: Allocator) void; -/// /// Returns the frame pointer associated with the last unwound stack frame. If the frame -/// /// pointer is unknown, 0 may be returned instead. -/// pub fn getFp(uc: *UnwindContext) usize; -/// }; -/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame, and returns -/// /// the frame's return address. -/// pub fn unwindFrame( -/// mod: *const Module, -/// gpa: Allocator, -/// di: *DebugInfo, -/// ctx: *UnwindContext, -/// ) SelfInfo.Error!usize; -/// ``` -const Module: type = Module: { - // Allow overriding the target-specific `SelfInfo` implementation by exposing `root.debug.Module`. - if (@hasDecl(root, "debug") and @hasDecl(root.debug, "Module")) { - break :Module root.debug.Module; - } - break :Module switch (builtin.os.tag) { - .linux, - .netbsd, - .freebsd, - .dragonfly, - .openbsd, - .solaris, - .illumos, - => @import("SelfInfo/ElfModule.zig"), - - .macos, - .ios, - .watchos, - .tvos, - .visionos, - => @import("SelfInfo/DarwinModule.zig"), - - .uefi, - .windows, - => @import("SelfInfo/WindowsModule.zig"), - - else => void, - }; -}; - -/// An implementation of `UnwindContext` useful for DWARF-based unwinders. The `Module.unwindFrame` -/// implementation should wrap `DwarfUnwindContext.unwindFrame`. -pub const DwarfUnwindContext = struct { - cfa: ?usize, - pc: usize, - cpu_context: CpuContext, - vm: Dwarf.Unwind.VirtualMachine, - stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), - - pub const Cache = struct { - /// TODO: to allow `DwarfUnwindContext` to work on freestanding, we currently just don't use - /// this mutex there. That's a bad solution, but a better one depends on the standard - /// library's general support for "bring your own OS" being improved. - mutex: switch (builtin.os.tag) { - else => std.Thread.Mutex, - .freestanding, .other => struct { - fn lock(_: @This()) void {} - fn unlock(_: @This()) void {} - }, - }, - buf: [num_slots]Slot, - const num_slots = 2048; - const Slot = struct { - const max_regs = 32; - pc: usize, - cie: *const Dwarf.Unwind.CommonInformationEntry, - cfa_rule: Dwarf.Unwind.VirtualMachine.CfaRule, - rules_regs: [max_regs]u16, - rules: [max_regs]Dwarf.Unwind.VirtualMachine.RegisterRule, - num_rules: u8, - }; - /// This is a function rather than a declaration to avoid lowering a very large struct value - /// into the binary when most of it is `undefined`. - pub fn init(c: *Cache) void { - c.mutex = .{}; - for (&c.buf) |*slot| slot.pc = 0; - } - }; - - pub fn init(cpu_context: *const CpuContext) DwarfUnwindContext { - comptime assert(supports_unwinding); - - // `@constCast` is safe because we aren't going to store to the resulting pointer. - const raw_pc_ptr = regNative(@constCast(cpu_context), ip_reg_num) catch |err| switch (err) { - error.InvalidRegister => unreachable, // `ip_reg_num` is definitely valid - error.UnsupportedRegister => unreachable, // the implementation needs to support ip - error.IncompatibleRegisterSize => unreachable, // ip is definitely `usize`-sized - }; - const pc = stripInstructionPtrAuthCode(raw_pc_ptr.*); - - return .{ - .cfa = null, - .pc = pc, - .cpu_context = cpu_context.*, - .vm = .{}, - .stack_machine = .{}, - }; - } - - pub fn deinit(self: *DwarfUnwindContext, gpa: Allocator) void { - self.vm.deinit(gpa); - self.stack_machine.deinit(gpa); - self.* = undefined; - } - - pub fn getFp(self: *const DwarfUnwindContext) usize { - // `@constCast` is safe because we aren't going to store to the resulting pointer. - const ptr = regNative(@constCast(&self.cpu_context), fp_reg_num) catch |err| switch (err) { - error.InvalidRegister => unreachable, // `fp_reg_num` is definitely valid - error.UnsupportedRegister => unreachable, // the implementation needs to support fp - error.IncompatibleRegisterSize => unreachable, // fp is a pointer so is `usize`-sized - }; - return ptr.*; - } - - /// Unwind a stack frame using DWARF unwinding info, updating the register context. - /// - /// If `.eh_frame_hdr` is available and complete, it will be used to binary search for the FDE. - /// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. The latter - /// may require lazily loading the data in those sections. - /// - /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when using macOS' - /// `__unwind_info` section. - pub fn unwindFrame( - context: *DwarfUnwindContext, - cache: *Cache, - gpa: Allocator, - unwind: *const Dwarf.Unwind, - load_offset: usize, - explicit_fde_offset: ?usize, - ) Error!usize { - return unwindFrameInner(context, cache, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { - error.InvalidDebugInfo, - error.MissingDebugInfo, - error.UnsupportedDebugInfo, - error.OutOfMemory, - => |e| return e, - - error.UnsupportedAddrSize, - error.UnimplementedUserOpcode, - error.UnimplementedExpressionCall, - error.UnimplementedOpcode, - error.UnimplementedTypedComparison, - error.UnimplementedTypeConversion, - error.UnknownExpressionOpcode, - error.UnsupportedRegister, - => return error.UnsupportedDebugInfo, - - error.InvalidRegister, - error.ReadFailed, - error.EndOfStream, - error.IncompatibleRegisterSize, - error.Overflow, - error.StreamTooLong, - error.InvalidOperand, - error.InvalidOpcode, - error.InvalidOperation, - error.InvalidCFARule, - error.IncompleteExpressionContext, - error.InvalidCFAOpcode, - error.InvalidExpression, - error.InvalidFrameBase, - error.InvalidIntegralTypeSize, - error.InvalidSubExpression, - error.InvalidTypeLength, - error.TruncatedIntegralType, - error.DivisionByZero, - error.InvalidExpressionValue, - error.NoExpressionValue, - error.RegisterSizeMismatch, - => return error.InvalidDebugInfo, - }; - } - fn unwindFrameInner( - context: *DwarfUnwindContext, - cache: *Cache, - gpa: Allocator, - unwind: *const Dwarf.Unwind, - load_offset: usize, - explicit_fde_offset: ?usize, - ) !usize { - comptime assert(supports_unwinding); - - if (context.pc == 0) return 0; - - const pc_vaddr = context.pc - load_offset; - - const cache_slot: Cache.Slot = slot: { - const slot_idx = std.hash.int(pc_vaddr) % Cache.num_slots; - - { - cache.mutex.lock(); - defer cache.mutex.unlock(); - if (cache.buf[slot_idx].pc == pc_vaddr) break :slot cache.buf[slot_idx]; - } - - const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( - pc_vaddr, - @sizeOf(usize), - native_endian, - ) orelse return error.MissingDebugInfo; - const cie, const fde = try unwind.getFde(fde_offset, native_endian); - - // Check if the FDE *actually* includes the pc (`lookupPc` can return false positives). - if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) { - return error.MissingDebugInfo; - } - - context.vm.reset(); - - const row = try context.vm.runTo(gpa, pc_vaddr, cie, &fde, @sizeOf(usize), native_endian); - - if (row.columns.len > Cache.Slot.max_regs) return error.UnsupportedDebugInfo; - - var slot: Cache.Slot = .{ - .pc = pc_vaddr, - .cie = cie, - .cfa_rule = row.cfa, - .rules_regs = undefined, - .rules = undefined, - .num_rules = 0, - }; - for (context.vm.rowColumns(&row)) |col| { - const i = slot.num_rules; - slot.rules_regs[i] = col.register; - slot.rules[i] = col.rule; - slot.num_rules += 1; - } - - { - cache.mutex.lock(); - defer cache.mutex.unlock(); - cache.buf[slot_idx] = slot; - } - - break :slot slot; - }; - - const format = cache_slot.cie.format; - const return_address_register = cache_slot.cie.return_address_register; - - context.cfa = switch (cache_slot.cfa_rule) { - .none => return error.InvalidCFARule, - .reg_off => |ro| cfa: { - const ptr = try regNative(&context.cpu_context, ro.register); - break :cfa try applyOffset(ptr.*, ro.offset); - }, - .expression => |expr| cfa: { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expr, gpa, .{ - .format = format, - .cpu_context = &context.cpu_context, - }, context.cfa) orelse return error.NoExpressionValue; - switch (value) { - .generic => |g| break :cfa g, - else => return error.InvalidExpressionValue, - } - }, - }; - - // If unspecified, we'll use the default rule for the return address register, which is - // typically equivalent to `.undefined` (meaning there is no return address), but may be - // overriden by ABIs. - var has_return_address: bool = builtin.cpu.arch.isAARCH64() and - return_address_register >= 19 and - return_address_register <= 28; - - // Create a copy of the CPU context, to which we will apply the new rules. - var new_cpu_context = context.cpu_context; - - // On all implemented architectures, the CFA is defined as being the previous frame's SP - (try regNative(&new_cpu_context, sp_reg_num)).* = context.cfa.?; - - const rules_len = cache_slot.num_rules; - for (cache_slot.rules_regs[0..rules_len], cache_slot.rules[0..rules_len]) |register, rule| { - const new_val: union(enum) { - same, - undefined, - val: usize, - bytes: []const u8, - } = switch (rule) { - .default => val: { - // The default rule is typically equivalent to `.undefined`, but ABIs may override it. - if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { - break :val .same; - } - break :val .undefined; - }, - .undefined => .undefined, - .same_value => .same, - .offset => |offset| val: { - const ptr: *const usize = @ptrFromInt(try applyOffset(context.cfa.?, offset)); - break :val .{ .val = ptr.* }; - }, - .val_offset => |offset| .{ .val = try applyOffset(context.cfa.?, offset) }, - .register => |r| .{ .bytes = try context.cpu_context.dwarfRegisterBytes(r) }, - .expression => |expr| val: { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expr, gpa, .{ - .format = format, - .cpu_context = &context.cpu_context, - }, context.cfa.?) orelse return error.NoExpressionValue; - const ptr: *const usize = switch (value) { - .generic => |addr| @ptrFromInt(addr), - else => return error.InvalidExpressionValue, - }; - break :val .{ .val = ptr.* }; - }, - .val_expression => |expr| val: { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expr, gpa, .{ - .format = format, - .cpu_context = &context.cpu_context, - }, context.cfa.?) orelse return error.NoExpressionValue; - switch (value) { - .generic => |val| break :val .{ .val = val }, - else => return error.InvalidExpressionValue, - } - }, - }; - switch (new_val) { - .same => {}, - .undefined => { - const dest = try new_cpu_context.dwarfRegisterBytes(@intCast(register)); - @memset(dest, undefined); - }, - .val => |val| { - const dest = try new_cpu_context.dwarfRegisterBytes(@intCast(register)); - if (dest.len != @sizeOf(usize)) return error.RegisterSizeMismatch; - const dest_ptr: *align(1) usize = @ptrCast(dest); - dest_ptr.* = val; - }, - .bytes => |src| { - const dest = try new_cpu_context.dwarfRegisterBytes(@intCast(register)); - if (dest.len != src.len) return error.RegisterSizeMismatch; - @memcpy(dest, src); - }, - } - if (register == return_address_register) { - has_return_address = new_val != .undefined; - } - } - - const return_address: usize = if (has_return_address) pc: { - const raw_ptr = try regNative(&new_cpu_context, return_address_register); - break :pc stripInstructionPtrAuthCode(raw_ptr.*); - } else 0; - - (try regNative(&new_cpu_context, ip_reg_num)).* = return_address; - - // The new CPU context is complete; flush changes. - context.cpu_context = new_cpu_context; - - // The caller will subtract 1 from the return address to get an address corresponding to the - // function call. However, if this is a signal frame, that's actually incorrect, because the - // "return address" we have is the instruction which triggered the signal (if the signal - // handler returned, the instruction would be re-run). Compensate for this by incrementing - // the address in that case. - const adjusted_ret_addr = if (cache_slot.cie.is_signal_frame) return_address +| 1 else return_address; - - // We also want to do that same subtraction here to get the PC for the next frame's FDE. - // This is because if the callee was noreturn, then the function call might be the caller's - // last instruction, so `return_address` might actually point outside of it! - context.pc = adjusted_ret_addr -| 1; - - return adjusted_ret_addr; - } - /// Since register rules are applied (usually) during a panic, - /// checked addition / subtraction is used so that we can return - /// an error and fall back to FP-based unwinding. - fn applyOffset(base: usize, offset: i64) !usize { - return if (offset >= 0) - try std.math.add(usize, base, @as(usize, @intCast(offset))) - else - try std.math.sub(usize, base, @as(usize, @intCast(-offset))); - } - - pub fn regNative(ctx: *CpuContext, num: u16) error{ - InvalidRegister, - UnsupportedRegister, - IncompatibleRegisterSize, - }!*align(1) usize { - const bytes = try ctx.dwarfRegisterBytes(num); - if (bytes.len != @sizeOf(usize)) return error.IncompatibleRegisterSize; - return @ptrCast(bytes); - } - - const ip_reg_num = Dwarf.ipRegNum(native_arch).?; - const fp_reg_num = Dwarf.fpRegNum(native_arch); - const sp_reg_num = Dwarf.spRegNum(native_arch); -}; diff --git a/lib/std/debug/SelfInfo/Darwin.zig b/lib/std/debug/SelfInfo/Darwin.zig new file mode 100644 index 000000000000..a43f279f39b7 --- /dev/null +++ b/lib/std/debug/SelfInfo/Darwin.zig @@ -0,0 +1,993 @@ +mutex: std.Thread.Mutex, +/// Accessed through `Module.Adapter`. +modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false), +ofiles: std.StringArrayHashMapUnmanaged(?OFile), + +pub const init: SelfInfo = .{ + .mutex = .{}, + .modules = .empty, + .ofiles = .empty, +}; +pub fn deinit(si: *SelfInfo, gpa: Allocator) void { + for (si.modules.keys()) |*module| { + unwind: { + const u = &(module.unwind orelse break :unwind catch break :unwind); + if (u.dwarf) |*dwarf| dwarf.deinit(gpa); + } + loaded: { + const l = &(module.loaded_macho orelse break :loaded catch break :loaded); + gpa.free(l.symbols); + posix.munmap(l.mapped_memory); + } + } + for (si.ofiles.values()) |*opt_ofile| { + const ofile = &(opt_ofile.* orelse continue); + ofile.dwarf.deinit(gpa); + ofile.symbols_by_name.deinit(gpa); + posix.munmap(ofile.mapped_memory); + } + si.modules.deinit(gpa); + si.ofiles.deinit(gpa); +} + +pub fn getSymbol(si: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol { + const module = try si.findModule(gpa, address); + defer si.mutex.unlock(); + + const loaded_macho = try module.getLoadedMachO(gpa); + + const vaddr = address - loaded_macho.vaddr_offset; + const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; + + // offset of `address` from start of `symbol` + const address_symbol_offset = vaddr - symbol.addr; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); + + // If any information is missing, we can at least return this from now on. + const sym_only_result: std.debug.Symbol = .{ + .name = stab_symbol, + .compile_unit_name = null, + .source_location = null, + }; + + if (symbol.ofile == MachoSymbol.unknown_ofile) { + // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. + return sym_only_result; + } + + const o_file: *OFile = of: { + const path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); + const gop = try si.ofiles.getOrPut(gpa, path); + if (!gop.found_existing) { + gop.value_ptr.* = loadOFile(gpa, path) catch null; + } + if (gop.value_ptr.*) |*o_file| { + break :of o_file; + } else { + return sym_only_result; + } + }; + + const symbol_index = o_file.symbols_by_name.getKeyAdapted( + @as([]const u8, stab_symbol), + @as(OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }), + ) orelse return sym_only_result; + const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; + + const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; + + return .{ + .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, + .compile_unit_name = compile_unit.die.getAttrString( + &o_file.dwarf, + native_endian, + std.dwarf.AT.name, + o_file.dwarf.section(.debug_str), + compile_unit, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + }, + .source_location = o_file.dwarf.getLineNumberInfo( + gpa, + native_endian, + compile_unit, + symbol_ofile_vaddr + address_symbol_offset, + ) catch null, + }; +} +pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { + const module = try si.findModule(gpa, address); + defer si.mutex.unlock(); + return module.name; +} + +pub const can_unwind: bool = true; +pub const UnwindContext = std.debug.Dwarf.SelfUnwinder; +/// Unwind a frame using MachO compact unwind info (from `__unwind_info`). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `__eh_frame` will be used if available. +pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { + return unwindFrameInner(si, gpa, context) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.UnsupportedDebugInfo, + error.ReadFailed, + error.OutOfMemory, + error.Unexpected, + => |e| return e, + error.UnsupportedRegister, + error.UnsupportedAddrSize, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, + error.Overflow, + error.EndOfStream, + error.StreamTooLong, + error.InvalidOpcode, + error.InvalidOperation, + error.InvalidOperand, + error.InvalidRegister, + error.IncompatibleRegisterSize, + => return error.InvalidDebugInfo, + }; +} +fn unwindFrameInner(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { + const module = try si.findModule(gpa, context.pc); + defer si.mutex.unlock(); + + const unwind: *Module.Unwind = try module.getUnwindInfo(gpa); + + const ip_reg_num = comptime Dwarf.ipRegNum(builtin.target.cpu.arch).?; + const fp_reg_num = comptime Dwarf.fpRegNum(builtin.target.cpu.arch); + const sp_reg_num = comptime Dwarf.spRegNum(builtin.target.cpu.arch); + + const unwind_info = unwind.unwind_info orelse return error.MissingDebugInfo; + if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidDebugInfo; + const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); + + const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); + if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidDebugInfo; + const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); + if (indices.len == 0) return error.MissingDebugInfo; + + // offset of the PC into the `__TEXT` segment + const pc_text_offset = context.pc - module.text_base; + + const start_offset: u32, const first_level_offset: u32 = index: { + var left: usize = 0; + var len: usize = indices.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < indices[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; + }; + // An offset of 0 is a sentinel indicating a range does not have unwind info. + if (start_offset == 0) return error.MissingDebugInfo; + + const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidDebugInfo; + const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], + ); + + if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidDebugInfo; + const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidDebugInfo; + const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; + const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidDebugInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < entries[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :entry .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidDebugInfo; + const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; + const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidDebugInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + const entry = entries[left]; + + const function_offset = first_level_offset + entry.funcOffset; + if (entry.encodingIndex < common_encodings.len) { + break :entry .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } + + const local_index = entry.encodingIndex - common_encodings.len; + const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidDebugInfo; + const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], + ); + if (local_index >= local_encodings.len) return error.InvalidDebugInfo; + break :entry .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + }, + else => return error.InvalidDebugInfo, + }; + + if (entry.raw_encoding == 0) return error.MissingDebugInfo; + + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnsupportedDebugInfo, + .RBP_FRAME => ip: { + const frame = encoding.value.x86_64.frame; + + const fp = (try dwarfRegNative(&context.cpu_state, fp_reg_num)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try dwarfRegNative(&context.cpu_state, fp_reg_num)).* = new_fp; + (try dwarfRegNative(&context.cpu_state, sp_reg_num)).* = new_sp; + (try dwarfRegNative(&context.cpu_state, ip_reg_num)).* = new_ip; + + const regs: [5]u3 = .{ + frame.reg0, + frame.reg1, + frame.reg2, + frame.reg3, + frame.reg4, + }; + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); + (try dwarfRegNative(&context.cpu_state, reg_number)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :ip new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => ip: { + const frameless = encoding.value.x86_64.frameless; + + const sp = (try dwarfRegNative(&context.cpu_state, sp_reg_num)).*; + const stack_size: usize = stack_size: { + if (encoding.mode.x86_64 == .STACK_IMMD) { + break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); + } + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + module.text_base + + entry.function_offset + + frameless.stack.indirect.sub_offset; + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); + }; + + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = frameless.stack_reg_count; + const ip_ptr = ip_ptr: { + var digits: [6]u3 = undefined; + var accumulator: usize = frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + var registers: [6]u3 = undefined; + var used_indices: [6]bool = @splat(false); + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + registers[i] = @intCast(unused_index + 1); + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + for (0..reg_count) |i| { + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); + (try dwarfRegNative(&context.cpu_state, reg_number)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :ip_ptr reg_addr; + }; + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + + (try dwarfRegNative(&context.cpu_state, sp_reg_num)).* = new_sp; + (try dwarfRegNative(&context.cpu_state, ip_reg_num)).* = new_ip; + + break :ip new_ip; + }, + .DWARF => { + const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); + const rules = try context.computeRules(gpa, dwarf, unwind.vmaddr_slide, encoding.value.x86_64.dwarf); + return context.next(gpa, &rules); + }, + }, + .aarch64, .aarch64_be => switch (encoding.mode.arm64) { + .OLD => return error.UnsupportedDebugInfo, + .FRAMELESS => ip: { + const sp = (try dwarfRegNative(&context.cpu_state, sp_reg_num)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try dwarfRegNative(&context.cpu_state, 30)).*; + (try dwarfRegNative(&context.cpu_state, sp_reg_num)).* = new_sp; + break :ip new_ip; + }, + .DWARF => { + const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); + const rules = try context.computeRules(gpa, dwarf, unwind.vmaddr_slide, encoding.value.arm64.dwarf); + return context.next(gpa, &rules); + }, + .FRAME => ip: { + const frame = encoding.value.arm64.frame; + + const fp = (try dwarfRegNative(&context.cpu_state, fp_reg_num)).*; + const ip_ptr = fp + @sizeOf(usize); + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.x_reg_pairs, field.name) != 0) { + (try dwarfRegNative(&context.cpu_state, 19 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try dwarfRegNative(&context.cpu_state, 20 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + { + const dest: *align(1) usize = @ptrCast(try context.cpu_state.dwarfRegisterBytes(64 + 8 + i)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + { + const dest: *align(1) usize = @ptrCast(try context.cpu_state.dwarfRegisterBytes(64 + 9 + i)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try dwarfRegNative(&context.cpu_state, fp_reg_num)).* = new_fp; + (try dwarfRegNative(&context.cpu_state, ip_reg_num)).* = new_ip; + + break :ip new_ip; + }, + }, + else => comptime unreachable, // unimplemented + }; + + const ret_addr = std.debug.stripInstructionPtrAuthCode(new_ip); + + // Like `Dwarf.SelfUnwinder.next`, adjust our next lookup pc in case the `call` was this + // function's last instruction making `ret_addr` one byte past its end. + context.pc = ret_addr -| 1; + + return ret_addr; +} + +/// Acquires the mutex on success. +fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module { + var info: std.c.dl_info = undefined; + if (std.c.dladdr(@ptrFromInt(address), &info) == 0) { + return error.MissingDebugInfo; + } + si.mutex.lock(); + errdefer si.mutex.unlock(); + const gop = try si.modules.getOrPutAdapted(gpa, @intFromPtr(info.fbase), Module.Adapter{}); + errdefer comptime unreachable; + if (!gop.found_existing) { + gop.key_ptr.* = .{ + .text_base = @intFromPtr(info.fbase), + .name = std.mem.span(info.fname), + .unwind = null, + .loaded_macho = null, + }; + } + return gop.key_ptr; +} + +const Module = struct { + text_base: usize, + name: []const u8, + unwind: ?(Error!Unwind), + loaded_macho: ?(Error!LoadedMachO), + + const Adapter = struct { + pub fn hash(_: Adapter, text_base: usize) u32 { + return @truncate(std.hash.int(text_base)); + } + pub fn eql(_: Adapter, a_text_base: usize, b_module: Module, b_index: usize) bool { + _ = b_index; + return a_text_base == b_module.text_base; + } + }; + const Context = struct { + pub fn hash(_: Context, module: Module) u32 { + return @truncate(std.hash.int(module.text_base)); + } + pub fn eql(_: Context, a_module: Module, b_module: Module, b_index: usize) bool { + _ = b_index; + return a_module.text_base == b_module.text_base; + } + }; + + const Unwind = struct { + /// The slide applied to the `__unwind_info` and `__eh_frame` sections. + /// So, `unwind_info.ptr` is this many bytes higher than the section's vmaddr. + vmaddr_slide: u64, + /// Backed by the in-memory section mapped by the loader. + unwind_info: ?[]const u8, + /// Backed by the in-memory `__eh_frame` section mapped by the loader. + dwarf: ?Dwarf.Unwind, + }; + + const LoadedMachO = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + symbols: []const MachoSymbol, + strings: []const u8, + /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is + /// because the segments in the file on disk might differ from the ones in memory. Normally + /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: + /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in + /// the dyld cache (dyld actually restart itself from cache after loading it), and the two + /// versions have (very) different segment base addresses. It's sort of like a large slide + /// has been applied to all addresses in memory. For an optimal experience, we consider the + /// on-disk vmaddr instead of the in-memory one. + vaddr_offset: usize, + }; + + fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind { + if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa); + return if (module.unwind.?) |*unwind| unwind else |err| err; + } + fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind { + const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const sections, const text_vmaddr = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break .{ load_cmd.getSections(), segment_cmd.vmaddr }; + } else unreachable; + + const vmaddr_slide = module.text_base - text_vmaddr; + + var opt_unwind_info: ?[]const u8 = null; + var opt_eh_frame: ?[]const u8 = null; + for (sections) |sect| { + if (mem.eql(u8, sect.sectName(), "__unwind_info")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); + opt_unwind_info = sect_ptr[0..@intCast(sect.size)]; + } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); + opt_eh_frame = sect_ptr[0..@intCast(sect.size)]; + } + } + const eh_frame = opt_eh_frame orelse return .{ + .vmaddr_slide = vmaddr_slide, + .unwind_info = opt_unwind_info, + .dwarf = null, + }; + var dwarf: Dwarf.Unwind = .initSection(.eh_frame, @intFromPtr(eh_frame.ptr) - vmaddr_slide, eh_frame); + errdefer dwarf.deinit(gpa); + // We don't need lookups, so this call is just for scanning CIEs. + dwarf.prepare(gpa, @sizeOf(usize), native_endian, false, true) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.InvalidOperand, + error.InvalidOpcode, + error.InvalidOperation, + => return error.InvalidDebugInfo, + error.UnsupportedAddrSize, + error.UnsupportedDwarfVersion, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, + }; + + return .{ + .vmaddr_slide = vmaddr_slide, + .unwind_info = opt_unwind_info, + .dwarf = dwarf, + }; + } + + fn getLoadedMachO(module: *Module, gpa: Allocator) Error!*LoadedMachO { + if (module.loaded_macho == null) module.loaded_macho = loadMachO(module, gpa) catch |err| switch (err) { + error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| e, + else => error.ReadFailed, + }; + return if (module.loaded_macho.?) |*lm| lm else |err| err; + } + fn loadMachO(module: *const Module, gpa: Allocator) Error!LoadedMachO { + const all_mapped_memory = try mapDebugInfoFile(module.name); + errdefer posix.munmap(all_mapped_memory); + + // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal + // binary": a simple file format which contains Mach-O binaries for multiple targets. For + // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images + // for both ARM64 macOS and x86_64 macOS. + if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; + const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; + // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. + const mapped_macho = switch (magic) { + macho.MH_MAGIC_64 => all_mapped_memory, + + macho.FAT_CIGAM => mapped_macho: { + // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing + // is big-endian, so we'll be swapping some bytes. + if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; + const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); + const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); + const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; + const native_cpu_type = switch (builtin.cpu.arch) { + .x86_64 => macho.CPU_TYPE_X86_64, + .aarch64 => macho.CPU_TYPE_ARM64, + else => comptime unreachable, + }; + for (archs) |*arch| { + if (@byteSwap(arch.cputype) != native_cpu_type) continue; + const offset = @byteSwap(arch.offset); + const size = @byteSwap(arch.size); + break :mapped_macho all_mapped_memory[offset..][0..size]; + } + // Our native architecture was not present in the fat binary. + return error.MissingDebugInfo; + }, + + // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It + // will be fairly easy to add support here if necessary; it's very similar to above. + macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, + + else => return error.InvalidDebugInfo, + }; + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + var symtab: ?macho.symtab_command = null; + var text_vmaddr: ?u64 = null; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { + if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; + text_vmaddr = seg_cmd.vmaddr; + }, + else => {}, + }; + break :lc_iter .{ + symtab orelse return error.MissingDebugInfo, + text_vmaddr orelse return error.MissingDebugInfo, + }; + }; + + const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); + const syms = syms_ptr[0..symtab.nsyms]; + const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; + + var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); + defer symbols.deinit(gpa); + + // This map is temporary; it is used only to detect duplicates here. This is + // necessary because we prefer to use STAB ("symbolic debugging table") symbols, + // but they might not be present, so we track normal symbols too. + // Indices match 1-1 with those of `symbols`. + var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; + defer symbol_names.deinit(gpa); + try symbol_names.ensureUnusedCapacity(gpa, syms.len); + + var ofile: u32 = undefined; + var last_sym: MachoSymbol = undefined; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + for (syms) |*sym| { + if (sym.n_type.bits.is_stab == 0) { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf, .pbud, .indr, .abs, _ => continue, + .sect => { + const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(.{ + .strx = sym.n_strx, + .addr = sym.n_value, + .ofile = MachoSymbol.unknown_ofile, + }); + } + }, + } + continue; + } + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, + }, + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, + }, + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + }, + else => return error.InvalidDebugInfo, + }, + .ensym => switch (state) { + .fun_size => { + state = .ensym; + if (last_sym.strx != 0) { + const name = std.mem.sliceTo(strings[last_sym.strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(last_sym); + } else { + symbols.items[gop.index] = last_sym; + } + } + }, + else => return error.InvalidDebugInfo, + }, + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, + }, + else => {}, + } + } + + switch (state) { + .init => { + // Missing STAB symtab entries is still okay, unless there were also no normal symbols. + if (symbols.items.len == 0) return error.MissingDebugInfo; + }, + .oso_close => {}, + else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab + } + + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); + + return .{ + .mapped_memory = all_mapped_memory, + .symbols = symbols_slice, + .strings = strings, + .vaddr_offset = module.text_base - text_vmaddr, + }; + } +}; + +const OFile = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + dwarf: Dwarf, + strtab: []const u8, + symtab: []align(1) const macho.nlist_64, + /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed + /// through `SymbolAdapter`, so that the symbol name is used as the logical key. + symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), + + const SymbolAdapter = struct { + strtab: []const u8, + symtab: []align(1) const macho.nlist_64, + pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { + _ = ctx; + return @truncate(std.hash.Wyhash.hash(0, sym_name)); + } + pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { + _ = b_index; + const b_sym = ctx.symtab[b_sym_index]; + const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); + return mem.eql(u8, a_sym_name, b_sym_name); + } + }; +}; + +const MachoSymbol = struct { + strx: u32, + addr: u64, + /// Value may be `unknown_ofile`. + ofile: u32, + const unknown_ofile = std.math.maxInt(u32); + fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } + /// Assumes that `symbols` is sorted in order of ascending `addr`. + fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + if (symbols.len == 0) return null; // no potential match + if (address < symbols[0].addr) return null; // address is before the lowest-address symbol + var left: usize = 0; + var len: usize = symbols.len; + while (len > 1) { + const mid = left + len / 2; + if (address < symbols[mid].addr) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + return &symbols[left]; + } + + test find { + const symbols: []const MachoSymbol = &.{ + .{ .addr = 100, .strx = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, find(symbols, 0)); + try testing.expectEqual(null, find(symbols, 99)); + try testing.expectEqual(&symbols[0], find(symbols, 100).?); + try testing.expectEqual(&symbols[0], find(symbols, 150).?); + try testing.expectEqual(&symbols[0], find(symbols, 199).?); + + try testing.expectEqual(&symbols[1], find(symbols, 200).?); + try testing.expectEqual(&symbols[1], find(symbols, 250).?); + try testing.expectEqual(&symbols[1], find(symbols, 299).?); + + try testing.expectEqual(&symbols[2], find(symbols, 300).?); + try testing.expectEqual(&symbols[2], find(symbols, 301).?); + try testing.expectEqual(&symbols[2], find(symbols, 5000).?); + } +}; +test { + _ = MachoSymbol; +} + +/// Uses `mmap` to map the file at `path` into memory. +fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { + const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return error.ReadFailed, + }; + defer file.close(); + + const file_end_pos = file.getEndPos() catch |err| switch (err) { + error.Unexpected => |e| return e, + else => return error.ReadFailed, + }; + const file_len = std.math.cast(usize, file_end_pos) orelse return error.InvalidDebugInfo; + + return posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ) catch |err| switch (err) { + error.Unexpected => |e| return e, + else => return error.ReadFailed, + }; +} + +fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { + const mapped_mem = try mapDebugInfoFile(o_file_path); + errdefer posix.munmap(mapped_mem); + + if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; + + const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { + var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtab_cmd: ?macho.symtab_command = null; + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => seg_cmd = cmd, + .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + break :cmds .{ + seg_cmd orelse return error.MissingDebugInfo, + symtab_cmd orelse return error.MissingDebugInfo, + }; + }; + + if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; + if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + + const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); + if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; + const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); + + // TODO handle tentative (common) symbols + var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; + defer symbols_by_name.deinit(gpa); + try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len)); + for (symtab, 0..) |sym, sym_index| { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf => continue, // includes tentative symbols + .abs => continue, + else => {}, + } + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( + @as([]const u8, sym_name), + @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }), + ); + if (gop.found_existing) return error.InvalidDebugInfo; + gop.key_ptr.* = @intCast(sym_index); + } + + var sections: Dwarf.SectionArray = @splat(null); + for (seg_cmd.getSections()) |sect| { + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; + } else continue; + + if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; + const section_bytes = mapped_mem[sect.offset..][0..sect.size]; + sections[section_index] = .{ + .data = section_bytes, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; + + var dwarf: Dwarf = .{ .sections = sections }; + errdefer dwarf.deinit(gpa); + try dwarf.open(gpa, native_endian); + + return .{ + .mapped_memory = mapped_mem, + .dwarf = dwarf, + .strtab = strtab, + .symtab = symtab, + .symbols_by_name = symbols_by_name.move(), + }; +} + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Error = std.debug.SelfInfoError; +const assert = std.debug.assert; +const posix = std.posix; +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; +const dwarfRegNative = std.debug.Dwarf.SelfUnwinder.regNative; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); + +const SelfInfo = @This(); diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig deleted file mode 100644 index 71e43a9a7481..000000000000 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ /dev/null @@ -1,954 +0,0 @@ -/// The runtime address where __TEXT is loaded. -text_base: usize, -name: []const u8, - -pub fn key(m: *const DarwinModule) usize { - return m.text_base; -} - -/// No cache needed, because `_dyld_get_image_header` etc are already fast. -pub const LookupCache = void; -pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!DarwinModule { - _ = cache; - _ = gpa; - var info: std.c.dl_info = undefined; - switch (std.c.dladdr(@ptrFromInt(address), &info)) { - 0 => return error.MissingDebugInfo, - else => return .{ - .name = std.mem.span(info.fname), - .text_base = @intFromPtr(info.fbase), - }, - } -} -fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, out: *DebugInfo) !void { - const header: *std.macho.mach_header = @ptrFromInt(module.text_base); - - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const sections, const text_vmaddr = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break .{ load_cmd.getSections(), segment_cmd.vmaddr }; - } else unreachable; - - const vmaddr_slide = module.text_base - text_vmaddr; - - var opt_unwind_info: ?[]const u8 = null; - var opt_eh_frame: ?[]const u8 = null; - for (sections) |sect| { - if (mem.eql(u8, sect.sectName(), "__unwind_info")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); - opt_unwind_info = sect_ptr[0..@intCast(sect.size)]; - } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); - opt_eh_frame = sect_ptr[0..@intCast(sect.size)]; - } - } - const eh_frame = opt_eh_frame orelse { - out.unwind = .{ - .vmaddr_slide = vmaddr_slide, - .unwind_info = opt_unwind_info, - .dwarf = null, - .dwarf_cache = undefined, - }; - return; - }; - var dwarf: Dwarf.Unwind = .initSection(.eh_frame, @intFromPtr(eh_frame.ptr) - vmaddr_slide, eh_frame); - errdefer dwarf.deinit(gpa); - // We don't need lookups, so this call is just for scanning CIEs. - dwarf.prepare(gpa, @sizeOf(usize), native_endian, false, true) catch |err| switch (err) { - error.ReadFailed => unreachable, // it's all fixed buffers - error.InvalidDebugInfo, - error.MissingDebugInfo, - error.OutOfMemory, - => |e| return e, - error.EndOfStream, - error.Overflow, - error.StreamTooLong, - error.InvalidOperand, - error.InvalidOpcode, - error.InvalidOperation, - => return error.InvalidDebugInfo, - error.UnsupportedAddrSize, - error.UnsupportedDwarfVersion, - error.UnimplementedUserOpcode, - => return error.UnsupportedDebugInfo, - }; - - const dwarf_cache = try gpa.create(UnwindContext.Cache); - errdefer gpa.destroy(dwarf_cache); - dwarf_cache.init(); - - out.unwind = .{ - .vmaddr_slide = vmaddr_slide, - .unwind_info = opt_unwind_info, - .dwarf = dwarf, - .dwarf_cache = dwarf_cache, - }; -} -fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO { - const all_mapped_memory = try mapDebugInfoFile(module.name); - errdefer posix.munmap(all_mapped_memory); - - // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal - // binary": a simple file format which contains Mach-O binaries for multiple targets. For - // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images - // for both ARM64 Macs and x86_64 Macs. - if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; - const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; - // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. - const mapped_macho = switch (magic) { - macho.MH_MAGIC_64 => all_mapped_memory, - - macho.FAT_CIGAM => mapped_macho: { - // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing - // is big-endian, so we'll be swapping some bytes. - if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; - const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); - const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); - const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; - const native_cpu_type = switch (builtin.cpu.arch) { - .x86_64 => macho.CPU_TYPE_X86_64, - .aarch64 => macho.CPU_TYPE_ARM64, - else => comptime unreachable, - }; - for (archs) |*arch| { - if (@byteSwap(arch.cputype) != native_cpu_type) continue; - const offset = @byteSwap(arch.offset); - const size = @byteSwap(arch.size); - break :mapped_macho all_mapped_memory[offset..][0..size]; - } - // Our native architecture was not present in the fat binary. - return error.MissingDebugInfo; - }, - - // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It - // will be fairly easy to add support here if necessary; it's very similar to above. - macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, - - else => return error.InvalidDebugInfo, - }; - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - var symtab: ?macho.symtab_command = null; - var text_vmaddr: ?u64 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { - if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; - text_vmaddr = seg_cmd.vmaddr; - }, - else => {}, - }; - break :lc_iter .{ - symtab orelse return error.MissingDebugInfo, - text_vmaddr orelse return error.MissingDebugInfo, - }; - }; - - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); - const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; - - var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); - defer symbols.deinit(gpa); - - // This map is temporary; it is used only to detect duplicates here. This is - // necessary because we prefer to use STAB ("symbolic debugging table") symbols, - // but they might not be present, so we track normal symbols too. - // Indices match 1-1 with those of `symbols`. - var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; - defer symbol_names.deinit(gpa); - try symbol_names.ensureUnusedCapacity(gpa, syms.len); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf, .pbud, .indr, .abs, _ => continue, - .sect => { - const name = std.mem.sliceTo(strings[sym.n_strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(.{ - .strx = sym.n_strx, - .addr = sym.n_value, - .ofile = MachoSymbol.unknown_ofile, - }); - } - }, - } - continue; - } - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type.stab) { - .oso => switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - }, - .bnsym => switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - }, - .fun => switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - }, - else => return error.InvalidDebugInfo, - }, - .ensym => switch (state) { - .fun_size => { - state = .ensym; - if (last_sym.strx != 0) { - const name = std.mem.sliceTo(strings[last_sym.strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(last_sym); - } else { - symbols.items[gop.index] = last_sym; - } - } - }, - else => return error.InvalidDebugInfo, - }, - .so => switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - }, - else => {}, - } - } - - switch (state) { - .init => { - // Missing STAB symtab entries is still okay, unless there were also no normal symbols. - if (symbols.items.len == 0) return error.MissingDebugInfo; - }, - .oso_close => {}, - else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab - } - - const symbols_slice = try symbols.toOwnedSlice(gpa); - errdefer gpa.free(symbols_slice); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - - return .{ - .mapped_memory = all_mapped_memory, - .symbols = symbols_slice, - .strings = strings, - .ofiles = .empty, - .vaddr_offset = module.text_base - text_vmaddr, - }; -} -pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { - // We need the lock for a few things: - // * loading the Mach-O module - // * loading the referenced object file - // * scanning the DWARF of that object file - // * building the line number table of that object file - // That's enough that it doesn't really seem worth scoping the lock more tightly than the whole function.. - di.mutex.lock(); - defer di.mutex.unlock(); - - if (di.loaded_macho == null) di.loaded_macho = module.loadMachO(gpa) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| return e, - else => return error.ReadFailed, - }; - const loaded_macho = &di.loaded_macho.?; - - const vaddr = address - loaded_macho.vaddr_offset; - const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; - - // offset of `address` from start of `symbol` - const address_symbol_offset = vaddr - symbol.addr; - - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); - - // If any information is missing, we can at least return this from now on. - const sym_only_result: std.debug.Symbol = .{ - .name = stab_symbol, - .compile_unit_name = null, - .source_location = null, - }; - - if (symbol.ofile == MachoSymbol.unknown_ofile) { - // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. - return sym_only_result; - } - - const o_file: *DebugInfo.OFile = of: { - const gop = try loaded_macho.ofiles.getOrPut(gpa, symbol.ofile); - if (!gop.found_existing) { - const o_file_path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); - gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch { - _ = loaded_macho.ofiles.pop().?; - return sym_only_result; - }; - } - break :of gop.value_ptr; - }; - - const symbol_index = o_file.symbols_by_name.getKeyAdapted( - @as([]const u8, stab_symbol), - @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }), - ) orelse return sym_only_result; - const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; - - const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; - - return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, - .compile_unit_name = compile_unit.die.getAttrString( - &o_file.dwarf, - native_endian, - std.dwarf.AT.name, - o_file.dwarf.section(.debug_str), - compile_unit, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - }, - .source_location = o_file.dwarf.getLineNumberInfo( - gpa, - native_endian, - compile_unit, - symbol_ofile_vaddr + address_symbol_offset, - ) catch null, - }; -} -pub const supports_unwinding: bool = true; -pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext; -/// Unwind a frame using MachO compact unwind info (from __unwind_info). -/// If the compact encoding can't encode a way to unwind a frame, it will -/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { - return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) { - error.InvalidDebugInfo, - error.MissingDebugInfo, - error.UnsupportedDebugInfo, - error.ReadFailed, - error.OutOfMemory, - error.Unexpected, - => |e| return e, - error.UnsupportedRegister, - => return error.UnsupportedDebugInfo, - error.InvalidRegister, - error.IncompatibleRegisterSize, - => return error.InvalidDebugInfo, - }; -} -fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - const unwind: *DebugInfo.Unwind = u: { - di.mutex.lock(); - defer di.mutex.unlock(); - if (di.unwind == null) try module.loadUnwindInfo(gpa, di); - break :u &di.unwind.?; - }; - - const unwind_info = unwind.unwind_info orelse return error.MissingDebugInfo; - if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidDebugInfo; - const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); - - const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); - if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidDebugInfo; - const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); - if (indices.len == 0) return error.MissingDebugInfo; - - // offset of the PC into the `__TEXT` segment - const pc_text_offset = context.pc - module.text_base; - - const start_offset: u32, const first_level_offset: u32 = index: { - var left: usize = 0; - var len: usize = indices.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < indices[mid].functionOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; - }; - // An offset of 0 is a sentinel indicating a range does not have unwind info. - if (start_offset == 0) return error.MissingDebugInfo; - - const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidDebugInfo; - const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( - unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], - ); - - if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidDebugInfo; - const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidDebugInfo; - const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); - - const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; - const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( - unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], - ); - if (entries.len == 0) return error.InvalidDebugInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < entries[mid].functionOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - break :entry .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidDebugInfo; - const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); - - const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; - const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( - unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], - ); - if (entries.len == 0) return error.InvalidDebugInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - const entry = entries[left]; - - const function_offset = first_level_offset + entry.funcOffset; - if (entry.encodingIndex < common_encodings.len) { - break :entry .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } - - const local_index = entry.encodingIndex - common_encodings.len; - const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidDebugInfo; - const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( - unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], - ); - if (local_index >= local_encodings.len) return error.InvalidDebugInfo; - break :entry .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - }, - else => return error.InvalidDebugInfo, - }; - - if (entry.raw_encoding == 0) return error.MissingDebugInfo; - - const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnsupportedDebugInfo, - .RBP_FRAME => ip: { - const frame = encoding.value.x86_64.frame; - - const fp = (try dwarfRegNative(&context.cpu_context, fp_reg_num)).*; - const new_sp = fp + 2 * @sizeOf(usize); - - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try dwarfRegNative(&context.cpu_context, fp_reg_num)).* = new_fp; - (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp; - (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip; - - const regs: [5]u3 = .{ - frame.reg0, - frame.reg1, - frame.reg2, - frame.reg3, - frame.reg4, - }; - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); - (try dwarfRegNative(&context.cpu_context, reg_number)).* = @as(*const usize, @ptrFromInt(addr)).*; - } - - break :ip new_ip; - }, - .STACK_IMMD, - .STACK_IND, - => ip: { - const frameless = encoding.value.x86_64.frameless; - - const sp = (try dwarfRegNative(&context.cpu_context, sp_reg_num)).*; - const stack_size: usize = stack_size: { - if (encoding.mode.x86_64 == .STACK_IMMD) { - break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); - } - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - module.text_base + - entry.function_offset + - frameless.stack.indirect.sub_offset; - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = frameless.stack_reg_count; - const ip_ptr = ip_ptr: { - var digits: [6]u3 = undefined; - var accumulator: usize = frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - var registers: [6]u3 = undefined; - var used_indices: [6]bool = @splat(false); - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - registers[i] = @intCast(unused_index + 1); - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - for (0..reg_count) |i| { - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); - (try dwarfRegNative(&context.cpu_context, reg_number)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :ip_ptr reg_addr; - }; - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - - (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp; - (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip; - - break :ip new_ip; - }, - .DWARF => { - const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); - return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.x86_64.dwarf); - }, - }, - .aarch64, .aarch64_be => switch (encoding.mode.arm64) { - .OLD => return error.UnsupportedDebugInfo, - .FRAMELESS => ip: { - const sp = (try dwarfRegNative(&context.cpu_context, sp_reg_num)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try dwarfRegNative(&context.cpu_context, 30)).*; - (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp; - break :ip new_ip; - }, - .DWARF => { - const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); - return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.arm64.dwarf); - }, - .FRAME => ip: { - const frame = encoding.value.arm64.frame; - - const fp = (try dwarfRegNative(&context.cpu_context, fp_reg_num)).*; - const ip_ptr = fp + @sizeOf(usize); - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(frame.x_reg_pairs, field.name) != 0) { - (try dwarfRegNative(&context.cpu_context, 19 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try dwarfRegNative(&context.cpu_context, 20 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - { - const dest: *align(1) usize = @ptrCast(try context.cpu_context.dwarfRegisterBytes(64 + 8 + i)); - dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; - } - reg_addr += @sizeOf(usize); - { - const dest: *align(1) usize = @ptrCast(try context.cpu_context.dwarfRegisterBytes(64 + 9 + i)); - dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; - } - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try dwarfRegNative(&context.cpu_context, fp_reg_num)).* = new_fp; - (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip; - - break :ip new_ip; - }, - }, - else => comptime unreachable, // unimplemented - }; - - const ret_addr = std.debug.stripInstructionPtrAuthCode(new_ip); - - // Like `DwarfUnwindContext.unwindFrame`, adjust our next lookup pc in case the `call` was this - // function's last instruction making `ret_addr` one byte past its end. - context.pc = ret_addr -| 1; - - return ret_addr; -} -pub const DebugInfo = struct { - /// Held while checking and/or populating `unwind` or `loaded_macho`. - /// Once a field is populated and the pointer `&di.loaded_macho.?` or `&di.unwind.?` has been - /// gotten, the lock is released; i.e. it is not held while *using* the loaded info. - mutex: std.Thread.Mutex, - - unwind: ?Unwind, - loaded_macho: ?LoadedMachO, - - pub const init: DebugInfo = .{ - .mutex = .{}, - - .unwind = null, - .loaded_macho = null, - }; - - pub fn deinit(di: *DebugInfo, gpa: Allocator) void { - if (di.loaded_macho) |*loaded_macho| { - for (loaded_macho.ofiles.values()) |*ofile| { - ofile.dwarf.deinit(gpa); - ofile.symbols_by_name.deinit(gpa); - posix.munmap(ofile.mapped_memory); - } - loaded_macho.ofiles.deinit(gpa); - gpa.free(loaded_macho.symbols); - posix.munmap(loaded_macho.mapped_memory); - } - } - - const Unwind = struct { - /// The slide applied to the `__unwind_info` and `__eh_frame` sections. - /// So, `unwind_info.ptr` is this many bytes higher than the section's vmaddr. - vmaddr_slide: u64, - /// Backed by the in-memory section mapped by the loader. - unwind_info: ?[]const u8, - /// Backed by the in-memory `__eh_frame` section mapped by the loader. - dwarf: ?Dwarf.Unwind, - /// This is `undefined` if `dwarf == null`. - dwarf_cache: *UnwindContext.Cache, - }; - - const LoadedMachO = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: []const u8, - /// Key is index into `strings` of the file path. - ofiles: std.AutoArrayHashMapUnmanaged(u32, OFile), - /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is - /// because the segments in the file on disk might differ from the ones in memory. Normally - /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: - /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in - /// the dyld cache (dyld actually restart itself from cache after loading it), and the two - /// versions have (very) different segment base addresses. It's sort of like a large slide - /// has been applied to all addresses in memory. For an optimal experience, we consider the - /// on-disk vmaddr instead of the in-memory one. - vaddr_offset: usize, - }; - - const OFile = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - dwarf: Dwarf, - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed - /// through `SymbolAdapter`, so that the symbol name is used as the logical key. - symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), - - const SymbolAdapter = struct { - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { - _ = ctx; - return @truncate(std.hash.Wyhash.hash(0, sym_name)); - } - pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { - _ = b_index; - const b_sym = ctx.symtab[b_sym_index]; - const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); - return mem.eql(u8, a_sym_name, b_sym_name); - } - }; - }; - - fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapDebugInfoFile(o_file_path); - errdefer posix.munmap(mapped_mem); - - if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - - const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { - var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtab_cmd: ?macho.symtab_command = null; - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => seg_cmd = cmd, - .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - break :cmds .{ - seg_cmd orelse return error.MissingDebugInfo, - symtab_cmd orelse return error.MissingDebugInfo, - }; - }; - - if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; - if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; - - const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); - if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; - const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); - - // TODO handle tentative (common) symbols - var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; - defer symbols_by_name.deinit(gpa); - try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len)); - for (symtab, 0..) |sym, sym_index| { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf => continue, // includes tentative symbols - .abs => continue, - else => {}, - } - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( - @as([]const u8, sym_name), - @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }), - ); - if (gop.found_existing) return error.InvalidDebugInfo; - gop.key_ptr.* = @intCast(sym_index); - } - - var sections: Dwarf.SectionArray = @splat(null); - for (seg_cmd.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; - } else continue; - - if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; - const section_bytes = mapped_mem[sect.offset..][0..sect.size]; - sections[section_index] = .{ - .data = section_bytes, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var dwarf: Dwarf = .{ .sections = sections }; - errdefer dwarf.deinit(gpa); - try dwarf.open(gpa, native_endian); - - return .{ - .mapped_memory = mapped_mem, - .dwarf = dwarf, - .strtab = strtab, - .symtab = symtab, - .symbols_by_name = symbols_by_name.move(), - }; - } -}; - -const MachoSymbol = struct { - strx: u32, - addr: u64, - /// Value may be `unknown_ofile`. - ofile: u32, - const unknown_ofile = std.math.maxInt(u32); - fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { - _ = context; - return lhs.addr < rhs.addr; - } - /// Assumes that `symbols` is sorted in order of ascending `addr`. - fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { - if (symbols.len == 0) return null; // no potential match - if (address < symbols[0].addr) return null; // address is before the lowest-address symbol - var left: usize = 0; - var len: usize = symbols.len; - while (len > 1) { - const mid = left + len / 2; - if (address < symbols[mid].addr) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - return &symbols[left]; - } - - test find { - const symbols: []const MachoSymbol = &.{ - .{ .addr = 100, .strx = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .ofile = undefined }, - }; - - try testing.expectEqual(null, find(symbols, 0)); - try testing.expectEqual(null, find(symbols, 99)); - try testing.expectEqual(&symbols[0], find(symbols, 100).?); - try testing.expectEqual(&symbols[0], find(symbols, 150).?); - try testing.expectEqual(&symbols[0], find(symbols, 199).?); - - try testing.expectEqual(&symbols[1], find(symbols, 200).?); - try testing.expectEqual(&symbols[1], find(symbols, 250).?); - try testing.expectEqual(&symbols[1], find(symbols, 299).?); - - try testing.expectEqual(&symbols[2], find(symbols, 300).?); - try testing.expectEqual(&symbols[2], find(symbols, 301).?); - try testing.expectEqual(&symbols[2], find(symbols, 5000).?); - } -}; -test { - _ = MachoSymbol; -} - -const ip_reg_num = Dwarf.ipRegNum(builtin.target.cpu.arch).?; -const fp_reg_num = Dwarf.fpRegNum(builtin.target.cpu.arch); -const sp_reg_num = Dwarf.spRegNum(builtin.target.cpu.arch); - -/// Uses `mmap` to map the file at `path` into memory. -fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { - const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return error.ReadFailed, - }; - defer file.close(); - - const file_len = std.math.cast(usize, try file.getEndPos()) orelse return error.InvalidDebugInfo; - - return posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); -} - -const DarwinModule = @This(); - -const std = @import("../../std.zig"); -const Allocator = std.mem.Allocator; -const Dwarf = std.debug.Dwarf; -const assert = std.debug.assert; -const macho = std.macho; -const mem = std.mem; -const posix = std.posix; -const testing = std.testing; -const Error = std.debug.SelfInfo.Error; -const dwarfRegNative = std.debug.SelfInfo.DwarfUnwindContext.regNative; - -const builtin = @import("builtin"); -const native_endian = builtin.target.cpu.arch.endian(); diff --git a/lib/std/debug/SelfInfo/Elf.zig b/lib/std/debug/SelfInfo/Elf.zig new file mode 100644 index 000000000000..4f9389f2d543 --- /dev/null +++ b/lib/std/debug/SelfInfo/Elf.zig @@ -0,0 +1,427 @@ +rwlock: std.Thread.RwLock, + +modules: std.ArrayList(Module), +ranges: std.ArrayList(Module.Range), + +unwind_cache: if (can_unwind) ?[]Dwarf.SelfUnwinder.CacheEntry else ?noreturn, + +pub const init: SelfInfo = .{ + .rwlock = .{}, + .modules = .empty, + .ranges = .empty, + .unwind_cache = null, +}; +pub fn deinit(si: *SelfInfo, gpa: Allocator) void { + for (si.modules.items) |*mod| { + unwind: { + const u = &(mod.unwind orelse break :unwind catch break :unwind); + for (u.buf[0..u.len]) |*unwind| unwind.deinit(gpa); + } + loaded: { + const l = &(mod.loaded_elf orelse break :loaded catch break :loaded); + l.file.deinit(gpa); + } + } + + si.modules.deinit(gpa); + si.ranges.deinit(gpa); + if (si.unwind_cache) |cache| gpa.free(cache); +} + +pub fn getSymbol(si: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol { + const module = try si.findModule(gpa, address, .exclusive); + defer si.rwlock.unlock(); + + const vaddr = address - module.load_offset; + + const loaded_elf = try module.getLoadedElf(gpa); + if (loaded_elf.file.dwarf) |*dwarf| { + if (!loaded_elf.scanned_dwarf) { + dwarf.open(gpa, native_endian) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.ReadFailed, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; + loaded_elf.scanned_dwarf = true; + } + if (dwarf.getSymbol(gpa, native_endian, vaddr)) |sym| { + return sym; + } else |err| switch (err) { + error.MissingDebugInfo => {}, + + error.InvalidDebugInfo, + error.OutOfMemory, + => |e| return e, + + error.ReadFailed, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDebugInfo, + } + } + // When DWARF is unavailable, fall back to searching the symtab. + return loaded_elf.file.searchSymtab(gpa, vaddr) catch |err| switch (err) { + error.NoSymtab, error.NoStrtab => return error.MissingDebugInfo, + error.BadSymtab => return error.InvalidDebugInfo, + error.OutOfMemory => |e| return e, + }; +} +pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { + const module = try si.findModule(gpa, address, .shared); + defer si.rwlock.unlockShared(); + if (module.name.len == 0) return error.MissingDebugInfo; + return module.name; +} + +pub const can_unwind: bool = s: { + // Notably, we are yet to support unwinding on ARM. There, unwinding is not done through + // `.eh_frame`, but instead with the `.ARM.exidx` section, which has a different format. + const archs: []const std.Target.Cpu.Arch = switch (builtin.target.os.tag) { + .linux => &.{ .x86, .x86_64, .aarch64, .aarch64_be }, + .netbsd => &.{ .x86, .x86_64, .aarch64, .aarch64_be }, + .freebsd => &.{ .x86_64, .aarch64, .aarch64_be }, + .openbsd => &.{.x86_64}, + .solaris => &.{ .x86, .x86_64 }, + .illumos => &.{ .x86, .x86_64 }, + else => unreachable, + }; + for (archs) |a| { + if (builtin.target.cpu.arch == a) break :s true; + } + break :s false; +}; +comptime { + if (can_unwind) { + std.debug.assert(Dwarf.supportsUnwinding(&builtin.target)); + } +} +pub const UnwindContext = Dwarf.SelfUnwinder; +pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { + comptime assert(can_unwind); + + { + si.rwlock.lockShared(); + defer si.rwlock.unlockShared(); + if (si.unwind_cache) |cache| { + if (Dwarf.SelfUnwinder.CacheEntry.find(cache, context.pc)) |entry| { + return context.next(gpa, entry); + } + } + } + + const module = try si.findModule(gpa, context.pc, .exclusive); + defer si.rwlock.unlock(); + + if (si.unwind_cache == null) { + si.unwind_cache = try gpa.alloc(Dwarf.SelfUnwinder.CacheEntry, 2048); + @memset(si.unwind_cache.?, .empty); + } + + const unwind_sections = try module.getUnwindSections(gpa); + for (unwind_sections) |*unwind| { + if (context.computeRules(gpa, unwind, module.load_offset, null)) |entry| { + entry.populate(si.unwind_cache.?); + return context.next(gpa, &entry); + } else |err| switch (err) { + error.MissingDebugInfo => continue, + + error.InvalidDebugInfo, + error.UnsupportedDebugInfo, + error.OutOfMemory, + => |e| return e, + + error.EndOfStream, + error.StreamTooLong, + error.ReadFailed, + error.Overflow, + error.InvalidOpcode, + error.InvalidOperation, + error.InvalidOperand, + => return error.InvalidDebugInfo, + + error.UnimplementedUserOpcode, + error.UnsupportedAddrSize, + => return error.UnsupportedDebugInfo, + } + } + return error.MissingDebugInfo; +} + +const Module = struct { + load_offset: usize, + name: []const u8, + build_id: ?[]const u8, + gnu_eh_frame: ?[]const u8, + + /// `null` means unwind information has not yet been loaded. + unwind: ?(Error!UnwindSections), + + /// `null` means the ELF file has not yet been loaded. + loaded_elf: ?(Error!LoadedElf), + + const LoadedElf = struct { + file: std.debug.ElfFile, + scanned_dwarf: bool, + }; + + const UnwindSections = struct { + buf: [2]Dwarf.Unwind, + len: usize, + }; + + const Range = struct { + start: usize, + len: usize, + /// Index into `modules` + module_index: usize, + }; + + /// Assumes we already hold an exclusive lock. + fn getUnwindSections(mod: *Module, gpa: Allocator) Error![]Dwarf.Unwind { + if (mod.unwind == null) mod.unwind = loadUnwindSections(mod, gpa); + const us = &(mod.unwind.? catch |err| return err); + return us.buf[0..us.len]; + } + fn loadUnwindSections(mod: *Module, gpa: Allocator) Error!UnwindSections { + var us: UnwindSections = .{ + .buf = undefined, + .len = 0, + }; + if (mod.gnu_eh_frame) |section_bytes| { + const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - mod.load_offset; + const header = Dwarf.Unwind.EhFrameHeader.parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo => |e| return e, + error.EndOfStream, error.Overflow => return error.InvalidDebugInfo, + error.UnsupportedAddrSize => return error.UnsupportedDebugInfo, + }; + us.buf[us.len] = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(@as(usize, @intCast(mod.load_offset + header.eh_frame_vaddr)))); + us.len += 1; + } else { + // There is no `.eh_frame_hdr` section. There may still be an `.eh_frame` or `.debug_frame` + // section, but we'll have to load the binary to get at it. + const loaded = try mod.getLoadedElf(gpa); + // If both are present, we can't just pick one -- the info could be split between them. + // `.debug_frame` is likely to be the more complete section, so we'll prioritize that one. + if (loaded.file.debug_frame) |*debug_frame| { + us.buf[us.len] = .initSection(.debug_frame, debug_frame.vaddr, debug_frame.bytes); + us.len += 1; + } + if (loaded.file.eh_frame) |*eh_frame| { + us.buf[us.len] = .initSection(.eh_frame, eh_frame.vaddr, eh_frame.bytes); + us.len += 1; + } + } + errdefer for (us.buf[0..us.len]) |*u| u.deinit(gpa); + for (us.buf[0..us.len]) |*u| u.prepare(gpa, @sizeOf(usize), native_endian, true, false) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.InvalidOperand, + error.InvalidOpcode, + error.InvalidOperation, + => return error.InvalidDebugInfo, + error.UnsupportedAddrSize, + error.UnsupportedDwarfVersion, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, + }; + return us; + } + + /// Assumes we already hold an exclusive lock. + fn getLoadedElf(mod: *Module, gpa: Allocator) Error!*LoadedElf { + if (mod.loaded_elf == null) mod.loaded_elf = loadElf(mod, gpa); + return if (mod.loaded_elf.?) |*elf| elf else |err| err; + } + fn loadElf(mod: *Module, gpa: Allocator) Error!LoadedElf { + const load_result = if (mod.name.len > 0) res: { + var file = std.fs.cwd().openFile(mod.name, .{}) catch return error.MissingDebugInfo; + defer file.close(); + break :res std.debug.ElfFile.load(gpa, file, mod.build_id, &.native(mod.name)); + } else res: { + const path = std.fs.selfExePathAlloc(gpa) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return error.ReadFailed, + }; + defer gpa.free(path); + var file = std.fs.cwd().openFile(path, .{}) catch return error.MissingDebugInfo; + defer file.close(); + break :res std.debug.ElfFile.load(gpa, file, mod.build_id, &.native(path)); + }; + + var elf_file = load_result catch |err| switch (err) { + error.OutOfMemory, + error.Unexpected, + => |e| return e, + + error.Overflow, + error.TruncatedElfFile, + error.InvalidCompressedSection, + error.InvalidElfMagic, + error.InvalidElfVersion, + error.InvalidElfClass, + error.InvalidElfEndian, + => return error.InvalidDebugInfo, + + error.SystemResources, + error.MemoryMappingNotSupported, + error.AccessDenied, + error.LockedMemoryLimitExceeded, + error.ProcessFdQuotaExceeded, + error.SystemFdQuotaExceeded, + => return error.ReadFailed, + }; + errdefer elf_file.deinit(gpa); + + if (elf_file.endian != native_endian) return error.InvalidDebugInfo; + if (elf_file.is_64 != (@sizeOf(usize) == 8)) return error.InvalidDebugInfo; + + return .{ + .file = elf_file, + .scanned_dwarf = false, + }; + } +}; + +fn findModule(si: *SelfInfo, gpa: Allocator, address: usize, lock: enum { shared, exclusive }) Error!*Module { + // With the requested lock, scan the module ranges looking for `address`. + switch (lock) { + .shared => si.rwlock.lockShared(), + .exclusive => si.rwlock.lock(), + } + for (si.ranges.items) |*range| { + if (address >= range.start and address < range.start + range.len) { + return &si.modules.items[range.module_index]; + } + } + // The address wasn't in a known range. We will rebuild the module/range lists, since it's possible + // a new module was loaded. Upgrade to an exclusive lock if necessary. + switch (lock) { + .shared => { + si.rwlock.unlockShared(); + si.rwlock.lock(); + }, + .exclusive => {}, + } + // Rebuild module list with the exclusive lock. + { + errdefer si.rwlock.unlock(); + for (si.modules.items) |*mod| { + unwind: { + const u = &(mod.unwind orelse break :unwind catch break :unwind); + for (u.buf[0..u.len]) |*unwind| unwind.deinit(gpa); + } + loaded: { + const l = &(mod.loaded_elf orelse break :loaded catch break :loaded); + l.file.deinit(gpa); + } + } + si.modules.clearRetainingCapacity(); + si.ranges.clearRetainingCapacity(); + var ctx: DlIterContext = .{ .si = si, .gpa = gpa }; + try std.posix.dl_iterate_phdr(&ctx, error{OutOfMemory}, DlIterContext.callback); + } + // Downgrade the lock back to shared if necessary. + switch (lock) { + .shared => { + si.rwlock.unlock(); + si.rwlock.lockShared(); + }, + .exclusive => {}, + } + // Scan the newly rebuilt module ranges. + for (si.ranges.items) |*range| { + if (address >= range.start and address < range.start + range.len) { + return &si.modules.items[range.module_index]; + } + } + // Still nothing; unlock and error. + switch (lock) { + .shared => si.rwlock.unlockShared(), + .exclusive => si.rwlock.unlock(), + } + return error.MissingDebugInfo; +} +const DlIterContext = struct { + si: *SelfInfo, + gpa: Allocator, + + fn callback(info: *std.posix.dl_phdr_info, size: usize, context: *@This()) !void { + _ = size; + + var build_id: ?[]const u8 = null; + var gnu_eh_frame: ?[]const u8 = null; + + // Populate `build_id` and `gnu_eh_frame` + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + std.elf.PT_NOTE => { + // Look for .note.gnu.build-id + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); + const name_size = r.takeInt(u32, native_endian) catch continue; + const desc_size = r.takeInt(u32, native_endian) catch continue; + const note_type = r.takeInt(u32, native_endian) catch continue; + const name = r.take(name_size) catch continue; + if (note_type != std.elf.NT_GNU_BUILD_ID) continue; + if (!std.mem.eql(u8, name, "GNU\x00")) continue; + const desc = r.take(desc_size) catch continue; + build_id = desc; + }, + std.elf.PT_GNU_EH_FRAME => { + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; + }, + else => {}, + } + } + + const gpa = context.gpa; + const si = context.si; + + const module_index = si.modules.items.len; + try si.modules.append(gpa, .{ + .load_offset = info.addr, + // Android libc uses NULL instead of "" to mark the main program + .name = std.mem.sliceTo(info.name, 0) orelse "", + .build_id = build_id, + .gnu_eh_frame = gnu_eh_frame, + .unwind = null, + .loaded_elf = null, + }); + + for (info.phdr[0..info.phnum]) |phdr| { + if (phdr.p_type != std.elf.PT_LOAD) continue; + try context.si.ranges.append(gpa, .{ + // Overflowing addition handles VSDOs having p_vaddr = 0xffffffffff700000 + .start = info.addr +% phdr.p_vaddr, + .len = phdr.p_memsz, + .module_index = module_index, + }); + } + } +}; + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Error = std.debug.SelfInfoError; +const assert = std.debug.assert; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); + +const SelfInfo = @This(); diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig deleted file mode 100644 index 7ce24e2e2a60..000000000000 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ /dev/null @@ -1,349 +0,0 @@ -load_offset: usize, -name: []const u8, -build_id: ?[]const u8, -gnu_eh_frame: ?[]const u8, - -pub const LookupCache = struct { - rwlock: std.Thread.RwLock, - ranges: std.ArrayList(Range), - const Range = struct { - start: usize, - len: usize, - mod: ElfModule, - }; - pub const init: LookupCache = .{ - .rwlock = .{}, - .ranges = .empty, - }; - pub fn deinit(lc: *LookupCache, gpa: Allocator) void { - lc.ranges.deinit(gpa); - } -}; - -pub const DebugInfo = struct { - /// Held while checking and/or populating `loaded_elf`/`scanned_dwarf`/`unwind`. - /// Once data is populated and a pointer to the field has been gotten, the lock - /// is released; i.e. it is not held while *using* the loaded debug info. - mutex: std.Thread.Mutex, - - loaded_elf: ?ElfFile, - scanned_dwarf: bool, - unwind: if (supports_unwinding) [2]?Dwarf.Unwind else void, - unwind_cache: if (supports_unwinding) *UnwindContext.Cache else void, - - pub const init: DebugInfo = .{ - .mutex = .{}, - .loaded_elf = null, - .scanned_dwarf = false, - .unwind = if (supports_unwinding) @splat(null), - .unwind_cache = undefined, - }; - pub fn deinit(di: *DebugInfo, gpa: Allocator) void { - if (di.loaded_elf) |*loaded_elf| loaded_elf.deinit(gpa); - if (supports_unwinding) { - if (di.unwind[0] != null) gpa.destroy(di.unwind_cache); - for (&di.unwind) |*opt_unwind| { - const unwind = &(opt_unwind.* orelse continue); - unwind.deinit(gpa); - } - } - } -}; - -pub fn key(m: ElfModule) usize { - return m.load_offset; -} -pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!ElfModule { - if (lookupInCache(cache, address)) |m| return m; - - { - // Check a new module hasn't been loaded - cache.rwlock.lock(); - defer cache.rwlock.unlock(); - const DlIterContext = struct { - ranges: *std.ArrayList(LookupCache.Range), - gpa: Allocator, - - fn callback(info: *std.posix.dl_phdr_info, size: usize, context: *@This()) !void { - _ = size; - - var mod: ElfModule = .{ - .load_offset = info.addr, - // Android libc uses NULL instead of "" to mark the main program - .name = mem.sliceTo(info.name, 0) orelse "", - .build_id = null, - .gnu_eh_frame = null, - }; - - // Populate `build_id` and `gnu_eh_frame` - for (info.phdr[0..info.phnum]) |phdr| { - switch (phdr.p_type) { - elf.PT_NOTE => { - // Look for .note.gnu.build-id - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); - const name_size = r.takeInt(u32, native_endian) catch continue; - const desc_size = r.takeInt(u32, native_endian) catch continue; - const note_type = r.takeInt(u32, native_endian) catch continue; - const name = r.take(name_size) catch continue; - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, name, "GNU\x00")) continue; - const desc = r.take(desc_size) catch continue; - mod.build_id = desc; - }, - elf.PT_GNU_EH_FRAME => { - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - mod.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; - }, - else => {}, - } - } - - // Now that `mod` is populated, create the ranges - for (info.phdr[0..info.phnum]) |phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - try context.ranges.append(context.gpa, .{ - // Overflowing addition handles VSDOs having p_vaddr = 0xffffffffff700000 - .start = info.addr +% phdr.p_vaddr, - .len = phdr.p_memsz, - .mod = mod, - }); - } - } - }; - cache.ranges.clearRetainingCapacity(); - var ctx: DlIterContext = .{ - .ranges = &cache.ranges, - .gpa = gpa, - }; - try std.posix.dl_iterate_phdr(&ctx, error{OutOfMemory}, DlIterContext.callback); - } - - if (lookupInCache(cache, address)) |m| return m; - return error.MissingDebugInfo; -} -fn lookupInCache(cache: *LookupCache, address: usize) ?ElfModule { - cache.rwlock.lockShared(); - defer cache.rwlock.unlockShared(); - for (cache.ranges.items) |*range| { - if (address >= range.start and address < range.start + range.len) { - return range.mod; - } - } - return null; -} -fn loadElf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { - std.debug.assert(di.loaded_elf == null); - std.debug.assert(!di.scanned_dwarf); - - const load_result = if (module.name.len > 0) res: { - var file = std.fs.cwd().openFile(module.name, .{}) catch return error.MissingDebugInfo; - defer file.close(); - break :res ElfFile.load(gpa, file, module.build_id, &.native(module.name)); - } else res: { - const path = std.fs.selfExePathAlloc(gpa) catch |err| switch (err) { - error.OutOfMemory => |e| return e, - else => return error.ReadFailed, - }; - defer gpa.free(path); - var file = std.fs.cwd().openFile(path, .{}) catch return error.MissingDebugInfo; - defer file.close(); - break :res ElfFile.load(gpa, file, module.build_id, &.native(path)); - }; - di.loaded_elf = load_result catch |err| switch (err) { - error.OutOfMemory, - error.Unexpected, - => |e| return e, - - error.Overflow, - error.TruncatedElfFile, - error.InvalidCompressedSection, - error.InvalidElfMagic, - error.InvalidElfVersion, - error.InvalidElfClass, - error.InvalidElfEndian, - => return error.InvalidDebugInfo, - - error.SystemResources, - error.MemoryMappingNotSupported, - error.AccessDenied, - error.LockedMemoryLimitExceeded, - error.ProcessFdQuotaExceeded, - error.SystemFdQuotaExceeded, - => return error.ReadFailed, - }; - - const matches_native = - di.loaded_elf.?.endian == native_endian and - di.loaded_elf.?.is_64 == (@sizeOf(usize) == 8); - - if (!matches_native) { - di.loaded_elf.?.deinit(gpa); - di.loaded_elf = null; - return error.InvalidDebugInfo; - } -} -pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { - const vaddr = address - module.load_offset; - { - di.mutex.lock(); - defer di.mutex.unlock(); - if (di.loaded_elf == null) try module.loadElf(gpa, di); - const loaded_elf = &di.loaded_elf.?; - // We need the lock if using DWARF, as we might scan the DWARF or build a line number table. - if (loaded_elf.dwarf) |*dwarf| { - if (!di.scanned_dwarf) { - dwarf.open(gpa, native_endian) catch |err| switch (err) { - error.InvalidDebugInfo, - error.MissingDebugInfo, - error.OutOfMemory, - => |e| return e, - error.EndOfStream, - error.Overflow, - error.ReadFailed, - error.StreamTooLong, - => return error.InvalidDebugInfo, - }; - di.scanned_dwarf = true; - } - return dwarf.getSymbol(gpa, native_endian, vaddr) catch |err| switch (err) { - error.InvalidDebugInfo, - error.MissingDebugInfo, - error.OutOfMemory, - => |e| return e, - error.ReadFailed, - error.EndOfStream, - error.Overflow, - error.StreamTooLong, - => return error.InvalidDebugInfo, - }; - } - // Otherwise, we're just going to scan the symtab, which we don't need the lock for; fall out of this block. - } - // When there's no DWARF available, fall back to searching the symtab. - return di.loaded_elf.?.searchSymtab(gpa, vaddr) catch |err| switch (err) { - error.NoSymtab, error.NoStrtab => return error.MissingDebugInfo, - error.BadSymtab => return error.InvalidDebugInfo, - error.OutOfMemory => |e| return e, - }; -} -fn prepareUnwindLookup(unwind: *Dwarf.Unwind, gpa: Allocator) Error!void { - unwind.prepare(gpa, @sizeOf(usize), native_endian, true, false) catch |err| switch (err) { - error.ReadFailed => unreachable, // it's all fixed buffers - error.InvalidDebugInfo, - error.MissingDebugInfo, - error.OutOfMemory, - => |e| return e, - error.EndOfStream, - error.Overflow, - error.StreamTooLong, - error.InvalidOperand, - error.InvalidOpcode, - error.InvalidOperation, - => return error.InvalidDebugInfo, - error.UnsupportedAddrSize, - error.UnsupportedDwarfVersion, - error.UnimplementedUserOpcode, - => return error.UnsupportedDebugInfo, - }; -} -fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { - var buf: [2]Dwarf.Unwind = undefined; - const unwinds: []Dwarf.Unwind = if (module.gnu_eh_frame) |section_bytes| unwinds: { - const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - module.load_offset; - const header = Dwarf.Unwind.EhFrameHeader.parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian) catch |err| switch (err) { - error.ReadFailed => unreachable, // it's all fixed buffers - error.InvalidDebugInfo => |e| return e, - error.EndOfStream, error.Overflow => return error.InvalidDebugInfo, - error.UnsupportedAddrSize => return error.UnsupportedDebugInfo, - }; - buf[0] = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(@as(usize, @intCast(module.load_offset + header.eh_frame_vaddr)))); - break :unwinds buf[0..1]; - } else unwinds: { - // There is no `.eh_frame_hdr` section. There may still be an `.eh_frame` or `.debug_frame` - // section, but we'll have to load the binary to get at it. - if (di.loaded_elf == null) try module.loadElf(gpa, di); - const opt_debug_frame = &di.loaded_elf.?.debug_frame; - const opt_eh_frame = &di.loaded_elf.?.eh_frame; - var i: usize = 0; - // If both are present, we can't just pick one -- the info could be split between them. - // `.debug_frame` is likely to be the more complete section, so we'll prioritize that one. - if (opt_debug_frame.*) |*debug_frame| { - buf[i] = .initSection(.debug_frame, debug_frame.vaddr, debug_frame.bytes); - i += 1; - } - if (opt_eh_frame.*) |*eh_frame| { - buf[i] = .initSection(.eh_frame, eh_frame.vaddr, eh_frame.bytes); - i += 1; - } - if (i == 0) return error.MissingDebugInfo; - break :unwinds buf[0..i]; - }; - errdefer for (unwinds) |*u| u.deinit(gpa); - for (unwinds) |*u| try prepareUnwindLookup(u, gpa); - - const unwind_cache = try gpa.create(UnwindContext.Cache); - errdefer gpa.destroy(unwind_cache); - unwind_cache.init(); - - switch (unwinds.len) { - 0 => unreachable, - 1 => di.unwind = .{ unwinds[0], null }, - 2 => di.unwind = .{ unwinds[0], unwinds[1] }, - else => unreachable, - } - di.unwind_cache = unwind_cache; -} -pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { - const unwinds: *const [2]?Dwarf.Unwind = u: { - di.mutex.lock(); - defer di.mutex.unlock(); - if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di); - std.debug.assert(di.unwind[0] != null); - break :u &di.unwind; - }; - for (unwinds) |*opt_unwind| { - const unwind = &(opt_unwind.* orelse break); - return context.unwindFrame(di.unwind_cache, gpa, unwind, module.load_offset, null) catch |err| switch (err) { - error.MissingDebugInfo => continue, // try the next one - else => |e| return e, - }; - } - return error.MissingDebugInfo; -} -pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext; -pub const supports_unwinding: bool = s: { - // Notably, we are yet to support unwinding on ARM. There, unwinding is not done through - // `.eh_frame`, but instead with the `.ARM.exidx` section, which has a different format. - const archs: []const std.Target.Cpu.Arch = switch (builtin.target.os.tag) { - .linux => &.{ .x86, .x86_64, .aarch64, .aarch64_be }, - .netbsd => &.{ .x86, .x86_64, .aarch64, .aarch64_be }, - .freebsd => &.{ .x86_64, .aarch64, .aarch64_be }, - .openbsd => &.{.x86_64}, - .solaris => &.{ .x86, .x86_64 }, - .illumos => &.{ .x86, .x86_64 }, - else => unreachable, - }; - for (archs) |a| { - if (builtin.target.cpu.arch == a) break :s true; - } - break :s false; -}; -comptime { - if (supports_unwinding) { - std.debug.assert(Dwarf.supportsUnwinding(&builtin.target)); - } -} - -const ElfModule = @This(); - -const std = @import("../../std.zig"); -const Allocator = std.mem.Allocator; -const Dwarf = std.debug.Dwarf; -const ElfFile = std.debug.ElfFile; -const elf = std.elf; -const mem = std.mem; -const Error = std.debug.SelfInfo.Error; - -const builtin = @import("builtin"); -const native_endian = builtin.target.cpu.arch.endian(); diff --git a/lib/std/debug/SelfInfo/Windows.zig b/lib/std/debug/SelfInfo/Windows.zig new file mode 100644 index 000000000000..ffa99a27f24f --- /dev/null +++ b/lib/std/debug/SelfInfo/Windows.zig @@ -0,0 +1,559 @@ +mutex: std.Thread.Mutex, +modules: std.ArrayListUnmanaged(Module), +module_name_arena: std.heap.ArenaAllocator.State, + +pub const init: SelfInfo = .{ + .mutex = .{}, + .modules = .empty, + .module_name_arena = .{}, +}; +pub fn deinit(si: *SelfInfo, gpa: Allocator) void { + for (si.modules.items) |*module| { + di: { + const di = &(module.di orelse break :di catch break :di); + di.deinit(gpa); + } + } + si.modules.deinit(gpa); + + var module_name_arena = si.module_name_arena.promote(gpa); + module_name_arena.deinit(); +} + +pub fn getSymbol(si: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol { + si.mutex.lock(); + defer si.mutex.unlock(); + const module = try si.findModule(gpa, address); + const di = try module.getDebugInfo(gpa); + return di.getSymbol(gpa, address - module.base_address); +} +pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { + si.mutex.lock(); + defer si.mutex.unlock(); + const module = try si.findModule(gpa, address); + return module.name; +} + +pub const can_unwind: bool = switch (builtin.cpu.arch) { + else => true, + // On x86, `RtlVirtualUnwind` does not exist. We could in theory use `RtlCaptureStackBackTrace` + // instead, but on x86, it turns out that function is just... doing FP unwinding with esp! It's + // hard to find implementation details to confirm that, but the most authoritative source I have + // is an entry in the LLVM mailing list from 2020/08/16 which contains this quote: + // + // > x86 doesn't have what most architectures would consider an "unwinder" in the sense of + // > restoring registers; there is simply a linked list of frames that participate in SEH and + // > that desire to be called for a dynamic unwind operation, so RtlCaptureStackBackTrace + // > assumes that EBP-based frames are in use and walks an EBP-based frame chain on x86 - not + // > all x86 code is written with EBP-based frames so while even though we generally build the + // > OS that way, you might always run the risk of encountering external code that uses EBP as a + // > general purpose register for which such an unwind attempt for a stack trace would fail. + // + // Regardless, it's easy to effectively confirm this hypothesis just by compiling some code with + // `-fomit-frame-pointer -OReleaseFast` and observing that `RtlCaptureStackBackTrace` returns an + // empty trace when it's called in such an application. Note that without `-OReleaseFast` or + // similar, LLVM seems reluctant to ever clobber ebp, so you'll get a trace returned which just + // contains all of the kernel32/ntdll frames but none of your own. Don't be deceived---this is + // just coincidental! + // + // Anyway, the point is, the only stack walking primitive on x86-windows is FP unwinding. We + // *could* ask Microsoft to do that for us with `RtlCaptureStackBackTrace`... but better to just + // use our existing FP unwinder in `std.debug`! + .x86 => false, +}; +pub const UnwindContext = struct { + pc: usize, + cur: windows.CONTEXT, + history_table: windows.UNWIND_HISTORY_TABLE, + pub fn init(ctx: *const std.debug.cpu_context.Native) UnwindContext { + return .{ + .pc = @returnAddress(), + .cur = switch (builtin.cpu.arch) { + .x86_64 => std.mem.zeroInit(windows.CONTEXT, .{ + .Rax = ctx.gprs.get(.rax), + .Rcx = ctx.gprs.get(.rcx), + .Rdx = ctx.gprs.get(.rdx), + .Rbx = ctx.gprs.get(.rbx), + .Rsp = ctx.gprs.get(.rsp), + .Rbp = ctx.gprs.get(.rbp), + .Rsi = ctx.gprs.get(.rsi), + .Rdi = ctx.gprs.get(.rdi), + .R8 = ctx.gprs.get(.r8), + .R9 = ctx.gprs.get(.r9), + .R10 = ctx.gprs.get(.r10), + .R11 = ctx.gprs.get(.r11), + .R12 = ctx.gprs.get(.r12), + .R13 = ctx.gprs.get(.r13), + .R14 = ctx.gprs.get(.r14), + .R15 = ctx.gprs.get(.r15), + .Rip = ctx.gprs.get(.rip), + }), + .aarch64, .aarch64_be => .{ + .ContextFlags = 0, + .Cpsr = 0, + .DUMMYUNIONNAME = .{ .X = ctx.x }, + .Sp = ctx.sp, + .Pc = ctx.pc, + .V = @splat(.{ .B = @splat(0) }), + .Fpcr = 0, + .Fpsr = 0, + .Bcr = @splat(0), + .Bvr = @splat(0), + .Wcr = @splat(0), + .Wvr = @splat(0), + }, + .thumb => .{ + .ContextFlags = 0, + .R0 = ctx.r[0], + .R1 = ctx.r[1], + .R2 = ctx.r[2], + .R3 = ctx.r[3], + .R4 = ctx.r[4], + .R5 = ctx.r[5], + .R6 = ctx.r[6], + .R7 = ctx.r[7], + .R8 = ctx.r[8], + .R9 = ctx.r[9], + .R10 = ctx.r[10], + .R11 = ctx.r[11], + .R12 = ctx.r[12], + .Sp = ctx.r[13], + .Lr = ctx.r[14], + .Pc = ctx.r[15], + .Cpsr = 0, + .Fpcsr = 0, + .Padding = 0, + .DUMMYUNIONNAME = .{ .S = @splat(0) }, + .Bvr = @splat(0), + .Bcr = @splat(0), + .Wvr = @splat(0), + .Wcr = @splat(0), + .Padding2 = @splat(0), + }, + else => comptime unreachable, + }, + .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE), + }; + } + pub fn deinit(ctx: *UnwindContext, gpa: Allocator) void { + _ = ctx; + _ = gpa; + } + pub fn getFp(ctx: *UnwindContext) usize { + return ctx.cur.getRegs().bp; + } +}; +pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { + _ = si; + _ = gpa; + + const current_regs = context.cur.getRegs(); + var image_base: windows.DWORD64 = undefined; + if (windows.ntdll.RtlLookupFunctionEntry(current_regs.ip, &image_base, &context.history_table)) |runtime_function| { + var handler_data: ?*anyopaque = null; + var establisher_frame: u64 = undefined; + _ = windows.ntdll.RtlVirtualUnwind( + windows.UNW_FLAG_NHANDLER, + image_base, + current_regs.ip, + runtime_function, + &context.cur, + &handler_data, + &establisher_frame, + null, + ); + } else { + // leaf function + context.cur.setIp(@as(*const usize, @ptrFromInt(current_regs.sp)).*); + context.cur.setSp(current_regs.sp + @sizeOf(usize)); + } + + const next_regs = context.cur.getRegs(); + const tib = &windows.teb().NtTib; + if (next_regs.sp < @intFromPtr(tib.StackLimit) or next_regs.sp > @intFromPtr(tib.StackBase)) { + context.pc = 0; + return 0; + } + // Like `DwarfUnwindContext.unwindFrame`, adjust our next lookup pc in case the `call` was this + // function's last instruction making `next_regs.ip` one byte past its end. + context.pc = next_regs.ip -| 1; + return next_regs.ip; +} + +const Module = struct { + base_address: usize, + size: u32, + name: []const u8, + handle: windows.HMODULE, + + di: ?(Error!DebugInfo), + + const DebugInfo = struct { + arena: std.heap.ArenaAllocator.State, + coff_image_base: u64, + mapped_file: ?MappedFile, + dwarf: ?Dwarf, + pdb: ?Pdb, + coff_section_headers: []coff.SectionHeader, + + const MappedFile = struct { + file: fs.File, + section_handle: windows.HANDLE, + section_view: []const u8, + fn deinit(mf: *const MappedFile) void { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mf.section_view.ptr)) == .SUCCESS); + windows.CloseHandle(mf.section_handle); + mf.file.close(); + } + }; + + fn deinit(di: *DebugInfo, gpa: Allocator) void { + if (di.dwarf) |*dwarf| dwarf.deinit(gpa); + if (di.pdb) |*pdb| { + pdb.file_reader.file.close(); + pdb.deinit(); + } + if (di.mapped_file) |*mf| mf.deinit(); + + var arena = di.arena.promote(gpa); + arena.deinit(); + } + + fn getSymbol(di: *DebugInfo, gpa: Allocator, vaddr: usize) Error!std.debug.Symbol { + pdb: { + const pdb = &(di.pdb orelse break :pdb); + var coff_section: *align(1) const coff.SectionHeader = undefined; + const mod_index = for (pdb.sect_contribs) |sect_contrib| { + if (sect_contrib.section > di.coff_section_headers.len) continue; + // Remember that SectionContribEntry.Section is 1-based. + coff_section = &di.coff_section_headers[sect_contrib.section - 1]; + + const vaddr_start = coff_section.virtual_address + sect_contrib.offset; + const vaddr_end = vaddr_start + sect_contrib.size; + if (vaddr >= vaddr_start and vaddr < vaddr_end) { + break sect_contrib.module_index; + } + } else { + // we have no information to add to the address + break :pdb; + }; + const module = pdb.getModule(mod_index) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + + error.ReadFailed, + error.EndOfStream, + => return error.InvalidDebugInfo, + } orelse { + return error.InvalidDebugInfo; // bad module index + }; + return .{ + .name = pdb.getSymbolName(module, vaddr - coff_section.virtual_address), + .compile_unit_name = fs.path.basename(module.obj_file_name), + .source_location = pdb.getLineNumberInfo(module, vaddr - coff_section.virtual_address) catch null, + }; + } + dwarf: { + const dwarf = &(di.dwarf orelse break :dwarf); + const dwarf_address = vaddr + di.coff_image_base; + return dwarf.getSymbol(gpa, native_endian, dwarf_address) catch |err| switch (err) { + error.MissingDebugInfo => break :dwarf, + + error.InvalidDebugInfo, + error.OutOfMemory, + => |e| return e, + + error.ReadFailed, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; + } + return error.MissingDebugInfo; + } + }; + + fn getDebugInfo(module: *Module, gpa: Allocator) Error!*DebugInfo { + if (module.di == null) module.di = loadDebugInfo(module, gpa); + return if (module.di.?) |*di| di else |err| err; + } + fn loadDebugInfo(module: *const Module, gpa: Allocator) Error!DebugInfo { + const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); + const mapped = mapped_ptr[0..module.size]; + var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; + + var arena_instance: std.heap.ArenaAllocator = .init(gpa); + errdefer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + const mapped_file: ?DebugInfo.MappedFile = mapped: { + if (!coff_obj.strtabRequired()) break :mapped null; + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + name_buffer[4..], + windows.PATH_MAX_WIDE, + ); + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.Unexpected => |e| return e, + error.FileNotFound => return error.MissingDebugInfo, + + error.FileTooBig, + error.IsDir, + error.NotDir, + error.SymLinkLoop, + error.NameTooLong, + error.InvalidUtf8, + error.InvalidWtf8, + error.BadPathName, + => return error.InvalidDebugInfo, + + error.SystemResources, + error.WouldBlock, + error.AccessDenied, + error.ProcessNotFound, + error.PermissionDenied, + error.NoSpaceLeft, + error.DeviceBusy, + error.NoDevice, + error.SharingViolation, + error.PathAlreadyExists, + error.PipeBusy, + error.NetworkNotFound, + error.AntivirusInterference, + error.ProcessFdQuotaExceeded, + error.SystemFdQuotaExceeded, + error.FileLocksNotSupported, + error.FileBusy, + => return error.ReadFailed, + }; + errdefer coff_file.close(); + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + var coff_len: usize = 0; + var section_view_ptr: ?[*]const u8 = null; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(§ion_view_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr.?)) == .SUCCESS); + const section_view = section_view_ptr.?[0..coff_len]; + coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; + break :mapped .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + }; + errdefer if (mapped_file) |*mf| mf.deinit(); + + const coff_image_base = coff_obj.getImageBase(); + + var opt_dwarf: ?Dwarf = dwarf: { + if (coff_obj.getSectionByName(".debug_info") == null) break :dwarf null; + + var sections: Dwarf.SectionArray = undefined; + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| .{ + .data = try coff_obj.getSectionDataAlloc(section_header, arena), + .owned = false, + } else null; + } + break :dwarf .{ .sections = sections }; + }; + errdefer if (opt_dwarf) |*dwarf| dwarf.deinit(gpa); + + if (opt_dwarf) |*dwarf| { + dwarf.open(gpa, native_endian) catch |err| switch (err) { + error.Overflow, + error.EndOfStream, + error.StreamTooLong, + error.ReadFailed, + => return error.InvalidDebugInfo, + + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + }; + } + + var opt_pdb: ?Pdb = pdb: { + const path = coff_obj.getPdbPath() catch { + return error.InvalidDebugInfo; + } orelse { + break :pdb null; + }; + const pdb_file_open_result = if (fs.path.isAbsolute(path)) res: { + break :res std.fs.cwd().openFile(path, .{}); + } else res: { + const self_dir = fs.selfExeDirPathAlloc(gpa) catch |err| switch (err) { + error.OutOfMemory, error.Unexpected => |e| return e, + else => return error.ReadFailed, + }; + defer gpa.free(self_dir); + const abs_path = try fs.path.join(gpa, &.{ self_dir, path }); + defer gpa.free(abs_path); + break :res std.fs.cwd().openFile(abs_path, .{}); + }; + const pdb_file = pdb_file_open_result catch |err| switch (err) { + error.FileNotFound, error.IsDir => break :pdb null, + else => return error.ReadFailed, + }; + errdefer pdb_file.close(); + + const pdb_reader = try arena.create(std.fs.File.Reader); + pdb_reader.* = pdb_file.reader(try arena.alloc(u8, 4096)); + + var pdb = Pdb.init(gpa, pdb_reader) catch |err| switch (err) { + error.OutOfMemory, error.ReadFailed, error.Unexpected => |e| return e, + else => return error.InvalidDebugInfo, + }; + errdefer pdb.deinit(); + pdb.parseInfoStream() catch |err| switch (err) { + error.UnknownPDBVersion => return error.UnsupportedDebugInfo, + error.EndOfStream => return error.InvalidDebugInfo, + + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + error.ReadFailed, + => |e| return e, + }; + pdb.parseDbiStream() catch |err| switch (err) { + error.UnknownPDBVersion => return error.UnsupportedDebugInfo, + + error.EndOfStream, + error.EOF, + error.StreamTooLong, + error.WriteFailed, + => return error.InvalidDebugInfo, + + error.InvalidDebugInfo, + error.OutOfMemory, + error.ReadFailed, + => |e| return e, + }; + + if (!std.mem.eql(u8, &coff_obj.guid, &pdb.guid) or coff_obj.age != pdb.age) + return error.InvalidDebugInfo; + + break :pdb pdb; + }; + errdefer if (opt_pdb) |*pdb| { + pdb.file_reader.file.close(); + pdb.deinit(); + }; + + const coff_section_headers: []coff.SectionHeader = if (opt_pdb != null) csh: { + break :csh try coff_obj.getSectionHeadersAlloc(arena); + } else &.{}; + + return .{ + .arena = arena_instance.state, + .coff_image_base = coff_image_base, + .mapped_file = mapped_file, + .dwarf = opt_dwarf, + .pdb = opt_pdb, + .coff_section_headers = coff_section_headers, + }; + } +}; + +/// Assumes we already hold `si.mutex`. +fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) error{ MissingDebugInfo, OutOfMemory, Unexpected }!*Module { + for (si.modules.items) |*mod| { + if (address >= mod.base_address and address < mod.base_address + mod.size) { + return mod; + } + } + + // A new module might have been loaded; rebuild the list. + { + for (si.modules.items) |*mod| { + const di = &(mod.di orelse continue catch continue); + di.deinit(gpa); + } + si.modules.clearRetainingCapacity(); + + var module_name_arena = si.module_name_arena.promote(gpa); + defer si.module_name_arena = module_name_arena.state; + _ = module_name_arena.reset(.retain_capacity); + + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + return windows.unexpectedError(windows.GetLastError()); + } + defer windows.CloseHandle(handle); + var entry: windows.MODULEENTRY32 = undefined; + entry.dwSize = @sizeOf(windows.MODULEENTRY32); + var result = windows.kernel32.Module32First(handle, &entry); + while (result != 0) : (result = windows.kernel32.Module32Next(handle, &entry)) { + try si.modules.append(gpa, .{ + .base_address = @intFromPtr(entry.modBaseAddr), + .size = entry.modBaseSize, + .name = try module_name_arena.allocator().dupe( + u8, + std.mem.sliceTo(&entry.szModule, 0), + ), + .handle = entry.hModule, + .di = null, + }); + } + } + + for (si.modules.items) |*mod| { + if (address >= mod.base_address and address < mod.base_address + mod.size) { + return mod; + } + } + + return error.MissingDebugInfo; +} + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Pdb = std.debug.Pdb; +const Error = std.debug.SelfInfoError; +const assert = std.debug.assert; +const coff = std.coff; +const fs = std.fs; +const windows = std.os.windows; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); + +const SelfInfo = @This(); diff --git a/lib/std/debug/SelfInfo/WindowsModule.zig b/lib/std/debug/SelfInfo/WindowsModule.zig deleted file mode 100644 index 1f4139583e99..000000000000 --- a/lib/std/debug/SelfInfo/WindowsModule.zig +++ /dev/null @@ -1,442 +0,0 @@ -base_address: usize, -size: usize, -name: []const u8, -handle: windows.HMODULE, -pub fn key(m: WindowsModule) usize { - return m.base_address; -} -pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) std.debug.SelfInfo.Error!WindowsModule { - if (lookupInCache(cache, address)) |m| return m; - { - // Check a new module hasn't been loaded - cache.rwlock.lock(); - defer cache.rwlock.unlock(); - cache.modules.clearRetainingCapacity(); - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); - if (handle == windows.INVALID_HANDLE_VALUE) { - return windows.unexpectedError(windows.GetLastError()); - } - defer windows.CloseHandle(handle); - var entry: windows.MODULEENTRY32 = undefined; - entry.dwSize = @sizeOf(windows.MODULEENTRY32); - if (windows.kernel32.Module32First(handle, &entry) != 0) { - try cache.modules.append(gpa, entry); - while (windows.kernel32.Module32Next(handle, &entry) != 0) { - try cache.modules.append(gpa, entry); - } - } - } - if (lookupInCache(cache, address)) |m| return m; - return error.MissingDebugInfo; -} -pub fn getSymbolAtAddress(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, address: usize) std.debug.SelfInfo.Error!std.debug.Symbol { - // The `Pdb` API doesn't really allow us *any* thread-safe access, and the `Dwarf` API isn't - // great for it either; just lock the whole thing. - di.mutex.lock(); - defer di.mutex.unlock(); - - if (!di.loaded) module.loadDebugInfo(gpa, di) catch |err| switch (err) { - error.OutOfMemory, error.InvalidDebugInfo, error.MissingDebugInfo, error.Unexpected => |e| return e, - error.FileNotFound => return error.MissingDebugInfo, - error.UnknownPDBVersion => return error.UnsupportedDebugInfo, - else => return error.ReadFailed, - }; - - // Translate the runtime address into a virtual address into the module - const vaddr = address - module.base_address; - - if (di.pdb != null) { - if (di.getSymbolFromPdb(vaddr) catch return error.InvalidDebugInfo) |symbol| return symbol; - } - - if (di.dwarf) |*dwarf| { - const dwarf_address = vaddr + di.coff_image_base; - return dwarf.getSymbol(gpa, native_endian, dwarf_address) catch return error.InvalidDebugInfo; - } - - return error.MissingDebugInfo; -} -fn lookupInCache(cache: *LookupCache, address: usize) ?WindowsModule { - cache.rwlock.lockShared(); - defer cache.rwlock.unlockShared(); - for (cache.modules.items) |*entry| { - const base_address = @intFromPtr(entry.modBaseAddr); - if (address >= base_address and address < base_address + entry.modBaseSize) { - return .{ - .base_address = base_address, - .size = entry.modBaseSize, - .name = std.mem.sliceTo(&entry.szModule, 0), - .handle = entry.hModule, - }; - } - } - return null; -} -fn loadDebugInfo(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo) !void { - const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); - const mapped = mapped_ptr[0..module.size]; - var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; - // The string table is not mapped into memory by the loader, so if a section name is in the - // string table then we have to map the full image file from disk. This can happen when - // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. - if (coff_obj.strtabRequired()) { - var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; - name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present - const process_handle = windows.GetCurrentProcess(); - const len = windows.kernel32.GetModuleFileNameExW( - process_handle, - module.handle, - name_buffer[4..], - windows.PATH_MAX_WIDE, - ); - if (len == 0) return error.MissingDebugInfo; - const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => |e| return e, - }; - errdefer coff_file.close(); - var section_handle: windows.HANDLE = undefined; - const create_section_rc = windows.ntdll.NtCreateSection( - §ion_handle, - windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, - null, - null, - windows.PAGE_READONLY, - // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. - // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. - windows.SEC_COMMIT, - coff_file.handle, - ); - if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer windows.CloseHandle(section_handle); - var coff_len: usize = 0; - var section_view_ptr: ?[*]const u8 = null; - const map_section_rc = windows.ntdll.NtMapViewOfSection( - section_handle, - process_handle, - @ptrCast(§ion_view_ptr), - null, - 0, - null, - &coff_len, - .ViewUnmap, - 0, - windows.PAGE_READONLY, - ); - if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr.?)) == .SUCCESS); - const section_view = section_view_ptr.?[0..coff_len]; - coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; - di.mapped_file = .{ - .file = coff_file, - .section_handle = section_handle, - .section_view = section_view, - }; - } - di.coff_image_base = coff_obj.getImageBase(); - - if (coff_obj.getSectionByName(".debug_info")) |_| { - di.dwarf = .{}; - - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, gpa), - .owned = true, - }; - } else null; - } - - try di.dwarf.?.open(gpa, native_endian); - } - - if (coff_obj.getPdbPath() catch return error.InvalidDebugInfo) |raw_path| pdb: { - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(gpa); - defer gpa.free(self_dir); - break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) gpa.free(path); - - const pdb_file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound, error.IsDir => break :pdb, - else => |e| return e, - }; - errdefer pdb_file.close(); - - const pdb_reader = try gpa.create(std.fs.File.Reader); - errdefer gpa.destroy(pdb_reader); - - pdb_reader.* = pdb_file.reader(try gpa.alloc(u8, 4096)); - errdefer gpa.free(pdb_reader.interface.buffer); - - var pdb: Pdb = try .init(gpa, pdb_reader); - errdefer pdb.deinit(); - try pdb.parseInfoStream(); - try pdb.parseDbiStream(); - - if (!mem.eql(u8, &coff_obj.guid, &pdb.guid) or coff_obj.age != pdb.age) - return error.InvalidDebugInfo; - - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); - - di.pdb = pdb; - } - - di.loaded = true; -} -pub const LookupCache = struct { - rwlock: std.Thread.RwLock, - modules: std.ArrayListUnmanaged(windows.MODULEENTRY32), - pub const init: LookupCache = .{ - .rwlock = .{}, - .modules = .empty, - }; - pub fn deinit(lc: *LookupCache, gpa: Allocator) void { - lc.modules.deinit(gpa); - } -}; -pub const DebugInfo = struct { - mutex: std.Thread.Mutex, - - loaded: bool, - - coff_image_base: u64, - mapped_file: ?struct { - file: fs.File, - section_handle: windows.HANDLE, - section_view: []const u8, - }, - - dwarf: ?Dwarf, - - pdb: ?Pdb, - /// Populated iff `pdb != null`; otherwise `&.{}`. - coff_section_headers: []coff.SectionHeader, - - pub const init: DebugInfo = .{ - .mutex = .{}, - .loaded = false, - .coff_image_base = undefined, - .mapped_file = null, - .dwarf = null, - .pdb = null, - .coff_section_headers = &.{}, - }; - - pub fn deinit(di: *DebugInfo, gpa: Allocator) void { - if (!di.loaded) return; - if (di.dwarf) |*dwarf| dwarf.deinit(gpa); - if (di.pdb) |*pdb| { - pdb.file_reader.file.close(); - gpa.free(pdb.file_reader.interface.buffer); - gpa.destroy(pdb.file_reader); - pdb.deinit(); - } - gpa.free(di.coff_section_headers); - if (di.mapped_file) |mapped| { - const process_handle = windows.GetCurrentProcess(); - assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mapped.section_view.ptr)) == .SUCCESS); - windows.CloseHandle(mapped.section_handle); - mapped.file.close(); - } - } - - fn getSymbolFromPdb(di: *DebugInfo, relocated_address: usize) !?std.debug.Symbol { - var coff_section: *align(1) const coff.SectionHeader = undefined; - const mod_index = for (di.pdb.?.sect_contribs) |sect_contrib| { - if (sect_contrib.section > di.coff_section_headers.len) continue; - // Remember that SectionContribEntry.Section is 1-based. - coff_section = &di.coff_section_headers[sect_contrib.section - 1]; - - const vaddr_start = coff_section.virtual_address + sect_contrib.offset; - const vaddr_end = vaddr_start + sect_contrib.size; - if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { - break sect_contrib.module_index; - } - } else { - // we have no information to add to the address - return null; - }; - - const module = try di.pdb.?.getModule(mod_index) orelse return error.InvalidDebugInfo; - - return .{ - .name = di.pdb.?.getSymbolName( - module, - relocated_address - coff_section.virtual_address, - ), - .compile_unit_name = fs.path.basename(module.obj_file_name), - .source_location = try di.pdb.?.getLineNumberInfo( - module, - relocated_address - coff_section.virtual_address, - ), - }; - } -}; - -pub const supports_unwinding: bool = switch (builtin.cpu.arch) { - else => true, - // On x86, `RtlVirtualUnwind` does not exist. We could in theory use `RtlCaptureStackBackTrace` - // instead, but on x86, it turns out that function is just... doing FP unwinding with esp! It's - // hard to find implementation details to confirm that, but the most authoritative source I have - // is an entry in the LLVM mailing list from 2020/08/16 which contains this quote: - // - // > x86 doesn't have what most architectures would consider an "unwinder" in the sense of - // > restoring registers; there is simply a linked list of frames that participate in SEH and - // > that desire to be called for a dynamic unwind operation, so RtlCaptureStackBackTrace - // > assumes that EBP-based frames are in use and walks an EBP-based frame chain on x86 - not - // > all x86 code is written with EBP-based frames so while even though we generally build the - // > OS that way, you might always run the risk of encountering external code that uses EBP as a - // > general purpose register for which such an unwind attempt for a stack trace would fail. - // - // Regardless, it's easy to effectively confirm this hypothesis just by compiling some code with - // `-fomit-frame-pointer -OReleaseFast` and observing that `RtlCaptureStackBackTrace` returns an - // empty trace when it's called in such an application. Note that without `-OReleaseFast` or - // similar, LLVM seems reluctant to ever clobber ebp, so you'll get a trace returned which just - // contains all of the kernel32/ntdll frames but none of your own. Don't be deceived---this is - // just coincidental! - // - // Anyway, the point is, the only stack walking primitive on x86-windows is FP unwinding. We - // *could* ask Microsoft to do that for us with `RtlCaptureStackBackTrace`... but better to just - // use our existing FP unwinder in `std.debug`! - .x86 => false, -}; -pub const UnwindContext = struct { - pc: usize, - cur: windows.CONTEXT, - history_table: windows.UNWIND_HISTORY_TABLE, - pub fn init(ctx: *const std.debug.cpu_context.Native) UnwindContext { - return .{ - .pc = @returnAddress(), - .cur = switch (builtin.cpu.arch) { - .x86_64 => std.mem.zeroInit(windows.CONTEXT, .{ - .Rax = ctx.gprs.get(.rax), - .Rcx = ctx.gprs.get(.rcx), - .Rdx = ctx.gprs.get(.rdx), - .Rbx = ctx.gprs.get(.rbx), - .Rsp = ctx.gprs.get(.rsp), - .Rbp = ctx.gprs.get(.rbp), - .Rsi = ctx.gprs.get(.rsi), - .Rdi = ctx.gprs.get(.rdi), - .R8 = ctx.gprs.get(.r8), - .R9 = ctx.gprs.get(.r9), - .R10 = ctx.gprs.get(.r10), - .R11 = ctx.gprs.get(.r11), - .R12 = ctx.gprs.get(.r12), - .R13 = ctx.gprs.get(.r13), - .R14 = ctx.gprs.get(.r14), - .R15 = ctx.gprs.get(.r15), - .Rip = ctx.gprs.get(.rip), - }), - .aarch64, .aarch64_be => .{ - .ContextFlags = 0, - .Cpsr = 0, - .DUMMYUNIONNAME = .{ .X = ctx.x }, - .Sp = ctx.sp, - .Pc = ctx.pc, - .V = @splat(.{ .B = @splat(0) }), - .Fpcr = 0, - .Fpsr = 0, - .Bcr = @splat(0), - .Bvr = @splat(0), - .Wcr = @splat(0), - .Wvr = @splat(0), - }, - .thumb => .{ - .ContextFlags = 0, - .R0 = ctx.r[0], - .R1 = ctx.r[1], - .R2 = ctx.r[2], - .R3 = ctx.r[3], - .R4 = ctx.r[4], - .R5 = ctx.r[5], - .R6 = ctx.r[6], - .R7 = ctx.r[7], - .R8 = ctx.r[8], - .R9 = ctx.r[9], - .R10 = ctx.r[10], - .R11 = ctx.r[11], - .R12 = ctx.r[12], - .Sp = ctx.r[13], - .Lr = ctx.r[14], - .Pc = ctx.r[15], - .Cpsr = 0, - .Fpcsr = 0, - .Padding = 0, - .DUMMYUNIONNAME = .{ .S = @splat(0) }, - .Bvr = @splat(0), - .Bcr = @splat(0), - .Wvr = @splat(0), - .Wcr = @splat(0), - .Padding2 = @splat(0), - }, - else => comptime unreachable, - }, - .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE), - }; - } - pub fn deinit(ctx: *UnwindContext, gpa: Allocator) void { - _ = ctx; - _ = gpa; - } - pub fn getFp(ctx: *UnwindContext) usize { - return ctx.cur.getRegs().bp; - } -}; -pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - _ = module; - _ = gpa; - _ = di; - - const current_regs = context.cur.getRegs(); - var image_base: windows.DWORD64 = undefined; - if (windows.ntdll.RtlLookupFunctionEntry(current_regs.ip, &image_base, &context.history_table)) |runtime_function| { - var handler_data: ?*anyopaque = null; - var establisher_frame: u64 = undefined; - _ = windows.ntdll.RtlVirtualUnwind( - windows.UNW_FLAG_NHANDLER, - image_base, - current_regs.ip, - runtime_function, - &context.cur, - &handler_data, - &establisher_frame, - null, - ); - } else { - // leaf function - context.cur.setIp(@as(*const usize, @ptrFromInt(current_regs.sp)).*); - context.cur.setSp(current_regs.sp + @sizeOf(usize)); - } - - const next_regs = context.cur.getRegs(); - const tib = &windows.teb().NtTib; - if (next_regs.sp < @intFromPtr(tib.StackLimit) or next_regs.sp > @intFromPtr(tib.StackBase)) { - context.pc = 0; - return 0; - } - // Like `DwarfUnwindContext.unwindFrame`, adjust our next lookup pc in case the `call` was this - // function's last instruction making `next_regs.ip` one byte past its end. - context.pc = next_regs.ip -| 1; - return next_regs.ip; -} - -const WindowsModule = @This(); - -const std = @import("../../std.zig"); -const Allocator = std.mem.Allocator; -const Dwarf = std.debug.Dwarf; -const Pdb = std.debug.Pdb; -const assert = std.debug.assert; -const coff = std.coff; -const fs = std.fs; -const mem = std.mem; -const windows = std.os.windows; - -const builtin = @import("builtin"); -const native_endian = builtin.target.cpu.arch.endian(); diff --git a/test/standalone/coff_dwarf/main.zig b/test/standalone/coff_dwarf/main.zig index 411dbd913b6f..6707bab4dc7f 100644 --- a/test/standalone/coff_dwarf/main.zig +++ b/test/standalone/coff_dwarf/main.zig @@ -14,7 +14,7 @@ pub fn main() void { var add_addr: usize = undefined; _ = add(1, 2, &add_addr); - const symbol = di.getSymbolAtAddress(gpa, add_addr) catch |err| fatal("failed to get symbol: {t}", .{err}); + const symbol = di.getSymbol(gpa, add_addr) catch |err| fatal("failed to get symbol: {t}", .{err}); defer if (symbol.source_location) |sl| gpa.free(sl.file_name); if (symbol.name == null) fatal("failed to resolve symbol name", .{});