From 4dc4e478f014273cac02bf3e50e8734fc1a47cc0 Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Tue, 3 Sep 2024 12:27:53 +1000 Subject: [PATCH] read: improve handling of 0 for tombstones Where possible, ignore these instead of returning them or returning an error. This often isn't possible because 0 is a valid address, but we can handle it for `DW_LNE_set_address` in the middle of a line sequence, and for address pairs. --- src/read/aranges.rs | 2 ++ src/read/line.rs | 23 +++++++++++-------- src/read/loclists.rs | 53 ++++++++++++++++++++++++++++---------------- src/read/rnglists.rs | 42 +++++++++++++++++++++++------------ 4 files changed, 78 insertions(+), 42 deletions(-) diff --git a/src/read/aranges.rs b/src/read/aranges.rs index c2aac914..2829f261 100644 --- a/src/read/aranges.rs +++ b/src/read/aranges.rs @@ -290,6 +290,8 @@ impl ArangeEntryIter { #[doc(hidden)] pub fn convert_raw(&self, mut entry: ArangeEntry) -> Result> { // Skip tombstone entries. + // DWARF specifies a tombstone value of -1, but many linkers use 0. + // However, 0 may be a valid address, so the caller must handle that case. let address_size = self.encoding.address_size; let tombstone_address = !0 >> (64 - self.encoding.address_size * 8); if entry.range.begin == tombstone_address { diff --git a/src/read/line.rs b/src/read/line.rs index ceaf3b3f..1e91b020 100644 --- a/src/read/line.rs +++ b/src/read/line.rs @@ -828,12 +828,16 @@ impl LineRow { } LineInstruction::SetAddress(address) => { + // If the address is a tombstone, then skip instructions until the next address. + // DWARF specifies a tombstone value of -1, but many linkers use 0. + // However, 0 may be a valid address, so we only skip that if we have previously + // seen a higher address. Additionally, gold may keep the relocation addend, + // so we treat all lower addresses as tombstones instead of just 0. + // This works because DWARF specifies that addresses are monotonically increasing + // within a sequence; the alternative is to return an error. let tombstone_address = !0 >> (64 - program.header().encoding.address_size * 8); - self.tombstone = address == tombstone_address; + self.tombstone = address < self.address || address == tombstone_address; if !self.tombstone { - if address < self.address { - return Err(Error::InvalidAddressRange); - } self.address = address; self.op_index.0 = 0; } @@ -2877,13 +2881,14 @@ mod tests { #[test] fn test_exec_set_address_backwards() { let header = make_test_header(EndianSlice::new(&[], LittleEndian)); - let mut registers = LineRow::new(&header); - registers.address = 1; + let mut initial_registers = LineRow::new(&header); + initial_registers.address = 1; let opcode = LineInstruction::SetAddress(0); - let mut program = IncompleteLineProgram { header }; - let result = registers.execute(opcode, &mut program); - assert_eq!(result, Err(Error::InvalidAddressRange)); + let mut expected_registers = initial_registers; + expected_registers.tombstone = true; + + assert_exec_opcode(header, initial_registers, opcode, expected_registers, false); } #[test] diff --git a/src/read/loclists.rs b/src/read/loclists.rs index 84e4d636..26450186 100644 --- a/src/read/loclists.rs +++ b/src/read/loclists.rs @@ -633,6 +633,7 @@ impl LocListIter { ), RawLocListEntry::AddressOrOffsetPair { begin, end, data } | RawLocListEntry::OffsetPair { begin, end, data } => { + // Skip tombstone entries (see below). if self.base_address == tombstone { return Ok(None); } @@ -651,7 +652,17 @@ impl LocListIter { } }; - if range.begin == tombstone || range.begin > range.end { + // Skip tombstone entries. + // + // DWARF specifies a tombstone value of -1 or -2, but many linkers use 0 or 1. + // However, 0/1 may be a valid address, so we can't always reliably skip them. + // One case where we can skip them is for address pairs, where both values are + // replaced by tombstones and thus `begin` equals `end`. Since these entries + // are empty, it's safe to skip them even if they aren't tombstones. + // + // In addition to skipping tombstone entries, we also skip invalid entries + // where `begin` is greater than `end`. This can occur due to compiler bugs. + if range.begin == tombstone || range.begin >= range.end { return Ok(None); } @@ -695,6 +706,7 @@ mod tests { let format = Format::Dwarf32; for size in [4, 8] { let tombstone = u64::ones_sized(size); + let tombstone_0 = 0; let encoding = Encoding { format, version: 5, @@ -707,7 +719,8 @@ mod tests { .word(size, 0x0301_0400) .word(size, 0x0301_0500) .word(size, tombstone) - .word(size, 0x0301_0600); + .word(size, 0x0301_0600) + .word(size, tombstone_0); let buf = section.get_contents().unwrap(); let debug_addr = &DebugAddr::from(EndianSlice::new(&buf, LittleEndian)); let debug_addr_base = DebugAddrBase(0); @@ -742,14 +755,6 @@ mod tests { section = section.uleb(4).L32(3); expect_location(0x0201_0400, 0x0201_0500, &[3, 0, 0, 0]); - // An empty offset pair followed by a normal offset pair. - section = section.L8(DW_LLE_offset_pair.0).uleb(0x10600).uleb(0x10600); - section = section.uleb(4).L32(4); - expect_location(0x0201_0600, 0x0201_0600, &[4, 0, 0, 0]); - section = section.L8(DW_LLE_offset_pair.0).uleb(0x10800).uleb(0x10900); - section = section.uleb(4).L32(5); - expect_location(0x0201_0800, 0x0201_0900, &[5, 0, 0, 0]); - section = section .L8(DW_LLE_start_end.0) .word(size, 0x201_0a00) @@ -770,12 +775,6 @@ mod tests { section = section.uleb(4).L32(8); expect_location(0, 1, &[8, 0, 0, 0]); - // An offset pair that starts and ends at 0. - section = section.L8(DW_LLE_base_address.0).word(size, 0); - section = section.L8(DW_LLE_offset_pair.0).uleb(0).uleb(0); - section = section.uleb(4).L32(8); - expect_location(0, 0, &[8, 0, 0, 0]); - // An offset pair that ends at -1. section = section.L8(DW_LLE_base_address.0).word(size, 0); section = section.L8(DW_LLE_offset_pair.0).uleb(0).uleb(tombstone); @@ -822,13 +821,30 @@ mod tests { .uleb(0x100); section = section.uleb(4).L32(25); + // Ignore some instances of 0 for tombstone. + section = section.L8(DW_LLE_startx_endx.0).uleb(6).uleb(6); + section = section.uleb(4).L32(30); + section = section + .L8(DW_LLE_start_end.0) + .word(size, tombstone_0) + .word(size, tombstone_0); + section = section.uleb(4).L32(31); + + // Ignore empty ranges. + section = section.L8(DW_LLE_base_address.0).word(size, 0); + section = section.L8(DW_LLE_offset_pair.0).uleb(0).uleb(0); + section = section.uleb(4).L32(41); + section = section.L8(DW_LLE_base_address.0).word(size, 0x10000); + section = section.L8(DW_LLE_offset_pair.0).uleb(0x1234).uleb(0x1234); + section = section.uleb(4).L32(42); + // A valid range after the tombstones. section = section .L8(DW_LLE_start_end.0) .word(size, 0x201_1600) .word(size, 0x201_1700); - section = section.uleb(4).L32(26); - expect_location(0x0201_1600, 0x0201_1700, &[26, 0, 0, 0]); + section = section.uleb(4).L32(100); + expect_location(0x0201_1600, 0x0201_1700, &[100, 0, 0, 0]); section = section.L8(DW_LLE_end_of_list.0); section = section.mark(&end); @@ -895,7 +911,6 @@ mod tests { // An empty location range followed by a normal location. section = section.word(size, 0x10600).word(size, 0x10600); section = section.L16(4).L32(4); - expect_location(0x0201_0600, 0x0201_0600, &[4, 0, 0, 0]); section = section.word(size, 0x10800).word(size, 0x10900); section = section.L16(4).L32(5); expect_location(0x0201_0800, 0x0201_0900, &[5, 0, 0, 0]); diff --git a/src/read/rnglists.rs b/src/read/rnglists.rs index df3f5b4a..4b979555 100644 --- a/src/read/rnglists.rs +++ b/src/read/rnglists.rs @@ -556,6 +556,7 @@ impl RngListIter { } RawRngListEntry::AddressOrOffsetPair { begin, end } | RawRngListEntry::OffsetPair { begin, end } => { + // Skip tombstone entries (see below). if self.base_address == tombstone { return Ok(None); } @@ -570,7 +571,17 @@ impl RngListIter { } }; - if range.begin == tombstone || range.begin > range.end { + // Skip tombstone entries. + // + // DWARF specifies a tombstone value of -1 or -2, but many linkers use 0 or 1. + // However, 0/1 may be a valid address, so we can't always reliably skip them. + // One case where we can skip them is for address pairs, where both values are + // replaced by tombstones and thus `begin` equals `end`. Since these entries + // are empty, it's safe to skip them even if they aren't tombstones. + // + // In addition to skipping tombstone entries, we also skip invalid entries + // where `begin` is greater than `end`. This can occur due to compiler bugs. + if range.begin == tombstone || range.begin >= range.end { return Ok(None); } @@ -658,6 +669,7 @@ mod tests { let format = Format::Dwarf32; for size in [4, 8] { let tombstone = u64::ones_sized(size); + let tombstone_0 = 0; let encoding = Encoding { format, version: 5, @@ -669,7 +681,8 @@ mod tests { .word(size, 0x0301_0400) .word(size, 0x0301_0500) .word(size, tombstone) - .word(size, 0x0301_0600); + .word(size, 0x0301_0600) + .word(size, tombstone_0); let buf = section.get_contents().unwrap(); let debug_addr = &DebugAddr::from(EndianSlice::new(&buf, LittleEndian)); let debug_addr_base = DebugAddrBase(0); @@ -699,12 +712,6 @@ mod tests { section = section.L8(DW_RLE_offset_pair.0).uleb(0x10400).uleb(0x10500); expect_range(0x0201_0400, 0x0201_0500); - // An empty offset pair followed by a normal offset pair. - section = section.L8(DW_RLE_offset_pair.0).uleb(0x10600).uleb(0x10600); - expect_range(0x0201_0600, 0x0201_0600); - section = section.L8(DW_RLE_offset_pair.0).uleb(0x10800).uleb(0x10900); - expect_range(0x0201_0800, 0x0201_0900); - section = section .L8(DW_RLE_start_end.0) .word(size, 0x201_0a00) @@ -722,11 +729,6 @@ mod tests { section = section.L8(DW_RLE_offset_pair.0).uleb(0).uleb(1); expect_range(0, 1); - // An offset pair that starts and ends at 0. - section = section.L8(DW_RLE_base_address.0).word(size, 0); - section = section.L8(DW_RLE_offset_pair.0).uleb(0).uleb(0); - expect_range(0, 0); - // An offset pair that ends at -1. section = section.L8(DW_RLE_base_address.0).word(size, 0); section = section.L8(DW_RLE_offset_pair.0).uleb(0).uleb(tombstone); @@ -760,6 +762,19 @@ mod tests { .word(size, tombstone) .uleb(0x100); + // Ignore some instances of 0 for tombstone. + section = section.L8(DW_RLE_startx_endx.0).uleb(6).uleb(6); + section = section + .L8(DW_RLE_start_end.0) + .word(size, tombstone_0) + .word(size, tombstone_0); + + // Ignore empty ranges. + section = section.L8(DW_RLE_base_address.0).word(size, 0); + section = section.L8(DW_RLE_offset_pair.0).uleb(0).uleb(0); + section = section.L8(DW_RLE_base_address.0).word(size, 0x10000); + section = section.L8(DW_RLE_offset_pair.0).uleb(0x1234).uleb(0x1234); + // A valid range after the tombstones. section = section .L8(DW_RLE_start_end.0) @@ -868,7 +883,6 @@ mod tests { expect_range(0x0201_0400, 0x0201_0500); // An empty range followed by a normal range. section = section.word(size, 0x10600).word(size, 0x10600); - expect_range(0x0201_0600, 0x0201_0600); section = section.word(size, 0x10800).word(size, 0x10900); expect_range(0x0201_0800, 0x0201_0900); // A range that starts at 0.