Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug-1907983: fix sym file header parsing #2991

Merged
merged 1 commit into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions tecken/libsym.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,22 @@ def extract_sym_header_data(file_path):
data["debug_filename"] = debug_filename
data["debug_id"] = debug_id.upper()

elif line.startswith("INFO CODE_ID"):
elif line.startswith("INFO"):
parts = line.strip().split()
# NOTE(willkg): Non-Windows module sym files don't have a code_file
if len(parts) == 3:
_, _, code_id = parts
code_file = ""
elif len(parts) == 4:
_, _, code_id, code_file = parts

data["code_file"] = code_file
data["code_id"] = code_id.upper()

elif line.startswith("INFO GENERATOR"):
_, _, generator = line.strip().split(maxsplit=2)
data["generator"] = generator
if parts[1] == "CODE_ID":
# NOTE(willkg): Non-Windows module sym files don't have a code_file
if len(parts) == 3:
_, _, code_id = parts
code_file = ""
elif len(parts) == 4:
_, _, code_id, code_file = parts
Comment on lines +41 to +45
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like this should have an else ValueError condition in case the file is corrupted or something

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I initially wrote the extraction code, I thought about validation a bit, but decided that's outside of the scope. This code parses the bits it understands so it can fill in some metadata. If the header is malformed or invalid in some way, we don't have any mechanisms for doing anything with that knowledge. Given that, I wrote it such that it just ignores those cases.

Bug 1814430 covers writing a better information view for symbols files which would cover whether they're valid. Adding header validation to that would be helpful.

A while back, I wrote bin/debug-sym-file.py that downloads a file and tells you things about it including whether it parses in symbolic. I used that to debug malformed symbols files for bug 1791785. We could add header validation to that.


data["code_file"] = code_file
data["code_id"] = code_id.upper()

elif parts[1] == "GENERATOR":
_, _, generator = line.strip().split(maxsplit=2)
data["generator"] = generator

else:
break
Expand Down
78 changes: 56 additions & 22 deletions tecken/tests/test_libsym.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,53 @@

class Test_extract_sym_header_data:
def test_windows_module_header(self, tmp_path):
sym_path = tmp_path / "basic-opt64.sym"
sym_path = tmp_path / "js.sym"
sym_path.write_bytes(
b"""\
MODULE windows x86_64 2B02EEDFFB7C497B9F3A107E5193B3652 basic-opt64.pdb
INFO CODE_ID 5DDC1E9E8B000 basic-opt64.dll
INFO GENERATOR mozilla/dump_syms XYZ
FILE 0 C:\\Users\\Calixte\\dump_syms\\test_data\\basic.cpp
FILE 1 d:\\agent\\_work\\2\\s\\src\\vctools\\crt\\vcstartup\\src\\heap\\delete_scalar_size.cpp
MODULE windows x86_64 A7B74D36BC7FECE04C4C44205044422E1 js.pdb
INFO CODE_ID 66BCC3E020DC000 js.exe
INFO RELEASECHANNEL beta
INFO VERSION 130.0
INFO VENDOR Mozilla
INFO PRODUCTNAME Firefox
INFO BUILDID 20240814121850
INFO GENERATOR mozilla/dump_syms 2.3.3
FILE 0 hg:hg.mozilla.org/releases/mozilla-beta:build/pure_virtual/pure_virtual.c:2107f27bbb2a2d2adc4cd4a4ae9bed8234b88d5d
FILE 1 hg:hg.mozilla.org/releases/mozilla-beta:mfbt/Assertions.h:2107f27bbb2a2d2adc4cd4a4ae9bed8234b88d5d
"""
)
data = extract_sym_header_data(str(sym_path))
assert data == {
"debug_filename": "js.pdb",
"debug_id": "A7B74D36BC7FECE04C4C44205044422E1",
"code_file": "js.exe",
"code_id": "66BCC3E020DC000",
"generator": "mozilla/dump_syms 2.3.3",
}

def test_mac_module_headeer(self, tmp_path):
sym_path = tmp_path / "libmozglue.dylib.sym"
sym_path.write_bytes(
b"""\
MODULE Mac x86_64 16039459CC413A18B31815B77A73C0E90 libmozglue.dylib
INFO CODE_ID 16039459CC413A18B31815B77A73C0E9
INFO RELEASECHANNEL beta
INFO VERSION 130.0
INFO VENDOR Mozilla
INFO PRODUCTNAME Firefox
INFO BUILDID 20240814121850
INFO GENERATOR mozilla/dump_syms 2.3.3
FILE 0 hg:hg.mozilla.org/releases/mozilla-beta:build/pure_virtual/pure_virtual.c:2107f27bbb2a2d2adc4cd4a4ae9bed8234b88d5d
FILE 1 hg:hg.mozilla.org/releases/mozilla-beta:memory/build/zone.c:2107f27bbb2a2d2adc4cd4a4ae9bed8234b88d5d
"""
)
data = extract_sym_header_data(str(sym_path))
assert data == {
"debug_filename": "basic-opt64.pdb",
"debug_id": "2B02EEDFFB7C497B9F3A107E5193B3652",
"code_file": "basic-opt64.dll",
"code_id": "5DDC1E9E8B000",
"generator": "mozilla/dump_syms XYZ",
"debug_filename": "libmozglue.dylib",
"debug_id": "16039459CC413A18B31815B77A73C0E90",
"code_file": "",
"code_id": "16039459CC413A18B31815B77A73C0E9",
"generator": "mozilla/dump_syms 2.3.3",
}

def test_linux_header_missing_generator(self, tmp_path):
Expand All @@ -49,24 +79,28 @@ def test_linux_header_missing_generator(self, tmp_path):

def test_linux_header(self, tmp_path):
"""Verify linux module sym file headers with no code_file"""
sym_path = tmp_path / "basic.dbg.sym"
sym_path = tmp_path / "libnss3.so.sym"
sym_path.write_bytes(
b"""\
MODULE Linux x86_64 20AD60B0B4C68177552708AA192E77390 basic.full
INFO CODE_ID B060AD20C6B47781552708AA192E7739FAC7C84A
INFO GENERATOR mozilla/dump_syms XYZ
FILE 0 /home/calixte/dev/mozilla/dump_syms.calixteman/test_data/linux/basic.cpp
PUBLIC 1000 0 _init
PUBLIC 1020 0 <.plt ELF section in basic.dbg>
MODULE Linux x86_64 A4CE852C227BB0DBB18BD1B5D75C51430 libnss3.so
INFO CODE_ID 2C85CEA47B22DBB0B18BD1B5D75C5143DE916497
INFO RELEASECHANNEL beta
INFO VERSION 130.0
INFO VENDOR Mozilla
INFO PRODUCTNAME Fennec
INFO BUILDID 20240814121850
INFO GENERATOR mozilla/dump_syms 2.3.3
FILE 0 s3:gecko-generated-sources:d6462856db6d74cac7a8c3828d23875b0b668975ee2a5b37015c86ce722a1c88164de511853077a5652b435e1ed37079b307ade8744777a21022400bb396c2a7/build/unix/elfhack/inject/x86_64-android.c:
FILE 1 /builds/worker/fetches/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/bits/fortify/string.h
"""
)
data = extract_sym_header_data(str(sym_path))
assert data == {
"debug_filename": "basic.full",
"debug_id": "20AD60B0B4C68177552708AA192E77390",
"debug_filename": "libnss3.so",
"debug_id": "A4CE852C227BB0DBB18BD1B5D75C51430",
"code_file": "",
"code_id": "B060AD20C6B47781552708AA192E7739FAC7C84A",
"generator": "mozilla/dump_syms XYZ",
"code_id": "2C85CEA47B22DBB0B18BD1B5D75C5143DE916497",
"generator": "mozilla/dump_syms 2.3.3",
}

def test_sym_parse_error(self, tmp_path):
Expand Down