Skip to content

Commit 4335e77

Browse files
authored
Enable reporting of undefined symbols in wasm-ld (#10350)
When enabled, wasm-ld will report more detailed information about the undefined symbols such as which object file/funcion required the symbols. This works by generating the list of all possible JS-defined symbols and passing that list of wasm-ld so it can determine which symbols are permitted to be undefined. The cost of this feature is that we run the js compiler twice, once before wasm-ld and once after the link once wasm-emscripten-finalize has been run. On my machine this takes around 700 so I'm not enabling this by default yet.
1 parent 00eda2b commit 4335e77

File tree

8 files changed

+130
-45
lines changed

8 files changed

+130
-45
lines changed

ChangeLog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ See docs/process.md for how version tagging works.
1717

1818
Current Trunk
1919
-------------
20+
- Add LLD_REPORT_UNDEFINED option that should allow for more detailed
21+
diagnostics when symbols are undefined at link time. This currently has
22+
some limitations and is not enabled by default. For example, EM_JS symbols
23+
are reported as undefined at link time, as are `__invoke_*` functions.
2024

2125
v1.39.7: 02/03/2020
2226
-------------------

emcc.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,41 @@ def ensure_archive_index(archive_file):
512512
run_process([shared.LLVM_RANLIB, archive_file])
513513

514514

515+
def get_all_js_library_funcs(temp_files):
516+
# Runs the js compiler to generate a list of all functions available in the JS
517+
# libraries. This must be done separately for each linker invokation since the
518+
# list of library functions depends on what settings are used.
519+
# TODO(sbc): Find a way to optimize this. Potentially we could add a super-set
520+
# mode of the js compiler that would generate a list of all possible symbols
521+
# that could be checked in.
522+
old_full = shared.Settings.INCLUDE_FULL_LIBRARY
523+
old_linkable = shared.Settings.LINKABLE
524+
try:
525+
# Temporarily define INCLUDE_FULL_LIBRARY since we want a full list
526+
# of all available JS library functions.
527+
shared.Settings.INCLUDE_FULL_LIBRARY = True
528+
# Temporarily set LINKABLE so that the jscompiler doesn't report
529+
# undefined symbolls itself.
530+
shared.Settings.LINKABLE = True
531+
emscripten.generate_struct_info()
532+
glue, forwarded_data = emscripten.compile_settings(temp_files)
533+
forwarded_json = json.loads(forwarded_data)
534+
library_fns = forwarded_json['Functions']['libraryFunctions']
535+
library_fns_list = []
536+
for name in library_fns:
537+
if shared.is_c_symbol(name):
538+
name = shared.demangle_c_symbol_name(name)
539+
library_fns_list.append(name)
540+
# TODO(sbc): wasm-ld shouldn't be reporting errors for symbols
541+
# such as __wasi_fd_write which are defined with import_name attibutes
542+
# but it currently does. Remove this once we fix wasm-ld.
543+
library_fns_list.append('__wasi_' + name)
544+
finally:
545+
shared.Settings.INCLUDE_FULL_LIBRARY = old_full
546+
shared.Settings.LINKABLE = old_linkable
547+
return library_fns_list
548+
549+
515550
#
516551
# Main run() function
517552
#
@@ -2210,7 +2245,14 @@ def get_final():
22102245
lto_level = options.opt_level
22112246
else:
22122247
lto_level = 0
2213-
final = shared.Building.link_lld(linker_inputs, DEFAULT_FINAL, lto_level=lto_level)
2248+
all_externals = None
2249+
if shared.Settings.LLD_REPORT_UNDEFINED:
2250+
all_externals = get_all_js_library_funcs(misc_temp_files)
2251+
log_time('JS symbol generation')
2252+
# TODO(sbc): This is an incomplete list of __invoke functions. Perhaps add
2253+
# support for wildcard to wasm-ld.
2254+
all_externals += ['emscripten_longjmp_jmpbuf', '__invoke_void', '__invoke_i32_i8*_...']
2255+
final = shared.Building.link_lld(linker_inputs, DEFAULT_FINAL, lto_level=lto_level, all_external_symbols=all_externals)
22142256
else:
22152257
final = shared.Building.link(linker_inputs, DEFAULT_FINAL, force_archive_contents=force_archive_contents, just_calculate=just_calculate)
22162258
else:

emscripten.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2769,11 +2769,8 @@ def normalize_line_endings(text):
27692769
return text
27702770

27712771

2772-
def run(infile, outfile, memfile):
2773-
temp_files = get_configuration().get_temp_files()
2774-
infile, outfile = substitute_response_files([infile, outfile])
2775-
2776-
if not shared.Settings.BOOTSTRAPPING_STRUCT_INFO:
2772+
def generate_struct_info():
2773+
if not shared.Settings.STRUCT_INFO and not shared.Settings.BOOTSTRAPPING_STRUCT_INFO:
27772774
generated_struct_info_name = 'generated_struct_info.json'
27782775

27792776
def generate_struct_info():
@@ -2785,6 +2782,12 @@ def generate_struct_info():
27852782
shared.Settings.STRUCT_INFO = shared.Cache.get(generated_struct_info_name, generate_struct_info)
27862783
# do we need an else, to define it for the bootstrap case?
27872784

2785+
2786+
def run(infile, outfile, memfile):
2787+
temp_files = get_configuration().get_temp_files()
2788+
infile, outfile = substitute_response_files([infile, outfile])
2789+
generate_struct_info()
2790+
27882791
outfile_obj = open(outfile, 'w')
27892792

27902793
emscripter = emscript_wasm_backend if shared.Settings.WASM_BACKEND else emscript_fastcomp

src/jsifier.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,11 +162,15 @@ function JSify(data, functionsOnly) {
162162
return;
163163
} else if ((!LibraryManager.library.hasOwnProperty(ident) && !LibraryManager.library.hasOwnProperty(ident + '__inline')) || SIDE_MODULE) {
164164
if (!(finalName in IMPLEMENTED_FUNCTIONS) && !LINKABLE) {
165+
var msg = 'undefined symbol: ' + ident;
165166
if (ERROR_ON_UNDEFINED_SYMBOLS) {
166-
error('undefined symbol: ' + ident);
167+
error(msg);
168+
if (WASM_BACKEND) {
169+
warnOnce('Link with `-s LLD_REPORT_UNDEFINED` to get more information on undefined symbols');
170+
}
167171
warnOnce('To disable errors for undefined symbols use `-s ERROR_ON_UNDEFINED_SYMBOLS=0`')
168172
} else if (VERBOSE || WARN_ON_UNDEFINED_SYMBOLS) {
169-
warn('undefined symbol: ' + ident);
173+
warn(msg);
170174
}
171175
}
172176
if (!RELOCATABLE) {

src/library.js

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4802,40 +4802,41 @@ LibraryManager.library = {
48024802
{{{ makeDynCall('vii') }}}(func, Math.min(base, end), Math.max(base, end));
48034803
},
48044804

4805-
// misc definitions to avoid unnecessary unresolved symbols from fastcomp
4805+
// misc definitions to avoid unnecessary unresolved symbols being reported
4806+
// by fastcomp or wasm-ld
48064807
#if SUPPORT_LONGJMP
4807-
emscripten_prep_setjmp: true,
4808-
emscripten_cleanup_setjmp: true,
4809-
emscripten_check_longjmp: true,
4810-
emscripten_get_longjmp_result: true,
4811-
emscripten_setjmp: true,
4808+
emscripten_prep_setjmp: function() {},
4809+
emscripten_cleanup_setjmp: function() {},
4810+
emscripten_check_longjmp: function() {},
4811+
emscripten_get_longjmp_result: function() {},
4812+
emscripten_setjmp: function() {},
48124813
#endif
4813-
emscripten_preinvoke: true,
4814-
emscripten_postinvoke: true,
4815-
emscripten_resume: true,
4816-
emscripten_landingpad: true,
4817-
getHigh32: true,
4818-
setHigh32: true,
4819-
FtoILow: true,
4820-
FtoIHigh: true,
4821-
DtoILow: true,
4822-
DtoIHigh: true,
4823-
BDtoILow: true,
4824-
BDtoIHigh: true,
4825-
SItoF: true,
4826-
UItoF: true,
4827-
SItoD: true,
4828-
UItoD: true,
4829-
BItoD: true,
4830-
llvm_dbg_value: true,
4831-
llvm_debugtrap: true,
4832-
llvm_ctlz_i32: true,
4833-
emscripten_asm_const: true,
4834-
emscripten_asm_const_int: true,
4835-
emscripten_asm_const_double: true,
4836-
emscripten_asm_const_int_sync_on_main_thread: true,
4837-
emscripten_asm_const_double_sync_on_main_thread: true,
4838-
emscripten_asm_const_async_on_main_thread: true,
4814+
emscripten_preinvoke: function() {},
4815+
emscripten_postinvoke: function() {},
4816+
emscripten_resume: function() {},
4817+
emscripten_landingpad: function() {},
4818+
getHigh32: function() {},
4819+
setHigh32: function() {},
4820+
FtoILow: function() {},
4821+
FtoIHigh: function() {},
4822+
DtoILow: function() {},
4823+
DtoIHigh: function() {},
4824+
BDtoILow: function() {},
4825+
BDtoIHigh: function() {},
4826+
SItoF: function() {},
4827+
UItoF: function() {},
4828+
SItoD: function() {},
4829+
UItoD: function() {},
4830+
BItoD: function() {},
4831+
llvm_dbg_value: function() {},
4832+
llvm_debugtrap: function() {},
4833+
llvm_ctlz_i32: function() {},
4834+
emscripten_asm_const: function() {},
4835+
emscripten_asm_const_int: function() {},
4836+
emscripten_asm_const_double: function() {},
4837+
emscripten_asm_const_int_sync_on_main_thread: function() {},
4838+
emscripten_asm_const_double_sync_on_main_thread: function() {},
4839+
emscripten_asm_const_async_on_main_thread: function() {},
48394840

48404841
// ======== compiled code from system/lib/compiler-rt , see readme therein
48414842
__muldsi3__asm: true,

src/settings.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1733,6 +1733,13 @@ var DISABLE_EXCEPTION_THROWING = 0;
17331733
// across function boundaries.
17341734
var USE_OFFSET_CONVERTER = 0;
17351735

1736+
// If set to 1, the JS compiler is run before wasm-ld so that the linker can
1737+
// report undefined symbols within the binary. Without this option that linker
1738+
// doesn't know which symmbols might be defined JS and so reporting of undefined
1739+
// symbols is deleyed until the JS compiler is run.
1740+
// [link]
1741+
var LLD_REPORT_UNDEFINED = 0;
1742+
17361743
//===========================================
17371744
// Internal, used for testing only, from here
17381745
//===========================================

tests/test_other.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10302,3 +10302,10 @@ def test_signature_mismatch(self):
1030210302
self.assertContained('function signature mismatch: foo', stderr)
1030310303
self.expect_fail([PYTHON, EMCC, '-Wl,--fatal-warnings', 'a.c', 'b.c'])
1030410304
self.expect_fail([PYTHON, EMCC, '-s', 'STRICT', 'a.c', 'b.c'])
10305+
10306+
@no_fastcomp('lld only')
10307+
def test_lld_report_undefined(self):
10308+
create_test_file('main.c', 'void foo(); int main() { foo(); return 0; }')
10309+
stderr = self.expect_fail([PYTHON, EMCC, '-s', 'LLD_REPORT_UNDEFINED', 'main.c'])
10310+
self.assertContained('wasm-ld: error:', stderr)
10311+
self.assertContained('main_0.o: undefined symbol: foo', stderr)

tools/shared.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,10 @@ def demangle_c_symbol_name(name):
14471447
return name[1:] if name.startswith('_') else '$' + name
14481448

14491449

1450+
def is_c_symbol(name):
1451+
return name.startswith('_')
1452+
1453+
14501454
def treat_as_user_function(name):
14511455
if name.startswith('dynCall_'):
14521456
return False
@@ -1881,7 +1885,7 @@ def link_llvm(linker_inputs, target):
18811885
return target
18821886

18831887
@staticmethod
1884-
def link_lld(args, target, opts=[], lto_level=0):
1888+
def link_lld(args, target, opts=[], lto_level=0, all_external_symbols=None):
18851889
if not os.path.exists(WASM_LD):
18861890
exit_with_error('linker binary not found in LLVM directory: %s', WASM_LD)
18871891
# runs lld to link things.
@@ -1903,10 +1907,17 @@ def link_lld(args, target, opts=[], lto_level=0):
19031907
WASM_LD,
19041908
'-o',
19051909
target,
1906-
'--allow-undefined',
19071910
'--lto-O%d' % lto_level,
19081911
] + args
19091912

1913+
if all_external_symbols:
1914+
undefs = configuration.get_temp_files().get('.undefined').name
1915+
with open(undefs, 'w') as f:
1916+
f.write('\n'.join(all_external_symbols))
1917+
cmd.append('--allow-undefined-file=%s' % undefs)
1918+
else:
1919+
cmd.append('--allow-undefined')
1920+
19101921
# wasi does not import the memory (but for JS it is efficient to do so,
19111922
# as it allows us to set up memory, preload files, etc. even before the
19121923
# wasm module arrives)
@@ -1941,8 +1952,14 @@ def link_lld(args, target, opts=[], lto_level=0):
19411952

19421953
cmd += ['--export', '__data_end']
19431954

1944-
for export in Settings.EXPORTED_FUNCTIONS:
1945-
cmd += ['--export', export[1:]] # Strip the leading underscore
1955+
c_exports = [e for e in Settings.EXPORTED_FUNCTIONS if is_c_symbol(e)]
1956+
# Strip the leading underscores
1957+
c_exports = [demangle_c_symbol_name(e) for e in c_exports]
1958+
if all_external_symbols:
1959+
# Filter out symbols external/JS symbols
1960+
c_exports = [e for e in c_exports if e not in all_external_symbols]
1961+
for export in c_exports:
1962+
cmd += ['--export', export]
19461963

19471964
if Settings.RELOCATABLE:
19481965
if Settings.SIDE_MODULE:
@@ -2314,7 +2331,7 @@ def get_safe_internalize():
23142331
if len(internalize_list) > 8192:
23152332
logger.debug('using response file for EXPORTED_FUNCTIONS in internalize')
23162333
finalized_exports = '\n'.join([exp[1:] for exp in exps])
2317-
internalize_list_file = configuration.get_temp_files().get(suffix='.response').name
2334+
internalize_list_file = configuration.get_temp_files().get('.response').name
23182335
with open(internalize_list_file, 'w') as f:
23192336
f.write(finalized_exports)
23202337
internalize_public_api += 'file=' + internalize_list_file

0 commit comments

Comments
 (0)