diff --git a/ChangeLog.md b/ChangeLog.md index e0b3f132c9b09..a1e01edfeb4a9 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,10 @@ See docs/process.md for how version tagging works. Current Trunk ------------- +- Add LLD_REPORT_UNDEFINED option that should allow for more detailed + diagnostics when symbols are undefined at link time. This currently has + some limitations and is not enabled by default. For example, EM_JS symbols + are reported as undefined at link time, as are `__invoke_*` functions. v1.39.7: 02/03/2020 ------------------- diff --git a/emcc.py b/emcc.py index 4ba8a6b9c3583..4211fc71b3a91 100755 --- a/emcc.py +++ b/emcc.py @@ -512,6 +512,41 @@ def ensure_archive_index(archive_file): run_process([shared.LLVM_RANLIB, archive_file]) +def get_all_js_library_funcs(temp_files): + # Runs the js compiler to generate a list of all functions available in the JS + # libraries. This must be done separately for each linker invokation since the + # list of library functions depends on what settings are used. + # TODO(sbc): Find a way to optimize this. Potentially we could add a super-set + # mode of the js compiler that would generate a list of all possible symbols + # that could be checked in. + old_full = shared.Settings.INCLUDE_FULL_LIBRARY + old_linkable = shared.Settings.LINKABLE + try: + # Temporarily define INCLUDE_FULL_LIBRARY since we want a full list + # of all available JS library functions. + shared.Settings.INCLUDE_FULL_LIBRARY = True + # Temporarily set LINKABLE so that the jscompiler doesn't report + # undefined symbolls itself. + shared.Settings.LINKABLE = True + emscripten.generate_struct_info() + glue, forwarded_data = emscripten.compile_settings(temp_files) + forwarded_json = json.loads(forwarded_data) + library_fns = forwarded_json['Functions']['libraryFunctions'] + library_fns_list = [] + for name in library_fns: + if shared.is_c_symbol(name): + name = shared.demangle_c_symbol_name(name) + library_fns_list.append(name) + # TODO(sbc): wasm-ld shouldn't be reporting errors for symbols + # such as __wasi_fd_write which are defined with import_name attibutes + # but it currently does. Remove this once we fix wasm-ld. + library_fns_list.append('__wasi_' + name) + finally: + shared.Settings.INCLUDE_FULL_LIBRARY = old_full + shared.Settings.LINKABLE = old_linkable + return library_fns_list + + # # Main run() function # @@ -2210,7 +2245,14 @@ def get_final(): lto_level = options.opt_level else: lto_level = 0 - final = shared.Building.link_lld(linker_inputs, DEFAULT_FINAL, lto_level=lto_level) + all_externals = None + if shared.Settings.LLD_REPORT_UNDEFINED: + all_externals = get_all_js_library_funcs(misc_temp_files) + log_time('JS symbol generation') + # TODO(sbc): This is an incomplete list of __invoke functions. Perhaps add + # support for wildcard to wasm-ld. + all_externals += ['emscripten_longjmp_jmpbuf', '__invoke_void', '__invoke_i32_i8*_...'] + final = shared.Building.link_lld(linker_inputs, DEFAULT_FINAL, lto_level=lto_level, all_external_symbols=all_externals) else: final = shared.Building.link(linker_inputs, DEFAULT_FINAL, force_archive_contents=force_archive_contents, just_calculate=just_calculate) else: diff --git a/emscripten.py b/emscripten.py index 912352d9d7b82..53b14d6297368 100644 --- a/emscripten.py +++ b/emscripten.py @@ -2769,11 +2769,8 @@ def normalize_line_endings(text): return text -def run(infile, outfile, memfile): - temp_files = get_configuration().get_temp_files() - infile, outfile = substitute_response_files([infile, outfile]) - - if not shared.Settings.BOOTSTRAPPING_STRUCT_INFO: +def generate_struct_info(): + if not shared.Settings.STRUCT_INFO and not shared.Settings.BOOTSTRAPPING_STRUCT_INFO: generated_struct_info_name = 'generated_struct_info.json' def generate_struct_info(): @@ -2785,6 +2782,12 @@ def generate_struct_info(): shared.Settings.STRUCT_INFO = shared.Cache.get(generated_struct_info_name, generate_struct_info) # do we need an else, to define it for the bootstrap case? + +def run(infile, outfile, memfile): + temp_files = get_configuration().get_temp_files() + infile, outfile = substitute_response_files([infile, outfile]) + generate_struct_info() + outfile_obj = open(outfile, 'w') emscripter = emscript_wasm_backend if shared.Settings.WASM_BACKEND else emscript_fastcomp diff --git a/src/jsifier.js b/src/jsifier.js index 85ffd0d32d065..20ebf346f3575 100644 --- a/src/jsifier.js +++ b/src/jsifier.js @@ -162,11 +162,15 @@ function JSify(data, functionsOnly) { return; } else if ((!LibraryManager.library.hasOwnProperty(ident) && !LibraryManager.library.hasOwnProperty(ident + '__inline')) || SIDE_MODULE) { if (!(finalName in IMPLEMENTED_FUNCTIONS) && !LINKABLE) { + var msg = 'undefined symbol: ' + ident; if (ERROR_ON_UNDEFINED_SYMBOLS) { - error('undefined symbol: ' + ident); + error(msg); + if (WASM_BACKEND) { + warnOnce('Link with `-s LLD_REPORT_UNDEFINED` to get more information on undefined symbols'); + } warnOnce('To disable errors for undefined symbols use `-s ERROR_ON_UNDEFINED_SYMBOLS=0`') } else if (VERBOSE || WARN_ON_UNDEFINED_SYMBOLS) { - warn('undefined symbol: ' + ident); + warn(msg); } } if (!RELOCATABLE) { diff --git a/src/library.js b/src/library.js index 005ce6f41cfb8..328b5c468822f 100644 --- a/src/library.js +++ b/src/library.js @@ -4791,40 +4791,41 @@ LibraryManager.library = { {{{ makeDynCall('vii') }}}(func, Math.min(base, end), Math.max(base, end)); }, - // misc definitions to avoid unnecessary unresolved symbols from fastcomp + // misc definitions to avoid unnecessary unresolved symbols being reported + // by fastcomp or wasm-ld #if SUPPORT_LONGJMP - emscripten_prep_setjmp: true, - emscripten_cleanup_setjmp: true, - emscripten_check_longjmp: true, - emscripten_get_longjmp_result: true, - emscripten_setjmp: true, + emscripten_prep_setjmp: function() {}, + emscripten_cleanup_setjmp: function() {}, + emscripten_check_longjmp: function() {}, + emscripten_get_longjmp_result: function() {}, + emscripten_setjmp: function() {}, #endif - emscripten_preinvoke: true, - emscripten_postinvoke: true, - emscripten_resume: true, - emscripten_landingpad: true, - getHigh32: true, - setHigh32: true, - FtoILow: true, - FtoIHigh: true, - DtoILow: true, - DtoIHigh: true, - BDtoILow: true, - BDtoIHigh: true, - SItoF: true, - UItoF: true, - SItoD: true, - UItoD: true, - BItoD: true, - llvm_dbg_value: true, - llvm_debugtrap: true, - llvm_ctlz_i32: true, - emscripten_asm_const: true, - emscripten_asm_const_int: true, - emscripten_asm_const_double: true, - emscripten_asm_const_int_sync_on_main_thread: true, - emscripten_asm_const_double_sync_on_main_thread: true, - emscripten_asm_const_async_on_main_thread: true, + emscripten_preinvoke: function() {}, + emscripten_postinvoke: function() {}, + emscripten_resume: function() {}, + emscripten_landingpad: function() {}, + getHigh32: function() {}, + setHigh32: function() {}, + FtoILow: function() {}, + FtoIHigh: function() {}, + DtoILow: function() {}, + DtoIHigh: function() {}, + BDtoILow: function() {}, + BDtoIHigh: function() {}, + SItoF: function() {}, + UItoF: function() {}, + SItoD: function() {}, + UItoD: function() {}, + BItoD: function() {}, + llvm_dbg_value: function() {}, + llvm_debugtrap: function() {}, + llvm_ctlz_i32: function() {}, + emscripten_asm_const: function() {}, + emscripten_asm_const_int: function() {}, + emscripten_asm_const_double: function() {}, + emscripten_asm_const_int_sync_on_main_thread: function() {}, + emscripten_asm_const_double_sync_on_main_thread: function() {}, + emscripten_asm_const_async_on_main_thread: function() {}, // ======== compiled code from system/lib/compiler-rt , see readme therein __muldsi3__asm: true, diff --git a/src/settings.js b/src/settings.js index d09f24b31f9fd..ac691f2b3b9c2 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1733,6 +1733,13 @@ var DISABLE_EXCEPTION_THROWING = 0; // across function boundaries. var USE_OFFSET_CONVERTER = 0; +// If set to 1, the JS compiler is run before wasm-ld so that the linker can +// report undefined symbols within the binary. Without this option that linker +// doesn't know which symmbols might be defined JS and so reporting of undefined +// symbols is deleyed until the JS compiler is run. +// [link] +var LLD_REPORT_UNDEFINED = 0; + //=========================================== // Internal, used for testing only, from here //=========================================== diff --git a/tests/test_other.py b/tests/test_other.py index ec8b47a230851..7f52181ddd9c6 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -10301,3 +10301,10 @@ def test_signature_mismatch(self): self.assertContained('function signature mismatch: foo', stderr) self.expect_fail([PYTHON, EMCC, '-Wl,--fatal-warnings', 'a.c', 'b.c']) self.expect_fail([PYTHON, EMCC, '-s', 'STRICT', 'a.c', 'b.c']) + + @no_fastcomp('lld only') + def test_lld_report_undefined(self): + create_test_file('main.c', 'void foo(); int main() { foo(); return 0; }') + stderr = self.expect_fail([PYTHON, EMCC, '-s', 'LLD_REPORT_UNDEFINED', 'main.c']) + self.assertContained('wasm-ld: error:', stderr) + self.assertContained('main_0.o: undefined symbol: foo', stderr) diff --git a/tools/shared.py b/tools/shared.py index 45fe677accdf3..12d315bb03c9d 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1447,6 +1447,10 @@ def demangle_c_symbol_name(name): return name[1:] if name.startswith('_') else '$' + name +def is_c_symbol(name): + return name.startswith('_') + + def treat_as_user_function(name): if name.startswith('dynCall_'): return False @@ -1881,7 +1885,7 @@ def link_llvm(linker_inputs, target): return target @staticmethod - def link_lld(args, target, opts=[], lto_level=0): + def link_lld(args, target, opts=[], lto_level=0, all_external_symbols=None): if not os.path.exists(WASM_LD): exit_with_error('linker binary not found in LLVM directory: %s', WASM_LD) # runs lld to link things. @@ -1903,10 +1907,17 @@ def link_lld(args, target, opts=[], lto_level=0): WASM_LD, '-o', target, - '--allow-undefined', '--lto-O%d' % lto_level, ] + args + if all_external_symbols: + undefs = configuration.get_temp_files().get('.undefined').name + with open(undefs, 'w') as f: + f.write('\n'.join(all_external_symbols)) + cmd.append('--allow-undefined-file=%s' % undefs) + else: + cmd.append('--allow-undefined') + # wasi does not import the memory (but for JS it is efficient to do so, # as it allows us to set up memory, preload files, etc. even before the # wasm module arrives) @@ -1941,8 +1952,14 @@ def link_lld(args, target, opts=[], lto_level=0): cmd += ['--export', '__data_end'] - for export in Settings.EXPORTED_FUNCTIONS: - cmd += ['--export', export[1:]] # Strip the leading underscore + c_exports = [e for e in Settings.EXPORTED_FUNCTIONS if is_c_symbol(e)] + # Strip the leading underscores + c_exports = [demangle_c_symbol_name(e) for e in c_exports] + if all_external_symbols: + # Filter out symbols external/JS symbols + c_exports = [e for e in c_exports if e not in all_external_symbols] + for export in c_exports: + cmd += ['--export', export] if Settings.RELOCATABLE: if Settings.SIDE_MODULE: @@ -2314,7 +2331,7 @@ def get_safe_internalize(): if len(internalize_list) > 8192: logger.debug('using response file for EXPORTED_FUNCTIONS in internalize') finalized_exports = '\n'.join([exp[1:] for exp in exps]) - internalize_list_file = configuration.get_temp_files().get(suffix='.response').name + internalize_list_file = configuration.get_temp_files().get('.response').name with open(internalize_list_file, 'w') as f: f.write(finalized_exports) internalize_public_api += 'file=' + internalize_list_file