From c8117e51bb23a18127f6a5769280b72749647ccb Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Wed, 1 Nov 2023 20:52:18 -0700 Subject: [PATCH] Refactor line_endings.py and apply --output-eol to worker.js. NFC Most of the code in this file is used only for testing and is not part of emscripten proper, so move that code into test/line_endings.py Move the remaining utility functions to tools/utils.py. Update `write_file` utility so it can write a file with the correct line endings. This simplifies the callers who want to write a file with specific line endings. --- ChangeLog.md | 3 ++ docs/emcc.txt | 10 ++--- emcc.py | 6 +-- site/source/docs/tools_reference/emcc.rst | 4 +- test/common.py | 3 +- {tools => test}/line_endings.py | 30 +++---------- test/test_other.py | 52 +++++++++++------------ tools/link.py | 14 +++--- tools/minimal_runtime_shell.py | 4 +- tools/utils.py | 18 ++++++-- 10 files changed, 68 insertions(+), 76 deletions(-) rename {tools => test}/line_endings.py (85%) diff --git a/ChangeLog.md b/ChangeLog.md index 3680ae7a1b512..381a9190f56c8 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -20,6 +20,9 @@ See docs/process.md for more on how version tagging works. 4.0.4 (in development) ---------------------- +- The `--output_eol` command line flag was renamed `--output-eol` for + consistency with other flags. The old name continues to work as an alias. + (#20735) 4.0.3 - 02/07/25 ---------------- diff --git a/docs/emcc.txt b/docs/emcc.txt index 10b216fde55f2..8d05274ab35d4 100644 --- a/docs/emcc.txt +++ b/docs/emcc.txt @@ -574,12 +574,12 @@ Options that are modified or new in *emcc* are listed below: [compile] Tells *emcc* to emit an object file which can then be linked with other object files to produce an executable. -"--output_eol windows|linux" +"--output-eol windows|linux" [link] Specifies the line ending to generate for the text files - that are outputted. If "--output_eol windows" is passed, the final - output files will have Windows rn line endings in them. With "-- - output_eol linux", the final generated files will be written with - Unix n line endings. + that are outputted. If "--output-eol windows" is passed, the final + output files will have Windows "\r\n" line endings in them. With " + --output-eol linux", the final generated files will be written with + Unix "\n" line endings. "--cflags" [other] Prints out the flags "emcc" would pass to "clang" to diff --git a/emcc.py b/emcc.py index f02cb7cfa849e..623ce79028abd 100644 --- a/emcc.py +++ b/emcc.py @@ -82,7 +82,7 @@ '--bind', '--closure', '--cpuprofiler', '--embed-file', '--emit-symbol-map', '--emrun', '--exclude-file', '--extern-post-js', '--extern-pre-js', '--ignore-dynamic-linking', '--js-library', - '--js-transform', '--oformat', '--output_eol', + '--js-transform', '--oformat', '--output_eol', '--output-eol', '--post-js', '--pre-js', '--preload-file', '--profiling-funcs', '--proxy-to-worker', '--shell-file', '--source-map-base', '--threadprofiler', '--use-preload-plugins' @@ -1348,14 +1348,14 @@ def consume_arg_file(): exit_with_error('--default-obj-ext is no longer supported by emcc') elif arg.startswith('-fsanitize=cfi'): exit_with_error('emscripten does not currently support -fsanitize=cfi') - elif check_arg('--output_eol'): + elif check_arg('--output_eol') or check_arg('--output-eol'): style = consume_arg() if style.lower() == 'windows': options.output_eol = '\r\n' elif style.lower() == 'linux': options.output_eol = '\n' else: - exit_with_error(f'Invalid value "{style}" to --output_eol!') + exit_with_error(f'invalid value for --output-eol: `{style}`') # Record PTHREADS setting because it controls whether --shared-memory is passed to lld elif arg == '-pthread': settings.PTHREADS = 1 diff --git a/site/source/docs/tools_reference/emcc.rst b/site/source/docs/tools_reference/emcc.rst index 8f107b8f73255..bf8083186b9f3 100644 --- a/site/source/docs/tools_reference/emcc.rst +++ b/site/source/docs/tools_reference/emcc.rst @@ -566,9 +566,9 @@ Options that are modified or new in *emcc* are listed below: [compile] Tells *emcc* to emit an object file which can then be linked with other object files to produce an executable. -``--output_eol windows|linux`` +``--output-eol windows|linux`` [link] - Specifies the line ending to generate for the text files that are outputted. If "--output_eol windows" is passed, the final output files will have Windows \r\n line endings in them. With "--output_eol linux", the final generated files will be written with Unix \n line endings. + Specifies the line ending to generate for the text files that are outputted. If "--output-eol windows" is passed, the final output files will have Windows ``\r\n`` line endings in them. With "--output-eol linux", the final generated files will be written with Unix ``\n`` line endings. ``--cflags`` [other] diff --git a/test/common.py b/test/common.py index 4c7387e28ec82..ff3981345dbdc 100644 --- a/test/common.py +++ b/test/common.py @@ -33,11 +33,12 @@ import clang_native import jsrun +import line_endings from tools.shared import EMCC, EMXX, DEBUG, EMCONFIGURE, EMCMAKE from tools.shared import get_canonical_temp_dir, path_from_root from tools.utils import MACOS, WINDOWS, read_file, read_binary, write_binary, exit_with_error from tools.settings import COMPILE_TIME_SETTINGS -from tools import shared, feature_matrix, line_endings, building, config, utils +from tools import shared, feature_matrix, building, config, utils logger = logging.getLogger('common') diff --git a/tools/line_endings.py b/test/line_endings.py similarity index 85% rename from tools/line_endings.py rename to test/line_endings.py index 9d883d8a52264..1198f2c25fc4a 100755 --- a/tools/line_endings.py +++ b/test/line_endings.py @@ -7,22 +7,7 @@ import os import sys - -def convert_line_endings(text, from_eol, to_eol): - if from_eol == to_eol: - return text - return text.replace(from_eol, to_eol) - - -def convert_line_endings_in_file(filename, from_eol, to_eol): - if from_eol == to_eol: - return # No conversion needed - - with open(filename, 'rb') as f: - text = f.read() - text = convert_line_endings(text, from_eol.encode(), to_eol.encode()) - with open(filename, 'wb') as f: - f.write(text) +from tools import utils def check_line_endings(filename, expect_only=None, print_errors=True, print_info=False): @@ -38,8 +23,7 @@ def check_line_endings(filename, expect_only=None, print_errors=True, print_info print('File not found: ' + filename, file=sys.stderr) return 1 - with open(filename, 'rb') as f: - data = f.read() + data = utils.read_binary(filename) index = data.find(b"\r\r\n") if index != -1: @@ -75,15 +59,15 @@ def check_line_endings(filename, expect_only=None, print_errors=True, print_info old_macos_line_ending_example = data[index - 50:index + 50].replace(b'\r', b'\\r').replace(b'\n', b'\\n') if print_errors: print('File \'' + filename + '\' contains OLD macOS line endings "\\r"', file=sys.stderr) - print("Content around an OLD macOS line ending location: '" + old_macos_line_ending_example + "'", file=sys.stderr) + print("Content around an OLD macOS line ending location: '" + old_macos_line_ending_example.decode('utf-8') + "'", file=sys.stderr) # We don't want to use the old macOS (9.x) line endings anywhere. return 1 if has_dos_line_endings and has_unix_line_endings: if print_errors: print('File \'' + filename + '\' contains both DOS "\\r\\n" and UNIX "\\n" line endings! (' + str(dos_line_ending_count) + ' DOS line endings, ' + str(unix_line_ending_count) + ' UNIX line endings)', file=sys.stderr) - print("Content around a DOS line ending location: '" + dos_line_ending_example + "'", file=sys.stderr) - print("Content around an UNIX line ending location: '" + unix_line_ending_example + "'", file=sys.stderr) + print("Content around a DOS line ending location: '" + dos_line_ending_example.decode('utf-8') + "'", file=sys.stderr) + print("Content around an UNIX line ending location: '" + unix_line_ending_example.decode('utf-8') + "'", file=sys.stderr) # Mixed line endings return 1 @@ -96,13 +80,13 @@ def check_line_endings(filename, expect_only=None, print_errors=True, print_info if expect_only == '\n' and has_dos_line_endings: if print_errors: print('File \'' + filename + '\' contains DOS "\\r\\n" line endings! (' + str(dos_line_ending_count) + ' DOS line endings), but expected only UNIX line endings!', file=sys.stderr) - print("Content around a DOS line ending location: '" + dos_line_ending_example + "'", file=sys.stderr) + print("Content around a DOS line ending location: '" + dos_line_ending_example.decode('utf-8') + "'", file=sys.stderr) return 1 # DOS line endings, but expected UNIX if expect_only == '\r\n' and has_unix_line_endings: if print_errors: print('File \'' + filename + '\' contains UNIX "\\n" line endings! (' + str(unix_line_ending_count) + ' UNIX line endings), but expected only DOS line endings!', file=sys.stderr) - print("Content around a UNIX line ending location: '" + unix_line_ending_example + "'", file=sys.stderr) + print("Content around a UNIX line ending location: '" + unix_line_ending_example.decode('utf-8') + "'", file=sys.stderr) return 1 # UNIX line endings, but expected DOS return 0 diff --git a/test/test_other.py b/test/test_other.py index cd1d2b33eb50f..460799ad06d85 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -46,7 +46,7 @@ import common import jsrun import clang_native -from tools import line_endings +import line_endings from tools import webassembly from tools.settings import settings @@ -8703,28 +8703,24 @@ def test_disable_inlining(self): self.assertContained('foo', output) @crossplatform - def test_output_eol(self): - for params in ([], ['--proxy-to-worker'], ['--proxy-to-worker', '-sWASM=0']): - for output_suffix in ('html', 'js'): - for eol in ('windows', 'linux'): - files = ['a.js'] - if output_suffix == 'html': - files += ['a.html'] - cmd = [EMCC, test_file('hello_world.c'), '-o', 'a.' + output_suffix, '--output_eol', eol] + params - self.run_process(cmd) - for f in files: - print(str(cmd) + ' ' + str(params) + ' ' + eol + ' ' + f) - self.assertExists(f) - if eol == 'linux': - expected_ending = '\n' - else: - expected_ending = '\r\n' - - ret = line_endings.check_line_endings(f, expect_only=expected_ending) - self.assertEqual(ret, 0) + @parameterized({ + '': ([],), + 'proxy_to_worker': (['--proxy-to-worker'],), + 'proxy_to_worker_wasm2js': (['--proxy-to-worker', '-sWASM=0'],), + }) + def test_output_eol(self, params): + for eol in ('windows', 'linux'): + self.clear() + print('checking eol: ', eol) + self.run_process([EMCC, test_file('hello_world.c'), '-o', 'a.html', '--output-eol', eol] + params) + for f in ['a.html', 'a.js']: + self.assertExists(f) + if eol == 'linux': + expected_ending = '\n' + else: + expected_ending = '\r\n' - for f in files: - delete_file(f) + self.assertEqual(line_endings.check_line_endings(f, expect_only=expected_ending), 0, f'expected on ly {eol} line endingsn in {f}') def test_binaryen_warn_mem(self): # if user changes INITIAL_MEMORY at runtime, the wasm module may not accept the memory import if @@ -8906,7 +8902,7 @@ def test_unoptimized_code_size(self): # under control to a certain extent. This test allows us to track major # changes to the size of the unoptimized and unminified code size. # Run with `--rebase` when this test fails. - self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output_eol=linux']) + self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output-eol=linux']) self.check_expected_size_in_file('wasm', test_file('other/test_unoptimized_code_size.wasm.size'), os.path.getsize('hello_world.wasm')) @@ -8914,7 +8910,7 @@ def test_unoptimized_code_size(self): test_file('other/test_unoptimized_code_size.js.size'), os.path.getsize('hello_world.js')) - self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output_eol=linux', '-sASSERTIONS=0'], output_basename='no_asserts') + self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output-eol=linux', '-sASSERTIONS=0'], output_basename='no_asserts') self.check_expected_size_in_file('wasm', test_file('other/test_unoptimized_code_size_no_asserts.wasm.size'), os.path.getsize('no_asserts.wasm')) @@ -8922,7 +8918,7 @@ def test_unoptimized_code_size(self): test_file('other/test_unoptimized_code_size_no_asserts.js.size'), os.path.getsize('no_asserts.js')) - self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output_eol=linux', '-sSTRICT'], output_basename='strict') + self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output-eol=linux', '-sSTRICT'], output_basename='strict') self.check_expected_size_in_file('wasm', test_file('other/test_unoptimized_code_size_strict.wasm.size'), os.path.getsize('strict.wasm')) @@ -8939,7 +8935,7 @@ def run_codesize_test(self, filename, args=[], expected_exists=[], expected_not_ expected_basename = test_file('other/codesize', self.id().split('.')[-1]) # Run once without closure and parse output to find wasmImports - build_cmd = [compiler_for(filename), filename, '--output_eol=linux', '--emit-minification-map=minify.map'] + args + self.get_emcc_args() + build_cmd = [compiler_for(filename), filename, '--output-eol=linux', '--emit-minification-map=minify.map'] + args + self.get_emcc_args() self.run_process(build_cmd + ['-g2']) # find the imports we send from JS # TODO(sbc): Find a way to do that that doesn't depend on internal details of @@ -11411,7 +11407,7 @@ def test_minimal_runtime_code_size(self, test_name, js, compare_js_output=False) '-sGL_ENABLE_GET_PROC_ADDRESS=0', '-sNO_FILESYSTEM', '-sSTRICT', - '--output_eol', 'linux', + '--output-eol', 'linux', '-Oz', '--closure=1', '-DNDEBUG', @@ -11962,7 +11958,7 @@ def test_main_reads_params(self): def test_INCOMING_MODULE_JS_API(self): def test(args): - self.run_process([EMCC, test_file('hello_world.c'), '-O3', '--closure=1', '-sENVIRONMENT=node,shell', '--output_eol=linux'] + args) + self.run_process([EMCC, test_file('hello_world.c'), '-O3', '--closure=1', '-sENVIRONMENT=node,shell', '--output-eol=linux'] + args) for engine in config.JS_ENGINES: self.assertContained('hello, world!', self.run_js('a.out.js', engine=engine)) return os.path.getsize('a.out.js') diff --git a/tools/link.py b/tools/link.py index ca05ed8e76bdf..fde4c509ab299 100644 --- a/tools/link.py +++ b/tools/link.py @@ -41,8 +41,6 @@ from .settings import settings, default_setting, user_settings, JS_ONLY_SETTINGS, DEPRECATED_SETTINGS from .minimal_runtime_shell import generate_minimal_runtime_html -import tools.line_endings - logger = logging.getLogger('link') DEFAULT_SHELL_HTML = utils.path_from_root('src/shell.html') @@ -2090,7 +2088,7 @@ def create_worker_file(input_file, target_dir, output_file, options): contents = building.acorn_optimizer(output_file, ['--minify-whitespace'], return_output=True, worker_js=True) write_file(output_file, contents) - tools.line_endings.convert_line_endings_in_file(output_file, os.linesep, options.output_eol) + utils.convert_line_endings_in_file(output_file, options.output_eol) @ToolchainProfiler.profile_block('final emitting') @@ -2160,14 +2158,14 @@ def phase_final_emitting(options, target, js_target, wasm_target): generate_html(target, options, js_target, target_basename, wasm_target) elif settings.PROXY_TO_WORKER: - generate_worker_js(target, js_target, target_basename) + generate_worker_js(target, options, js_target, target_basename) if settings.SPLIT_MODULE: diagnostics.warning('experimental', 'the SPLIT_MODULE setting is experimental and subject to change') do_split_module(wasm_target, options) if not settings.SINGLE_FILE: - tools.line_endings.convert_line_endings_in_file(js_target, os.linesep, options.output_eol) + utils.convert_line_endings_in_file(js_target, options.output_eol) if options.executable: make_js_executable(js_target) @@ -2672,10 +2670,10 @@ def generate_html(target, options, js_target, target_basename, wasm_target): if settings.MINIFY_HTML and (settings.OPT_LEVEL >= 1 or settings.SHRINK_LEVEL >= 1): minify_html(target) - tools.line_endings.convert_line_endings_in_file(target, os.linesep, options.output_eol) + utils.convert_line_endings_in_file(target, options.output_eol) -def generate_worker_js(target, js_target, target_basename): +def generate_worker_js(target, options, js_target, target_basename): if settings.SINGLE_FILE: # compiler output is embedded as base64 data URL proxy_worker_filename = get_subresource_location_js(js_target) @@ -2686,7 +2684,7 @@ def generate_worker_js(target, js_target, target_basename): proxy_worker_filename = (settings.PROXY_TO_WORKER_FILENAME or worker_target_basename) + '.js' target_contents = worker_js_script(proxy_worker_filename) - write_file(target, target_contents) + utils.write_file(target, target_contents, options.output_eol) def worker_js_script(proxy_worker_filename): diff --git a/tools/minimal_runtime_shell.py b/tools/minimal_runtime_shell.py index f53fa345f27c8..8963ea8d30cd7 100644 --- a/tools/minimal_runtime_shell.py +++ b/tools/minimal_runtime_shell.py @@ -8,7 +8,6 @@ sys.path.insert(0, __rootdir__) from . import shared -from . import line_endings from . import utils from . import feature_matrix from .settings import settings @@ -211,5 +210,4 @@ def generate_minimal_runtime_html(target, options, js_target, target_basename): else: js_contents = '' shell = shell.replace('{{{ JS_CONTENTS_IN_SINGLE_FILE_BUILD }}}', js_contents) - shell = line_endings.convert_line_endings(shell, '\n', options.output_eol) - utils.write_file(target, shell) + utils.write_file(target, shell, options.output_eol) diff --git a/tools/utils.py b/tools/utils.py index 8bbd937d9e55e..2a4ee1a1d76b4 100644 --- a/tools/utils.py +++ b/tools/utils.py @@ -47,6 +47,14 @@ def removeprefix(string, prefix): return string +def convert_line_endings_in_file(filename, to_eol): + if to_eol == os.linesep: + return # No conversion needed + + text = read_file(filename) + write_file(filename, text, line_endings=to_eol) + + def read_file(file_path): """Read from a file opened in text mode""" with open(file_path, encoding='utf-8') as fh: @@ -59,10 +67,14 @@ def read_binary(file_path): return fh.read() -def write_file(file_path, text): +def write_file(file_path, text, line_endings=None): """Write to a file opened in text mode""" - with open(file_path, 'w', encoding='utf-8') as fh: - fh.write(text) + if line_endings and line_endings != os.linesep: + text = text.replace('\n', line_endings) + write_binary(file_path, text.encode('utf-8')) + else: + with open(file_path, 'w', encoding='utf-8') as fh: + fh.write(text) def write_binary(file_path, contents):