From c8117e51bb23a18127f6a5769280b72749647ccb Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 1 Nov 2023 20:52:18 -0700
Subject: [PATCH] Refactor line_endings.py and apply --output-eol to worker.js.
 NFC

Most of the code in this file is used only for testing and is not part
of emscripten proper, so move that code into test/line_endings.py

Move the remaining utility functions to tools/utils.py.

Update `write_file` utility so it can write a file with the correct
line endings. This simplifies the callers who want to write a file
with specific line endings.
---
 ChangeLog.md                              |  3 ++
 docs/emcc.txt                             | 10 ++---
 emcc.py                                   |  6 +--
 site/source/docs/tools_reference/emcc.rst |  4 +-
 test/common.py                            |  3 +-
 {tools => test}/line_endings.py           | 30 +++----------
 test/test_other.py                        | 52 +++++++++++------------
 tools/link.py                             | 14 +++---
 tools/minimal_runtime_shell.py            |  4 +-
 tools/utils.py                            | 18 ++++++--
 10 files changed, 68 insertions(+), 76 deletions(-)
 rename {tools => test}/line_endings.py (85%)

diff --git a/ChangeLog.md b/ChangeLog.md
index 3680ae7a1b512..381a9190f56c8 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -20,6 +20,9 @@ See docs/process.md for more on how version tagging works.
 
 4.0.4 (in development)
 ----------------------
+- The `--output_eol` command line flag was renamed `--output-eol` for
+  consistency with other flags. The old name continues to work as an alias.
+  (#20735)
 
 4.0.3 - 02/07/25
 ----------------
diff --git a/docs/emcc.txt b/docs/emcc.txt
index 10b216fde55f2..8d05274ab35d4 100644
--- a/docs/emcc.txt
+++ b/docs/emcc.txt
@@ -574,12 +574,12 @@ Options that are modified or new in *emcc* are listed below:
    [compile] Tells *emcc* to emit an object file which can then be
    linked with other object files to produce an executable.
 
-"--output_eol windows|linux"
+"--output-eol windows|linux"
    [link] Specifies the line ending to generate for the text files
-   that are outputted. If "--output_eol windows" is passed, the final
-   output files will have Windows rn line endings in them. With "--
-   output_eol linux", the final generated files will be written with
-   Unix n line endings.
+   that are outputted. If "--output-eol windows" is passed, the final
+   output files will have Windows "\r\n" line endings in them. With "
+   --output-eol linux", the final generated files will be written with
+   Unix "\n" line endings.
 
 "--cflags"
    [other] Prints out the flags "emcc" would pass to "clang" to
diff --git a/emcc.py b/emcc.py
index f02cb7cfa849e..623ce79028abd 100644
--- a/emcc.py
+++ b/emcc.py
@@ -82,7 +82,7 @@
     '--bind', '--closure', '--cpuprofiler', '--embed-file',
     '--emit-symbol-map', '--emrun', '--exclude-file', '--extern-post-js',
     '--extern-pre-js', '--ignore-dynamic-linking', '--js-library',
-    '--js-transform', '--oformat', '--output_eol',
+    '--js-transform', '--oformat', '--output_eol', '--output-eol',
     '--post-js', '--pre-js', '--preload-file', '--profiling-funcs',
     '--proxy-to-worker', '--shell-file', '--source-map-base',
     '--threadprofiler', '--use-preload-plugins'
@@ -1348,14 +1348,14 @@ def consume_arg_file():
       exit_with_error('--default-obj-ext is no longer supported by emcc')
     elif arg.startswith('-fsanitize=cfi'):
       exit_with_error('emscripten does not currently support -fsanitize=cfi')
-    elif check_arg('--output_eol'):
+    elif check_arg('--output_eol') or check_arg('--output-eol'):
       style = consume_arg()
       if style.lower() == 'windows':
         options.output_eol = '\r\n'
       elif style.lower() == 'linux':
         options.output_eol = '\n'
       else:
-        exit_with_error(f'Invalid value "{style}" to --output_eol!')
+        exit_with_error(f'invalid value for --output-eol: `{style}`')
     # Record PTHREADS setting because it controls whether --shared-memory is passed to lld
     elif arg == '-pthread':
       settings.PTHREADS = 1
diff --git a/site/source/docs/tools_reference/emcc.rst b/site/source/docs/tools_reference/emcc.rst
index 8f107b8f73255..bf8083186b9f3 100644
--- a/site/source/docs/tools_reference/emcc.rst
+++ b/site/source/docs/tools_reference/emcc.rst
@@ -566,9 +566,9 @@ Options that are modified or new in *emcc* are listed below:
   [compile]
   Tells *emcc* to emit an object file which can then be linked with other object files to produce an executable.
 
-``--output_eol windows|linux``
+``--output-eol windows|linux``
   [link]
-  Specifies the line ending to generate for the text files that are outputted. If "--output_eol windows" is passed, the final output files will have Windows \r\n line endings in them. With "--output_eol linux", the final generated files will be written with Unix \n line endings.
+  Specifies the line ending to generate for the text files that are outputted. If "--output-eol windows" is passed, the final output files will have Windows ``\r\n`` line endings in them. With "--output-eol linux", the final generated files will be written with Unix ``\n`` line endings.
 
 ``--cflags``
   [other]
diff --git a/test/common.py b/test/common.py
index 4c7387e28ec82..ff3981345dbdc 100644
--- a/test/common.py
+++ b/test/common.py
@@ -33,11 +33,12 @@
 
 import clang_native
 import jsrun
+import line_endings
 from tools.shared import EMCC, EMXX, DEBUG, EMCONFIGURE, EMCMAKE
 from tools.shared import get_canonical_temp_dir, path_from_root
 from tools.utils import MACOS, WINDOWS, read_file, read_binary, write_binary, exit_with_error
 from tools.settings import COMPILE_TIME_SETTINGS
-from tools import shared, feature_matrix, line_endings, building, config, utils
+from tools import shared, feature_matrix, building, config, utils
 
 logger = logging.getLogger('common')
 
diff --git a/tools/line_endings.py b/test/line_endings.py
similarity index 85%
rename from tools/line_endings.py
rename to test/line_endings.py
index 9d883d8a52264..1198f2c25fc4a 100755
--- a/tools/line_endings.py
+++ b/test/line_endings.py
@@ -7,22 +7,7 @@
 import os
 import sys
 
-
-def convert_line_endings(text, from_eol, to_eol):
-  if from_eol == to_eol:
-    return text
-  return text.replace(from_eol, to_eol)
-
-
-def convert_line_endings_in_file(filename, from_eol, to_eol):
-  if from_eol == to_eol:
-    return # No conversion needed
-
-  with open(filename, 'rb') as f:
-    text = f.read()
-  text = convert_line_endings(text, from_eol.encode(), to_eol.encode())
-  with open(filename, 'wb') as f:
-    f.write(text)
+from tools import utils
 
 
 def check_line_endings(filename, expect_only=None, print_errors=True, print_info=False):
@@ -38,8 +23,7 @@ def check_line_endings(filename, expect_only=None, print_errors=True, print_info
       print('File not found: ' + filename, file=sys.stderr)
     return 1
 
-  with open(filename, 'rb') as f:
-    data = f.read()
+  data = utils.read_binary(filename)
 
   index = data.find(b"\r\r\n")
   if index != -1:
@@ -75,15 +59,15 @@ def check_line_endings(filename, expect_only=None, print_errors=True, print_info
     old_macos_line_ending_example = data[index - 50:index + 50].replace(b'\r', b'\\r').replace(b'\n', b'\\n')
     if print_errors:
       print('File \'' + filename + '\' contains OLD macOS line endings "\\r"', file=sys.stderr)
-      print("Content around an OLD macOS line ending location: '" + old_macos_line_ending_example + "'", file=sys.stderr)
+      print("Content around an OLD macOS line ending location: '" + old_macos_line_ending_example.decode('utf-8') + "'", file=sys.stderr)
     # We don't want to use the old macOS (9.x) line endings anywhere.
     return 1
 
   if has_dos_line_endings and has_unix_line_endings:
     if print_errors:
       print('File \'' + filename + '\' contains both DOS "\\r\\n" and UNIX "\\n" line endings! (' + str(dos_line_ending_count) + ' DOS line endings, ' + str(unix_line_ending_count) + ' UNIX line endings)', file=sys.stderr)
-      print("Content around a DOS line ending location: '" + dos_line_ending_example + "'", file=sys.stderr)
-      print("Content around an UNIX line ending location: '" + unix_line_ending_example + "'", file=sys.stderr)
+      print("Content around a DOS line ending location: '" + dos_line_ending_example.decode('utf-8') + "'", file=sys.stderr)
+      print("Content around an UNIX line ending location: '" + unix_line_ending_example.decode('utf-8') + "'", file=sys.stderr)
     # Mixed line endings
     return 1
 
@@ -96,13 +80,13 @@ def check_line_endings(filename, expect_only=None, print_errors=True, print_info
   if expect_only == '\n' and has_dos_line_endings:
     if print_errors:
       print('File \'' + filename + '\' contains DOS "\\r\\n" line endings! (' + str(dos_line_ending_count) + ' DOS line endings), but expected only UNIX line endings!', file=sys.stderr)
-      print("Content around a DOS line ending location: '" + dos_line_ending_example + "'", file=sys.stderr)
+      print("Content around a DOS line ending location: '" + dos_line_ending_example.decode('utf-8') + "'", file=sys.stderr)
     return 1 # DOS line endings, but expected UNIX
 
   if expect_only == '\r\n' and has_unix_line_endings:
     if print_errors:
       print('File \'' + filename + '\' contains UNIX "\\n" line endings! (' + str(unix_line_ending_count) + ' UNIX line endings), but expected only DOS line endings!', file=sys.stderr)
-      print("Content around a UNIX line ending location: '" + unix_line_ending_example + "'", file=sys.stderr)
+      print("Content around a UNIX line ending location: '" + unix_line_ending_example.decode('utf-8') + "'", file=sys.stderr)
     return 1 # UNIX line endings, but expected DOS
 
   return 0
diff --git a/test/test_other.py b/test/test_other.py
index cd1d2b33eb50f..460799ad06d85 100644
--- a/test/test_other.py
+++ b/test/test_other.py
@@ -46,7 +46,7 @@
 import common
 import jsrun
 import clang_native
-from tools import line_endings
+import line_endings
 from tools import webassembly
 from tools.settings import settings
 
@@ -8703,28 +8703,24 @@ def test_disable_inlining(self):
     self.assertContained('foo', output)
 
   @crossplatform
-  def test_output_eol(self):
-    for params in ([], ['--proxy-to-worker'], ['--proxy-to-worker', '-sWASM=0']):
-      for output_suffix in ('html', 'js'):
-        for eol in ('windows', 'linux'):
-          files = ['a.js']
-          if output_suffix == 'html':
-            files += ['a.html']
-          cmd = [EMCC, test_file('hello_world.c'), '-o', 'a.' + output_suffix, '--output_eol', eol] + params
-          self.run_process(cmd)
-          for f in files:
-            print(str(cmd) + ' ' + str(params) + ' ' + eol + ' ' + f)
-            self.assertExists(f)
-            if eol == 'linux':
-              expected_ending = '\n'
-            else:
-              expected_ending = '\r\n'
-
-            ret = line_endings.check_line_endings(f, expect_only=expected_ending)
-            self.assertEqual(ret, 0)
+  @parameterized({
+    '': ([],),
+    'proxy_to_worker': (['--proxy-to-worker'],),
+    'proxy_to_worker_wasm2js': (['--proxy-to-worker', '-sWASM=0'],),
+  })
+  def test_output_eol(self, params):
+    for eol in ('windows', 'linux'):
+      self.clear()
+      print('checking eol: ', eol)
+      self.run_process([EMCC, test_file('hello_world.c'), '-o', 'a.html', '--output-eol', eol] + params)
+      for f in ['a.html', 'a.js']:
+        self.assertExists(f)
+        if eol == 'linux':
+          expected_ending = '\n'
+        else:
+          expected_ending = '\r\n'
 
-          for f in files:
-            delete_file(f)
+        self.assertEqual(line_endings.check_line_endings(f, expect_only=expected_ending), 0, f'expected on ly {eol} line endingsn in {f}')
 
   def test_binaryen_warn_mem(self):
     # if user changes INITIAL_MEMORY at runtime, the wasm module may not accept the memory import if
@@ -8906,7 +8902,7 @@ def test_unoptimized_code_size(self):
     # under control to a certain extent.  This test allows us to track major
     # changes to the size of the unoptimized and unminified code size.
     # Run with `--rebase` when this test fails.
-    self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output_eol=linux'])
+    self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output-eol=linux'])
     self.check_expected_size_in_file('wasm',
                                      test_file('other/test_unoptimized_code_size.wasm.size'),
                                      os.path.getsize('hello_world.wasm'))
@@ -8914,7 +8910,7 @@ def test_unoptimized_code_size(self):
                                      test_file('other/test_unoptimized_code_size.js.size'),
                                      os.path.getsize('hello_world.js'))
 
-    self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output_eol=linux', '-sASSERTIONS=0'], output_basename='no_asserts')
+    self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output-eol=linux', '-sASSERTIONS=0'], output_basename='no_asserts')
     self.check_expected_size_in_file('wasm',
                                      test_file('other/test_unoptimized_code_size_no_asserts.wasm.size'),
                                      os.path.getsize('no_asserts.wasm'))
@@ -8922,7 +8918,7 @@ def test_unoptimized_code_size(self):
                                      test_file('other/test_unoptimized_code_size_no_asserts.js.size'),
                                      os.path.getsize('no_asserts.js'))
 
-    self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output_eol=linux', '-sSTRICT'], output_basename='strict')
+    self.build(test_file('hello_world.c'), emcc_args=['-O0', '--output-eol=linux', '-sSTRICT'], output_basename='strict')
     self.check_expected_size_in_file('wasm',
                                      test_file('other/test_unoptimized_code_size_strict.wasm.size'),
                                      os.path.getsize('strict.wasm'))
@@ -8939,7 +8935,7 @@ def run_codesize_test(self, filename, args=[], expected_exists=[], expected_not_
     expected_basename = test_file('other/codesize', self.id().split('.')[-1])
 
     # Run once without closure and parse output to find wasmImports
-    build_cmd = [compiler_for(filename), filename, '--output_eol=linux', '--emit-minification-map=minify.map'] + args + self.get_emcc_args()
+    build_cmd = [compiler_for(filename), filename, '--output-eol=linux', '--emit-minification-map=minify.map'] + args + self.get_emcc_args()
     self.run_process(build_cmd + ['-g2'])
     # find the imports we send from JS
     # TODO(sbc): Find a way to do that that doesn't depend on internal details of
@@ -11411,7 +11407,7 @@ def test_minimal_runtime_code_size(self, test_name, js, compare_js_output=False)
                                '-sGL_ENABLE_GET_PROC_ADDRESS=0',
                                '-sNO_FILESYSTEM',
                                '-sSTRICT',
-                               '--output_eol', 'linux',
+                               '--output-eol', 'linux',
                                '-Oz',
                                '--closure=1',
                                '-DNDEBUG',
@@ -11962,7 +11958,7 @@ def test_main_reads_params(self):
 
   def test_INCOMING_MODULE_JS_API(self):
     def test(args):
-      self.run_process([EMCC, test_file('hello_world.c'), '-O3', '--closure=1', '-sENVIRONMENT=node,shell', '--output_eol=linux'] + args)
+      self.run_process([EMCC, test_file('hello_world.c'), '-O3', '--closure=1', '-sENVIRONMENT=node,shell', '--output-eol=linux'] + args)
       for engine in config.JS_ENGINES:
         self.assertContained('hello, world!', self.run_js('a.out.js', engine=engine))
       return os.path.getsize('a.out.js')
diff --git a/tools/link.py b/tools/link.py
index ca05ed8e76bdf..fde4c509ab299 100644
--- a/tools/link.py
+++ b/tools/link.py
@@ -41,8 +41,6 @@
 from .settings import settings, default_setting, user_settings, JS_ONLY_SETTINGS, DEPRECATED_SETTINGS
 from .minimal_runtime_shell import generate_minimal_runtime_html
 
-import tools.line_endings
-
 logger = logging.getLogger('link')
 
 DEFAULT_SHELL_HTML = utils.path_from_root('src/shell.html')
@@ -2090,7 +2088,7 @@ def create_worker_file(input_file, target_dir, output_file, options):
     contents = building.acorn_optimizer(output_file, ['--minify-whitespace'], return_output=True, worker_js=True)
     write_file(output_file, contents)
 
-  tools.line_endings.convert_line_endings_in_file(output_file, os.linesep, options.output_eol)
+  utils.convert_line_endings_in_file(output_file, options.output_eol)
 
 
 @ToolchainProfiler.profile_block('final emitting')
@@ -2160,14 +2158,14 @@ def phase_final_emitting(options, target, js_target, wasm_target):
     generate_html(target, options, js_target, target_basename,
                   wasm_target)
   elif settings.PROXY_TO_WORKER:
-    generate_worker_js(target, js_target, target_basename)
+    generate_worker_js(target, options, js_target, target_basename)
 
   if settings.SPLIT_MODULE:
     diagnostics.warning('experimental', 'the SPLIT_MODULE setting is experimental and subject to change')
     do_split_module(wasm_target, options)
 
   if not settings.SINGLE_FILE:
-    tools.line_endings.convert_line_endings_in_file(js_target, os.linesep, options.output_eol)
+    utils.convert_line_endings_in_file(js_target, options.output_eol)
 
   if options.executable:
     make_js_executable(js_target)
@@ -2672,10 +2670,10 @@ def generate_html(target, options, js_target, target_basename, wasm_target):
   if settings.MINIFY_HTML and (settings.OPT_LEVEL >= 1 or settings.SHRINK_LEVEL >= 1):
     minify_html(target)
 
-  tools.line_endings.convert_line_endings_in_file(target, os.linesep, options.output_eol)
+  utils.convert_line_endings_in_file(target, options.output_eol)
 
 
-def generate_worker_js(target, js_target, target_basename):
+def generate_worker_js(target, options, js_target, target_basename):
   if settings.SINGLE_FILE:
     # compiler output is embedded as base64 data URL
     proxy_worker_filename = get_subresource_location_js(js_target)
@@ -2686,7 +2684,7 @@ def generate_worker_js(target, js_target, target_basename):
     proxy_worker_filename = (settings.PROXY_TO_WORKER_FILENAME or worker_target_basename) + '.js'
 
   target_contents = worker_js_script(proxy_worker_filename)
-  write_file(target, target_contents)
+  utils.write_file(target, target_contents, options.output_eol)
 
 
 def worker_js_script(proxy_worker_filename):
diff --git a/tools/minimal_runtime_shell.py b/tools/minimal_runtime_shell.py
index f53fa345f27c8..8963ea8d30cd7 100644
--- a/tools/minimal_runtime_shell.py
+++ b/tools/minimal_runtime_shell.py
@@ -8,7 +8,6 @@
 sys.path.insert(0, __rootdir__)
 
 from . import shared
-from . import line_endings
 from . import utils
 from . import feature_matrix
 from .settings import settings
@@ -211,5 +210,4 @@ def generate_minimal_runtime_html(target, options, js_target, target_basename):
   else:
     js_contents = ''
   shell = shell.replace('{{{ JS_CONTENTS_IN_SINGLE_FILE_BUILD }}}', js_contents)
-  shell = line_endings.convert_line_endings(shell, '\n', options.output_eol)
-  utils.write_file(target, shell)
+  utils.write_file(target, shell, options.output_eol)
diff --git a/tools/utils.py b/tools/utils.py
index 8bbd937d9e55e..2a4ee1a1d76b4 100644
--- a/tools/utils.py
+++ b/tools/utils.py
@@ -47,6 +47,14 @@ def removeprefix(string, prefix):
   return string
 
 
+def convert_line_endings_in_file(filename, to_eol):
+  if to_eol == os.linesep:
+    return # No conversion needed
+
+  text = read_file(filename)
+  write_file(filename, text, line_endings=to_eol)
+
+
 def read_file(file_path):
   """Read from a file opened in text mode"""
   with open(file_path, encoding='utf-8') as fh:
@@ -59,10 +67,14 @@ def read_binary(file_path):
     return fh.read()
 
 
-def write_file(file_path, text):
+def write_file(file_path, text, line_endings=None):
   """Write to a file opened in text mode"""
-  with open(file_path, 'w', encoding='utf-8') as fh:
-    fh.write(text)
+  if line_endings and line_endings != os.linesep:
+    text = text.replace('\n', line_endings)
+    write_binary(file_path, text.encode('utf-8'))
+  else:
+    with open(file_path, 'w', encoding='utf-8') as fh:
+      fh.write(text)
 
 
 def write_binary(file_path, contents):