Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unwrap emcc wrapper script to its underlying clang invocation #154

Merged
merged 11 commits into from
Jan 4, 2024
11 changes: 10 additions & 1 deletion BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,13 @@ filegroup(
# Implementation:
# If you are looking into the implementation, start with the overview in ImplementationReadme.md.

exports_files(["refresh.template.py", "check_python_version.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.
exports_files(["refresh.template.py", "check_python_version.template.py", "print_args.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.

filegroup(
name = "print_args",
srcs = select({
"@bazel_tools//src/conditions:host_windows": [":print_args.bat"],
"//conditions:default": [":print_args.sh"],
}),
visibility = ["//visibility:public"],
)
3 changes: 3 additions & 0 deletions print_args.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@ECHO OFF

py -3 %HEDRON_COMPILE_COMMANDS_PRINT_ARGS_PY% %*
19 changes: 19 additions & 0 deletions print_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Prints the arguments passed to the script
"""

import sys


def main():
print('===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS===')
for arg in sys.argv[1:]:
print(arg)
print('===HEDRON_COMPILE_COMMANDS_END_ARGS===')

# We purposely return a non-zero exit code to have the emcc process exit after running this fake clang wrapper.
sys.exit(1)


if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions print_args.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

exec python3 "${HEDRON_COMPILE_COMMANDS_PRINT_ARGS_PY}" "$@"
96 changes: 71 additions & 25 deletions refresh.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,17 +283,6 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke
return headers, should_cache


@functools.lru_cache(maxsize=None)
def _get_clang_or_gcc():
"""Returns clang or gcc, if you have one of them on your path."""
if shutil.which('clang'):
return 'clang'
elif shutil.which('gcc'):
return 'gcc'
else:
return None


def windows_list2cmdline(seq):
"""
Copied from list2cmdline in https://github.com/python/cpython/blob/main/Lib/subprocess.py because we need it but it's not exported as part of the public API.
Expand Down Expand Up @@ -567,18 +556,7 @@ def _get_headers(compile_action, source_path: str):
if compile_action.arguments[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe
headers, should_cache = _get_headers_msvc(compile_action.arguments, source_path)
else:
# Emscripten is tricky. There isn't an easy way to make it emcc run without lots of environment variables.
# So...rather than doing our usual script unwrapping, we just swap in clang/gcc and use that to get headers, knowing that they'll accept the same argument format.
# You can unwrap emcc.sh to emcc.py via next(pathlib.Path('external').glob('emscripten_bin_*/emscripten/emcc.py')).as_posix()
# But then the underlying emcc needs a configuration file that itself depends on lots of environment variables.
# If we ever pick this back up, note that you can supply that config via compile_args += ["--em-config", "external/emsdk/emscripten_toolchain/emscripten_config"]
args = compile_action.arguments
if args[0].endswith('emcc.sh') or args[0].endswith('emcc.bat'):
alternate_compiler = _get_clang_or_gcc()
if not alternate_compiler: return set() # Skip getting headers.
args = args.copy()
args[0] = alternate_compiler
headers, should_cache = _get_headers_gcc(args, source_path, compile_action.actionKey)
headers, should_cache = _get_headers_gcc(compile_action.arguments, source_path, compile_action.actionKey)

# Cache for future use
if output_file and should_cache:
Expand Down Expand Up @@ -767,6 +745,73 @@ def _apple_platform_patch(compile_args: typing.List[str]):
return compile_args


def _get_sysroot(args: typing.List[str]):
for idx, arg in enumerate(args):
if arg == '--sysroot' or arg == '-isysroot':
if idx + 1 < len(args):
return pathlib.PurePath(args[idx + 1])
elif arg.startswith('--sysroot='):
return pathlib.PurePath(arg[len('--sysroot='):])
elif arg.startswith('-isysroot'):
return pathlib.PurePath(arg[len('-isysroot'):])
return None


def _emscripten_platform_patch(compile_args: typing.List[str]):
"""De-Bazel the command into something clangd can parse.

This function has fixes specific to Emscripten platforms, but you should call it on all platforms. It'll determine whether the fixes should be applied or not
"""
emcc_driver = pathlib.Path(compile_args[0])
if not emcc_driver.name.startswith('emcc'):
return compile_args

workspace_absolute = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"])
sysroot = _get_sysroot(compile_args)
assert sysroot, f'Emscripten sysroot not detected in CMD: {compile_args}'

def get_workspace_root(path_from_execroot: pathlib.PurePath):
assert path_from_execroot.parts[0] == 'external'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super minor, but I suppose this might not be external if invoked within the emsdk...would we want to return . rather than assert?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I fixed it.

return pathlib.PurePath('external') / path_from_execroot.parts[1]

environment = {
'EXT_BUILD_ROOT': str(workspace_absolute),
'EM_BIN_PATH': str(get_workspace_root(sysroot)),
cpsauer marked this conversation as resolved.
Show resolved Hide resolved
'EM_CONFIG_PATH': str(get_workspace_root(emcc_driver) / 'emscripten_toolchain' / 'emscripten_config'),
'EMCC_SKIP_SANITY_CHECK': '1',
'EM_COMPILER_WRAPPER': str(pathlib.PurePath({print_args_executable})),
'HEDRON_COMPILE_COMMANDS_PRINT_ARGS_PY': str(pathlib.PurePath({print_args_py})),
'PATH': os.environ['PATH'],
}

# We run the emcc process with the environment variable EM_COMPILER_WRAPPER to intercept the command line arguments passed to `clang`.
emcc_process = subprocess.run(
compile_args,
# MIN_PY=3.7: Replace PIPEs with capture_output.
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=environment,
encoding=locale.getpreferredencoding(),
check=False, # We explicitly ignore errors and carry on.
)

lines = emcc_process.stdout.splitlines()

# Parse the arguments from the output of the emcc process.
if BEGIN_ARGS_MARKER in lines:
begin_args_idx = lines.index(BEGIN_ARGS_MARKER)
end_args_idx = lines.index(END_ARGS_MARKER, begin_args_idx + 1)
args = lines[begin_args_idx + 1:end_args_idx]
clang_driver = pathlib.PurePath(args[0])
if _is_relative_to(clang_driver, workspace_absolute):
args[0] = clang_driver.relative_to(workspace_absolute).as_posix()
return args

assert False, f'Failed to parse emcc output: {emcc_process.stderr}'
BEGIN_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS==='
END_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_END_ARGS==='


def _all_platform_patch(compile_args: typing.List[str]):
"""Apply de-Bazeling fixes to the compile command that are shared across target platforms."""
# clangd writes module cache files to the wrong place
Expand Down Expand Up @@ -1017,10 +1062,11 @@ def _get_cpp_command_for_files(compile_action):

Undo Bazel-isms and figures out which files clangd should apply the command to.
"""
# Patch command by platform
compile_action.arguments = _all_platform_patch(compile_action.arguments)
# Patch command by platform, revealing any hidden arguments.
compile_action.arguments = _apple_platform_patch(compile_action.arguments)
compile_action.arguments = _emscripten_platform_patch(compile_action.arguments)
# Android and Linux and grailbio LLVM toolchains: Fine as is; no special patching needed.
compile_action.arguments = _all_platform_patch(compile_action.arguments)

source_files, header_files = _get_files(compile_action)

Expand Down
5 changes: 5 additions & 0 deletions refresh_compile_commands.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def refresh_compile_commands(
version_checker_script_name,
script_name,
],
data = ["@hedron_compile_commands//:print_args"],
imports = [''], # Allows binary to import templated script, even if this macro is being called inside a sub package. See https://github.com/hedronvision/bazel-compile-commands-extractor/issues/137
**kwargs
)
Expand All @@ -115,6 +116,8 @@ def _expand_template_impl(ctx):
" {windows_default_include_paths}": "\n".join([" %r," % path for path in find_cpp_toolchain(ctx).built_in_include_directories]), # find_cpp_toolchain is from https://docs.bazel.build/versions/main/integrating-with-rules-cc.html
"{exclude_headers}": repr(ctx.attr.exclude_headers),
"{exclude_external_sources}": repr(ctx.attr.exclude_external_sources),
"{print_args_executable}": repr(ctx.executable._print_args_executable.path),
"{print_args_py}": repr(ctx.file._print_args_py.path),
},
)
return DefaultInfo(files = depset([script]))
Expand All @@ -125,6 +128,8 @@ _expand_template = rule(
"exclude_external_sources": attr.bool(default = False),
"exclude_headers": attr.string(values = ["all", "external", ""]), # "" needed only for compatibility with Bazel < 3.6.0
"_script_template": attr.label(allow_single_file = True, default = "refresh.template.py"),
"_print_args_executable": attr.label(executable = True, allow_single_file = True, cfg = "target", default = "print_args"),
"_print_args_py": attr.label(allow_single_file = True, default = "print_args.py"),
# For Windows INCLUDE. If this were eliminated, for example by the resolution of https://github.com/clangd/clangd/issues/123, we'd be able to just use a macro and skylib's expand_template rule: https://github.com/bazelbuild/bazel-skylib/pull/330
# Once https://github.com/bazelbuild/bazel/pull/17108 is widely released, we should be able to eliminate this and get INCLUDE directly. Perhaps for 7.0? Should be released in the sucessor to 6.0
"_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"),
Expand Down