Skip to content

Commit

Permalink
Detect the ISA extension when building Embree and adjust its build co…
Browse files Browse the repository at this point in the history
…nfiguration to match

Embree has specialized implementations for various ISA extensions to
improve performance. Its code uses preprocessor definitions to detect
which extension to use. Typically, its code is compiled multiple times,
with different extensions enabled, and, at runtime, it detects which one
should be used.

Godot's build system doesn't do this, it simply compiles Embree once,
for the base SSE2 instruction set, and uses that. However, it doesn't
properly guarantee that it is built correctly. If Godot is compiled for
a newer instruction set (such as by using `-march=x86-64-v3` with
GCC/Clang, or `/arch:AVX2` with MSVC), Embree will end up with mixed
code paths, and compilation will fail. (Additionally, Godot's copy of
the Embree source code omits files that are not used by SSE2 builds, but
are needed for AVX builds, which causes more build errors.)

This commit fixes the compilation issues by finding the highest Embree
ISA extension target that's compatible with the compiler flags set
by the user, and adjusting the build settings accordingly.
  • Loading branch information
MonterraByte committed Aug 24, 2024
1 parent f53212c commit f2aba40
Showing 1 changed file with 117 additions and 17 deletions.
134 changes: 117 additions & 17 deletions modules/raycast/SCsub
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python

import re
import subprocess

Import("env")
Import("env_modules")

Expand Down Expand Up @@ -59,24 +62,59 @@ if env["builtin_embree"]:
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
]

thirdparty_sources = [thirdparty_dir + file for file in embree_src]
embree_avx_src = [
"kernels/geometry/primitive8.cpp",
"kernels/bvh/bvh_intersector1_bvh8.cpp",
"kernels/bvh/bvh_intersector_hybrid4_bvh8.cpp",
"kernels/bvh/bvh_intersector_hybrid8_bvh4.cpp",
"kernels/bvh/bvh_intersector_hybrid8_bvh8.cpp",
]

env_raycast.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "include"])
env_raycast.Append(CPPDEFINES=["EMBREE_TARGET_SSE2", "EMBREE_LOWEST_ISA", "TASKING_INTERNAL"])
env_raycast.AppendUnique(CPPDEFINES=["NDEBUG"]) # No assert() even in debug builds.
embree_avx512_src = [
"kernels/bvh/bvh_intersector_hybrid16_bvh4.cpp",
"kernels/bvh/bvh_intersector_hybrid16_bvh8.cpp",
]

if not env.msvc:
if env["arch"] in ["x86_64", "x86_32"]:
env_raycast.Append(CCFLAGS=["-msse2", "-mxsave"])
if env.msvc:
flags = env_raycast.subst("$CCFLAGS $CXXFLAGS")
m = re.search(r"/arch:(AVX512|AVX2|AVX)", flags)
if m is not None:
isa = m.group(1).lower()
else:
isa = "sse2"
else:
env_isa_test = env_raycast.Clone()
env_isa_test.Append(CCFLAGS=["-E", "-dM", "-x c++"])
command = env_isa_test.subst(env_isa_test["CXXCOM"], source=File('-'), target=File('-'))
defines = subprocess.check_output(command, input='', encoding='utf-8', shell=True)

def is_defined(name: str) -> bool:
m = re.search(fr"^#define\s{name}(?:\s|\Z)", defines, flags=re.MULTILINE)
return m is not None

if is_defined("__F16C__") and is_defined("__AVX2__") and is_defined("__FMA__") and is_defined("__LZCNT__") and is_defined("__BMI__") and is_defined("__BMI2__"):
if is_defined("__AVX512F__") and is_defined("__AVX512DQ__") and is_defined("__AVX512CD__") and is_defined("__AVX512BW__") and is_defined("__AVX512VL__"):
# Embree also enables AVX2 support when targeting AVX512.
isa = "avx512"
else:
isa = "avx2"
elif is_defined("__AVX__"):
isa = "avx"
elif is_defined("__SSE4_1__") and is_defined("__SSE4_2__"):
isa = "sse42"
else:
isa = "sse2"

if env["platform"] == "windows":
env_raycast.Append(CCFLAGS=["-mstackrealign"])
if isa not in ["sse2", "sse42"]:
embree_src += embree_avx_src

if env["platform"] == "windows":
if env.msvc:
env.Append(LINKFLAGS=["psapi.lib"])
else:
env.Append(LIBS=["psapi"])
if isa == "avx512":
embree_src += embree_avx512_src

thirdparty_sources = [thirdparty_dir + file for file in embree_src]

env_raycast.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "include"])
env_raycast.AppendUnique(CPPDEFINES=["NDEBUG"]) # No assert() even in debug builds.

if env.msvc: # Disable bogus warning about intentional struct padding.
env_raycast.Append(CCFLAGS=["/wd4324"])
Expand All @@ -85,10 +123,72 @@ if env["builtin_embree"]:
env_thirdparty.force_optimization_on_debug()
env_thirdparty.disable_warnings()
env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
env_thirdparty.Append(CPPDEFINES=["EMBREE_LOWEST_ISA", "TASKING_INTERNAL"])

# These defines are used for MSVC (to signal SSE support) and for ARM (to enable use of NEON in Embree code).
sse2_defines = ["__SSE__", "__SSE2__"]
sse42_defines = ["__SSE4_1__", "__SSE4_2__"]
if env.msvc:
sse42_defines = sse2_defines + ["__SSE3__", "__SSSE3__"] + sse42_defines
avx_defines = ["__AVX__", "__BMI__", "__BMI2__", "__LZCNT__"] + sse42_defines
avx2_defines = ["__AVX2__"] + avx_defines

if not env.msvc:
# To avoid issues when a certain ISA is partially enabled (for example, when using `-mavx512f` but not `-mavx512vl`),
# explicitly disable ISAs higher than the target one.
no_avx512_flags = ["-mno-avx512f", "-mno-avx512dq", "-mno-avx512cd", "-mno-avx512bw", "-mno-avx512vl"]
no_avx2_flags = ["-mno-f16c", "-mno-avx2", "-mno-fma", "-mno-lzcnt", "-mno-bmi", "-mno-bmi2"]
no_avx_flags = ["-mno-avx"]
no_sse42_flags = ["-mno-sse4.2"]

sse2_flags = no_sse42_flags + no_avx_flags + no_avx2_flags + no_avx512_flags
sse42_flags = no_avx_flags + no_avx2_flags + no_avx512_flags
avx_flags = no_avx2_flags + no_avx512_flags
avx2_flags = no_avx512_flags

arm = env["arch"] in ["arm32", "arm64"]
if isa == "sse2":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_SSE2"])

if env.msvc or arm:
env_thirdparty.Append(CPPDEFINES=sse2_defines)
else:
env_thirdparty.Append(CCFLAGS=sse2_flags)

if env["platform"] == "windows" and not env.msvc and env["arch"] == "x86_32":
env_thirdparty.Append(CCFLAGS=["-mstackrealign"])
elif isa == "sse42":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_SSE42"])

if env["arch"] != "x86_64" or env.msvc:
# Embree needs those, it will automatically use SSE2NEON in ARM
env_thirdparty.Append(CPPDEFINES=["__SSE2__", "__SSE__"])
if env.msvc or arm:
env_thirdparty.Append(CPPDEFINES=sse42_defines)
else:
env_thirdparty.Append(CCFLAGS=sse42_flags)

if env["platform"] == "windows" and not env.msvc and env["arch"] == "x86_32":
env_thirdparty.Append(CCFLAGS=["-mstackrealign"])
elif isa == "avx":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX"])

if arm:
env_thirdparty.Append(CPPDEFINES=avx_defines)
elif not env.msvc:
env_thirdparty.Append(CCFLAGS=avx_flags)
elif isa == "avx2":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX2"])

if arm:
env_thirdparty.Append(CPPDEFINES=avx2_defines)
elif not env.msvc:
env_thirdparty.Append(CCFLAGS=avx2_flags)
elif isa == "avx512":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX512"])

if env["platform"] == "windows":
if env.msvc:
env.Append(LINKFLAGS=["psapi.lib"])
else:
env.Append(LIBS=["psapi"])

if env["platform"] == "web":
env_thirdparty.Append(CXXFLAGS=["-msimd128"])
Expand Down

0 comments on commit f2aba40

Please sign in to comment.