From d6a70d66a9ca1897446af07392b2c586ec2eeba9 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 26 Aug 2025 15:48:07 -0700 Subject: [PATCH 1/3] Update [ghstack-poisoned] --- kernels/portable/cpu/op_amax.cpp | 3 ++- kernels/portable/cpu/op_amin.cpp | 3 ++- kernels/portable/cpu/op_argmax.cpp | 3 ++- kernels/portable/cpu/op_argmin.cpp | 3 ++- kernels/portable/cpu/op_max.cpp | 7 ++++--- kernels/portable/cpu/op_min.cpp | 7 ++++--- kernels/portable/cpu/op_relu.cpp | 5 ++++- kernels/portable/cpu/op_sign.cpp | 3 ++- kernels/portable/cpu/op_topk.cpp | 4 +++- kernels/portable/cpu/util/math_util.h | 21 ++++++++++++++++++- .../kernels/portable/op_registration_util.bzl | 11 ++++++++++ tools/cmake/preset/windows.cmake | 10 +++------ 12 files changed, 59 insertions(+), 21 deletions(-) diff --git a/kernels/portable/cpu/op_amax.cpp b/kernels/portable/cpu/op_amax.cpp index 192fad5c908..8ae395f3c81 100644 --- a/kernels/portable/cpu/op_amax.cpp +++ b/kernels/portable/cpu/op_amax.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -55,7 +56,7 @@ Tensor& amax_out( for (const auto out_ix : c10::irange(begin, end)) { out_data[out_ix] = plan.execute( [](CTYPE v, CTYPE max_v) { - return std::isnan(v) || v > max_v ? v : max_v; + return utils::isnan_override(v) || v > max_v ? v : max_v; }, out_ix); } diff --git a/kernels/portable/cpu/op_amin.cpp b/kernels/portable/cpu/op_amin.cpp index d4e9be4f4e0..dc077e2dc44 100644 --- a/kernels/portable/cpu/op_amin.cpp +++ b/kernels/portable/cpu/op_amin.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -54,7 +55,7 @@ Tensor& amin_out( for (const auto out_ix : c10::irange(begin, end)) { out_data[out_ix] = plan.execute( [](CTYPE v, CTYPE min_v) { - return std::isnan(v) || v < min_v ? v : min_v; + return utils::isnan_override(v) || v < min_v ? v : min_v; }, out_ix); } diff --git a/kernels/portable/cpu/op_argmax.cpp b/kernels/portable/cpu/op_argmax.cpp index 0e62c049082..e9a561366f7 100644 --- a/kernels/portable/cpu/op_argmax.cpp +++ b/kernels/portable/cpu/op_argmax.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -58,7 +59,7 @@ Tensor& argmax_out( // the below condition as written is equivalent to // !isnan(accval) && (isnan(v) || v > acc_val). See // argument in op_argmin.cpp. - if (!std::isnan(acc_val) && !(v <= acc_val)) { + if (!utils::isnan_override(acc_val) && !(v <= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/portable/cpu/op_argmin.cpp b/kernels/portable/cpu/op_argmin.cpp index d422610769f..fda9463c5ee 100644 --- a/kernels/portable/cpu/op_argmin.cpp +++ b/kernels/portable/cpu/op_argmin.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -65,7 +66,7 @@ Tensor& argmin_out( // - false, so the result is true. The result is trivially // - true for the above condition that uses isnan(v) as // - well. - if (!std::isnan(acc_val) && !(v >= acc_val)) { + if (!utils::isnan_override(acc_val) && !(v >= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/portable/cpu/op_max.cpp b/kernels/portable/cpu/op_max.cpp index 3f4a1d27c0e..cdea0834806 100644 --- a/kernels/portable/cpu/op_max.cpp +++ b/kernels/portable/cpu/op_max.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -88,8 +89,8 @@ std::tuple max_out( for (const auto out_ix : c10::irange(begin, end)) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && - (std::isnan(v) || v > acc_val)) { + if (!utils::isnan_override(acc_val) && + (utils::isnan_override(v) || v > acc_val)) { acc_val = v; acc_ix = ix; } @@ -132,7 +133,7 @@ max_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { data_out[0] = lower_bound(); for (const auto i : c10::irange(in.numel())) { CTYPE_OUT val = static_cast(data_in[i]); - if (std::isnan(val)) { + if (utils::isnan_override(val)) { data_out[0] = val; break; } diff --git a/kernels/portable/cpu/op_min.cpp b/kernels/portable/cpu/op_min.cpp index 8b70bcd40f5..d4d59d04128 100644 --- a/kernels/portable/cpu/op_min.cpp +++ b/kernels/portable/cpu/op_min.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -88,8 +89,8 @@ std::tuple min_out( for (const auto out_ix : c10::irange(begin, end)) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && - (std::isnan(v) || v < acc_val)) { + if (!utils::isnan_override(acc_val) && + (utils::isnan_override(v) || v < acc_val)) { acc_val = v; acc_ix = ix; } @@ -132,7 +133,7 @@ min_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { data_out[0] = upper_bound(); for (const auto i : c10::irange(in.numel())) { CTYPE_OUT val = static_cast(data_in[i]); - if (std::isnan(val)) { + if (utils::isnan_override(val)) { data_out[0] = val; break; } diff --git a/kernels/portable/cpu/op_relu.cpp b/kernels/portable/cpu/op_relu.cpp index 973542a2a77..4b848fa17e4 100644 --- a/kernels/portable/cpu/op_relu.cpp +++ b/kernels/portable/cpu/op_relu.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -45,7 +46,9 @@ Tensor& relu_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "relu.out", CTYPE, [&]() { apply_unary_map_fn( [](const CTYPE val_in) { - return (std::isnan(val_in) || val_in >= CTYPE(0)) ? val_in : CTYPE(0); + return (utils::isnan_override(val_in) || val_in >= CTYPE(0)) + ? val_in + : CTYPE(0); }, in.const_data_ptr(), out.mutable_data_ptr(), diff --git a/kernels/portable/cpu/op_sign.cpp b/kernels/portable/cpu/op_sign.cpp index e6945094973..56d07133539 100644 --- a/kernels/portable/cpu/op_sign.cpp +++ b/kernels/portable/cpu/op_sign.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -42,7 +43,7 @@ Tensor& sign_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "sign.out", CTYPE, [&] { apply_unary_map_fn( [](const CTYPE val_in) { - if (std::isnan(val_in)) { + if (utils::isnan_override(val_in)) { return val_in; } else { return static_cast((val_in > 0) - (val_in < 0)); diff --git a/kernels/portable/cpu/op_topk.cpp b/kernels/portable/cpu/op_topk.cpp index e35e67193bf..e2143ce78d5 100644 --- a/kernels/portable/cpu/op_topk.cpp +++ b/kernels/portable/cpu/op_topk.cpp @@ -10,6 +10,8 @@ #include #include +#include +#include #include namespace torch { @@ -62,7 +64,7 @@ bool float_less_than(T x, T y) { if constexpr (std::is_integral_v) { return x < y; } - return (!std::isnan(x) && std::isnan(y)) || x < y; + return (!utils::isnan_override(x) && utils::isnan_override(y)) || x < y; } template > diff --git a/kernels/portable/cpu/util/math_util.h b/kernels/portable/cpu/util/math_util.h index 2c4828b9e6e..a3a64997a5f 100644 --- a/kernels/portable/cpu/util/math_util.h +++ b/kernels/portable/cpu/util/math_util.h @@ -8,10 +8,14 @@ #pragma once +#include + #if defined(ET_USE_PYTORCH_HEADERS) && ET_USE_PYTORCH_HEADERS #include #endif +#include + namespace torch { namespace executor { namespace native { @@ -29,7 +33,8 @@ template < typename std::enable_if::value, bool>::type = true> INT_T floor_divide(INT_T a, INT_T b) { const auto quot = a / b; - if (std::signbit(a) == std::signbit(b)) { + // MSVC does not like signbit on integral types. + if ((a < 0) == (b < 0)) { return quot; } const auto rem = a % b; @@ -52,6 +57,20 @@ FLOAT_T floor_divide(FLOAT_T a, FLOAT_T b) { return div; } +/** + * A wrapper around std::isnan that works with MSVC. When building with MSVC, + * std::isnan calls with integer inputs fail to compile due to ambiguous + * overload resolution. + */ +template +bool isnan_override(T a) { + if constexpr (!std::is_integral_v) { + return std::isnan(a); + } else { + return false; + } +} + /** * Override min/max so we can emulate PyTorch's behavior with NaN entries. */ diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl index a0394113126..158d2cd2769 100644 --- a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl @@ -279,6 +279,7 @@ ATEN_OPS = ( deps = [ "//executorch/runtime/core/exec_aten/util:scalar_type_util", "//executorch/runtime/core/exec_aten/util:tensor_util", + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -288,6 +289,7 @@ ATEN_OPS = ( "//executorch/runtime/core/exec_aten/util:scalar_type_util", "//executorch/runtime/core/exec_aten/util:tensor_util", "//executorch/kernels/portable/cpu/util:index_util", + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -311,12 +313,14 @@ ATEN_OPS = ( op_target( name = "op_argmax", deps = [ + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), op_target( name = "op_argmin", deps = [ + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -839,6 +843,7 @@ ATEN_OPS = ( op_target( name = "op_max", deps = [ + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -876,6 +881,7 @@ ATEN_OPS = ( op_target( name = "op_min", deps = [ + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -1052,6 +1058,7 @@ ATEN_OPS = ( name = "op_relu", deps = [ "//executorch/kernels/portable/cpu/util:functional_util", + "//executorch/kernels/portable/cpu/util:math_util", ], ), op_target( @@ -1162,6 +1169,7 @@ ATEN_OPS = ( name = "op_sign", deps = [ "//executorch/kernels/portable/cpu/util:functional_util", + "//executorch/kernels/portable/cpu/util:math_util", ], ), op_target( @@ -1270,6 +1278,9 @@ ATEN_OPS = ( ), op_target( name = "op_topk", + deps = [ + "//executorch/kernels/portable/cpu/util:math_util", + ] ), op_target( name = "op_transpose_copy", diff --git a/tools/cmake/preset/windows.cmake b/tools/cmake/preset/windows.cmake index fb44ed56494..5cf26a21caf 100644 --- a/tools/cmake/preset/windows.cmake +++ b/tools/cmake/preset/windows.cmake @@ -5,19 +5,15 @@ # LICENSE file in the root directory of this source tree. # keep sorted +set_overridable_option(EXECUTORCH_BUILD_EXECUTOR_RUNNER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) +set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) +set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON) # Below options are not yet buildable on Windows, but should be. -set(EXECUTORCH_BUILD_PORTABLE_OPS - OFF - CACHE BOOL "" -) -# set_overridable_option(EXECUTORCH_BUILD_EXECUTOR_RUNNER ON) -# set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) -# set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON) # set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON) From 18866f43b9442ef7d5ffb892f4109de974c861c3 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 26 Aug 2025 15:56:42 -0700 Subject: [PATCH 2/3] Update [ghstack-poisoned] --- .github/workflows/build-presets.yml | 5 ++++- backends/xnnpack/cmake/Dependencies.cmake | 8 ++++++++ tools/cmake/preset/windows.cmake | 4 +--- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-presets.yml b/.github/workflows/build-presets.yml index 76ec7dfd42d..794c715eaf7 100644 --- a/.github/workflows/build-presets.yml +++ b/.github/workflows/build-presets.yml @@ -109,7 +109,7 @@ jobs: strategy: fail-fast: false matrix: - preset: [windows] + preset: [pybind, windows] with: job-name: build ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} @@ -122,14 +122,17 @@ jobs: Set-PSDebug -Trace 1 \$ErrorActionPreference = 'Stop' \$PSNativeCommandUseErrorActionPreference = \$true + conda create --yes --quiet -n et python=3.12 conda activate et python install_requirements.py + cmake --preset ${{ matrix.preset }} -T ClangCL if (\$LASTEXITCODE -ne 0) { Write-Host "CMake configuration was unsuccessful. Exit code: \$LASTEXITCODE." exit \$LASTEXITCODE } + \$numCores = [System.Environment]::GetEnvironmentVariable('NUMBER_OF_PROCESSORS') - 1 cmake --build cmake-out -j \$numCores if (\$LASTEXITCODE -ne 0) { diff --git a/backends/xnnpack/cmake/Dependencies.cmake b/backends/xnnpack/cmake/Dependencies.cmake index 8d5d0845430..ce25f5cec22 100644 --- a/backends/xnnpack/cmake/Dependencies.cmake +++ b/backends/xnnpack/cmake/Dependencies.cmake @@ -55,6 +55,14 @@ else() ) endif() +if(WIN32) + # These XNNPACK options don't currently build on Windows. + set_overridable_option(XNNPACK_ENABLE_AVX256SKX OFF) + set_overridable_option(XNNPACK_ENABLE_AVX256VNNI OFF) + set_overridable_option(XNNPACK_ENABLE_AVX256VNNIGFNI OFF) + set_overridable_option(XNNPACK_ENABLE_AVX512BF16 OFF) +endif() + set(XNNPACK_BUILD_ALL_MICROKERNELS OFF CACHE BOOL "" diff --git a/tools/cmake/preset/windows.cmake b/tools/cmake/preset/windows.cmake index 5cf26a21caf..b75a5af578e 100644 --- a/tools/cmake/preset/windows.cmake +++ b/tools/cmake/preset/windows.cmake @@ -14,6 +14,4 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON) - -# Below options are not yet buildable on Windows, but should be. -# set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON) +set_overridable_option(EXECUTORCH_BUILD_XNNPACK ON) From e7423a45b46ed35b9ec4276de722313636fe7323 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 26 Aug 2025 16:18:10 -0700 Subject: [PATCH 3/3] Update [ghstack-poisoned] --- extension/data_loader/CMakeLists.txt | 5 +++++ tools/cmake/preset/pybind.cmake | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/extension/data_loader/CMakeLists.txt b/extension/data_loader/CMakeLists.txt index 104cd23c977..a5e7a0c4a81 100644 --- a/extension/data_loader/CMakeLists.txt +++ b/extension/data_loader/CMakeLists.txt @@ -24,6 +24,11 @@ if(NOT ET_HAVE_SYS_MMAN_H AND NOT WIN32) "extension/data_loader/mmap_data_loader.cpp" ) endif() +if(WIN32) + list(APPEND _extension_data_loader__srcs + "extension/data_loader/mman_windows.cpp" + ) +endif() list(TRANSFORM _extension_data_loader__srcs PREPEND "${EXECUTORCH_ROOT}/") add_library(extension_data_loader ${_extension_data_loader__srcs}) target_link_libraries(extension_data_loader executorch_core) diff --git a/tools/cmake/preset/pybind.cmake b/tools/cmake/preset/pybind.cmake index e13fe026ef2..c7ad94cd8be 100644 --- a/tools/cmake/preset/pybind.cmake +++ b/tools/cmake/preset/pybind.cmake @@ -21,12 +21,13 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_MODULE ON) -set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TRAINING ON) if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set_overridable_option(EXECUTORCH_BUILD_COREML ON) + set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TRAINING ON) elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") set_overridable_option(EXECUTORCH_BUILD_COREML ON) + set_overridable_option(EXECUTORCH_BUILD_EXTENSION_TRAINING ON) elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "WIN32" )